Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[post-processing] Refactor MoveFilesPP to respect non-video files #9774

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
35 changes: 18 additions & 17 deletions test/test_YoutubeDL.py
Original file line number Diff line number Diff line change
Expand Up @@ -874,42 +874,43 @@ def test_format_note(self):
}), r'^30fps$')

def test_postprocessors(self):
filename = 'post-processor-testfile.mp4'
audiofile = filename + '.mp3'
filename = 'post-processor-testfile'
video_file = filename + '.mp4'
audio_file = filename + '.mp3'

class SimplePP(PostProcessor):
def run(self, info):
with open(audiofile, 'w') as f:
with open(audio_file, 'w') as f:
f.write('EXAMPLE')
return [info['filepath']], info

def run_pp(params, PP):
with open(filename, 'w') as f:
with open(video_file, 'w') as f:
f.write('EXAMPLE')
ydl = YoutubeDL(params)
ydl.add_post_processor(PP())
ydl.post_process(filename, {'filepath': filename})
ydl.post_process(video_file, {'filepath': video_file})

run_pp({'keepvideo': True}, SimplePP)
self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
os.unlink(filename)
os.unlink(audiofile)
run_pp({'keepvideo': True, 'outtmpl': filename}, SimplePP)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Calling this out specifically - these changes necessitated an outtmpl (or setting title and id for the default outtmpl to use). I don't think this is an issue, but I wanted to confirm!

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To expand, the reason is that the old implementation didn't concern itself with outtmpls at all - it just got a list of source/destination filenames and it moved files based on that.

The new implementation uses prepare_filename which allows it to keep source/destination logic contained entirely within the MoveFilesPP module, but the side effect is that it needs to be able to generate a valid outtmpl somehow

self.assertTrue(os.path.exists(video_file), '%s doesn\'t exist' % video_file)
self.assertTrue(os.path.exists(audio_file), '%s doesn\'t exist' % audio_file)
os.unlink(video_file)
os.unlink(audio_file)

run_pp({'keepvideo': False}, SimplePP)
self.assertFalse(os.path.exists(filename), '%s exists' % filename)
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
os.unlink(audiofile)
run_pp({'keepvideo': False, 'outtmpl': filename}, SimplePP)
self.assertFalse(os.path.exists(video_file), '%s exists' % video_file)
self.assertTrue(os.path.exists(audio_file), '%s doesn\'t exist' % audio_file)
os.unlink(audio_file)

class ModifierPP(PostProcessor):
def run(self, info):
with open(info['filepath'], 'w') as f:
f.write('MODIFIED')
return [], info

run_pp({'keepvideo': False}, ModifierPP)
self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
os.unlink(filename)
run_pp({'keepvideo': False, 'outtmpl': filename}, ModifierPP)
self.assertTrue(os.path.exists(video_file), '%s doesn\'t exist' % video_file)
os.unlink(video_file)

def test_match_filter(self):
first = {
Expand Down
47 changes: 20 additions & 27 deletions yt_dlp/YoutubeDL.py
Original file line number Diff line number Diff line change
Expand Up @@ -3208,7 +3208,6 @@ def replace_info_dict(new_info):
# info_dict['_filename'] needs to be set for backward compatibility
info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
temp_filename = self.prepare_filename(info_dict, 'temp')
files_to_move = {}

# Forced printings
self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
Expand Down Expand Up @@ -3236,13 +3235,11 @@ def check_max_downloads():
sub_files = self._write_subtitles(info_dict, temp_filename)
if sub_files is None:
return
files_to_move.update(dict(sub_files))

thumb_files = self._write_thumbnails(
'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
if thumb_files is None:
return
files_to_move.update(dict(thumb_files))

infofn = self.prepare_filename(info_dict, 'infojson')
_infojson_written = self._write_info_json('video', info_dict, infofn)
Expand Down Expand Up @@ -3316,13 +3313,12 @@ def _write_link_file(link_type):
for link_type, should_write in write_links.items()):
return

new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
new_info, _ = self.pre_process(info_dict, 'before_dl')
replace_info_dict(new_info)

if self.params.get('skip_download'):
info_dict['filepath'] = temp_filename
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
info_dict['__files_to_move'] = files_to_move
replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
else:
Expand Down Expand Up @@ -3436,9 +3432,6 @@ def correct_ext(filename, ext=new_ext):
info_dict['__files_to_merge'] = downloaded
# Even if there were no downloads, it is being merged only now
info_dict['__real_download'] = True
else:
for file in downloaded:
files_to_move[file] = None
else:
# Just a single file
dl_filename = existing_video_file(full_filename, temp_filename)
Expand All @@ -3452,7 +3445,6 @@ def correct_ext(filename, ext=new_ext):

dl_filename = dl_filename or temp_filename
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

except network_exceptions as err:
self.report_error('unable to download video data: %s' % error_to_compat_str(err))
return
Expand Down Expand Up @@ -3523,7 +3515,7 @@ def ffmpeg_fixup(cndn, msg, cls):

fixup()
try:
replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
replace_info_dict(self.post_process(dl_filename, info_dict))
except PostProcessingError as err:
self.report_error('Postprocessing: %s' % str(err))
return
Expand Down Expand Up @@ -3644,8 +3636,6 @@ def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
os.remove(filename)
except OSError:
self.report_warning(f'Unable to delete file {filename}')
if filename in info.get('__files_to_move', []): # NB: Delete even if None
del info['__files_to_move'][filename]

@staticmethod
def post_extract(info_dict):
Expand All @@ -3662,8 +3652,7 @@ def actual_post_extract(info_dict):

def run_pp(self, pp, infodict):
files_to_delete = []
if '__files_to_move' not in infodict:
infodict['__files_to_move'] = {}

try:
files_to_delete, infodict = pp.run(infodict)
except PostProcessingError as e:
Expand All @@ -3675,10 +3664,7 @@ def run_pp(self, pp, infodict):

if not files_to_delete:
return infodict
if self.params.get('keepvideo', False):
for f in files_to_delete:
infodict['__files_to_move'].setdefault(f, '')
else:
if not self.params.get('keepvideo', False):
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could I get eyes on this to confirm that this logic inversion works as expected now that I've deleted the files_to_move related stuff?

self._delete_downloaded_files(
*files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
return infodict
Expand All @@ -3691,23 +3677,27 @@ def run_all_pps(self, key, info, *, additional_pps=None):
return info

def pre_process(self, ie_info, key='pre_process', files_to_move=None):
if files_to_move is not None:
self.report_warning('[pre_process] "files_to_move" is deprecated and may be removed in a future version')
Comment on lines +3680 to +3681
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For viz: wanted to make sure this messaging was on-brand for deprecating a parameter rather than an entire method


info = dict(ie_info)
info['__files_to_move'] = files_to_move or {}
try:
info = self.run_all_pps(key, info)
except PostProcessingError as err:
msg = f'Preprocessing: {err}'
info.setdefault('__pending_error', msg)
self.report_error(msg, is_error=False)
return info, info.pop('__files_to_move', None)
return info, files_to_move

def post_process(self, filename, info, files_to_move=None):
"""Run all the postprocessors on the given file."""
if files_to_move is not None:
self.report_warning('[post_process] "files_to_move" is deprecated and may be removed in a future version')

info['filepath'] = filename
info['__files_to_move'] = files_to_move or {}
info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
del info['__files_to_move']
info.pop('__multiple_thumbnails', None)
return self.run_all_pps('after_move', info)

def _make_archive_id(self, info_dict):
Expand Down Expand Up @@ -4294,10 +4284,11 @@ def _write_subtitles(self, info_dict, filename):
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
existing_sub = self.existing_file((sub_filename_final, sub_filename))

if existing_sub:
self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
sub_info['filepath'] = existing_sub
ret.append((existing_sub, sub_filename_final))
ret.append(existing_sub)
continue

self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
Expand All @@ -4308,7 +4299,7 @@ def _write_subtitles(self, info_dict, filename):
with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
subfile.write(sub_info['data'])
sub_info['filepath'] = sub_filename
ret.append((sub_filename, sub_filename_final))
ret.append(sub_filename)
continue
except OSError:
self.report_error(f'Cannot write video subtitles file {sub_filename}')
Expand All @@ -4319,7 +4310,7 @@ def _write_subtitles(self, info_dict, filename):
sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
self.dl(sub_filename, sub_copy, subtitle=True)
sub_info['filepath'] = sub_filename
ret.append((sub_filename, sub_filename_final))
ret.append(sub_filename)
except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
if self.params.get('ignoreerrors') is not True: # False or 'only_download'
Expand All @@ -4339,6 +4330,7 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None
self.to_screen(f'[info] There are no {label} thumbnails to download')
return ret
multiple = write_all and len(thumbnails) > 1
info_dict['__multiple_thumbnails'] = multiple
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is how I'm determining whether thumbnails need to use an indexed file "extension". I could use get_param('write_all_thumbnails') and similar in the MoveFilesPP, but I reckon doing it this way lessens the risk of logic here getting out of sync with logic in MoveFilesPP if this is refactored/modified in the future


if thumb_filename_base is None:
thumb_filename_base = filename
Expand All @@ -4360,15 +4352,15 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None
self.to_screen('[info] %s is already present' % (
thumb_display_id if multiple else f'{label} thumbnail').capitalize())
t['filepath'] = existing_thumb
ret.append((existing_thumb, thumb_filename_final))
ret.append(existing_thumb)
else:
self.to_screen(f'[info] Downloading {thumb_display_id} ...')
try:
uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
with open(encodeFilename(thumb_filename), 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf)
ret.append((thumb_filename, thumb_filename_final))
ret.append(thumb_filename)
t['filepath'] = thumb_filename
except network_exceptions as err:
if isinstance(err, HTTPError) and err.status == 404:
Expand All @@ -4378,4 +4370,5 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None
thumbnails.pop(idx)
if ret and not write_all:
break

return ret
4 changes: 4 additions & 0 deletions yt_dlp/postprocessor/embedthumbnail.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,4 +224,8 @@ def run(self, info):
thumbnail_filename if converted or not self._already_have_thumbnail else None,
original_thumbnail if converted and not self._already_have_thumbnail else None,
info=info)

if not self._already_have_thumbnail:
info['thumbnails'][idx].pop('filepath', None)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm using the .pop(name, None) pattern a few places to prevent raising an error if a key doesn't exist. While it shouldn't be strictly necessary based on my testing, there is so much I don't know about yt-dlp that I figured this was the safer bet


return [], info
17 changes: 8 additions & 9 deletions yt_dlp/postprocessor/ffmpeg.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,10 @@ def run(self, info):
self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
os.replace(temp_filename, filename)

if not self._already_have_subtitle:
for _, subtitle in subtitles.items():
subtitle.pop('filepath', None)

files_to_delete = [] if self._already_have_subtitle else sub_filenames
return files_to_delete, info

Expand Down Expand Up @@ -698,6 +702,7 @@ def run(self, info):
infojson_filename = info.get('infojson_filename')
options.extend(self._get_infojson_opts(info, infojson_filename))
if not infojson_filename:
info.pop('infojson_filename', None)
files_to_delete.append(info.get('infojson_filename'))
elif self._add_infojson is True:
self.to_screen('The info-json can only be attached to mkv/mka files')
Expand Down Expand Up @@ -1016,9 +1021,6 @@ def run(self, info):
'filepath': new_file,
}

info['__files_to_move'][new_file] = replace_extension(
info['__files_to_move'][sub['filepath']], new_ext)

return sub_filenames, info


Expand Down Expand Up @@ -1083,16 +1085,15 @@ def is_webp(cls, path):
return imghdr.what(path) == 'webp'

def fixup_webp(self, info, idx=-1):
thumbnail_filename = info['thumbnails'][idx]['filepath']
thumbnail = info['thumbnails'][idx]
thumbnail_filename = thumbnail['filepath']
_, thumbnail_ext = os.path.splitext(thumbnail_filename)
if thumbnail_ext:
if thumbnail_ext.lower() != '.webp' and imghdr.what(thumbnail_filename) == 'webp':
self.to_screen('Correcting thumbnail "%s" extension to webp' % thumbnail_filename)
webp_filename = replace_extension(thumbnail_filename, 'webp')
os.replace(thumbnail_filename, webp_filename)
info['thumbnails'][idx]['filepath'] = webp_filename
info['__files_to_move'][webp_filename] = replace_extension(
info['__files_to_move'].pop(thumbnail_filename), 'webp')
thumbnail['filepath'] = webp_filename

@staticmethod
def _options(target_ext):
Expand Down Expand Up @@ -1130,8 +1131,6 @@ def run(self, info):
continue
thumbnail_dict['filepath'] = self.convert_thumbnail(original_thumbnail, target_ext)
files_to_delete.append(original_thumbnail)
info['__files_to_move'][thumbnail_dict['filepath']] = replace_extension(
info['__files_to_move'][original_thumbnail], target_ext)

if not has_thumbnail:
self.to_screen('There aren\'t any thumbnails to convert')
Expand Down