Develop (#831)

* Change version to next * Refactor UI to Gradio * Refactor UI to Gradio * Refactor UI to Gradio * Autoload source and target into UI * Implement video preview * Adjust layout * Add theme * Fix dependencies * Share components * Typing for the globals * Revert export of components * Expose of components via registry * Finish basic preview * Finish basic preview * Finish basic preview * Finish basic preview * Start with settings * More settings * More settings * More settings * Implement background enhancer * Rename to frame enhancer * Introduce list frame processor names * Move get_device to core of frame processors * Add flags to settings * Sort frame processor choices * Adjust project structure * Minor fixes * Load UI dynamic, Change some CLI argument names * Rename temp_face to crop_frame * Start with reference component * Use face position slider * Use face position slider * Use face position slider * Add pre_check() to settings component to load autoload models * Make CI happy * Make CI happy * Split render() and listen(), finalize reference component * Split render() and listen() part2 * Remove limiting choices, Add error handling to start() * Remove limiting choices, Add error handling to start()
s0md3v · Aug 7, 2023 · 3d02a32 · 3d02a32
1 parent cf7ba1c
commit 3d02a32
Show file tree

Hide file tree

Showing 27 changed files with 726 additions and 536 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -8,10 +8,10 @@ jobs:
  steps:
  - name: Checkout
  uses: actions/checkout@v2
- - name: Set up Python 3.9
+ - name: Set up Python 3.10
  uses: actions/setup-python@v2
  with:
- python-version: 3.9
+ python-version: '3.10'
  - run: pip install flake8
  - run: pip install mypy
  - run: flake8 run.py roop
@@ -26,10 +26,10 @@ jobs:
  uses: actions/checkout@v2
  - name: Set up ffmpeg
  uses: FedericoCarboni/setup-ffmpeg@v2
- - name: Set up Python 3.9
+ - name: Set up Python 3.10
  uses: actions/setup-python@v2
  with:
- python-version: 3.9
+ python-version: '3.10'
  - run: pip install -r requirements-headless.txt
  - run: python run.py -s .github/examples/source.jpg -t .github/examples/target.mp4 -o .github/examples/output.mp4
  if: matrix.os != 'windows-latest'

diff --git a/requirements-headless.txt b/requirements-headless.txt
@@ -1,13 +1,10 @@
+insightface==0.7.3
 numpy==1.24.3
-opencv-python==4.8.0.74
 onnx==1.14.0
-insightface==0.7.3
-psutil==5.9.5
-tk==0.1.0
-customtkinter==5.2.0
-tkinterdnd2==0.3.0
-onnxruntime==1.15.0
-tensorflow==2.13.0
+onnxruntime==1.15.1
+opencv-python==4.8.0.74
 opennsfw2==0.10.2
 protobuf==4.23.4
-tqdm==4.65.0
+psutil==5.9.5
+tensorflow==2.13.0
+tqdm==4.65.0
diff --git a/requirements.txt b/requirements.txt
@@ -1,21 +1,20 @@
 --extra-index-url https://download.pytorch.org/whl/cu118
 
+basicsr==1.4.2
+gfpgan==1.3.8
+gradio==3.39.0
+insightface==0.7.3
 numpy==1.24.3
-opencv-python==4.8.0.74
 onnx==1.14.0
-insightface==0.7.3
-psutil==5.9.5
-tk==0.1.0
-customtkinter==5.2.0
-tkinterdnd2==0.3.0; sys_platform != 'darwin' and platform_machine != 'arm64'
-tkinterdnd2-universal==1.7.3; sys_platform == 'darwin' and platform_machine == 'arm64'
-pillow==10.0.0
 onnxruntime==1.15.1; python_version != '3.9' and sys_platform == 'darwin' and platform_machine != 'arm64'
 onnxruntime-coreml==1.13.1; python_version == '3.9' and sys_platform == 'darwin' and platform_machine != 'arm64'
-onnxruntime-silicon==1.13.1; sys_platform == 'darwin' and platform_machine == 'arm64'
 onnxruntime-gpu==1.15.1; sys_platform != 'darwin'
-tensorflow==2.13.0
+onnxruntime-silicon==1.13.1; sys_platform == 'darwin' and platform_machine == 'arm64'
+opencv-python==4.8.0.74
 opennsfw2==0.10.2
+pillow==10.0.0
 protobuf==4.23.4
+psutil==5.9.5
+realesrgan==0.3.0
+tensorflow==2.13.0
 tqdm==4.65.0
-gfpgan==1.3.8
diff --git a/roop/core.py b/roop/core.py
@@ -17,24 +17,24 @@
 import tensorflow
 import roop.globals
 import roop.metadata
-import roop.ui as ui
 from roop.predictor import predict_image, predict_video
-from roop.processors.frame.core import get_frame_processors_modules
-from roop.utilities import has_image_extension, is_image, is_video, detect_fps, create_video, extract_frames, get_temp_frame_paths, restore_audio, create_temp, move_temp, clean_temp, normalize_output_path
+from roop.processors.frame.core import get_frame_processors_modules, list_frame_processors_names
+from roop.utilities import has_image_extension, is_image, is_video, detect_fps, create_video, extract_frames, get_temp_frame_paths, restore_audio, create_temp, move_temp, clean_temp, normalize_output_path, list_module_names
 
 warnings.filterwarnings('ignore', category=FutureWarning, module='insightface')
 warnings.filterwarnings('ignore', category=UserWarning, module='torchvision')
 
 
 def parse_args() -> None:
  signal.signal(signal.SIGINT, lambda signal_number, frame: destroy())
- program = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=100))
+ program = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=120))
  program.add_argument('-s', '--source', help='select an source image', dest='source_path')
  program.add_argument('-t', '--target', help='select an target image or video', dest='target_path')
  program.add_argument('-o', '--output', help='select output file or directory', dest='output_path')
- program.add_argument('--frame-processor', help='frame processors (choices: face_swapper, face_enhancer, ...)', dest='frame_processor', default=['face_swapper'], nargs='+')
+ program.add_argument('--frame-processors', help='list of available frame processors', dest='frame_processors', default=['face_swapper'], nargs='+')
+ program.add_argument('--ui-layouts', help='list of available ui layouts', dest='ui_layouts', default=['default'], nargs='+')
  program.add_argument('--keep-fps', help='keep target fps', dest='keep_fps', action='store_true')
- program.add_argument('--keep-frames', help='keep temporary frames', dest='keep_frames', action='store_true')
+ program.add_argument('--keep-temp', help='keep temporary frames', dest='keep_temp', action='store_true')
  program.add_argument('--skip-audio', help='skip target audio', dest='skip_audio', action='store_true')
  program.add_argument('--many-faces', help='process every face', dest='many_faces', action='store_true')
  program.add_argument('--reference-face-position', help='position of the reference face', dest='reference_face_position', type=int, default=0)
@@ -45,7 +45,7 @@ def parse_args() -> None:
  program.add_argument('--output-video-encoder', help='encoder used for the output video', dest='output_video_encoder', default='libx264', choices=['libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc'])
  program.add_argument('--output-video-quality', help='quality used for the output video', dest='output_video_quality', type=int, default=35, choices=range(101), metavar='[0-100]')
  program.add_argument('--max-memory', help='maximum amount of RAM in GB', dest='max_memory', type=int)
- program.add_argument('--execution-provider', help='available execution provider (choices: cpu, ...)', dest='execution_provider', default=['cpu'], choices=suggest_execution_providers(), nargs='+')
+ program.add_argument('--execution-providers', help='list of available execution providers (choices: cpu, ...)', dest='execution_providers', default=['cpu'], choices=suggest_execution_providers(), nargs='+')
  program.add_argument('--execution-threads', help='number of execution threads', dest='execution_threads', type=int, default=suggest_execution_threads())
  program.add_argument('-v', '--version', action='version', version=f'{roop.metadata.name} {roop.metadata.version}')
 
@@ -55,9 +55,10 @@ def parse_args() -> None:
  roop.globals.target_path = args.target_path
  roop.globals.output_path = normalize_output_path(roop.globals.source_path, roop.globals.target_path, args.output_path)
  roop.globals.headless = roop.globals.source_path is not None and roop.globals.target_path is not None and roop.globals.output_path is not None
- roop.globals.frame_processors = args.frame_processor
+ roop.globals.frame_processors = args.frame_processors
+ roop.globals.ui_layouts = args.ui_layouts
  roop.globals.keep_fps = args.keep_fps
- roop.globals.keep_frames = args.keep_frames
+ roop.globals.keep_temp = args.keep_temp
  roop.globals.skip_audio = args.skip_audio
  roop.globals.many_faces = args.many_faces
  roop.globals.reference_face_position = args.reference_face_position
@@ -68,7 +69,7 @@ def parse_args() -> None:
  roop.globals.output_video_encoder = args.output_video_encoder
  roop.globals.output_video_quality = args.output_video_quality
  roop.globals.max_memory = args.max_memory
- roop.globals.execution_providers = decode_execution_providers(args.execution_provider)
+ roop.globals.execution_providers = decode_execution_providers(args.execution_providers)
  roop.globals.execution_threads = args.execution_threads
 
 
@@ -85,6 +86,10 @@ def suggest_execution_providers() -> List[str]:
  return encode_execution_providers(onnxruntime.get_available_providers())
 
 
+def suggest_ui_layouts() -> List[str]:
+ return list_module_names('roop/uis/__layouts__')
+
+
 def suggest_execution_threads() -> int:
  if 'CUDAExecutionProvider' in onnxruntime.get_available_providers():
  return 8
@@ -124,24 +129,22 @@ def pre_check() -> bool:
 
 def update_status(message: str, scope: str = 'ROOP.CORE') -> None:
  print(f'[{scope}] {message}')
- if not roop.globals.headless:
- ui.update_status(message)
 
 
 def start() -> None:
- for frame_processor in get_frame_processors_modules(roop.globals.frame_processors):
- if not frame_processor.pre_start():
+ for frame_processor_module in get_frame_processors_modules(roop.globals.frame_processors):
+ if not frame_processor_module.pre_start():
  return
  # process image to image
  if has_image_extension(roop.globals.target_path):
  if predict_image(roop.globals.target_path):
  destroy()
  shutil.copy2(roop.globals.target_path, roop.globals.output_path)
  # process frame
- for frame_processor in get_frame_processors_modules(roop.globals.frame_processors):
- update_status('Progressing...', frame_processor.NAME)
- frame_processor.process_image(roop.globals.source_path, roop.globals.output_path, roop.globals.output_path)
- frame_processor.post_process()
+ for frame_processor_module in get_frame_processors_modules(roop.globals.frame_processors):
+ update_status('Progressing...', frame_processor_module.NAME)
+ frame_processor_module.process_image(roop.globals.source_path, roop.globals.output_path, roop.globals.output_path)
+ frame_processor_module.post_process()
  # validate image
  if is_image(roop.globals.target_path):
  update_status('Processing to image succeed!')
@@ -164,21 +167,23 @@ def start() -> None:
  # process frame
  temp_frame_paths = get_temp_frame_paths(roop.globals.target_path)
  if temp_frame_paths:
- for frame_processor in get_frame_processors_modules(roop.globals.frame_processors):
- update_status('Progressing...', frame_processor.NAME)
- frame_processor.process_video(roop.globals.source_path, temp_frame_paths)
- frame_processor.post_process()
+ for frame_processor_module in get_frame_processors_modules(roop.globals.frame_processors):
+ update_status('Progressing...', frame_processor_module.NAME)
+ frame_processor_module.process_video(roop.globals.source_path, temp_frame_paths)
+ frame_processor_module.post_process()
  else:
- update_status('Frames not found...')
+ update_status('Temporary frames not found...')
  return
  # create video
  if roop.globals.keep_fps:
  fps = detect_fps(roop.globals.target_path)
  update_status(f'Creating video with {fps} FPS...')
- create_video(roop.globals.target_path, fps)
+ if not create_video(roop.globals.target_path, fps):
+ update_status('Creating video failed...')
  else:
  update_status('Creating video with 30 FPS...')
- create_video(roop.globals.target_path)
+ if not create_video(roop.globals.target_path):
+ update_status('Creating video failed...')
  # handle audio
  if roop.globals.skip_audio:
  move_temp(roop.globals.target_path, roop.globals.output_path)
@@ -216,5 +221,6 @@ def run() -> None:
  if roop.globals.headless:
  start()
  else:
- window = ui.init(start, destroy)
- window.mainloop()
+ import roop.uis.core as ui
+
+ ui.init()
diff --git a/roop/face_analyser.py b/roop/face_analyser.py
@@ -39,7 +39,7 @@ def get_one_face(frame: Frame, position: int = 0) -> Optional[Face]:
 def get_many_faces(frame: Frame) -> Optional[List[Face]]:
  try:
  return get_face_analyser().get(frame)
- except ValueError:
+ except (AttributeError, ValueError):
  return None
 
 
@@ -52,3 +52,7 @@ def find_similar_face(frame: Frame, reference_face: Face) -> Optional[Face]:
  if distance < roop.globals.similar_face_distance:
  return face
  return None
+
+
+def get_faces_total(frame: Frame) -> int:
+ return len(get_many_faces(frame))
diff --git a/roop/globals.py b/roop/globals.py
@@ -5,8 +5,9 @@
 output_path: Optional[str] = None
 headless: Optional[bool] = None
 frame_processors: List[str] = []
+ui_layouts: List[str] = []
 keep_fps: Optional[bool] = None
-keep_frames: Optional[bool] = None
+keep_temp: Optional[bool] = None
 skip_audio: Optional[bool] = None
 many_faces: Optional[bool] = None
 reference_face_position: Optional[int] = None

diff --git a/roop/metadata.py b/roop/metadata.py
@@ -1,2 +1,2 @@
 name = 'roop'
-version = '1.3.2'
+version = '2.0.0-next'
diff --git a/roop/processors/frame/__modules__/__init__.py b/roop/processors/frame/__modules__/__init__.py
diff --git a/roop/processors/frame/face_enhancer.py → ...essors/frame/__modules__/face_enhancer.py b/roop/processors/frame/face_enhancer.py → ...essors/frame/__modules__/face_enhancer.py
@@ -4,7 +4,7 @@
 from gfpgan.utils import GFPGANer
 
 import roop.globals
-import roop.processors.frame.core
+import roop.processors.frame.core as frame_processors
 from roop.core import update_status
 from roop.face_analyser import get_many_faces
 from roop.typing import Frame, Face
@@ -13,7 +13,7 @@
 FACE_ENHANCER = None
 THREAD_SEMAPHORE = threading.Semaphore()
 THREAD_LOCK = threading.Lock()
-NAME = 'ROOP.FACE-ENHANCER'
+NAME = 'ROOP.PROCESSORS.FRAME.FACE_ENHANCER'
 
 
 def get_face_enhancer() -> Any:
@@ -23,18 +23,14 @@ def get_face_enhancer() -> Any:
  if FACE_ENHANCER is None:
  model_path = resolve_relative_path('../models/GFPGANv1.4.pth')
  # todo: set models path -> https://github.com/TencentARC/GFPGAN/issues/399
- FACE_ENHANCER = GFPGANer(model_path=model_path, upscale=1, device=get_device())
+ FACE_ENHANCER = GFPGANer(
+ model_path=model_path,
+ upscale=1,
+ device=frame_processors.get_device()
+ )
  return FACE_ENHANCER
 
 
-def get_device() -> str:
- if 'CUDAExecutionProvider' in roop.globals.execution_providers:
- return 'cuda'
- if 'CoreMLExecutionProvider' in roop.globals.execution_providers:
- return 'mps'
- return 'cpu'
-
-
 def clear_face_enhancer() -> None:
  global FACE_ENHANCER
 
@@ -66,14 +62,14 @@ def enhance_face(target_face: Face, temp_frame: Frame) -> Frame:
  start_y = max(0, start_y - padding_y)
  end_x = max(0, end_x + padding_x)
  end_y = max(0, end_y + padding_y)
- temp_face = temp_frame[start_y:end_y, start_x:end_x]
- if temp_face.size:
+ crop_frame = temp_frame[start_y:end_y, start_x:end_x]
+ if crop_frame.size:
  with THREAD_SEMAPHORE:
- _, _, temp_face = get_face_enhancer().enhance(
- temp_face,
+ _, _, crop_frame = get_face_enhancer().enhance(
+ crop_frame,
  paste_back=True
  )
- temp_frame[start_y:end_y, start_x:end_x] = temp_face
+ temp_frame[start_y:end_y, start_x:end_x] = crop_frame
  return temp_frame
 
 
@@ -88,16 +84,16 @@ def process_frame(source_face: Face, reference_face: Face, temp_frame: Frame) ->
 def process_frames(source_path: str, temp_frame_paths: List[str], update: Callable[[], None]) -> None:
  for temp_frame_path in temp_frame_paths:
  temp_frame = cv2.imread(temp_frame_path)
- result = process_frame(None, None, temp_frame)
- cv2.imwrite(temp_frame_path, result)
+ result_frame = process_frame(None, None, temp_frame)
+ cv2.imwrite(temp_frame_path, result_frame)
  if update:
  update()
 
 
 def process_image(source_path: str, target_path: str, output_path: str) -> None:
  target_frame = cv2.imread(target_path)
- result = process_frame(None, None, target_frame)
- cv2.imwrite(output_path, result)
+ result_frame = process_frame(None, None, target_frame)
+ cv2.imwrite(output_path, result_frame)
 
 
 def process_video(source_path: str, temp_frame_paths: List[str]) -> None:

diff --git a/roop/processors/frame/face_swapper.py → ...cessors/frame/__modules__/face_swapper.py b/roop/processors/frame/face_swapper.py → ...cessors/frame/__modules__/face_swapper.py
@@ -4,7 +4,7 @@
 import threading
 
 import roop.globals
-import roop.processors.frame.core
+import roop.processors.frame.core as frame_processors
 from roop.core import update_status
 from roop.face_analyser import get_one_face, get_many_faces, find_similar_face
 from roop.face_reference import get_face_reference, set_face_reference, clear_face_reference
@@ -13,7 +13,7 @@
 
 FACE_SWAPPER = None
 THREAD_LOCK = threading.Lock()
-NAME = 'ROOP.FACE-SWAPPER'
+NAME = 'ROOP.PROCESSORS.FRAME.FACE_SWAPPER'
 
 
 def get_face_swapper() -> Any:
@@ -75,26 +75,26 @@ def process_frame(source_face: Face, reference_face: Face, temp_frame: Frame) ->
 
 def process_frames(source_path: str, temp_frame_paths: List[str], update: Callable[[], None]) -> None:
  source_face = get_one_face(cv2.imread(source_path))
- reference_face = None if roop.globals.many_faces else get_face_reference()
+ reference_face = get_face_reference() if not roop.globals.many_faces else None
  for temp_frame_path in temp_frame_paths:
  temp_frame = cv2.imread(temp_frame_path)
- result = process_frame(source_face, reference_face, temp_frame)
- cv2.imwrite(temp_frame_path, result)
+ result_frame = process_frame(source_face, reference_face, temp_frame)
+ cv2.imwrite(temp_frame_path, result_frame)
  if update:
  update()
 
 
 def process_image(source_path: str, target_path: str, output_path: str) -> None:
  source_face = get_one_face(cv2.imread(source_path))
  target_frame = cv2.imread(target_path)
- reference_face = None if roop.globals.many_faces else get_one_face(target_frame, roop.globals.reference_face_position)
- result = process_frame(source_face, reference_face, target_frame)
- cv2.imwrite(output_path, result)
+ reference_face = get_one_face(target_frame, roop.globals.reference_face_position) if not roop.globals.many_faces else None
+ result_frame = process_frame(source_face, reference_face, target_frame)
+ cv2.imwrite(output_path, result_frame)
 
 
 def process_video(source_path: str, temp_frame_paths: List[str]) -> None:
  if not roop.globals.many_faces and not get_face_reference():
  reference_frame = cv2.imread(temp_frame_paths[roop.globals.reference_frame_number])
  reference_face = get_one_face(reference_frame, roop.globals.reference_face_position)
  set_face_reference(reference_face)
- roop.processors.frame.core.process_video(source_path, temp_frame_paths, process_frames)
+ frame_processors.process_video(source_path, temp_frame_paths, process_frames)