Skip to content

Commit

Permalink
Next (#502)
Browse files Browse the repository at this point in the history
* Validate the overrides from facefusion.ini

* Break down cli testing

* Remove architecture lookup to support old driver

* Remove architecture lookup to support old driver

* Remove hwaccel auto

* Respect the output video resolution

* Bump next version

* Full directml support (#501)

* Introduce conditional thread management for DML support

* Finish migration to thread helpers

* Introduce dynamic frame colorizer sizes

* Introduce dynamic frame colorizer sizes

* Add 192x192 to frame colorizer

* Fix async audio
  • Loading branch information
henryruhs committed Apr 19, 2024
1 parent 092dfbb commit 4efa5b2
Show file tree
Hide file tree
Showing 30 changed files with 350 additions and 191 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ frame processors:
--face-swapper-model {blendswap_256,inswapper_128,inswapper_128_fp16,simswap_256,simswap_512_unofficial,uniface_256} choose the model responsible for swapping the face
--frame-colorizer-model {ddcolor,ddcolor_artistic,deoldify,deoldify_artistic,deoldify_stable} choose the model responsible for colorizing the frame
--frame-colorizer-blend [0-100] blend the colorized into the previous frame
--frame-colorizer-size {192x192,256x256,384x384,512x512} specify the size of the frame provided to the frame colorizer
--frame-enhancer-model {lsdir_x4,nomos8k_sc_x4,real_esrgan_x2,real_esrgan_x2_fp16,real_esrgan_x4,real_esrgan_x4_fp16,real_hatgan_x4,span_kendata_x4} choose the model responsible for enhancing the frame
--frame-enhancer-blend [0-100] blend the enhanced into the previous frame
--lip-syncer-model {wav2lip_gan} choose the model responsible for syncing the lips
Expand Down
1 change: 1 addition & 0 deletions facefusion.ini
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ face_enhancer_blend =
face_swapper_model =
frame_colorizer_model =
frame_colorizer_blend =
frame_colorizer_size =
frame_enhancer_model =
frame_enhancer_blend =
lip_syncer_model =
Expand Down
14 changes: 7 additions & 7 deletions facefusion/content_analyser.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
from typing import Any
from functools import lru_cache
from time import sleep
import threading
import cv2
import numpy
import onnxruntime
from tqdm import tqdm

import facefusion.globals
from facefusion import process_manager, wording
from facefusion.thread_helper import thread_lock, conditional_thread_semaphore
from facefusion.typing import VisionFrame, ModelSet, Fps
from facefusion.execution import apply_execution_provider_options
from facefusion.vision import get_video_frame, count_video_frame_total, read_image, detect_video_fps
from facefusion.filesystem import resolve_relative_path, is_file
from facefusion.download import conditional_download

CONTENT_ANALYSER = None
THREAD_LOCK : threading.Lock = threading.Lock()
MODELS : ModelSet =\
{
'open_nsfw':
Expand All @@ -33,7 +32,7 @@
def get_content_analyser() -> Any:
global CONTENT_ANALYSER

with THREAD_LOCK:
with thread_lock():
while process_manager.is_checking():
sleep(0.5)
if CONTENT_ANALYSER is None:
Expand Down Expand Up @@ -72,10 +71,11 @@ def analyse_stream(vision_frame : VisionFrame, video_fps : Fps) -> bool:
def analyse_frame(vision_frame : VisionFrame) -> bool:
content_analyser = get_content_analyser()
vision_frame = prepare_frame(vision_frame)
probability = content_analyser.run(None,
{
content_analyser.get_inputs()[0].name: vision_frame
})[0][0][1]
with conditional_thread_semaphore(facefusion.globals.execution_providers):
probability = content_analyser.run(None,
{
content_analyser.get_inputs()[0].name: vision_frame
})[0][0][1]
return probability > PROBABILITY_LIMIT


Expand Down
14 changes: 14 additions & 0 deletions facefusion/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,19 @@ def cli() -> None:
run(program)


def validate_args(program : ArgumentParser) -> None:
try:
for action in program._actions:
if action.default:
if isinstance(action.default, list):
for default in action.default:
program._check_value(action, default)
else:
program._check_value(action, action.default)
except Exception as exception:
program.error(str(exception))


def apply_args(program : ArgumentParser) -> None:
args = program.parse_args()
# general
Expand Down Expand Up @@ -185,6 +198,7 @@ def apply_args(program : ArgumentParser) -> None:


def run(program : ArgumentParser) -> None:
validate_args(program)
apply_args(program)
logger.init(facefusion.globals.log_level)

Expand Down
9 changes: 4 additions & 5 deletions facefusion/execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ def encode_execution_providers(execution_providers : List[str]) -> List[str]:
return [ execution_provider.replace('ExecutionProvider', '').lower() for execution_provider in execution_providers ]


def decode_execution_providers(execution_providers: List[str]) -> List[str]:
def decode_execution_providers(execution_providers : List[str]) -> List[str]:
available_execution_providers = onnxruntime.get_available_providers()
encoded_execution_providers = encode_execution_providers(available_execution_providers)

return [ execution_provider for execution_provider, encoded_execution_provider in zip(available_execution_providers, encoded_execution_providers) if any(execution_provider in encoded_execution_provider for execution_provider in execution_providers) ]


def apply_execution_provider_options(execution_providers: List[str]) -> List[Any]:
def apply_execution_provider_options(execution_providers : List[str]) -> List[Any]:
execution_providers_with_options : List[Any] = []

for execution_provider in execution_providers:
Expand Down Expand Up @@ -64,13 +64,12 @@ def detect_execution_devices() -> List[ExecutionDevice]:
'framework':
{
'name': 'CUDA',
'version': root_element.find('cuda_version').text,
'version': root_element.find('cuda_version').text
},
'product':
{
'vendor': 'NVIDIA',
'name': gpu_element.find('product_name').text.replace('NVIDIA ', ''),
'architecture': gpu_element.find('product_architecture').text,
'name': gpu_element.find('product_name').text.replace('NVIDIA ', '')
},
'video_memory':
{
Expand Down
50 changes: 26 additions & 24 deletions facefusion/face_analyser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from typing import Any, Optional, List, Tuple
from time import sleep
import threading
import cv2
import numpy
import onnxruntime
Expand All @@ -13,12 +12,11 @@
from facefusion.execution import apply_execution_provider_options
from facefusion.download import conditional_download
from facefusion.filesystem import resolve_relative_path, is_file
from facefusion.thread_helper import thread_lock, thread_semaphore, conditional_thread_semaphore
from facefusion.typing import VisionFrame, Face, FaceSet, FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, ModelSet, BoundingBox, FaceLandmarkSet, FaceLandmark5, FaceLandmark68, Score, FaceScoreSet, Embedding
from facefusion.vision import resize_frame_resolution, unpack_resolution

FACE_ANALYSER = None
THREAD_SEMAPHORE : threading.Semaphore = threading.Semaphore()
THREAD_LOCK : threading.Lock = threading.Lock()
MODELS : ModelSet =\
{
'face_detector_retinaface':
Expand Down Expand Up @@ -85,7 +83,7 @@ def get_face_analyser() -> Any:
face_detectors = {}
face_landmarkers = {}

with THREAD_LOCK:
with thread_lock():
while process_manager.is_checking():
sleep(0.5)
if FACE_ANALYSER is None:
Expand Down Expand Up @@ -185,7 +183,7 @@ def detect_with_retinaface(vision_frame : VisionFrame, face_detector_size : str)
score_list = []

detect_vision_frame = prepare_detect_frame(temp_vision_frame, face_detector_size)
with THREAD_SEMAPHORE:
with thread_semaphore():
detections = face_detector.run(None,
{
face_detector.get_inputs()[0].name: detect_vision_frame
Expand Down Expand Up @@ -227,7 +225,7 @@ def detect_with_scrfd(vision_frame : VisionFrame, face_detector_size : str) -> T
score_list = []

detect_vision_frame = prepare_detect_frame(temp_vision_frame, face_detector_size)
with THREAD_SEMAPHORE:
with thread_semaphore():
detections = face_detector.run(None,
{
face_detector.get_inputs()[0].name: detect_vision_frame
Expand Down Expand Up @@ -266,7 +264,7 @@ def detect_with_yoloface(vision_frame : VisionFrame, face_detector_size : str) -
score_list = []

detect_vision_frame = prepare_detect_frame(temp_vision_frame, face_detector_size)
with THREAD_SEMAPHORE:
with thread_semaphore():
detections = face_detector.run(None,
{
face_detector.get_inputs()[0].name: detect_vision_frame
Expand Down Expand Up @@ -304,7 +302,7 @@ def detect_with_yunet(vision_frame : VisionFrame, face_detector_size : str) -> T

face_detector.setInputSize((temp_vision_frame.shape[1], temp_vision_frame.shape[0]))
face_detector.setScoreThreshold(facefusion.globals.face_detector_score)
with THREAD_SEMAPHORE:
with thread_semaphore():
_, detections = face_detector.detect(temp_vision_frame)
if numpy.any(detections):
for detection in detections:
Expand Down Expand Up @@ -380,10 +378,11 @@ def calc_embedding(temp_vision_frame : VisionFrame, face_landmark_5 : FaceLandma
crop_vision_frame = crop_vision_frame / 127.5 - 1
crop_vision_frame = crop_vision_frame[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32)
crop_vision_frame = numpy.expand_dims(crop_vision_frame, axis = 0)
embedding = face_recognizer.run(None,
{
face_recognizer.get_inputs()[0].name: crop_vision_frame
})[0]
with conditional_thread_semaphore(facefusion.globals.execution_providers):
embedding = face_recognizer.run(None,
{
face_recognizer.get_inputs()[0].name: crop_vision_frame
})[0]
embedding = embedding.ravel()
normed_embedding = embedding / numpy.linalg.norm(embedding)
return embedding, normed_embedding
Expand All @@ -399,10 +398,11 @@ def detect_face_landmark_68(temp_vision_frame : VisionFrame, bounding_box : Boun
crop_vision_frame[:, :, 0] = cv2.createCLAHE(clipLimit = 2).apply(crop_vision_frame[:, :, 0])
crop_vision_frame = cv2.cvtColor(crop_vision_frame, cv2.COLOR_Lab2RGB)
crop_vision_frame = crop_vision_frame.transpose(2, 0, 1).astype(numpy.float32) / 255.0
face_landmark_68, face_heatmap = face_landmarker.run(None,
{
face_landmarker.get_inputs()[0].name: [ crop_vision_frame ]
})
with conditional_thread_semaphore(facefusion.globals.execution_providers):
face_landmark_68, face_heatmap = face_landmarker.run(None,
{
face_landmarker.get_inputs()[0].name: [ crop_vision_frame ]
})
face_landmark_68 = face_landmark_68[:, :, :2][0] / 64
face_landmark_68 = face_landmark_68.reshape(1, -1, 2) * 256
face_landmark_68 = cv2.transform(face_landmark_68, cv2.invertAffineTransform(affine_matrix))
Expand All @@ -416,10 +416,11 @@ def expand_face_landmark_68_from_5(face_landmark_5 : FaceLandmark5) -> FaceLandm
face_landmarker = get_face_analyser().get('face_landmarkers').get('68_5')
affine_matrix = estimate_matrix_by_face_landmark_5(face_landmark_5, 'ffhq_512', (1, 1))
face_landmark_5 = cv2.transform(face_landmark_5.reshape(1, -1, 2), affine_matrix).reshape(-1, 2)
face_landmark_68_5 = face_landmarker.run(None,
{
face_landmarker.get_inputs()[0].name: [ face_landmark_5 ]
})[0][0]
with conditional_thread_semaphore(facefusion.globals.execution_providers):
face_landmark_68_5 = face_landmarker.run(None,
{
face_landmarker.get_inputs()[0].name: [ face_landmark_5 ]
})[0][0]
face_landmark_68_5 = cv2.transform(face_landmark_68_5.reshape(1, -1, 2), cv2.invertAffineTransform(affine_matrix)).reshape(-1, 2)
return face_landmark_68_5

Expand All @@ -432,10 +433,11 @@ def detect_gender_age(temp_vision_frame : VisionFrame, bounding_box : BoundingBo
crop_vision_frame, affine_matrix = warp_face_by_translation(temp_vision_frame, translation, scale, (96, 96))
crop_vision_frame = crop_vision_frame[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32)
crop_vision_frame = numpy.expand_dims(crop_vision_frame, axis = 0)
prediction = gender_age.run(None,
{
gender_age.get_inputs()[0].name: crop_vision_frame
})[0][0]
with conditional_thread_semaphore(facefusion.globals.execution_providers):
prediction = gender_age.run(None,
{
gender_age.get_inputs()[0].name: crop_vision_frame
})[0][0]
gender = int(numpy.argmax(prediction[:2]))
age = int(numpy.round(prediction[2] * 100))
return gender, age
Expand Down
25 changes: 13 additions & 12 deletions facefusion/face_masker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,20 @@
from cv2.typing import Size
from functools import lru_cache
from time import sleep
import threading
import cv2
import numpy
import onnxruntime

import facefusion.globals
from facefusion import process_manager
from facefusion.thread_helper import thread_lock, conditional_thread_semaphore
from facefusion.typing import FaceLandmark68, VisionFrame, Mask, Padding, FaceMaskRegion, ModelSet
from facefusion.execution import apply_execution_provider_options
from facefusion.filesystem import resolve_relative_path, is_file
from facefusion.download import conditional_download

FACE_OCCLUDER = None
FACE_PARSER = None
THREAD_LOCK : threading.Lock = threading.Lock()
MODELS : ModelSet =\
{
'face_occluder':
Expand Down Expand Up @@ -48,7 +47,7 @@
def get_face_occluder() -> Any:
global FACE_OCCLUDER

with THREAD_LOCK:
with thread_lock():
while process_manager.is_checking():
sleep(0.5)
if FACE_OCCLUDER is None:
Expand All @@ -60,7 +59,7 @@ def get_face_occluder() -> Any:
def get_face_parser() -> Any:
global FACE_PARSER

with THREAD_LOCK:
with thread_lock():
while process_manager.is_checking():
sleep(0.5)
if FACE_PARSER is None:
Expand Down Expand Up @@ -120,10 +119,11 @@ def create_occlusion_mask(crop_vision_frame : VisionFrame) -> Mask:
prepare_vision_frame = cv2.resize(crop_vision_frame, face_occluder.get_inputs()[0].shape[1:3][::-1])
prepare_vision_frame = numpy.expand_dims(prepare_vision_frame, axis = 0).astype(numpy.float32) / 255
prepare_vision_frame = prepare_vision_frame.transpose(0, 1, 2, 3)
occlusion_mask : Mask = face_occluder.run(None,
{
face_occluder.get_inputs()[0].name: prepare_vision_frame
})[0][0]
with conditional_thread_semaphore(facefusion.globals.execution_providers):
occlusion_mask : Mask = face_occluder.run(None,
{
face_occluder.get_inputs()[0].name: prepare_vision_frame
})[0][0]
occlusion_mask = occlusion_mask.transpose(0, 1, 2).clip(0, 1).astype(numpy.float32)
occlusion_mask = cv2.resize(occlusion_mask, crop_vision_frame.shape[:2][::-1])
occlusion_mask = (cv2.GaussianBlur(occlusion_mask.clip(0, 1), (0, 0), 5).clip(0.5, 1) - 0.5) * 2
Expand All @@ -135,10 +135,11 @@ def create_region_mask(crop_vision_frame : VisionFrame, face_mask_regions : List
prepare_vision_frame = cv2.flip(cv2.resize(crop_vision_frame, (512, 512)), 1)
prepare_vision_frame = numpy.expand_dims(prepare_vision_frame, axis = 0).astype(numpy.float32)[:, :, ::-1] / 127.5 - 1
prepare_vision_frame = prepare_vision_frame.transpose(0, 3, 1, 2)
region_mask : Mask = face_parser.run(None,
{
face_parser.get_inputs()[0].name: prepare_vision_frame
})[0][0]
with conditional_thread_semaphore(facefusion.globals.execution_providers):
region_mask : Mask = face_parser.run(None,
{
face_parser.get_inputs()[0].name: prepare_vision_frame
})[0][0]
region_mask = numpy.isin(region_mask.argmax(0), [ FACE_MASK_REGIONS[region] for region in face_mask_regions ])
region_mask = cv2.resize(region_mask.astype(numpy.float32), crop_vision_frame.shape[:2][::-1])
region_mask = (cv2.GaussianBlur(region_mask.clip(0, 1), (0, 0), 5).clip(0.5, 1) - 0.5) * 2
Expand Down
20 changes: 10 additions & 10 deletions facefusion/ffmpeg.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,16 @@ def extract_frames(target_path : str, temp_video_resolution : str, temp_video_fp
trim_frame_start = facefusion.globals.trim_frame_start
trim_frame_end = facefusion.globals.trim_frame_end
temp_frames_pattern = get_temp_frames_pattern(target_path, '%04d')
commands = [ '-hwaccel', 'auto', '-i', target_path, '-q:v', '0' ]
commands = [ '-i', target_path, '-s', str(temp_video_resolution), '-q:v', '0' ]

if trim_frame_start is not None and trim_frame_end is not None:
commands.extend([ '-vf', 'trim=start_frame=' + str(trim_frame_start) + ':end_frame=' + str(trim_frame_end) + ',scale=' + str(temp_video_resolution) + ',fps=' + str(temp_video_fps) ])
commands.extend([ '-vf', 'trim=start_frame=' + str(trim_frame_start) + ':end_frame=' + str(trim_frame_end) + ',fps=' + str(temp_video_fps) ])
elif trim_frame_start is not None:
commands.extend([ '-vf', 'trim=start_frame=' + str(trim_frame_start) + ',scale=' + str(temp_video_resolution) + ',fps=' + str(temp_video_fps) ])
commands.extend([ '-vf', 'trim=start_frame=' + str(trim_frame_start) + ',fps=' + str(temp_video_fps) ])
elif trim_frame_end is not None:
commands.extend([ '-vf', 'trim=end_frame=' + str(trim_frame_end) + ',scale=' + str(temp_video_resolution) + ',fps=' + str(temp_video_fps) ])
commands.extend([ '-vf', 'trim=end_frame=' + str(trim_frame_end) + ',fps=' + str(temp_video_fps) ])
else:
commands.extend([ '-vf', 'scale=' + str(temp_video_resolution) + ',fps=' + str(temp_video_fps) ])
commands.extend([ '-vf', 'fps=' + str(temp_video_fps) ])
commands.extend([ '-vsync', '0', temp_frames_pattern ])
return run_ffmpeg(commands)

Expand All @@ -62,7 +62,7 @@ def merge_video(target_path : str, output_video_resolution : str, output_video_f
temp_video_fps = restrict_video_fps(target_path, output_video_fps)
temp_output_video_path = get_temp_output_video_path(target_path)
temp_frames_pattern = get_temp_frames_pattern(target_path, '%04d')
commands = [ '-hwaccel', 'auto', '-s', str(output_video_resolution), '-r', str(temp_video_fps), '-i', temp_frames_pattern, '-c:v', facefusion.globals.output_video_encoder ]
commands = [ '-r', str(temp_video_fps), '-i', temp_frames_pattern, '-s', str(output_video_resolution), '-c:v', facefusion.globals.output_video_encoder ]

if facefusion.globals.output_video_encoder in [ 'libx264', 'libx265' ]:
output_video_compression = round(51 - (facefusion.globals.output_video_quality * 0.51))
Expand All @@ -83,13 +83,13 @@ def merge_video(target_path : str, output_video_resolution : str, output_video_f
def copy_image(target_path : str, output_path : str, temp_image_resolution : str) -> bool:
is_webp = filetype.guess_mime(target_path) == 'image/webp'
temp_image_compression = 100 if is_webp else 0
commands = [ '-i', target_path, '-q:v', str(temp_image_compression), '-vf', 'scale=' + str(temp_image_resolution), '-y', output_path ]
commands = [ '-i', target_path, '-s', str(temp_image_resolution), '-q:v', str(temp_image_compression), '-y', output_path ]
return run_ffmpeg(commands)


def finalize_image(output_path : str, output_image_resolution : str) -> bool:
output_image_compression = round(31 - (facefusion.globals.output_image_quality * 0.31))
commands = [ '-i', output_path, '-q:v', str(output_image_compression), '-vf', 'scale=' + str(output_image_resolution), '-y', output_path ]
commands = [ '-i', output_path, '-s', str(output_image_resolution), '-q:v', str(output_image_compression), '-y', output_path ]
return run_ffmpeg(commands)


Expand All @@ -106,7 +106,7 @@ def restore_audio(target_path : str, output_path : str, output_video_fps : Fps)
trim_frame_start = facefusion.globals.trim_frame_start
trim_frame_end = facefusion.globals.trim_frame_end
temp_output_video_path = get_temp_output_video_path(target_path)
commands = [ '-hwaccel', 'auto', '-i', temp_output_video_path ]
commands = [ '-i', temp_output_video_path ]

if trim_frame_start is not None:
start_time = trim_frame_start / output_video_fps
Expand All @@ -120,7 +120,7 @@ def restore_audio(target_path : str, output_path : str, output_video_fps : Fps)

def replace_audio(target_path : str, audio_path : str, output_path : str) -> bool:
temp_output_path = get_temp_output_video_path(target_path)
commands = [ '-hwaccel', 'auto', '-i', temp_output_path, '-i', audio_path, '-af', 'apad', '-shortest', '-y', output_path ]
commands = [ '-i', temp_output_path, '-i', audio_path, '-af', 'apad', '-shortest', '-y', output_path ]
return run_ffmpeg(commands)


Expand Down

0 comments on commit 4efa5b2

Please sign in to comment.