diff --git a/.gitignore b/.gitignore index f4fd44a..1382e22 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,7 @@ !plugins/* !plugins/extract/* !plugins/train/* +!plugins/convert/* !tools !tools/lib* *.ini diff --git a/lib/cli.py b/lib/cli.py index 93b2121..184a4e6 100644 --- a/lib/cli.py +++ b/lib/cli.py @@ -4,12 +4,15 @@ import argparse import logging import os import platform +import re import sys +import textwrap from importlib import import_module from lib.logger import crash_log, log_setup from lib.utils import safe_shutdown +from lib.model.masks import get_available_masks, get_default_mask from plugins.plugin_loader import PluginLoader logger = logging.getLogger(__name__) # pylint: disable=invalid-name @@ -278,17 +281,34 @@ class FullHelpArgumentParser(argparse.ArgumentParser): class SmartFormatter(argparse.HelpFormatter): """ Smart formatter for allowing raw formatting in help - text. + text and lists in the helptext - To use prefix the help item with "R|" to overide - default formatting + To use: prefix the help item with "R|" to overide + default formatting. List items can be marked with "L|" + at the start of a newline - from: https://stackoverflow.com/questions/3853722 """ + adapted from: https://stackoverflow.com/questions/3853722 """ + + def __init__(self, + prog, + indent_increment=2, + max_help_position=24, + width=None): + + super().__init__(prog, indent_increment, max_help_position, width) + self._whitespace_matcher_limited = re.compile(r'[ \r\f\v]+', re.ASCII) def _split_lines(self, text, width): if text.startswith("R|"): - return text[2:].splitlines() - # this is the RawTextHelpFormatter._split_lines + text = self._whitespace_matcher_limited.sub(' ', text).strip()[2:] + output = list() + for txt in text.splitlines(): + indent = "" + if txt.startswith("L|"): + indent = " " + txt = " - {}".format(txt[2:]) + output.extend(textwrap.wrap(txt, width, subsequent_indent=indent)) + return output return argparse.HelpFormatter._split_lines(self, text, width) @@ -396,18 +416,16 @@ class ExtractConvertArgs(FaceSwapArgs): "action": DirOrFileFullPaths, "filetypes": "video", "dest": "input_dir", - "default": "input", - "help": "Input directory or video. Either a " - "directory containing the image files " - "you wish to process or path to a " - "video file. Defaults to 'input'"}) + "required": True, + "help": "Input directory or video. Either a directory containing " + "the image files you wish to process or path to a video " + "file."}) argument_list.append({"opts": ("-o", "--output-dir"), "action": DirFullPaths, "dest": "output_dir", - "default": "output", - "help": "Output directory. This is where the " - "converted files will be stored. " - "Defaults to 'output'"}) + "required": True, + "help": "Output directory. This is where the converted files will " + "be saved."}) argument_list.append({"opts": ("-al", "--alignments"), "action": FileFullPaths, "filetypes": "alignments", @@ -469,32 +487,30 @@ class ExtractArgs(ExtractConvertArgs): "opts": ("-D", "--detector"), "action": Radio, "type": str.lower, - "choices": PluginLoader.get_available_extractors( - "detect"), + "choices": PluginLoader.get_available_extractors("detect"), "default": "mtcnn", "help": "R|Detector to use." - "\n'dlib-hog': uses least resources, but is the" - "\n\tleast reliable." - "\n'dlib-cnn': faster than mtcnn but detects" - "\n\tfewer faces and fewer false positives." - "\n'mtcnn': slower than dlib, but uses fewer" - "\n\tresources whilst detecting more faces and" - "\n\tmore false positives. Has superior" - "\n\talignment to dlib" - "\n's3fd': Can detect more faces than mtcnn, but" - "\n\t is a lot more resource intensive"}) + "\nL|'dlib-hog': uses least resources, but is the " + "least reliable." + "\nL|'dlib-cnn': faster than mtcnn but detects " + "fewer faces and fewer false positives." + "\nL|'mtcnn': slower than dlib, but uses fewer " + "resources whilst detecting more faces and " + "more false positives. Has superior " + "alignment to dlib" + "\nL|'s3fd': Can detect more faces than mtcnn, but " + "is a lot more resource intensive"}) argument_list.append({ "opts": ("-A", "--aligner"), "action": Radio, "type": str.lower, - "choices": PluginLoader.get_available_extractors( - "align"), + "choices": PluginLoader.get_available_extractors("align"), "default": "fan", "help": "R|Aligner to use." - "\n'dlib': Dlib Pose Predictor. Faster, less " - "\n\tresource intensive, but less accurate." - "\n'fan': Face Alignment Network. Best aligner." - "\n\tGPU heavy, slow when not running on GPU"}) + "\nL|'dlib': Dlib Pose Predictor. Faster, less " + "resource intensive, but less accurate." + "\nL|'fan': Face Alignment Network. Best aligner. " + "GPU heavy, slow when not running on GPU"}) argument_list.append({"opts": ("-r", "--rotate-images"), "type": str, "dest": "rotate_images", @@ -611,10 +627,9 @@ class ConvertArgs(ExtractConvertArgs): argument_list.append({"opts": ("-m", "--model-dir"), "action": DirFullPaths, "dest": "model_dir", - "default": "models", - "help": "Model directory. A directory " - "containing the trained model you wish " - "to process. Defaults to 'models'"}) + "required": True, + "help": "Model directory. A directory containing the trained model " + "you wish to process."}) argument_list.append({"opts": ("-a", "--input-aligned-dir"), "action": DirFullPaths, "dest": "input_aligned_dir", @@ -627,56 +642,91 @@ class ConvertArgs(ExtractConvertArgs): "conversion. If no aligned dir is " "specified, all faces will be " "converted"}) - argument_list.append({"opts": ("-t", "--trainer"), - "action": Radio, - "type": str.lower, - "choices": PluginLoader.get_available_models(), - "default": PluginLoader.get_default_model(), - "help": "Select the trainer that was used to " - "create the model"}) - argument_list.append({"opts": ("-c", "--converter"), - "action": Radio, - "type": str.lower, - "choices": PluginLoader.get_available_converters(), - "default": "masked", - "help": "Converter to use"}) + argument_list.append({"opts": ("-ref", "--reference-video"), + "action": FileFullPaths, + "dest": "reference_video", + "filetypes": "video", + "type": str, + "help": "Only required if converting from images to video. Provide " + "The original video that the source frames were extracted " + "from (for extracting the fps and audio)."}) + argument_list.append({ + "opts": ("-c", "--color-adjustment"), + "action": Radio, + "type": str.lower, + "dest": "color_adjustment", + "choices": PluginLoader.get_available_convert_plugins("color", True), + "default": "avg-color", + "help": "R|Performs color adjustment to the swapped face. Some of these options have " + "configurable settings in '/config/convert.ini' or 'Edit > Configure " + "Convert Plugins':" + "\nL|avg-color: Adjust the mean of each color channel in the swapped " + "reconstruction to equal the mean of the masked area in the orginal image." + "\nL|color-transfer: Transfers the color distribution from the source to the " + "target image using the mean and standard deviations of the L*a*b* " + "color space." + "\nL|match-hist: Adjust the histogram of each color channel in the swapped " + "reconstruction to equal the histogram of the masked area in the orginal " + "image." + "\nL|seamless-clone: Use cv2's seamless clone function to remove extreme " + "gradients at the mask seam by smoothing colors. Generally does not give " + "very satisfactory results." + "\nL|none: Don't perform color adjustment."}) + argument_list.append({ + "opts": ("-sc", "--scaling"), + "action": Radio, + "type": str.lower, + "choices": PluginLoader.get_available_convert_plugins("scaling", True), + "default": "none", + "help": "R|Performs a scaling process to attempt to get better definition on the " + "final swap. Some of these options have configurable settings in " + "'/config/convert.ini' or 'Edit > Configure Convert Plugins':" + "\nL|sharpen: Perform sharpening on the final face." + "\nL|none: Don't perform any scaling operations."}) argument_list.append({ "opts": ("-M", "--mask-type"), "action": Radio, "type": str.lower, "dest": "mask_type", - "choices": ["ellipse", - "facehull", - "dfl", - # "cnn", Removed until implemented - "none"], - "default": "facehull", - "help": "R|Mask to use to replace faces." - "\nellipse: Oval around face." - "\nfacehull: Face cutout based on landmarks." - "\ndfl: A Face Hull mask from DeepFaceLabs." - # "\ncnn: Not yet implemented" Removed until implemented - "\nnone: No mask. Can still use blur and erode on the edges of the swap box."}) - argument_list.append({"opts": ("-b", "--blur-size"), - "type": float, + "choices": get_available_masks() + ["predicted"], + "default": "predicted", + "help": "R|Mask to use to replace faces. Blending of the masks can be adjusted in " + "'/config/convert.ini' or 'Edit > Configure Convert Plugins':" + "\nL|components: An improved face hull mask using a facehull of 8 facial " + "parts." + "\nL|dfl_full: An improved face hull mask using a facehull of 3 facial parts." + "\nL|facehull: Face cutout based on landmarks." + "\nL|predicted: The predicted mask generated from the model. If the model was " + "not trained with a mask then this will fallback to " + "'{}'".format(get_default_mask()) + + "\nL|none: Don't use a mask."}) + argument_list.append({"opts": ("-w", "--writer"), + "action": Radio, + "type": str, + "choices": PluginLoader.get_available_convert_plugins("writer", + False), + "default": "opencv", + "help": "R|The plugin to use to output the converted images. The " + "writers are configurable in '/config/convert.ini' or `Edit " + "> Configure Convert Plugins:'" + "\nL|ffmpeg: [video] Writes out the convert straight to " + "video. When the input is a series of images then the " + "'-ref' (--reference-video) parameter must be set." + "\nL|gif: [animated image] Create an animated gif." + "\nL|opencv: [images] The fastest image writer, but less " + "options and formats than other plugins." + "\nL|pillow: [images] Slower than opencv, but has more " + "options and supports more formats."}) + argument_list.append({"opts": ("-osc", "--output-scale"), + "dest": "output_scale", "action": Slider, - "min_max": (0.0, 100.0), - "rounding": 2, - "default": 5.0, - "help": "Blur kernel size as a percentage of the swap area. Smooths " - "the transition between the swapped face and the background " - "image."}) - argument_list.append({"opts": ("-e", "--erosion-size"), - "dest": "erosion_size", - "type": float, - "action": Slider, - "min_max": (-100.0, 100.0), - "rounding": 2, - "default": 0.0, - "help": "Erosion kernel size as a percentage of the mask radius " - "area. Positive values apply erosion which reduces the size " - "of the swapped area. Negative values apply dilation which " - "increases the swapped area"}) + "type": int, + "default": 100, + "min_max": (25, 400), + "rounding": 1, + "help": "Scale the final output frames by this amount. 100%% will " + "output the frames at source dimensions. 50%% at half size " + "200%% at double size"}) argument_list.append({"opts": ("-g", "--gpus"), "type": int, "action": Slider, @@ -684,74 +734,38 @@ class ConvertArgs(ExtractConvertArgs): "rounding": 1, "default": 1, "help": "Number of GPUs to use for conversion"}) - argument_list.append({"opts": ("-sh", "--sharpen"), - "action": Radio, - "type": str.lower, - "dest": "sharpen_image", - "choices": ["box_filter", "gaussian_filter", "none"], - "default": "none", - "help": "Sharpen the masked facial region of " - "the converted images. Choice of filter " - "to use in sharpening process -- box" - "filter or gaussian filter."}) argument_list.append({"opts": ("-fr", "--frame-ranges"), "nargs": "+", "type": str, - "help": "frame ranges to apply transfer to e.g. " - "For frames 10 to 50 and 90 to 100 use " - "--frame-ranges 10-50 90-100. Files " - "must have the frame-number as the last " - "number in the name!"}) - argument_list.append({"opts": ("-d", "--discard-frames"), + "help": "frame ranges to apply transfer to e.g. For frames 10 to 50 " + "and 90 to 100 use --frame-ranges 10-50 90-100. Files " + "must have the frame-number as the last number in the name! " + "Frames falling outside of the selected range will be " + "discarded unless '-k' (--keep-unchanged) is selected."}) + argument_list.append({"opts": ("-k", "--keep-unchanged"), "action": "store_true", - "dest": "discard_frames", + "dest": "keep_unchanged", "default": False, - "help": "When used with --frame-ranges discards " - "frames that are not processed instead " - "of writing them out unchanged"}) + "help": "When used with --frame-ranges outputs the unchanged frames " + "that are not processed instead of discarding them."}) argument_list.append({"opts": ("-s", "--swap-model"), "action": "store_true", "dest": "swap_model", "default": False, "help": "Swap the model. Instead of A -> B, " "swap B -> A"}) - argument_list.append({"opts": ("-S", "--seamless"), + argument_list.append({"opts": ("-sp", "--singleprocess"), "action": "store_true", - "dest": "seamless_clone", "default": False, - "help": "Use cv2's seamless clone function to " - "remove extreme gradients at the mask " - "seam by smoothing colors."}) - argument_list.append({"opts": ("-mh", "--match-histogram"), - "action": "store_true", - "dest": "match_histogram", - "default": False, - "help": "Adjust the histogram of each color " - "channel in the swapped reconstruction " - "to equal the histogram of the masked " - "area in the orginal image"}) - argument_list.append({"opts": ("-aca", "--avg-color-adjust"), - "action": "store_true", - "dest": "avg_color_adjust", - "default": False, - "help": "Adjust the mean of each color channel " - " in the swapped reconstruction to " - "equal the mean of the masked area in " - "the orginal image"}) - argument_list.append({"opts": ("-sb", "--smooth-box"), - "action": "store_true", - "dest": "smooth_box", - "default": False, - "help": "Perform a Gaussian blur on the edges of the face box " - "received from the model. Helps reduce pronounced edges " - "of the swap area"}) - argument_list.append({"opts": ("-dt", "--draw-transparent"), - "action": "store_true", - "dest": "draw_transparent", - "default": False, - "help": "Place the swapped face on a " - "transparent layer rather than the " - "original frame."}) + "help": "Disable multiprocessing. Slower but less resource " + "intensive."}) + argument_list.append({"opts": ("-t", "--trainer"), + "type": str.lower, + "choices": PluginLoader.get_available_models(), + "help": "[LEGACY] This only needs to be selected if a legacy " + "model is being loaded or if there are multiple models in " + "the model folder"}) + return argument_list @@ -766,17 +780,15 @@ class TrainArgs(FaceSwapArgs): argument_list.append({"opts": ("-A", "--input-A"), "action": DirFullPaths, "dest": "input_a", - "default": "input_a", - "help": "Input directory. A directory " - "containing training images for face A. " - "Defaults to 'input'"}) + "required": True, + "help": "Input directory. A directory containing training images " + "for face A."}) argument_list.append({"opts": ("-B", "--input-B"), "action": DirFullPaths, "dest": "input_b", - "default": "input_b", - "help": "Input directory. A directory " - "containing training images for face B. " - "Defaults to 'input'"}) + "required": True, + "help": "Input directory. A directory containing training images " + "for face B."}) argument_list.append({"opts": ("-ala", "--alignments-A"), "action": FileFullPaths, "filetypes": 'alignments', @@ -800,32 +812,34 @@ class TrainArgs(FaceSwapArgs): argument_list.append({"opts": ("-m", "--model-dir"), "action": DirFullPaths, "dest": "model_dir", - "default": "models", - "help": "Model directory. This is where the " - "training data will be stored. " - "Defaults to 'model'"}) + "required": True, + "help": "Model directory. This is where the training data will be " + "stored."}) argument_list.append({"opts": ("-t", "--trainer"), "action": Radio, "type": str.lower, "choices": PluginLoader.get_available_models(), "default": PluginLoader.get_default_model(), "help": "R|Select which trainer to use. Trainers can be" - "\nconfigured from the edit menu or the config folder." - "\n'original': The original model created by /u/deepfakes." - "\n'dfaker': 64px in/128px out model from dfaker." - "\n\tEnable 'warp-to-landmarks' for full dfaker method." - "\n'dfl-h128'. 128px in/out model from deepfacelab" - "\n'iae': A model that uses intermediate layers to try to" - "\n\tget better details" - "\n'lightweight': A lightweight model for low-end cards." - "\n\tDon't expect great results. Can train as low as 1.6GB" - "\n\twith batch size 8." - "\n'unbalanced': 128px in/out model from andenixa. The" - "\n\tautoencoders are unbalanced so B>A swaps won't work so" - "\n\twell. Very configurable," - "\n'villain': 128px in/out model from villainguy. Very" - "\n\tresource hungry (11GB for batchsize 16). Good for" - "\n\tdetails, but more susceptible to color differences"}) + "configured from the edit menu or the config folder." + "\nL|original: The original model created by /u/deepfakes." + "\nL|dfaker: 64px in/128px out model from dfaker. " + "Enable 'warp-to-landmarks' for full dfaker method." + "\nL|dfl-h128. 128px in/out model from deepfacelab" + "\nL|iae: A model that uses intermediate layers to try to " + "get better details" + "\nL|lightweight: A lightweight model for low-end cards. " + "Don't expect great results. Can train as low as 1.6GB " + "with batch size 8." + "\nL|realface: Customizable in/out resolution model " + "from andenixa. The autoencoders are unbalanced so B>A " + "swaps won't work so well. Very configurable." + "\nL|unbalanced: 128px in/out model from andenixa. The " + "autoencoders are unbalanced so B>A swaps won't work so " + "well. Very configurable." + "\nL|villain: 128px in/out model from villainguy. Very " + "resource hungry (11GB for batchsize 16). Good for " + "details, but more susceptible to color differences."}) argument_list.append({"opts": ("-s", "--save-interval"), "type": int, "action": Slider, diff --git a/lib/config.py b/lib/config.py index 6a26a90..e3be3e5 100644 --- a/lib/config.py +++ b/lib/config.py @@ -28,6 +28,20 @@ class FaceswapConfig(): self.handle_config() logger.debug("Initialized: %s", self.__class__.__name__) + @property + def changeable_items(self): + """ Training only. + Return a dict of config items with their set values for items + that can be altered after the model has been created """ + retval = dict() + for sect in ("global", self.section): + for key, val in self.defaults[sect].items(): + if key == "helptext" or val["fixed"]: + continue + retval[key] = self.get(sect, key) + logger.debug("Alterable for existing models: %s", retval) + return retval + def set_defaults(self): """ Override for plugin specific config defaults @@ -61,20 +75,6 @@ class FaceswapConfig(): conf[key] = self.get(sect, key) return conf - @property - def changeable_items(self): - """ Training only. - Return a dict of config items with their set values for items - that can be altered after the model has been created """ - retval = dict() - for sect in ("global", self.section): - for key, val in self.defaults[sect].items(): - if key == "helptext" or val["fixed"]: - continue - retval[key] = self.get(sect, key) - logger.debug("Alterable for existing models: %s", retval) - return retval - def get(self, section, option): """ Return a config item in it's correct format """ logger.debug("Getting config item: (section: '%s', option: '%s')", section, option) @@ -236,7 +236,7 @@ class FaceswapConfig(): def load_config(self): """ Load values from config """ - logger.info("Loading config: '%s'", self.configfile) + logger.verbose("Loading config: '%s'", self.configfile) self.config.read(self.configfile) def save_config(self): diff --git a/lib/convert.py b/lib/convert.py new file mode 100644 index 0000000..c9f617a --- /dev/null +++ b/lib/convert.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python3 +""" Converter for faceswap.py + Based on: https://gist.github.com/anonymous/d3815aba83a8f79779451262599b0955 + found on https://www.reddit.com/r/deepfakes/ """ + +import logging + +import cv2 +import numpy as np +from lib.model import masks as model_masks + +from plugins.plugin_loader import PluginLoader + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + +class Converter(): + """ Swap a source face with a target """ + def __init__(self, output_dir, output_size, output_has_mask, + draw_transparent, pre_encode, arguments): + logger.debug("Initializing %s: (output_dir: '%s', output_size: %s, output_has_mask: %s, " + "draw_transparent: %s, pre_encode: %s, arguments: %s)", + self.__class__.__name__, output_dir, output_size, output_has_mask, + draw_transparent, pre_encode, arguments) + self.output_dir = output_dir + self.draw_transparent = draw_transparent + self.writer_pre_encode = pre_encode + self.scale = arguments.output_scale / 100 + self.args = arguments + self.adjustments = dict(box=None, mask=None, color=None, seamless=None, scaling=None) + self.load_plugins(output_size, output_has_mask) + logger.debug("Initialized %s", self.__class__.__name__) + + def load_plugins(self, output_size, output_has_mask): + """ Load the requested adjustment plugins """ + logger.debug("Loading plugins") + self.adjustments["box"] = PluginLoader.get_converter("mask", "box_blend")( + "none", + output_size) + + self.adjustments["mask"] = PluginLoader.get_converter("mask", "mask_blend")( + self.args.mask_type, + output_size, + output_has_mask) + + if self.args.color_adjustment != "none" and self.args.color_adjustment is not None: + self.adjustments["color"] = PluginLoader.get_converter("color", + self.args.color_adjustment)() + + if self.args.scaling != "none" and self.args.scaling is not None: + self.adjustments["scaling"] = PluginLoader.get_converter("scaling", + self.args.scaling)() + logger.debug("Loaded plugins: %s", self.adjustments) + + def process(self, in_queue, out_queue): + """ Process items from the queue """ + logger.debug("Starting convert process. (in_queue: %s, out_queue: %s)", + in_queue, out_queue) + while True: + item = in_queue.get() + if item == "EOF": + logger.debug("Patch queue finished") + # Signal EOF to other processes in pool + in_queue.put(item) + break + logger.trace("Patch queue got: '%s'", item["filename"]) + + try: + image = self.patch_image(item) + except Exception as err: # pylint: disable=broad-except + # Log error and output original frame + logger.error("Failed to convert image: '%s'. Reason: %s", + item["filename"], str(err)) + image = item["image"] + # UNCOMMENT THIS CODE BLOCK TO PRINT TRACEBACK ERRORS + # import sys + # import traceback + # exc_info = sys.exc_info() + # traceback.print_exception(*exc_info) + + logger.trace("Out queue put: %s", item["filename"]) + out_queue.put((item["filename"], image)) + logger.debug("Completed convert process") + + def patch_image(self, predicted): + """ Patch the image """ + logger.trace("Patching image: '%s'", predicted["filename"]) + frame_size = (predicted["image"].shape[1], predicted["image"].shape[0]) + new_image = self.get_new_image(predicted, frame_size) + patched_face = self.post_warp_adjustments(predicted, new_image) + patched_face = self.scale_image(patched_face) + patched_face = np.rint(patched_face * 255.0).astype("uint8") + if self.writer_pre_encode is not None: + patched_face = self.writer_pre_encode(patched_face) + logger.trace("Patched image: '%s'", predicted["filename"]) + return patched_face + + def get_new_image(self, predicted, frame_size): + """ Get the new face from the predictor and apply box manipulations """ + logger.trace("Getting: (filename: '%s', faces: %s)", + predicted["filename"], len(predicted["swapped_faces"])) + + placeholder = predicted["image"] / 255.0 + placeholder = np.concatenate((placeholder, + np.zeros((frame_size[1], frame_size[0], 1))), + axis=-1).astype("float32") + for new_face, detected_face in zip(predicted["swapped_faces"], + predicted["detected_faces"]): + predicted_mask = new_face[:, :, -1] if new_face.shape[2] == 4 else None + new_face = new_face[:, :, :3] + src_face = detected_face.reference_face + interpolator = detected_face.reference_interpolators[1] + + new_face = self.pre_warp_adjustments(src_face, new_face, detected_face, predicted_mask) + + # Warp face with the mask + placeholder = cv2.warpAffine( # pylint: disable=no-member + new_face, + detected_face.reference_matrix, + frame_size, + placeholder, + flags=cv2.WARP_INVERSE_MAP | interpolator, # pylint: disable=no-member + borderMode=cv2.BORDER_TRANSPARENT) # pylint: disable=no-member + + placeholder = np.clip(placeholder, 0.0, 1.0) + logger.trace("Got filename: '%s'. (placeholders: %s)", + predicted["filename"], placeholder.shape) + + return placeholder + + def pre_warp_adjustments(self, old_face, new_face, detected_face, predicted_mask): + """ Run the pre-warp adjustments """ + logger.trace("old_face shape: %s, new_face shape: %s, predicted_mask shape: %s", + old_face.shape, new_face.shape, + predicted_mask.shape if predicted_mask is not None else None) + new_face = self.adjustments["box"].run(new_face) + new_face, raw_mask = self.get_image_mask(new_face, detected_face, predicted_mask) + if self.adjustments["color"] is not None: + new_face = self.adjustments["color"].run(old_face, new_face, raw_mask) + if self.adjustments["seamless"] is not None: + new_face = self.adjustments["seamless"].run(old_face, new_face, raw_mask) + logger.trace("returning: new_face shape %s", new_face.shape) + return new_face + + def get_image_mask(self, new_face, detected_face, predicted_mask): + """ Get the image mask """ + logger.trace("Getting mask. Image shape: %s", new_face.shape) + mask, raw_mask = self.adjustments["mask"].run(detected_face, predicted_mask) + if new_face.shape[2] == 4: + logger.trace("Combining mask with alpha channel box mask") + new_face[:, :, -1] = np.minimum(new_face[:, :, -1], mask.squeeze()) + else: + logger.trace("Adding mask to alpha channel") + new_face = np.concatenate((new_face, mask), -1) + new_face = np.clip(new_face, 0.0, 1.0) + logger.trace("Got mask. Image shape: %s", new_face.shape) + return new_face, raw_mask + + def post_warp_adjustments(self, predicted, new_image): + """ Apply fixes to the image after warping """ + if self.adjustments["scaling"] is not None: + new_image = self.adjustments["scaling"].run(new_image) + + mask = np.repeat(new_image[:, :, -1][:, :, np.newaxis], 3, axis=-1) + foreground = new_image[:, :, :3] + background = (predicted["image"][:, :, :3] / 255.0) * (1.0 - mask) + + foreground *= mask + frame = foreground + background + frame = self.add_alpha_mask(frame, predicted) + + np.clip(frame, 0.0, 1.0, out=frame) + return frame + + def add_alpha_mask(self, frame, predicted): + """ Adding a 4th channel should happen after all other channel operations + Add the default mask as 4th channel for saving as png with alpha channel """ + if not self.draw_transparent: + return frame + logger.trace("Creating transparent image: '%s'", predicted["filename"]) + mask_type = getattr(model_masks, model_masks.get_default_mask()) + final_mask = np.zeros(frame.shape[:2] + (1, ), dtype="float32") + + for detected_face in predicted["detected_faces"]: + landmarks = detected_face.landmarks_as_xy + final_mask = cv2.bitwise_or(final_mask, # pylint: disable=no-member + mask_type(landmarks, frame, channels=1).mask) + frame = np.concatenate((frame, np.expand_dims(final_mask, axis=-1)), axis=-1) + logger.trace("Created transparent image: '%s'", predicted["filename"]) + return frame + + def scale_image(self, frame): + """ Scale the image if requested """ + if self.scale == 1: + return frame + logger.trace("source frame: %s", frame.shape) + interp = cv2.INTER_CUBIC if self.scale > 1 else cv2.INTER_AREA # pylint: disable=no-member + dims = (round((frame.shape[1] / 2 * self.scale) * 2), + round((frame.shape[0] / 2 * self.scale) * 2)) + frame = cv2.resize(frame, dims, interpolation=interp) # pylint: disable=no-member + logger.trace("resized frame: %s", frame.shape) + return frame diff --git a/lib/faces_detect.py b/lib/faces_detect.py index c060a71..80493a4 100644 --- a/lib/faces_detect.py +++ b/lib/faces_detect.py @@ -2,6 +2,8 @@ """ Face and landmarks detection for faceswap.py """ import logging +import numpy as np + from dlib import rectangle as d_rectangle # pylint: disable=no-name-in-module from lib.aligner import Extract as AlignerExtract, get_align_mat, get_matrix_scaling @@ -23,8 +25,15 @@ class DetectedFace(): self.hash = None # Hash must be set when the file is saved due to image compression self.aligned = dict() + self.feed = dict() + self.reference = dict() logger.trace("Initialized %s", self.__class__.__name__) + @property + def extract_ratio(self): + """ The ratio of padding to add for training images """ + return 0.375 + @property def landmarks_as_xy(self): """ Landmarks as XY """ @@ -91,12 +100,13 @@ class DetectedFace(): self.x, self.w, self.y, self.h, self.landmarksXY) # <<< Aligned Face methods and properties >>> # - def load_aligned(self, image, size=256, align_eyes=False): + def load_aligned(self, image, size=256, align_eyes=False, dtype=None): """ No need to load aligned information for all uses of this class, so only call this to load the information for easy reference to aligned properties for this face """ - logger.trace("Loading aligned face: (size: %s, align_eyes: %s)", size, align_eyes) - padding = int(size * 0.1875) + logger.trace("Loading aligned face: (size: %s, align_eyes: %s, dtype: %s)", + size, align_eyes, dtype) + padding = int(size * self.extract_ratio) // 2 self.aligned["size"] = size self.aligned["padding"] = padding self.aligned["align_eyes"] = align_eyes @@ -104,15 +114,69 @@ class DetectedFace(): if image is None: self.aligned["face"] = None else: - self.aligned["face"] = AlignerExtract().transform( + face = AlignerExtract().transform( image, self.aligned["matrix"], size, padding) + self.aligned["face"] = face if dtype is None else face.astype(dtype) + logger.trace("Loaded aligned face: %s", {key: val for key, val in self.aligned.items() if key != "face"}) + def padding_from_coverage(self, size, coverage_ratio): + """ Return the image padding for a face from coverage_ratio set against a + pre-padded training image """ + adjusted_ratio = coverage_ratio - (1 - self.extract_ratio) + padding = round((size * adjusted_ratio) / 2) + logger.trace(padding) + return padding + + def load_feed_face(self, image, size=64, coverage_ratio=0.625, dtype=None): + """ Return a face in the correct dimensions for feeding into a NN + + Coverage ratio should be the ratio of the extracted image that was used for + training """ + logger.trace("Loading feed face: (size: %s, coverage_ratio: %s, dtype: %s)", + size, coverage_ratio, dtype) + + self.feed["size"] = size + self.feed["padding"] = self.padding_from_coverage(size, coverage_ratio) + self.feed["matrix"] = get_align_mat(self, size, should_align_eyes=False) + + face = np.clip(AlignerExtract().transform(image, + self.feed["matrix"], + size, + self.feed["padding"])[:, :, :3] / 255.0, + 0.0, 1.0) + self.feed["face"] = face if dtype is None else face.astype(dtype) + + logger.trace("Loaded feed face. (face_shape: %s, matrix: %s)", + self.feed_face.shape, self.feed_matrix) + + def load_reference_face(self, image, size=64, coverage_ratio=0.625, dtype=None): + """ Return a face in the correct dimensions for reference to the output from a NN + + Coverage ratio should be the ratio of the extracted image that was used for + training """ + logger.trace("Loading reference face: (size: %s, coverage_ratio: %s, dtype: %s)", + size, coverage_ratio, dtype) + + self.reference["size"] = size + self.reference["padding"] = self.padding_from_coverage(size, coverage_ratio) + self.reference["matrix"] = get_align_mat(self, size, should_align_eyes=False) + + face = np.clip(AlignerExtract().transform(image, + self.reference["matrix"], + size, + self.reference["padding"])[:, :, :3] / 255.0, + 0.0, 1.0) + self.reference["face"] = face if dtype is None else face.astype(dtype) + + logger.trace("Loaded reference face. (face_shape: %s, matrix: %s)", + self.reference_face.shape, self.reference_matrix) + @property def original_roi(self): """ Return the square aligned box location on the original @@ -151,3 +215,51 @@ class DetectedFace(): def adjusted_interpolators(self): """ Return the interpolator and reverse interpolator for the adjusted matrix """ return get_matrix_scaling(self.adjusted_matrix) + + @property + def feed_face(self): + """ Return face for feeding into NN """ + return self.feed["face"] + + @property + def feed_matrix(self): + """ Return matrix for transforming feed face back to image """ + mat = AlignerExtract().transform_matrix(self.feed["matrix"], + self.feed["size"], + self.feed["padding"]) + logger.trace("Returning: %s", mat) + return mat + + @property + def feed_interpolators(self): + """ Return the interpolators for an input face """ + return get_matrix_scaling(self.feed_matrix) + + @property + def reference_face(self): + """ Return source face at size of output from NN for reference """ + return self.reference["face"] + + @property + def reference_landmarks(self): + """ Return the landmarks location transposed to reference face """ + landmarks = AlignerExtract().transform_points(self.landmarksXY, + self.reference["matrix"], + self.reference["size"], + self.reference["padding"]) + logger.trace("Returning: %s", landmarks) + return landmarks + + @property + def reference_matrix(self): + """ Return matrix for transforming output face back to image """ + mat = AlignerExtract().transform_matrix(self.reference["matrix"], + self.reference["size"], + self.reference["padding"]) + logger.trace("Returning: %s", mat) + return mat + + @property + def reference_interpolators(self): + """ Return the interpolators for an output face """ + return get_matrix_scaling(self.reference_matrix) diff --git a/lib/gui/command.py b/lib/gui/command.py index 47306c9..32ed06c 100644 --- a/lib/gui/command.py +++ b/lib/gui/command.py @@ -225,7 +225,7 @@ class OptionControl(): logger.debug("Format control help: '%s'", ctltitle) ctlhelp = self.option.get("help", "") if ctlhelp.startswith("R|"): - ctlhelp = ctlhelp[2:].replace("\n\t", " ").replace("\n'", "\n\n'") + ctlhelp = ctlhelp[2:].replace("\nL|", "\n - ").replace("\n", "\n\n") else: ctlhelp = " ".join(ctlhelp.split()) ctlhelp = ctlhelp.replace("%%", "%") diff --git a/lib/gui/popup_configure.py b/lib/gui/popup_configure.py index 3666406..df40df1 100644 --- a/lib/gui/popup_configure.py +++ b/lib/gui/popup_configure.py @@ -8,7 +8,7 @@ import tkinter as tk from tkinter import ttk from .tooltip import Tooltip -from .utils import get_config, get_images, ContextMenu, set_slider_rounding +from .utils import adjust_wraplength, get_config, get_images, ContextMenu, set_slider_rounding logger = logging.getLogger(__name__) # pylint: disable=invalid-name POPUP = dict() @@ -238,6 +238,7 @@ class ConfigFrame(ttk.Frame): # pylint: disable=too-many-ancestors lbl.pack(padx=5, pady=5, side=tk.LEFT, anchor=tk.N) info = ttk.Label(info_frame, text=self.plugin_info) info.pack(padx=5, pady=5, fill=tk.X, expand=True) + info.bind("", adjust_wraplength) class OptionControl(): diff --git a/lib/gui/utils.py b/lib/gui/utils.py index d0ea108..a1900a4 100644 --- a/lib/gui/utils.py +++ b/lib/gui/utils.py @@ -55,6 +55,12 @@ def set_slider_rounding(value, var, d_type, round_to, min_max): var.set(value) +def adjust_wraplength(event): + """ dynamically adjust the wraplength of a label on event """ + label = event.widget + label.configure(wraplength=event.width - 1) + + class FileHandler(): """ Raise a filedialog box and capture input """ diff --git a/lib/model/masks.py b/lib/model/masks.py index 346b8c0..e998908 100644 --- a/lib/model/masks.py +++ b/lib/model/masks.py @@ -1,115 +1,131 @@ #!/usr/bin/env python3 -""" Masks functions for faceswap.py - Masks from: - dfaker: https://github.com/dfaker/df""" +""" Masks functions for faceswap.py """ +import inspect import logging +import sys + import cv2 import numpy as np -from lib.umeyama import umeyama - logger = logging.getLogger(__name__) # pylint: disable=invalid-name -def dfaker(landmarks, face, channels=4): - """ Dfaker model mask - Embeds the mask into the face alpha channel +def get_available_masks(): + """ Return a list of the available masks for cli """ + masks = sorted([name for name, obj in inspect.getmembers(sys.modules[__name__]) + if inspect.isclass(obj) and name != "Mask"]) + masks.append("none") + logger.debug(masks) + return masks + +def get_default_mask(): + """ Set the default mask for cli """ + masks = get_available_masks() + default = "dfl_full" + default = default if default in masks else masks[0] + logger.debug(default) + return default + + +class Mask(): + """ Parent class for masks + + the output mask will be .mask channels: 1, 3 or 4: - 1 - Return a single channel mask - 3 - Return a 3 channel mask - 4 - Return the original image with the mask in the alpha channel - """ - padding = int(face.shape[0] * 0.1875) - coverage = face.shape[0] - (padding * 2) - logger.trace("face_shape: %s, coverage: %s, landmarks: %s", face.shape, coverage, landmarks) + 1 - Returns a single channel mask + 3 - Returns a 3 channel mask + 4 - Returns the original image with the mask in the alpha channel """ - mat = umeyama(landmarks[17:], True)[0:2] - mat = mat.reshape(-1).reshape(2, 3) - mat = mat * coverage - mat[:, 2] += padding + def __init__(self, landmarks, face, channels=4): + logger.trace("Initializing %s: (face_shape: %s, channels: %s, landmarks: %s)", + self.__class__.__name__, face.shape, channels, landmarks) + self.landmarks = landmarks + self.face = face + self.channels = channels - mask = np.zeros(face.shape[0:2] + (1, ), dtype=np.float32) - hull = cv2.convexHull(landmarks).reshape(1, -1, 2) # pylint: disable=no-member - hull = cv2.transform(hull, mat).reshape(-1, 2) # pylint: disable=no-member - cv2.fillConvexPoly(mask, hull, 255.) # pylint: disable=no-member + mask = self.build_mask() + self.mask = self.merge_mask(mask) + logger.trace("Initialized %s", self.__class__.__name__) - kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15)) # pylint: disable=no-member - mask = cv2.dilate(mask, kernel, borderType=cv2.BORDER_REFLECT) # pylint: disable=no-member - mask = np.expand_dims(mask, axis=-1) + def build_mask(self): + """ Override to build the mask """ + raise NotImplementedError - return merge_mask(face, mask, channels) + def merge_mask(self, mask): + """ Return the mask in requested shape """ + logger.trace("mask_shape: %s", mask.shape) + assert self.channels in (1, 3, 4), "Channels should be 1, 3 or 4" + assert mask.shape[2] == 1 and mask.ndim == 3, "Input mask be 3 dimensions with 1 channel" + + if self.channels == 3: + retval = np.tile(mask, 3) + elif self.channels == 4: + retval = np.concatenate((self.face, mask), -1) + else: + retval = mask + + logger.trace("Final mask shape: %s", retval.shape) + return retval -def dfl_full(landmarks, face, channels=4): - """ DFL facial mask +class dfl_full(Mask): # pylint: disable=invalid-name + """ DFL facial mask """ + def build_mask(self): + mask = np.zeros(self.face.shape[0:2] + (1, ), dtype=np.float32) - channels: 1, 3 or 4: - 1 - Return a single channel mask - 3 - Return a 3 channel mask - 4 - Return the original image with the mask in the alpha channel - """ - logger.trace("face_shape: %s, landmarks: %s", face.shape, landmarks) - mask = np.zeros(face.shape[0:2] + (1, ), dtype=np.float32) + nose_ridge = (self.landmarks[27:31], self.landmarks[33:34]) + jaw = (self.landmarks[0:17], + self.landmarks[48:68], + self.landmarks[0:1], + self.landmarks[8:9], + self.landmarks[16:17]) + eyes = (self.landmarks[17:27], + self.landmarks[0:1], + self.landmarks[27:28], + self.landmarks[16:17], + self.landmarks[33:34]) + parts = [jaw, nose_ridge, eyes] - nose_ridge = (landmarks[27:31], landmarks[33:34]) - jaw = (landmarks[0:17], landmarks[48:68], landmarks[0:1], - landmarks[8:9], landmarks[16:17]) - eyes = (landmarks[17:27], landmarks[0:1], landmarks[27:28], - landmarks[16:17], landmarks[33:34]) - parts = [jaw, nose_ridge, eyes] - - for item in parts: - merged = np.concatenate(item) - cv2.fillConvexPoly(mask, cv2.convexHull(merged), 255.) # pylint: disable=no-member - - return merge_mask(face, mask, channels) + for item in parts: + merged = np.concatenate(item) + cv2.fillConvexPoly(mask, cv2.convexHull(merged), 255.) # pylint: disable=no-member + return mask -def components(landmarks, face, channels=4): - """ Component model mask +class components(Mask): # pylint: disable=invalid-name + """ Component model mask """ + def build_mask(self): + mask = np.zeros(self.face.shape[0:2] + (1, ), dtype=np.float32) - channels: 1, 3 or 4: - 1 - Return a single channel mask - 3 - Return a 3 channel mask - 4 - Return the original image with the mask in the alpha channel - """ - logger.trace("face_shape: %s, landmarks: %s", face.shape, landmarks) - mask = np.zeros(face.shape[0:2] + (1, ), dtype=np.float32) + r_jaw = (self.landmarks[0:9], self.landmarks[17:18]) + l_jaw = (self.landmarks[8:17], self.landmarks[26:27]) + r_cheek = (self.landmarks[17:20], self.landmarks[8:9]) + l_cheek = (self.landmarks[24:27], self.landmarks[8:9]) + nose_ridge = (self.landmarks[19:25], self.landmarks[8:9],) + r_eye = (self.landmarks[17:22], + self.landmarks[27:28], + self.landmarks[31:36], + self.landmarks[8:9]) + l_eye = (self.landmarks[22:27], + self.landmarks[27:28], + self.landmarks[31:36], + self.landmarks[8:9]) + nose = (self.landmarks[27:31], self.landmarks[31:36]) + parts = [r_jaw, l_jaw, r_cheek, l_cheek, nose_ridge, r_eye, l_eye, nose] - r_jaw = (landmarks[0:9], landmarks[17:18]) - l_jaw = (landmarks[8:17], landmarks[26:27]) - r_cheek = (landmarks[17:20], landmarks[8:9]) - l_cheek = (landmarks[24:27], landmarks[8:9]) - nose_ridge = (landmarks[19:25], landmarks[8:9],) - r_eye = (landmarks[17:22], landmarks[27:28], - landmarks[31:36], landmarks[8:9]) - l_eye = (landmarks[22:27], landmarks[27:28], - landmarks[31:36], landmarks[8:9]) - nose = (landmarks[27:31], landmarks[31:36]) - parts = [r_jaw, l_jaw, r_cheek, l_cheek, nose_ridge, r_eye, l_eye, nose] - - for item in parts: - merged = np.concatenate(item) - cv2.fillConvexPoly(mask, cv2.convexHull(merged), 255.) # pylint: disable=no-member - - return merge_mask(face, mask, channels) + for item in parts: + merged = np.concatenate(item) + cv2.fillConvexPoly(mask, cv2.convexHull(merged), 255.) # pylint: disable=no-member + return mask -def merge_mask(image, mask, channels): - """ Return the mask in requested shape """ - logger.trace("image_shape: %s, mask_shape: %s, channels: %s", - image.shape, mask.shape, channels) - assert channels in (1, 3, 4), "Channels should be 1, 3 or 4" - assert mask.shape[2] == 1 and mask.ndim == 3, "Input mask be 3 dimensions with 1 channel" - - if channels == 3: - retval = np.tile(mask, 3) - elif channels == 4: - retval = np.concatenate((image, mask), -1) - else: - retval = mask - - logger.trace("Final mask shape: %s", retval.shape) - return retval +class facehull(Mask): # pylint: disable=invalid-name + """ Basic face hull mask """ + def build_mask(self): + mask = np.zeros(self.face.shape[0:2] + (1, ), dtype=np.float32) + hull = cv2.convexHull( # pylint: disable=no-member + np.array(self.landmarks).reshape((-1, 2))) + cv2.fillConvexPoly(mask, hull, 1.0, lineType=cv2.LINE_AA) # pylint: disable=no-member + return mask diff --git a/lib/multithreading.py b/lib/multithreading.py index 4851f6d..08e5a74 100644 --- a/lib/multithreading.py +++ b/lib/multithreading.py @@ -16,6 +16,11 @@ logger = logging.getLogger(__name__) # pylint: disable=invalid-name _launched_processes = set() # pylint: disable=invalid-name +def total_cpus(): + """ Return total number of cpus """ + return mp.cpu_count() + + class ConsumerBuffer(): """ Memory buffer for consuming """ def __init__(self, dispatcher, index, data): @@ -293,9 +298,9 @@ class PoolProcess(): def set_procs(self, processes): """ Set the number of processes to use """ - if processes is None: - running_processes = len(mp.active_children()) - processes = max(mp.cpu_count() - running_processes, 1) + running_processes = len(mp.active_children()) + avail_processes = max(mp.cpu_count() - running_processes, 1) + processes = min(avail_processes, processes) logger.verbose("Processing '%s' in %s processes", self._name, processes) return processes @@ -307,6 +312,7 @@ class PoolProcess(): logger.debug("Adding process %s of %s to mp.Pool '%s'", idx + 1, self.procs, self._name) self.pool.apply_async(self._method, args=self._args, kwds=self._kwargs) + _launched_processes.add(self.pool) logging.debug("Pooled Processes: '%s'", self._name) def join(self): @@ -314,6 +320,7 @@ class PoolProcess(): logger.debug("Joining Pooled Process: '%s'", self._name) self.pool.close() self.pool.join() + _launched_processes.remove(self.pool) logger.debug("Joined Pooled Process: '%s'", self._name) @@ -415,6 +422,14 @@ class MultiThread(): """ Return a list of thread errors """ return [thread.err for thread in self._threads] + def check_and_raise_error(self): + """ Checks for errors in thread and raises them in caller """ + if not self.has_error: + return + logger.debug("Thread error caught: %s", self.errors) + error = self.errors[0] + raise error[1].with_traceback(error[2]) + def start(self): """ Start a thread with the given method and args """ logger.debug("Starting thread(s): '%s'", self._name) @@ -480,9 +495,13 @@ def terminate_processes(): have a mechanism in place to terminate this work to avoid long blocks """ - logger.debug("Processes to join: %s", [process.name + + logger.debug("Processes to join: %s", [process for process in _launched_processes - if process.is_alive()]) + if isinstance(process, mp.pool.Pool) + or process.is_alive()]) for process in list(_launched_processes): - if process.is_alive(): + if isinstance(process, mp.pool.Pool): + process.terminate() + if isinstance(process, mp.pool.Pool) or process.is_alive(): process.join() diff --git a/lib/training_data.py b/lib/training_data.py index fa15a45..c466a24 100644 --- a/lib/training_data.py +++ b/lib/training_data.py @@ -30,7 +30,7 @@ class TrainingDataGenerator(): self.model_input_size = model_input_size self.model_output_size = model_output_size self.training_opts = training_opts - self.mask_function = self.set_mask_function() + self.mask_class = self.set_mask_class() self.landmarks = self.training_opts.get("landmarks", None) self._nearest_landmarks = None self.processing = ImageManipulation(model_input_size, @@ -38,16 +38,16 @@ class TrainingDataGenerator(): training_opts.get("coverage_ratio", 0.625)) logger.debug("Initialized %s", self.__class__.__name__) - def set_mask_function(self): + def set_mask_class(self): """ Set the mask function to use if using mask """ mask_type = self.training_opts.get("mask_type", None) if mask_type: logger.debug("Mask type: '%s'", mask_type) - mask_func = getattr(masks, mask_type) + mask_class = getattr(masks, mask_type) else: - mask_func = None - logger.debug("Mask function: %s", mask_func) - return mask_func + mask_class = None + logger.debug("Mask class: %s", mask_class) + return mask_class def minibatch_ab(self, images, batchsize, side, do_shuffle=True, is_timelapse=False): """ Keep a queue filled to 8x Batch Size """ @@ -60,7 +60,7 @@ class TrainingDataGenerator(): (batchsize, training_size, training_size, 3), # sample images (batchsize, self.model_input_size, self.model_input_size, 3), (batchsize, self.model_output_size, self.model_output_size, 3))) - if self.mask_function: + if self.mask_class: batch_shape.append((self.batchsize, self.model_output_size, self.model_output_size, 1)) load_process = FixedProducerDispatcher( @@ -150,10 +150,10 @@ class TrainingDataGenerator(): except TypeError: raise Exception("Error while reading image", filename) - if self.mask_function or self.training_opts["warp_to_landmarks"]: + if self.mask_class or self.training_opts["warp_to_landmarks"]: src_pts = self.get_landmarks(filename, image, side) - if self.mask_function: - image = self.mask_function(src_pts, image, channels=4) + if self.mask_class: + image = self.mask_class(src_pts, image, channels=4).mask image = self.processing.color_adjust(image) diff --git a/lib/utils.py b/lib/utils.py index 72d5dd2..97ad380 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -27,10 +27,13 @@ _video_extensions = [ # pylint: disable=invalid-name ".avi", ".flv", ".mkv", ".mov", ".mp4", ".mpeg", ".webm"] -def get_folder(path): +def get_folder(path, make_folder=True): """ Return a path to a folder, creating it if it doesn't exist """ logger.debug("Requested path: '%s'", path) output_dir = Path(path) + if not make_folder and not output_dir.exists(): + logger.debug("%s does not exist", path) + return None output_dir.mkdir(parents=True, exist_ok=True) logger.debug("Returning: '%s'", output_dir) return output_dir diff --git a/plugins/convert/_config.py b/plugins/convert/_config.py new file mode 100644 index 0000000..2037e42 --- /dev/null +++ b/plugins/convert/_config.py @@ -0,0 +1,328 @@ +#!/usr/bin/env python3 +""" Default configurations for convert """ + +import logging + +from lib.config import FaceswapConfig +from lib.utils import _video_extensions + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + +BLUR_TYPES = ["gaussian", "normalized", "none"] +BLUR_INFO = ("The type of blending to use:" + "\n\t gaussian: Blend with Gaussian filter. Slower, but often better than Normalized" + "\n\t normalized: Blend with Normalized box filter. Faster than Gaussian" + "\n\t none: Don't perform blending") + + +class Config(FaceswapConfig): + """ Config File for Convert """ + + def set_defaults(self): + """ Set the default values for config """ + logger.debug("Setting defaults") + + # << GLOBAL OPTIONS >> # +# section = "global" +# self.add_section(title=section, +# info="Options that apply to all models") + + # << MASK OPTIONS >> # + section = "mask.box_blend" + self.add_section(title=section, + info="Options for blending the edges of the swapped box with the " + "background image") + self.add_item( + section=section, title="type", datatype=str, choices=BLUR_TYPES, default="gaussian", + info=BLUR_INFO) + self.add_item( + section=section, title="distance", datatype=float, default=11.0, rounding=1, + min_max=(0.1, 25.0), + info="The distance from the edges of the swap box to start blending. " + "\nThe distance is set as percentage of the swap box size to give the number of " + "pixels from the edge of the box. Eg: For a swap area of 256px and a percentage " + "of 4%, blending would commence 10 pixels from the edge." + "\nHigher percentages start the blending from closer to the center of the face, " + "so will reveal more of the source face.") + self.add_item( + section=section, title="radius", datatype=float, default=5.0, rounding=1, + min_max=(0.1, 25.0), + info="Radius dictates how much blending should occur, or more specifically, how far " + "the blending will spread away from the 'distance' parameter." + "\nThis figure is set as a percentage of the swap box size to give the radius in " + "pixels. Eg: For a swap area of 256px and a percentage of 5%, the radius would " + "be 13 pixels" + "\nNB: Higher percentage means more blending, but too high may reveal more of " + "the source face, or lead to hard lines at the border.") + self.add_item( + section=section, title="passes", datatype=int, default=1, rounding=1, + min_max=(1, 8), + info="The number of passes to perform. Additional passes of the blending " + "algorithm can improve smoothing at a time cost. This is more useful for 'box' " + "type blending." + "\nAdditional passes have exponentially less effect so it's not worth setting " + "this too high") + + section = "mask.mask_blend" + self.add_section(title=section, + info="Options for blending the edges between the mask and the " + "background image") + self.add_item( + section=section, title="type", datatype=str, choices=BLUR_TYPES, default="normalized", + info=BLUR_INFO) + self.add_item( + section=section, title="radius", datatype=float, default=3.0, rounding=1, + min_max=(0.1, 25.0), + info="Radius dictates how much blending should occur." + "\nThis figure is set as a percentage of the mask diameter to give the radius in " + "pixels. Eg: for a mask with diameter 200px, a percentage of 6% would give a " + "final radius of 3px." + "\nHigher percentage means more blending") + self.add_item( + section=section, title="passes", datatype=int, default=4, rounding=1, + min_max=(1, 8), + info="The number of passes to perform. Additional passes of the blending " + "algorithm can improve smoothing at a time cost. This is more useful for 'box' " + "type blending." + "\nAdditional passes have exponentially less effect so it's not worth setting " + "this too high") + self.add_item( + section=section, title="erosion", datatype=float, default=0.0, rounding=1, + min_max=(-100.0, 100.0), + info="Erosion kernel size as a percentage of the mask radius area.\n" + "Positive values apply erosion which reduces the size of the swapped area.\n" + "Negative values apply dilation which increases the swapped area") + + # <<<<<< COLOUR OPTIONS >>>>>> # + section = "color.color_transfer" + self.add_section(title=section, + info="Options for transfering the color distribution from the source to " + "the target image using the mean and standard deviations of the " + "L*a*b* color space.\n" + "This implementation is (loosely) based on to the 'Color Transfer " + "between Images' paper by Reinhard et al., 2001. matching the " + "histograms between the source and destination faces.") + self.add_item( + section=section, title="clip", datatype=bool, default=True, + info="Should components of L*a*b* image be scaled by np.clip before converting back " + "to BGR color space?\n" + "If False then components will be min-max scaled appropriately.\n" + "Clipping will keep target image brightness truer to the input.\n" + "Scaling will adjust image brightness to avoid washed out portions in the " + "resulting color transfer that can be caused by clipping.") + self.add_item( + section=section, title="preserve_paper", datatype=bool, default=True, + info="Should color transfer strictly follow methodology layed out in original paper?\n" + "The method does not always produce aesthetically pleasing results.\n" + "If False then L*a*b* components will be scaled using the reciprocal of the " + "scaling factor proposed in the paper. This method seems to produce more " + "consistently aesthetically pleasing results") + + section = "color.match_hist" + self.add_section(title=section, + info="Options for matching the histograms between the source and " + "destination faces") + self.add_item( + section=section, title="threshold", datatype=float, default=99.0, rounding=1, + min_max=(90.0, 100.0), + info="Adjust the threshold for histogram matching. Can reduce extreme colors leaking " + "in by filtering out colors at the extreme ends of the histogram spectrum") + + # <<<<<< SCALING OPTIONS >>>>>> # + section = "scaling.sharpen" + self.add_section(title=section, + info="Options for sharpening the face after placement") + self.add_item( + section=section, title="method", datatype=str, + choices=["box", "gaussian", "unsharp_mask"], default="unsharp_mask", + info="The type of sharpening to use: " + "\n\t box: Fastest, but weakest method. Uses a box filter to assess edges." + "\n\t gaussian: Slower, but better than box. Uses a gaussian filter to assess " + "edges." + "\n\t unsharp-mask: Slowest, but most tweakable. Uses the unsharp-mask method " + "to assess edges.") + self.add_item( + section=section, title="amount", datatype=int, default=150, rounding=1, + min_max=(100, 500), + info="Percentage that controls the magnitude of each overshoot " + "(how much darker and how much lighter the edge borders become)." + "\nThis can also be thought of as how much contrast is added at the edges. It " + "does not affect the width of the edge rims.") + self.add_item( + section=section, title="radius", datatype=float, default=0.3, rounding=1, + min_max=(0.1, 5.0), + info="Affects the size of the edges to be enhanced or how wide the edge rims become, " + "so a smaller radius enhances smaller-scale detail." + "\nRadius is set as a percentage of the final frame width and rounded to the " + "nearest pixel. E.g for a 1280 width frame, a 0.6 percenatage will give a radius " + "of 8px." + "\nHigher radius values can cause halos at the edges, a detectable faint light " + "rim around objects. Fine detail needs a smaller radius. " + "\nRadius and amount interact; reducing one allows more of the other.") + self.add_item( + section=section, title="threshold", datatype=float, default=5.0, rounding=1, + min_max=(1.0, 10.0), + info="[unsharp_mask only] Controls the minimal brightness change that will be " + "sharpened or how far apart adjacent tonal values have to be before the filter " + "does anything." + "\nThis lack of action is important to prevent smooth areas from becoming " + "speckled. The threshold setting can be used to sharpen more pronounced edges, " + "while leaving subtler edges untouched. " + "\nLow values should sharpen more because fewer areas are excluded. " + "\nHigher threshold values exclude areas of lower contrast.") + + # <<<<<< OUTPUT OPTIONS >>>>>> # + section = "writer.gif" + self.add_section(title=section, + info="Options for outputting converted frames to an animated gif.") + self.add_item( + section=section, title="fps", datatype=int, min_max=(1, 60), + rounding=1, default=25, + info="Frames per Second.") + self.add_item( + section=section, title="loop", datatype=int, min_max=(0, 100), + rounding=1, default=0, + info="The number of iterations. Set to 0 to loop indefinitely.") + self.add_item( + section=section, title="palettesize", datatype=str, default="256", + choices=["2", "4", "8", "16", "32", "64", "128", "256"], + info="The number of colors to quantize the image to. Is rounded to the nearest power " + "of two.") + self.add_item( + section=section, title="subrectangles", datatype=bool, default=False, + info="If True, will try and optimize the GIF by storing only the rectangular parts of " + "each frame that change with respect to the previous.") + + section = "writer.opencv" + self.add_section(title=section, + info="Options for outputting converted frames to a series of images " + "using OpenCV\n" + "OpenCV can be faster than other image writers, but lacks some of " + " configuration options and formats.") + self.add_item( + section=section, title="format", datatype=str, default="png", + choices=["bmp", "jpg", "jp2", "png", "ppm"], + info="Image format to use:" + "\n\t bmp: Windows bitmap" + "\n\t jpg: JPEG format" + "\n\t jp2: JPEG 2000 format" + "\n\t png: Portable Network Graphics" + "\n\t ppm: Portable Pixmap Format") + self.add_item( + section=section, title="draw_transparent", datatype=bool, default=False, + info="Place the swapped face on a transparent layer rather than the original frame.\n" + "NB: This is only compatible with images saved in png format. If an " + "incompatible format is selected then the image will be saved as a png.") + self.add_item( + section=section, title="jpg_quality", datatype=int, min_max=(1, 95), + rounding=1, default=75, + info="[jpg only] Set the jpg quality. 1 is worst 95 is best. Higher quality leads to " + "larger file sizes.") + self.add_item( + section=section, title="png_compress_level", datatype=int, min_max=(0, 9), + rounding=1, default=3, + info="[png only] ZLIB compression level, 1 gives best speed, 9 gives best " + "compression, 0 gives no compression at all.") + + section = "writer.pillow" + self.add_section(title=section, + info="Options for outputting converted frames to a series of images " + "using Pillow\n" + "Pillow is more feature rich than OpenCV but can be slower.") + self.add_item( + section=section, title="format", datatype=str, default="png", + choices=["bmp", "gif", "jpg", "jp2", "png", "ppm", "tif"], + info="Image format to use:" + "\n\t bmp: Windows bitmap" + "\n\t gif: Graphics Interchange Format (NB: Not animated)" + "\n\t jpg: JPEG format" + "\n\t jp2: JPEG 2000 format" + "\n\t png: Portable Network Graphics" + "\n\t ppm: Portable Pixmap Format" + "\n\t tif: Tag Image File Format") + self.add_item( + section=section, title="draw_transparent", datatype=bool, default=False, + info="Place the swapped face on a transparent layer rather than the original frame.\n" + "NB: This is only compatible with images saved in png or tif format. If an " + "incompatible format is selected then the image will be saved as a png.") + self.add_item( + section=section, title="optimize", datatype=bool, default=False, + info="[gif, jpg and png only] If enabled, indicates that the encoder should make an " + "extra pass over the image in order to select optimal encoder settings.") + self.add_item( + section=section, title="gif_interlace", datatype=bool, default=True, + info="[gif only] Set whether to save the gif as interlaced or not.") + self.add_item( + section=section, title="jpg_quality", datatype=int, min_max=(1, 95), + rounding=1, default=75, + info="[jpg only] Set the jpg quality. 1 is worst 95 is best. Higher quality leads to " + "larger file sizes.") + self.add_item( + section=section, title="png_compress_level", datatype=int, min_max=(0, 9), + rounding=1, default=3, + info="[png only] ZLIB compression level, 1 gives best speed, 9 gives best " + "compression, 0 gives no compression at all. When optimize option is set to True " + "this has no effect (it is set to 9 regardless of a value passed).") + self.add_item( + section=section, title="tif_compression", datatype=str, default="tiff_deflate", + choices=["none", "tiff_ccitt", "group3", "group4", "tiff_jpeg", "tiff_adobe_deflate", + "tiff_thunderscan", "tiff_deflate", "tiff_sgilog", "tiff_sgilog24", + "tiff_raw_16"], + info="[tif only] The desired compression method for the file.") + + section = "writer.ffmpeg" + self.add_section(title=section, + info="Options for encoding converted frames to video.") + self.add_item( + section=section, title="container", datatype=str, default="mp4", + choices=[ext.replace(".", "") for ext in _video_extensions], + info="Video container to use.") + self.add_item( + section=section, title="codec", datatype=str, + choices=["libx264", "libx265"], default="libx264", + info="Video codec to use:" + "\n\t libx264: H.264. A widely supported and commonly used codec." + "\n\t libx265: H.265 / HEVC video encoder application library.") + self.add_item( + section=section, title="crf", datatype=int, min_max=(0, 51), rounding=1, default=23, + info="Constant Rate Factor: 0 is lossless and 51 is worst quality possible. A lower " + "value generally leads to higher quality, and a subjectively sane range is " + "17–28. Consider 17 or 18 to be visually lossless or nearly so; it should look " + "the same or nearly the same as the input but it isn't technically lossless.\n" + "The range is exponential, so increasing the CRF value +6 results in roughly " + "half the bitrate / file size, while -6 leads to roughly twice the bitrate.\n" + "Choose the highest CRF value that still provides an acceptable quality. If the " + "output looks good, then try a higher value. If it looks bad, choose a lower " + "value.") + self.add_item( + section=section, title="preset", datatype=str, default="medium", + choices=["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow", + "slower", "veryslow"], + info="A preset is a collection of options that will provide a certain encoding speed " + "to compression ratio.\nA slower preset will provide better compression " + "(compression is quality per filesize).\nUse the slowest preset that you have " + "patience for") + self.add_item( + section=section, title="tune", datatype=str, default="none", + choices=["none", "film", "animation", "grain", "stillimage", "fastdecode", + "zerolatency"], + info="Change settings based upon the specifics of your input:" + "\n\t none: Don't perform any additional tuning." + "\n\t film: [H.264 only] Use for high quality movie content; lowers deblocking." + "\n\t animation: [H.264 only] Good for cartoons; uses higher deblocking and more " + "reference frames." + "\n\t grain: Preserves the grain structure in old, grainy film material." + "\n\t stillimage: [H.264 only] Good for slideshow-like content." + "\n\t fastdecode: Allows faster decoding by disabling certain filters." + "\n\t zerolatency: Good for fast encoding and low-latency streaming.") + self.add_item( + section=section, title="profile", datatype=str, default="auto", + choices=["auto", "baseline", "main", "high", "high10", "high422", "high444"], + info="[H.264 Only] Limit the output to a specific H.264 profile. Don't change this " + "unless your target device only supports a certain profile.") + self.add_item( + section=section, title="level", datatype=str, default="auto", + choices=["auto", "1", "1b", "1.1", "1.2", "1.3", "2", "2.1", "2.2", "3", "3.1", "3.2", + "4", "4.1", "4.2", "5", "5.1", "5.2", "6", "6.1", "6.2"], + info="[H.264 Only] Set the encoder level, Don't change this unless your target device " + "only supports a certain level.") diff --git a/plugins/convert/color/__init__.py b/plugins/convert/color/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/plugins/convert/color/_base.py b/plugins/convert/color/_base.py new file mode 100644 index 0000000..286523e --- /dev/null +++ b/plugins/convert/color/_base.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +""" Parent class for color Adjustments for faceswap.py converter """ + +import logging +import numpy as np + +from plugins.convert._config import Config + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + +def get_config(plugin_name): + """ Return the config for the requested model """ + return Config(plugin_name).config_dict + + +class Adjustment(): + """ Parent class for adjustments """ + def __init__(self): + logger.debug("Initializing %s", self.__class__.__name__) + self.config = get_config(".".join(self.__module__.split(".")[-2:])) + logger.debug("config: %s", self.config) + logger.debug("Initialized %s", self.__class__.__name__) + + def process(self, old_face, new_face, raw_mask): + """ Override for specific color adjustment process """ + raise NotImplementedError + + def run(self, old_face, new_face, raw_mask): + """ Perform selected adjustment on face """ + logger.trace("Performing color adjustment") + # Remove Mask for processing + reinsert_mask = False + if new_face.shape[2] == 4: + reinsert_mask = True + final_mask = new_face[:, :, -1] + new_face = new_face[:, :, :3] + new_face = self.process(old_face, new_face, raw_mask) + new_face = np.clip(new_face, 0.0, 1.0) + if reinsert_mask and new_face.shape[2] != 4: + # Reinsert Mask + new_face = np.concatenate((new_face, np.expand_dims(final_mask, axis=-1)), -1) + logger.trace("Performed color adjustment") + return new_face diff --git a/plugins/convert/color/avg_color.py b/plugins/convert/color/avg_color.py new file mode 100644 index 0000000..4483a31 --- /dev/null +++ b/plugins/convert/color/avg_color.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +""" Average colour adjustment color matching adjustment plugin for faceswap.py converter """ + +import numpy as np +from ._base import Adjustment + + +class Color(Adjustment): + """ Adjust the mean of the color channels to be the same for the swap and old frame """ + + @staticmethod + def process(old_face, new_face, raw_mask): + for _ in [0, 1]: + diff = old_face - new_face + avg_diff = np.sum(diff * raw_mask, axis=(0, 1)) + adjustment = avg_diff / np.sum(raw_mask, axis=(0, 1)) + new_face += adjustment + return new_face diff --git a/plugins/convert/color/color_transfer.py b/plugins/convert/color/color_transfer.py new file mode 100644 index 0000000..17ae9d2 --- /dev/null +++ b/plugins/convert/color/color_transfer.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python3 +""" Color Transfer adjustment color matching adjustment plugin for faceswap.py converter + source: https://github.com/jrosebr1/color_transfer + The MIT License (MIT) + + Copyright (c) 2014 Adrian Rosebrock, http://www.pyimagesearch.com + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. """ + +import cv2 +import numpy as np +from ._base import Adjustment + + +class Color(Adjustment): + """ + Transfers the color distribution from the source to the target + image using the mean and standard deviations of the L*a*b* + color space. + + This implementation is (loosely) based on to the "Color Transfer + between Images" paper by Reinhard et al., 2001. + """ + + def process(self, old_face, new_face, raw_mask): + """ + Parameters: + ------- + source: NumPy array + OpenCV image in BGR color space (the source image) + target: NumPy array + OpenCV image in BGR color space (the target image) + clip: Should components of L*a*b* image be scaled by np.clip before + converting back to BGR color space? + If False then components will be min-max scaled appropriately. + Clipping will keep target image brightness truer to the input. + Scaling will adjust image brightness to avoid washed out portions + in the resulting color transfer that can be caused by clipping. + preserve_paper: Should color transfer strictly follow methodology + layed out in original paper? The method does not always produce + aesthetically pleasing results. + If False then L*a*b* components will scaled using the reciprocal of + the scaling factor proposed in the paper. This method seems to produce + more consistently aesthetically pleasing results + + Returns: + ------- + transfer: NumPy array + OpenCV image (w, h, 3) NumPy array (uint8) + """ + clip = self.config.get("clip", True) + preserve_paper = self.config.get("preserve_paper", True) + + # convert the images from the RGB to L*ab* color space, being + # sure to utilizing the floating point data type (note: OpenCV + # expects floats to be 32-bit, so use that instead of 64-bit) + source = cv2.cvtColor( # pylint: disable=no-member + np.rint(old_face * raw_mask * 255.0).astype("uint8"), + cv2.COLOR_BGR2LAB).astype("float32") # pylint: disable=no-member + target = cv2.cvtColor( # pylint: disable=no-member + np.rint(new_face * raw_mask * 255.0).astype("uint8"), + cv2.COLOR_BGR2LAB).astype("float32") # pylint: disable=no-member + # compute color statistics for the source and target images + (l_mean_src, l_std_src, + a_mean_src, a_std_src, + b_mean_src, b_std_src) = self.image_stats(source) + (l_mean_tar, l_std_tar, + a_mean_tar, a_std_tar, + b_mean_tar, b_std_tar) = self.image_stats(target) + + # subtract the means from the target image + (light, col_a, col_b) = cv2.split(target) # pylint: disable=no-member + light -= l_mean_tar + col_a -= a_mean_tar + col_b -= b_mean_tar + + if preserve_paper: + # scale by the standard deviations using paper proposed factor + light = (l_std_tar / l_std_src) * light + col_a = (a_std_tar / a_std_src) * col_a + col_b = (b_std_tar / b_std_src) * col_b + else: + # scale by the standard deviations using reciprocal of paper proposed factor + light = (l_std_src / l_std_tar) * light + col_a = (a_std_src / a_std_tar) * col_a + col_b = (b_std_src / b_std_tar) * col_b + + # add in the source mean + light += l_mean_src + col_a += a_mean_src + col_b += b_mean_src + + # clip/scale the pixel intensities to [0, 255] if they fall + # outside this range + light = self._scale_array(light, clip=clip) + col_a = self._scale_array(col_a, clip=clip) + col_b = self._scale_array(col_b, clip=clip) + + # merge the channels together and convert back to the RGB color + # space, being sure to utilize the 8-bit unsigned integer data + # type + transfer = cv2.merge([light, col_a, col_b]) # pylint: disable=no-member + transfer = cv2.cvtColor( # pylint: disable=no-member + transfer.astype("uint8"), + cv2.COLOR_LAB2BGR).astype("float32") / 255.0 # pylint: disable=no-member + background = new_face * (1 - raw_mask) + merged = transfer + background + # return the color transferred image + return merged + + @staticmethod + def image_stats(image): + """ + Parameters: + ------- + image: NumPy array + OpenCV image in L*a*b* color space + + Returns: + ------- + Tuple of mean and standard deviations for the L*, a*, and b* + channels, respectively + """ + # compute the mean and standard deviation of each channel + (light, col_a, col_b) = cv2.split(image) # pylint: disable=no-member + (l_mean, l_std) = (light.mean(), light.std()) + (a_mean, a_std) = (col_a.mean(), col_a.std()) + (b_mean, b_std) = (col_b.mean(), col_b.std()) + + # return the color statistics + return (l_mean, l_std, a_mean, a_std, b_mean, b_std) + + @staticmethod + def _min_max_scale(arr, new_range=(0, 255)): + """ + Perform min-max scaling to a NumPy array + + Parameters: + ------- + arr: NumPy array to be scaled to [new_min, new_max] range + new_range: tuple of form (min, max) specifying range of + transformed array + + Returns: + ------- + NumPy array that has been scaled to be in + [new_range[0], new_range[1]] range + """ + # get array's current min and max + arr_min = arr.min() + arr_max = arr.max() + + # check if scaling needs to be done to be in new_range + if arr_min < new_range[0] or arr_max > new_range[1]: + # perform min-max scaling + scaled = (new_range[1] - new_range[0]) * (arr - arr_min) / (arr_max - + arr_min) + new_range[0] + else: + # return array if already in range + scaled = arr + + return scaled + + def _scale_array(self, arr, clip=True): + """ + Trim NumPy array values to be in [0, 255] range with option of + clipping or scaling. + + Parameters: + ------- + arr: array to be trimmed to [0, 255] range + clip: should array be scaled by np.clip? if False then input + array will be min-max scaled to range + [max([arr.min(), 0]), min([arr.max(), 255])] + + Returns: + ------- + NumPy array that has been scaled to be in [0, 255] range + """ + if clip: + scaled = np.clip(arr, 0, 255) + else: + scale_range = (max([arr.min(), 0]), min([arr.max(), 255])) + scaled = self._min_max_scale(arr, new_range=scale_range) + + return scaled diff --git a/plugins/convert/color/match_hist.py b/plugins/convert/color/match_hist.py new file mode 100644 index 0000000..e7c4572 --- /dev/null +++ b/plugins/convert/color/match_hist.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +""" Match histogram colour adjustment color matching adjustment plugin + for faceswap.py converter """ + +import numpy as np +from ._base import Adjustment + + +class Color(Adjustment): + """ Match the histogram of the color intensity of each channel """ + + def process(self, old_face, new_face, raw_mask): + mask_indices = np.nonzero(raw_mask.squeeze()) + new_face = [self.hist_match(old_face[:, :, c], + new_face[:, :, c], + mask_indices, + self.config["threshold"] / 100) + for c in range(3)] + new_face = np.stack(new_face, axis=-1) + return new_face + + @staticmethod + def hist_match(old_channel, new_channel, mask_indices, threshold): + """ Construct the histogram of the color intensity of a channel + for the swap and the original. Match the histogram of the original + by interpolation + """ + if mask_indices[0].size == 0: + return new_channel + + old_masked = old_channel[mask_indices] + new_masked = new_channel[mask_indices] + _, bin_idx, s_counts = np.unique(new_masked, return_inverse=True, return_counts=True) + t_values, t_counts = np.unique(old_masked, return_counts=True) + s_quants = np.cumsum(s_counts, dtype='float32') + t_quants = np.cumsum(t_counts, dtype='float32') + s_quants = threshold * s_quants / s_quants[-1] # cdf + t_quants /= t_quants[-1] # cdf + interp_s_values = np.interp(s_quants, t_quants, t_values) + new_channel[mask_indices] = interp_s_values[bin_idx] + return new_channel diff --git a/plugins/convert/color/seamless_clone.py b/plugins/convert/color/seamless_clone.py new file mode 100644 index 0000000..ccbc3bd --- /dev/null +++ b/plugins/convert/color/seamless_clone.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +""" Seamless clone adjustment plugin for faceswap.py converter + NB: This probably isn't the best place for this, but it is independent of + color adjustments and does not have a natural home, so here for now + and called as an extra plugin from lib/convert.py +""" + +import cv2 +import numpy as np +from ._base import Adjustment + + +class Color(Adjustment): + """ Seamless clone the swapped face into the old face with cv2 + NB: This probably isn't the best place for this, but it doesn't work well and + and does not have a natural home, so here for now. + """ + + @staticmethod + def process(old_face, new_face, raw_mask): + height, width, _ = old_face.shape + height = height // 2 + width = width // 2 + + y_indices, x_indices, _ = np.nonzero(raw_mask) + y_crop = slice(np.min(y_indices), np.max(y_indices)) + x_crop = slice(np.min(x_indices), np.max(x_indices)) + y_center = int(np.rint((np.max(y_indices) + np.min(y_indices)) / 2 + height)) + x_center = int(np.rint((np.max(x_indices) + np.min(x_indices)) / 2 + width)) + + insertion = np.rint(new_face[y_crop, x_crop] * 255.0).astype("uint8") + insertion_mask = np.rint(raw_mask[y_crop, x_crop] * 255.0).astype("uint8") + insertion_mask[insertion_mask != 0] = 255 + prior = np.rint(np.pad(old_face * 255.0, + ((height, height), (width, width), (0, 0)), + 'constant')).astype("uint8") + + blended = cv2.seamlessClone(insertion, # pylint: disable=no-member + prior, + insertion_mask, + (x_center, y_center), + cv2.NORMAL_CLONE) # pylint: disable=no-member + blended = blended[height:-height, width:-width] + + return blended.astype("float32") / 255.0 diff --git a/plugins/convert/mask/__init__.py b/plugins/convert/mask/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/plugins/convert/mask/_base.py b/plugins/convert/mask/_base.py new file mode 100644 index 0000000..01b7842 --- /dev/null +++ b/plugins/convert/mask/_base.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +""" Parent class for mask adjustments for faceswap.py converter """ + +import logging + +import cv2 +import numpy as np + +from lib.model import masks as model_masks +from plugins.convert._config import Config + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + +def get_config(plugin_name): + """ Return the config for the requested model """ + return Config(plugin_name).config_dict + + +class Adjustment(): + """ Parent class for adjustments """ + def __init__(self, mask_type, output_size, predicted_available): + logger.debug("Initializing %s: (arguments: '%s', output_size: %s, " + "predicted_available: %s)", + self.__class__.__name__, mask_type, output_size, predicted_available) + self.config = get_config(".".join(self.__module__.split(".")[-2:])) + logger.debug("config: %s", self.config) + self.mask_type = self.get_mask_type(mask_type, predicted_available) + self.dummy = np.zeros((output_size, output_size, 3), dtype='float32') + + self.skip = self.config.get("type", None) is None + logger.debug("Initialized %s", self.__class__.__name__) + + @staticmethod + def get_mask_type(mask_type, predicted_available): + """ Return the requested mask type. Return default mask if + predicted requested but not available """ + logger.debug("Requested mask_type: %s", mask_type) + if mask_type == "predicted" and not predicted_available: + mask_type = model_masks.get_default_mask() + logger.warning("Predicted selected, but the model was not trained with a mask. " + "Switching to '%s'", mask_type) + logger.debug("Returning mask_type: %s", mask_type) + return mask_type + + def process(self, *args, **kwargs): + """ Override for specific color adjustment process """ + raise NotImplementedError + + def run(self, *args, **kwargs): + """ Perform selected adjustment on face """ + logger.trace("Performing mask adjustment: (plugin: %s, args: %s, kwargs: %s", + self.__module__, args, kwargs) + retval = self.process(*args, **kwargs) + return retval + + +class BlurMask(): + """ Factory class to return the correct blur object for requested blur + Works for square images only. + Currently supports Gaussian and Normalized Box Filters + """ + def __init__(self, blur_type, mask, kernel_ratio, passes=1): + """ image_size = height or width of original image + mask = the mask to apply the blurring to + kernel_ratio = kernel ratio as percentage of mask size + diameter = True calculates approx diameter of mask for kernel, False + passes = the number of passes to perform the blur """ + logger.trace("Initializing %s: (blur_type: '%s', mask_shape: %s, kernel_ratio: %s, " + "passes: %s)", self.__class__.__name__, blur_type, mask.shape, kernel_ratio, + passes) + self.blur_type = blur_type.lower() + self.mask = mask + self.passes = passes + kernel_size = self.get_kernel_size(kernel_ratio) + self.kernel_size = self.get_kernel_tuple(kernel_size) + logger.trace("Initialized %s", self.__class__.__name__) + + @property + def blurred(self): + """ The final blurred mask """ + func = self.func_mapping[self.blur_type] + kwargs = self.get_kwargs() + blurred = self.mask + for i in range(self.passes): + ksize = int(kwargs["ksize"][0]) + logger.trace("Pass: %s, kernel_size: %s", i + 1, (ksize, ksize)) + blurred = func(blurred, **kwargs) + ksize = int(ksize * self.multipass_factor) + kwargs["ksize"] = self.get_kernel_tuple(ksize) + logger.trace("Returning blurred mask. Shape: %s", blurred.shape) + return blurred + + @property + def multipass_factor(self): + """ Multipass Factor + For multiple passes the kernel must be scaled down. This value is + different for box filter and gaussian """ + factor = dict(gaussian=0.8, + normalized=0.5) + return factor[self.blur_type] + + @property + def sigma(self): + """ Sigma for Gaussian Blur + Returns zero so it is calculated from kernel size """ + return 0 + + @property + def func_mapping(self): + """ Return a dict of function name to cv2 function """ + return dict(gaussian=cv2.GaussianBlur, # pylint: disable = no-member + normalized=cv2.blur) # pylint: disable = no-member + + @property + def kwarg_requirements(self): + """ Return a dict of function name to a list of required kwargs """ + return dict(gaussian=["ksize", "sigmaX"], + normalized=["ksize"]) + + @property + def kwarg_mapping(self): + """ Return a dict of kwarg names to config item names """ + return dict(ksize=self.kernel_size, + sigmaX=self.sigma) + + def get_kernel_size(self, radius_ratio): + """ Set the kernel size to absolute """ + mask_diameter = np.sqrt(np.sum(self.mask)) + radius = max(1, round(mask_diameter * radius_ratio / 100)) + kernel_size = int((radius * 2) + 1) + logger.trace("kernel_size: %s", kernel_size) + return kernel_size + + @staticmethod + def get_kernel_tuple(kernel_size): + """ Make sure kernel_size is odd and return it as a tupe """ + kernel_size += 1 if kernel_size % 2 == 0 else 0 + retval = (kernel_size, kernel_size) + logger.trace(retval) + return retval + + def get_kwargs(self): + """ return valid kwargs for the requested blur """ + retval = {kword: self.kwarg_mapping[kword] + for kword in self.kwarg_requirements[self.blur_type]} + logger.trace("BlurMask kwargs: %s", retval) + return retval diff --git a/plugins/convert/mask/box_blend.py b/plugins/convert/mask/box_blend.py new file mode 100644 index 0000000..0693630 --- /dev/null +++ b/plugins/convert/mask/box_blend.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +""" Adjustments for the swap box for faceswap.py converter """ + +import numpy as np + +from ._base import Adjustment, BlurMask, logger + + +class Mask(Adjustment): + """ Manipulations that occur on the swap box + Actions performed here occur prior to warping the face back to the background frame + + For actions that occur identically for each frame (e.g. blend_box), constants can + be placed into self.func_constants to be compiled at launch, then referenced for + each face. """ + def __init__(self, mask_type, output_size, predicted_available=False): + super().__init__(mask_type, output_size, predicted_available) + self.mask = self.get_mask() if not self.skip else None + + def get_mask(self): + """ The box for every face will be identical, so set the mask just once + As gaussian blur technically blurs both sides of the mask, reduce the mask ratio by + half to give a more expected box """ + logger.debug("Building box mask") + mask_ratio = self.config["distance"] / 200 + facesize = self.dummy.shape[0] + erode = slice(round(facesize * mask_ratio), -round(facesize * mask_ratio)) + mask = self.dummy[:, :, -1] + mask[erode, erode] = 1.0 + + mask = BlurMask(self.config["type"], + mask, + self.config["radius"], + self.config["passes"]).blurred + logger.debug("Built box mask. Shape: %s", mask.shape) + return mask + + def process(self, new_face): + """ The blend box function. Adds the created mask to the alpha channel """ + if self.skip: + logger.trace("Skipping blend box") + return new_face + + logger.trace("Blending box") + mask = np.expand_dims(self.mask, axis=-1) + new_face = np.clip(np.concatenate((new_face, mask), axis=-1), 0.0, 1.0) + logger.trace("Blended box") + return new_face diff --git a/plugins/convert/mask/mask_blend.py b/plugins/convert/mask/mask_blend.py new file mode 100644 index 0000000..37aece6 --- /dev/null +++ b/plugins/convert/mask/mask_blend.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +""" Adjustments for the mask for faceswap.py converter """ + +import cv2 +import numpy as np + +from lib.model import masks as model_masks +from ._base import Adjustment, BlurMask, logger + + +class Mask(Adjustment): + """ Return the requested mask """ + def __init__(self, mask_type, output_size, predicted_available): + super().__init__(mask_type, output_size, predicted_available) + self.do_erode = self.config.get("erosion", 0) != 0 + self.do_blend = self.config.get("type", None) is not None + + def process(self, detected_face, predicted_mask=None): + """ Return mask and perform processing """ + mask = self.get_mask(detected_face, predicted_mask) + raw_mask = mask.copy() + if not self.skip and self.do_erode: + mask = self.erode(mask) + if not self.skip and self.do_blend: + mask = self.blend(mask) + raw_mask = np.expand_dims(raw_mask, axis=-1) if raw_mask.ndim != 3 else raw_mask + mask = np.expand_dims(mask, axis=-1) if mask.ndim != 3 else mask + logger.trace("mask shape: %s, raw_mask shape: %s", mask.shape, raw_mask.shape) + return mask, raw_mask + + def get_mask(self, detected_face, predicted_mask): + """ Return the mask from lib/model/masks and intersect with box """ + if self.mask_type == "none": + # Return a dummy mask if not using a mask + mask = np.ones_like(self.dummy[:, :, 1]) + elif self.mask_type == "predicted": + mask = predicted_mask + else: + landmarks = detected_face.reference_landmarks + mask = getattr(model_masks, self.mask_type)(landmarks, self.dummy, channels=1).mask + np.nan_to_num(mask, copy=False) + np.clip(mask, 0.0, 1.0, out=mask) + return mask + + # MASK MANIPULATIONS + def erode(self, mask): + """ Erode/dilate mask if requested """ + kernel = self.get_erosion_kernel(mask) + if self.config["erosion"] > 0: + logger.trace("Eroding mask") + mask = cv2.erode(mask, kernel, iterations=1) # pylint: disable=no-member + else: + logger.trace("Dilating mask") + mask = cv2.dilate(mask, kernel, iterations=1) # pylint: disable=no-member + return mask + + def get_erosion_kernel(self, mask): + """ Get the erosion kernel """ + erosion_ratio = self.config["erosion"] / 100 + mask_radius = np.sqrt(np.sum(mask)) / 2 + kernel_size = max(1, int(abs(erosion_ratio * mask_radius))) + erosion_kernel = cv2.getStructuringElement( # pylint: disable=no-member + cv2.MORPH_ELLIPSE, # pylint: disable=no-member + (kernel_size, kernel_size)) + logger.trace("erosion_kernel shape: %s", erosion_kernel.shape) + return erosion_kernel + + def blend(self, mask): + """ Blur mask if requested """ + logger.trace("Blending mask") + mask = BlurMask(self.config["type"], + mask, + self.config["radius"], + self.config["passes"]).blurred + return mask diff --git a/plugins/convert/masked.py b/plugins/convert/masked.py deleted file mode 100644 index efbeac3..0000000 --- a/plugins/convert/masked.py +++ /dev/null @@ -1,366 +0,0 @@ -#!/usr/bin/env python3 -""" Masked converter for faceswap.py - Based on: https://gist.github.com/anonymous/d3815aba83a8f79779451262599b0955 - found on https://www.reddit.com/r/deepfakes/ """ - -import logging -import cv2 -import numpy as np -from lib.model.masks import dfl_full - -logger = logging.getLogger(__name__) # pylint: disable=invalid-name - - -class Convert(): - """ Swap a source face with a target """ - def __init__(self, encoder, model, arguments): - logger.debug("Initializing %s: (encoder: '%s', model: %s, arguments: %s", - self.__class__.__name__, encoder, model, arguments) - self.encoder = encoder - self.args = arguments - self.input_size = model.input_shape[0] - self.training_size = model.state.training_size - self.training_coverage_ratio = model.training_opts["coverage_ratio"] - self.input_mask_shape = model.state.mask_shapes[0] if model.state.mask_shapes else None - self.crop = None - self.mask = None - logger.debug("Initialized %s", self.__class__.__name__) - - def patch_image(self, image, detected_face): - """ Patch the image """ - logger.trace("Patching image") - image = image.astype('float32') - image_size = (image.shape[1], image.shape[0]) - coverage = int(self.training_coverage_ratio * self.training_size) - padding = (self.training_size - coverage) // 2 - logger.trace("coverage: %s, padding: %s", coverage, padding) - - self.crop = slice(padding, self.training_size - padding) - if not self.mask: # Init the mask on first image - self.mask = Mask(self.args.mask_type, self.training_size, padding, self.crop) - - detected_face.load_aligned(image, size=self.training_size, align_eyes=False) - new_image = self.get_new_image(image, detected_face, coverage, image_size) - image_mask = self.get_image_mask(detected_face, image_size) - patched_face = self.apply_fixes(image, new_image, image_mask, - detected_face.landmarks_as_xy) - - logger.trace("Patched image") - return patched_face - - def get_new_image(self, image, detected_face, coverage, image_size): - """ Get the new face from the predictor """ - logger.trace("coverage: %s", coverage) - src_face = detected_face.aligned_face[:, :, :3] - coverage_face = src_face[self.crop, self.crop] - old_face = coverage_face.copy() - coverage_face = cv2.resize(coverage_face, # pylint: disable=no-member - (self.input_size, self.input_size), - interpolation=cv2.INTER_AREA) # pylint: disable=no-member - coverage_face = np.expand_dims(coverage_face, 0) - np.clip(coverage_face / 255.0, 0.0, 1.0, out=coverage_face) - - if self.input_mask_shape: - mask = np.zeros(self.input_mask_shape, np.float32) - mask = np.expand_dims(mask, 0) - feed = [coverage_face, mask] - else: - feed = [coverage_face] - logger.trace("Input shapes: %s", [item.shape for item in feed]) - new_face = self.encoder(feed)[0] - new_face = new_face.squeeze() - logger.trace("Output shape: %s", new_face.shape) - - new_face = cv2.resize(new_face, # pylint: disable=no-member - (coverage, coverage), - interpolation=cv2.INTER_CUBIC) # pylint: disable=no-member - np.clip(new_face * 255.0, 0.0, 255.0, out=new_face) - - if self.args.smooth_box: - self.smooth_box(old_face, new_face) - - src_face[self.crop, self.crop] = new_face - background = image.copy() - interpolator = detected_face.adjusted_interpolators[1] - new_image = cv2.warpAffine( # pylint: disable=no-member - src_face, - detected_face.adjusted_matrix, - image_size, - background, - flags=cv2.WARP_INVERSE_MAP | interpolator, # pylint: disable=no-member - borderMode=cv2.BORDER_TRANSPARENT) # pylint: disable=no-member - return new_image - - @staticmethod - def smooth_box(old_face, new_face): - """ Perform gaussian blur on the edges of the output rect """ - height = new_face.shape[0] - crop = slice(0, height) - erode = slice(height // 15, -height // 15) - sigma = height / 16 # 10 for the default 160 size - window = int(np.ceil(sigma * 3.0)) - window = window + 1 if window % 2 == 0 else window - mask = np.zeros_like(new_face) - mask[erode, erode] = 1.0 - mask = cv2.GaussianBlur(mask, # pylint: disable=no-member - (window, window), - sigma) - new_face[crop, crop] = (mask * new_face + (1.0 - mask) * old_face) - - def get_image_mask(self, detected_face, image_size): - """ Get the image mask """ - mask = self.mask.get_mask(detected_face, image_size) - if self.args.erosion_size != 0: - kwargs = {'src': mask, - 'kernel': self.set_erosion_kernel(mask), - 'iterations': 1} - if self.args.erosion_size > 0: - mask = cv2.erode(**kwargs) # pylint: disable=no-member - else: - mask = cv2.dilate(**kwargs) # pylint: disable=no-member - - if self.args.blur_size != 0: - blur_size = self.set_blur_size(mask) - for _ in [1,2,3,4]: # pylint: disable=no-member - mask = cv2.blur(mask, (blur_size, blur_size)) - - return np.clip(mask, 0.0, 1.0, out=mask) - - def set_erosion_kernel(self, mask): - """ Set the erosion kernel """ - erosion_ratio = self.args.erosion_size / 100 - mask_radius = np.sqrt(np.sum(mask)) / 2 - percent_erode = max(1, int(abs(erosion_ratio * mask_radius))) - erosion_kernel = cv2.getStructuringElement( # pylint: disable=no-member - cv2.MORPH_ELLIPSE, # pylint: disable=no-member - (percent_erode, percent_erode)) - logger.trace("erosion_kernel shape: %s", erosion_kernel.shape) - return erosion_kernel - - def set_blur_size(self, mask): - """ Set the blur size to absolute or percentage """ - blur_ratio = self.args.blur_size / 100 / 1.6 - mask_radius = np.sqrt(np.sum(mask)) / 2 - blur_size = int(max(1, blur_ratio * mask_radius)) - logger.trace("blur_size: %s", blur_size) - return blur_size - - def apply_fixes(self, original, face, mask, landmarks): - """ Apply fixes """ - #TODO copies aren't likey neccesary and will slow calc... used when isolating issues - new_image = face[:, :, :3].copy() - image_mask = mask[:, :, :3].copy() - frame = original[:, :, :3].copy() - - #TODO - force default for args.sharpen_image to ensure it isn't None - if self.args.sharpen_image is not None and self.args.sharpen_image.lower() != "none": - new_image = self.sharpen(new_image, self.args.sharpen_image) - - if self.args.avg_color_adjust: - new_image = self.color_adjust(new_image, frame, image_mask) - - if self.args.match_histogram: - new_image = self.color_hist_match(new_image, frame, image_mask) - - if self.args.seamless_clone: - blended = self.seamless_clone(new_image, frame, image_mask) - else: - foreground = new_image * image_mask - background = frame * (1.0 - image_mask) - blended = foreground + background - - np.clip(blended, 0.0, 255.0, out=blended) - if self.args.draw_transparent: - # Adding a 4th channel should happen after all other channel operations - # Add mask as 4th channel for saving as alpha on supported output formats - blended = dfl_full(landmarks, blended, channels=4) - - return np.rint(blended).astype('uint8') - - @staticmethod - def sharpen(new, method): - """ Sharpen using the unsharp=mask technique , subtracting a blurried image """ - np.clip(new, 0.0, 255.0, out=new) - if method == "box_filter": - kernel = np.ones((3, 3)) * (-1) - kernel[1, 1] = 9 - new = cv2.filter2D(new, -1, kernel) # pylint: disable=no-member - elif method == "gaussian_filter": - blur = cv2.GaussianBlur(new, (0, 0), 3.0) # pylint: disable=no-member - new = cv2.addWeighted(new, 1.5, blur, -.5, 0, new) # pylint: disable=no-member - - return new - - @staticmethod - def color_adjust(new, frame, img_mask): - """ Adjust the mean of the color channels to be the same for the swap and old frame """ - for _ in [0, 1]: - np.clip(new, 0.0, 255.0, out=new) - diff = frame - new - avg_diff = np.sum(diff * img_mask, axis=(0, 1)) - adjustment = avg_diff / np.sum(img_mask, axis=(0, 1)) - new = new + adjustment - - return new - - def color_hist_match(self, new, frame, img_mask): - """ Match the histogram of the color intensity of each channel """ - np.clip(new, 0.0, 255.0, out=new) - new = np.stack((self.hist_match(new[:, :, c], frame[:, :, c], img_mask[:, :, c]) for c in [0, 1, 2]), axis=-1) - - return new - - @staticmethod - def hist_match(new, frame, img_mask): - """ Construct the histogram of the color intensity of a channel - for the swap and the original. Match the histogram of the original - by interpolation - """ - mask_indices = np.nonzero(img_mask) - if len(mask_indices[0]) == 0: - return new - - m_new = new[mask_indices] - m_frame = frame[mask_indices] - _, bin_idx, s_counts = np.unique(m_new, return_inverse=True, return_counts=True) - t_values, t_counts = np.unique(m_frame, return_counts=True) - s_quants = np.cumsum(s_counts, dtype='float32') - t_quants = np.cumsum(t_counts, dtype='float32') - s_quants /= s_quants[-1] # cdf - t_quants /= t_quants[-1] # cdf - interp_s_values = np.interp(s_quants, t_quants, t_values) - new[mask_indices] = interp_s_values[bin_idx] - - return new - - @staticmethod - def seamless_clone(new, frame, img_mask): - """ Seamless clone the swapped image into the old frame with cv2 """ - np.clip(new, 0.0, 255.0, out=new) - height, width, _ = frame.shape - height = height // 2 - width = width // 2 - y_indices, x_indices, _ = np.nonzero(img_mask) - y_crop = slice(np.min(y_indices), np.max(y_indices)) - x_crop = slice(np.min(x_indices), np.max(x_indices)) - y_center = int(np.rint((np.max(y_indices) + np.min(y_indices)) / 2 + height)) - x_center = int(np.rint((np.max(x_indices) + np.min(x_indices)) / 2 + width)) - - insertion = np.rint(new[y_crop, x_crop]).astype('uint8') - insertion_mask = img_mask[y_crop, x_crop] - insertion_mask[insertion_mask != 0] = 255 - insertion_mask = insertion_mask.astype('uint8') - prior = np.pad(frame, ((height, height), (width, width), (0, 0)), 'constant') - prior = prior.astype('uint8') - - blended = cv2.seamlessClone(insertion, # pylint: disable=no-member - prior, - insertion_mask, - (x_center, y_center), - cv2.NORMAL_CLONE) # pylint: disable=no-member - blended = blended[height:-height, width:-width] - - return blended - -class Mask(): - """ Return the requested mask """ - - def __init__(self, mask_type, training_size, padding, crop): - """ Set requested mask """ - logger.debug("Initializing %s: (mask_type: '%s', training_size: %s, padding: %s)", - self.__class__.__name__, mask_type, training_size, padding) - - self.training_size = training_size - self.padding = padding - self.mask_type = mask_type - self.crop = crop - - logger.debug("Initialized %s", self.__class__.__name__) - - def get_mask(self, detected_face, image_size): - """ Return a face mask """ - kwargs = {"matrix": detected_face.adjusted_matrix, - "interpolators": detected_face.adjusted_interpolators, - "landmarks": detected_face.landmarks_as_xy, - "image_size": image_size} - logger.trace("kwargs: %s", kwargs) - mask = getattr(self, self.mask_type)(**kwargs) - mask = self.finalize_mask(mask) - logger.trace("mask shape: %s", mask.shape) - return mask - - def cnn(self, **kwargs): - """ CNN Mask """ - # Insert FCN-VGG16 segmentation mask model here - logger.info("cnn not yet implemented, using facehull instead") - return self.facehull(**kwargs) - - def rect(self, **kwargs): - """ Namespace for rect mask. This is the same as 'none' in the cli """ - return self.none(**kwargs) - - def none(self, **kwargs): - """ Rect Mask """ - logger.trace("Getting mask") - interpolator = kwargs["interpolators"][1] - ones = np.zeros((self.training_size, self.training_size, 3), dtype='float32') - mask = np.zeros((kwargs["image_size"][1], kwargs["image_size"][0], 3), dtype='float32') - # central_core = slice(self.padding, -self.padding) - ones[self.crop, self.crop] = 1.0 - cv2.warpAffine(ones, # pylint: disable=no-member - kwargs["matrix"], - kwargs["image_size"], - mask, - flags=cv2.WARP_INVERSE_MAP | interpolator, # pylint: disable=no-member - borderMode=cv2.BORDER_CONSTANT, # pylint: disable=no-member - borderValue=0.0) - return mask - - def dfl(self, **kwargs): - """ DFaker Mask """ - logger.trace("Getting mask") - dummy = np.zeros((kwargs["image_size"][1], kwargs["image_size"][0], 3), dtype='float32') - mask = dfl_full(kwargs["landmarks"], dummy, channels=3) - mask = self.intersect_rect(mask, **kwargs) - return mask - - def facehull(self, **kwargs): - """ Facehull Mask """ - logger.trace("Getting mask") - mask = np.zeros((kwargs["image_size"][1], kwargs["image_size"][0], 3), dtype='float32') - hull = cv2.convexHull( # pylint: disable=no-member - np.array(kwargs["landmarks"]).reshape((-1, 2))) - cv2.fillConvexPoly(mask, # pylint: disable=no-member - hull, - (1.0, 1.0, 1.0), - lineType=cv2.LINE_AA) # pylint: disable=no-member - mask = self.intersect_rect(mask, **kwargs) - return mask - - @staticmethod - def ellipse(**kwargs): - """ Ellipse Mask """ - logger.trace("Getting mask") - mask = np.zeros((kwargs["image_size"][1], kwargs["image_size"][0], 3), dtype='float32') - ell = cv2.fitEllipse( # pylint: disable=no-member - np.array(kwargs["landmarks"]).reshape((-1, 2))) - cv2.ellipse(mask, # pylint: disable=no-member - box=ell, - color=(1.0, 1.0, 1.0), - thickness=-1) - return mask - - def intersect_rect(self, hull_mask, **kwargs): - """ Intersect the given hull mask with the roi """ - logger.trace("Intersecting rect") - mask = self.rect(**kwargs) - mask *= hull_mask - return mask - - @staticmethod - def finalize_mask(mask): - """ Finalize the mask """ - logger.trace("Finalizing mask") - np.nan_to_num(mask, copy=False) - np.clip(mask, 0.0, 1.0, out=mask) - return mask diff --git a/plugins/convert/scaling/__init__.py b/plugins/convert/scaling/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/plugins/convert/scaling/_base.py b/plugins/convert/scaling/_base.py new file mode 100644 index 0000000..4167692 --- /dev/null +++ b/plugins/convert/scaling/_base.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +""" Parent class for scaling Adjustments for faceswap.py converter """ + +import logging +import numpy as np + +from plugins.convert._config import Config + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + +def get_config(plugin_name): + """ Return the config for the requested model """ + return Config(plugin_name).config_dict + + +class Adjustment(): + """ Parent class for scaling adjustments """ + def __init__(self): + logger.debug("Initializing %s", self.__class__.__name__) + self.config = get_config(".".join(self.__module__.split(".")[-2:])) + logger.debug("config: %s", self.config) + logger.debug("Initialized %s", self.__class__.__name__) + + def process(self, new_face): + """ Override for specific scaling adjustment process """ + raise NotImplementedError + + def run(self, new_face): + """ Perform selected adjustment on face """ + logger.trace("Performing scaling adjustment") + # Remove Mask for processing + reinsert_mask = False + if new_face.shape[2] == 4: + reinsert_mask = True + final_mask = new_face[:, :, -1] + new_face = new_face[:, :, :3] + new_face = self.process(new_face) + new_face = np.clip(new_face, 0.0, 1.0) + if reinsert_mask and new_face.shape[2] != 4: + # Reinsert Mask + new_face = np.concatenate((new_face, np.expand_dims(final_mask, axis=-1)), -1) + logger.trace("Performed scaling adjustment") + return new_face diff --git a/plugins/convert/scaling/sharpen.py b/plugins/convert/scaling/sharpen.py new file mode 100644 index 0000000..1ecca3f --- /dev/null +++ b/plugins/convert/scaling/sharpen.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +""" Sharpening for enlarged face for faceswap.py converter """ +import cv2 +import numpy as np + +from ._base import Adjustment, logger + + +class Scaling(Adjustment): + """ Sharpening Adjustments for the face applied after warp to final frame """ + + def process(self, new_face): + """ Sharpen using the requested technique """ + amount = self.config["amount"] / 100.0 + kernel_center = self.get_kernel_size(new_face, self.config["radius"]) + new_face = getattr(self, self.config["method"])(new_face, kernel_center, amount) + return new_face + + @staticmethod + def get_kernel_size(new_face, radius_percent): + """ Return the kernel size and central point for the given radius + relative to frame width """ + radius = max(1, round(new_face.shape[1] * radius_percent / 100)) + kernel_size = int((radius * 2) + 1) + kernel_size = (kernel_size, kernel_size) + logger.trace(kernel_size) + return kernel_size, radius + + @staticmethod + def box(new_face, kernel_center, amount): + """ Sharpen using box filter """ + kernel_size, center = kernel_center + kernel = np.zeros(kernel_size, dtype="float32") + kernel[center, center] = 1.0 + box_filter = np.ones(kernel_size, dtype="float32") / kernel_size[0]**2 + kernel = kernel + (kernel - box_filter) * amount + new_face = cv2.filter2D(new_face, -1, kernel) # pylint: disable=no-member + return new_face + + @staticmethod + def gaussian(new_face, kernel_center, amount): + """ Sharpen using gaussian filter """ + kernel_size = kernel_center[0] + blur = cv2.GaussianBlur(new_face, kernel_size, 0) # pylint: disable=no-member + new_face = cv2.addWeighted(new_face, # pylint: disable=no-member + 1.0 + (0.5 * amount), + blur, + -(0.5 * amount), + 0) + return new_face + + def unsharp_mask(self, new_face, kernel_center, amount): + """ Sharpen using unsharp mask """ + kernel_size = kernel_center[0] + threshold = self.config["threshold"] / 255.0 + blur = cv2.GaussianBlur(new_face, kernel_size, 0) # pylint: disable=no-member + low_contrast_mask = (abs(new_face - blur) < threshold).astype("float32") + sharpened = (new_face * (1.0 + amount)) + (blur * -amount) + new_face = (new_face * (1.0 - low_contrast_mask)) + (sharpened * low_contrast_mask) + return new_face diff --git a/plugins/convert/writer/__init__.py b/plugins/convert/writer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/plugins/convert/writer/_base.py b/plugins/convert/writer/_base.py new file mode 100644 index 0000000..d771834 --- /dev/null +++ b/plugins/convert/writer/_base.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +""" Parent class for output writers for faceswap.py converter """ + +import logging +import os +import re + +from plugins.convert._config import Config + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + +def get_config(plugin_name): + """ Return the config for the requested model """ + return Config(plugin_name).config_dict + + +class Output(): + """ Parent class for scaling adjustments """ + def __init__(self, output_folder): + logger.debug("Initializing %s: (output_folder: '%s')", + self.__class__.__name__, output_folder) + self.config = get_config(".".join(self.__module__.split(".")[-2:])) + logger.debug("config: %s", self.config) + self.output_folder = output_folder + self.output_dimensions = None + + # Methods for making sure frames are written out in frame order + self.re_search = re.compile(r"(\d+)(?=\.\w+$)") # Identify frame numbers + self.cache = dict() # Cache for when frames must be written in correct order + logger.debug("Initialized %s", self.__class__.__name__) + + def output_filename(self, filename): + """ Return the output filename with the correct folder and extension + NB: The plugin must have a config item 'format' that contains the + file extension to use this method """ + out_filename = "{}.{}".format(os.path.splitext(filename)[0], self.config["format"]) + out_filename = os.path.join(self.output_folder, out_filename) + logger.trace("in filename: '%s', out filename: '%s'", filename, out_filename) + return out_filename + + def cache_frame(self, filename, image): + """ Add the incoming frame to the cache """ + frame_no = int(re.search(self.re_search, filename).group()) + self.cache[frame_no] = image + logger.trace("Added to cache. Frame no: %s", frame_no) + + def write(self, filename, image): + """ Override for specific frame writing method """ + raise NotImplementedError + + def pre_encode(self, image): # pylint: disable=unused-argument,no-self-use + """ If the writer supports pre-encoding then override this to pre-encode + the image in lib/convert.py to speed up saving """ + return None + + def close(self): + """ Override for specific frame writing close methods """ + raise NotImplementedError diff --git a/plugins/convert/writer/ffmpeg.py b/plugins/convert/writer/ffmpeg.py new file mode 100644 index 0000000..0b071e3 --- /dev/null +++ b/plugins/convert/writer/ffmpeg.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 +""" Video output writer for faceswap.py converter """ +import os +from collections import OrderedDict +from math import ceil + +import imageio +import imageio_ffmpeg as im_ffm +from ffmpy import FFmpeg + +from ._base import Output, logger + + +class Writer(Output): + """ Video output writer using imageio """ + def __init__(self, output_folder, total_count, source_video): + super().__init__(output_folder) + self.source_video = source_video + self.frame_order = list(range(1, total_count + 1)) + self.output_dimensions = None # Fix dims of 1st frame in case of different sized images + self.writer = None # Need to know dimensions of first frame, so set writer then + + @property + def video_file(self): + """ Return full path to video output """ + filename = os.path.basename(self.source_video) + filename = os.path.splitext(filename)[0] + filename = "{}_converted.{}".format(filename, self.config["container"]) + retval = os.path.join(self.output_folder, filename) + logger.debug(retval) + return retval + + @property + def video_tmp_file(self): + """ Temporary video file, prior to muxing final audio """ + path, filename = os.path.split(self.video_file) + retval = os.path.join(path, "__tmp_{}".format(filename)) + logger.debug(retval) + return retval + + @property + def valid_tune(self): + """ Return whether selected tune is valid for selected codec """ + return {"libx264": ["film", "animation", "grain", "stillimage", "fastdecode", + "zerolatency"], + "libx265": ["grain", "fastdecode", "zerolatency"]} + + @property + def video_fps(self): + """ Return the fps of source video """ + reader = imageio.get_reader(self.source_video) + retval = reader.get_meta_data()["fps"] + logger.debug(retval) + return retval + + @property + def output_params(self): + """ FFMPEG Output parameters """ + codec = self.config["codec"] + tune = self.config["tune"] + # Force all frames to the same size + output_args = ["-vf", "scale={}".format(self.output_dimensions)] + + output_args.extend(["-c:v", codec]) + output_args.extend(["-crf", str(self.config["crf"])]) + output_args.extend(["-preset", self.config["preset"]]) + + if tune is not None and tune in self.valid_tune[codec]: + output_args.extend(["-tune", tune]) + + if codec == "libx264" and self.config["profile"] != "auto": + output_args.extend(["-profile:v", self.config["profile"]]) + + if codec == "libx264" and self.config["level"] != "auto": + output_args.extend(["-level", self.config["level"]]) + + logger.debug(output_args) + return output_args + + def get_writer(self): + """ Add the requested encoding options and return the writer """ + logger.debug("writer config: %s", self.config) + return imageio.get_writer(self.video_tmp_file, + fps=self.video_fps, + ffmpeg_log_level="error", + quality=None, + output_params=self.output_params) + + def write(self, filename, image): + """ Frames come from the pool in arbitrary order, so cache frames + for writing out in correct order """ + logger.trace("Received frame: (filename: '%s', shape: %s", filename, image.shape) + if not self.output_dimensions: + logger.info("Outputting to: '%s'", self.video_file) + self.set_dimensions(image.shape[:2]) + self.writer = self.get_writer() + self.cache_frame(filename, image) + self.save_from_cache() + + def set_dimensions(self, frame_dims): + """ Set the dimensions based on a given frame frame. This protects against different + sized images coming in and ensure all images go out at the same size for writers + that require it and mapped to a macro block size 16""" + logger.debug("input dimensions: %s", frame_dims) + self.output_dimensions = "{}:{}".format( + int(ceil(frame_dims[1] / 16) * 16), + int(ceil(frame_dims[0] / 16) * 16)) + logger.debug("Set dimensions: %s", self.output_dimensions) + + def save_from_cache(self): + """ Save all the frames that are ready to be output from cache """ + while self.frame_order: + if self.frame_order[0] not in self.cache: + logger.trace("Next frame not ready. Continuing") + break + save_no = self.frame_order.pop(0) + save_image = self.cache.pop(save_no) + logger.trace("Rendering from cache. Frame no: %s", save_no) + self.writer.append_data(save_image[:, :, ::-1]) + logger.trace("Current cache size: %s", len(self.cache)) + + def close(self): + """ Close the ffmpeg writer and mux the audio """ + self.writer.close() + self.mux_audio() + + def mux_audio(self): + """ Mux audio + ImageIO is a useful lib for frames > video as it also packages the ffmpeg binary + however muxing audio is non-trivial, so this is done afterwards with ffmpy. + A future fix could be implemented to mux audio with the frames """ + logger.info("Muxing Audio...") + exe = im_ffm.get_ffmpeg_exe() + inputs = OrderedDict([(self.video_tmp_file, None), (self.source_video, None)]) + outputs = {self.video_file: "-map 0:0 -map 1:1 -c: copy"} + ffm = FFmpeg(executable=exe, + global_options="-hide_banner -nostats -v 0 -y", + inputs=inputs, + outputs=outputs) + logger.debug("Executing: %s", ffm.cmd) + ffm.run() + logger.debug("Removing temp file") + os.remove(self.video_tmp_file) diff --git a/plugins/convert/writer/gif.py b/plugins/convert/writer/gif.py new file mode 100644 index 0000000..3a822be --- /dev/null +++ b/plugins/convert/writer/gif.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +""" Animated GIF writer for faceswap.py converter """ +import os + +import cv2 +import imageio + +from ._base import Output, logger + + +class Writer(Output): + """ Video output writer using imageio """ + def __init__(self, output_folder, total_count): + super().__init__(output_folder) + self.frame_order = list(range(1, total_count + 1)) + self.output_dimensions = None # Fix dims of 1st frame in case of different sized images + self.writer = None # Need to know dimensions of first frame, so set writer then + self.gif_file = None # Set filename based on first file seen + + @property + def gif_params(self): + """ Format the gif params """ + kwargs = {key: int(val) for key, val in self.config.items()} + logger.debug(kwargs) + return kwargs + + def get_writer(self): + """ Add the requested encoding options and return the writer """ + logger.debug("writer config: %s", self.config) + return imageio.get_writer(self.gif_file, + mode="i", + **self.config) + + def write(self, filename, image): + """ Frames come from the pool in arbitrary order, so cache frames + for writing out in correct order """ + logger.trace("Received frame: (filename: '%s', shape: %s", filename, image.shape) + if not self.gif_file: + self.set_gif_filename(filename) + self.set_dimensions(image.shape[:2]) + self.writer = self.get_writer() + if (image.shape[1], image.shape[0]) != self.output_dimensions: + image = cv2.resize(image, self.output_dimensions) # pylint: disable=no-member + self.cache_frame(filename, image) + self.save_from_cache() + + def set_gif_filename(self, filename): + """ Set the gif output filename """ + logger.debug("sample filename: '%s'", filename) + filename = os.path.splitext(filename)[0] + idx = len(filename) + for char in list(filename[::-1]): + if not char.isdigit() and char not in ("_", "-"): + break + idx -= 1 + self.gif_file = os.path.join(self.output_folder, "{}_converted.gif".format(filename[:idx])) + logger.info("Outputting to: '%s'", self.gif_file) + + def set_dimensions(self, frame_dims): + """ Set the dimensions based on a given frame frame. This protects against different + sized images coming in and ensure all images go out at the same size for writers + that require it """ + logger.debug("input dimensions: %s", frame_dims) + self.output_dimensions = (frame_dims[1], frame_dims[0]) + logger.debug("Set dimensions: %s", self.output_dimensions) + + def save_from_cache(self): + """ Save all the frames that are ready to be output from cache """ + while self.frame_order: + if self.frame_order[0] not in self.cache: + logger.trace("Next frame not ready. Continuing") + break + save_no = self.frame_order.pop(0) + save_image = self.cache.pop(save_no) + logger.trace("Rendering from cache. Frame no: %s", save_no) + self.writer.append_data(save_image[:, :, ::-1]) + logger.trace("Current cache size: %s", len(self.cache)) + + def close(self): + """ Close the ffmpeg writer and mux the audio """ + self.writer.close() diff --git a/plugins/convert/writer/opencv.py b/plugins/convert/writer/opencv.py new file mode 100644 index 0000000..f681c7d --- /dev/null +++ b/plugins/convert/writer/opencv.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +""" Image output writer for faceswap.py converter + Uses cv2 for writing as in testing this was a lot faster than both Pillow and ImageIO +""" + +import cv2 +from ._base import Output, logger + + +class Writer(Output): + """ Images output writer using cv2 """ + def __init__(self, output_folder): + super().__init__(output_folder) + self.extension = ".{}".format(self.config["format"]) + self.check_transparency_format() + self.args = self.get_save_args() + + def check_transparency_format(self): + """ Make sure that the output format is correct if draw_transparent is selected """ + transparent = self.config["draw_transparent"] + if not transparent or (transparent and self.config["format"] == "png"): + return + logger.warning("Draw Transparent selected, but the requested format does not support " + "transparency. Changing output format to 'png'") + self.config["format"] = "png" + + def get_save_args(self): + """ Return the save parameters for the file format """ + filetype = self.config["format"] + args = list() + if filetype == "jpg" and self.config["jpg_quality"] > 0: + args = (cv2.IMWRITE_JPEG_QUALITY, # pylint: disable=no-member + self.config["jpg_quality"]) + if filetype == "png" and self.config["png_compress_level"] > -1: + args = (cv2.IMWRITE_PNG_COMPRESSION, # pylint: disable=no-member + self.config["png_compress_level"]) + logger.debug(args) + return args + + def write(self, filename, image): + logger.trace("Outputting: (filename: '%s'", filename) + filename = self.output_filename(filename) + try: + with open(filename, "wb") as outfile: + outfile.write(image) + except Exception as err: # pylint: disable=broad-except + logger.error("Failed to save image '%s'. Original Error: %s", filename, err) + + def pre_encode(self, image): + logger.trace("Pre-encoding image") + image = cv2.imencode(self.extension, image, self.args)[1] # pylint: disable=no-member + return image + + def close(self): + """ Image writer does not need a close method """ + return diff --git a/plugins/convert/writer/pillow.py b/plugins/convert/writer/pillow.py new file mode 100644 index 0000000..670e900 --- /dev/null +++ b/plugins/convert/writer/pillow.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +""" Image output writer for faceswap.py converter """ + +import os +from io import BytesIO +from PIL import Image + +from ._base import Output, logger + + +class Writer(Output): + """ Images output writer using cv2 """ + def __init__(self, output_folder): + super().__init__(output_folder) + self.check_transparency_format() + # Correct format namings for writing to byte stream + self.format_dict = dict(jpg="JPEG", jp2="JPEG 2000", tif="TIFF") + self.kwargs = self.get_save_kwargs() + + def check_transparency_format(self): + """ Make sure that the output format is correct if draw_transparent is selected """ + transparent = self.config["draw_transparent"] + if not transparent or (transparent and self.config["format"] in ("png", "tif")): + return + logger.warning("Draw Transparent selected, but the requested format does not support " + "transparency. Changing output format to 'png'") + self.config["format"] = "png" + + def get_save_kwargs(self): + """ Return the save parameters for the file format """ + filetype = self.config["format"] + kwargs = dict() + if filetype in ("gif", "jpg", "png"): + kwargs["optimize"] = self.config["optimize"] + if filetype == "gif": + kwargs["interlace"] = self.config["gif_interlace"] + if filetype == "png": + kwargs["compress_level"] = self.config["png_compress_level"] + if filetype == "tif": + kwargs["compression"] = self.config["tif_compression"] + logger.debug(kwargs) + return kwargs + + def write(self, filename, image): + logger.trace("Outputting: (filename: '%s'", filename) + filename = self.output_filename(filename) + try: + with open(filename, "wb") as outfile: + outfile.write(image.read()) + except Exception as err: # pylint: disable=broad-except + logger.error("Failed to save image '%s'. Original Error: %s", filename, err) + + def output_filename(self, filename): + """ Return the output filename with the correct folder and extension """ + out_filename = "{}.{}".format(os.path.splitext(filename)[0], self.config["format"]) + out_filename = os.path.join(self.output_folder, out_filename) + logger.trace("in filename: '%s', out filename: '%s'", filename, out_filename) + return out_filename + + def pre_encode(self, image): + logger.trace("Pre-encoding image") + fmt = self.format_dict.get(self.config["format"], None) + fmt = self.config["format"].upper() if fmt is None else fmt + encoded = BytesIO() + rgb = [2, 1, 0] + if image.shape[2] == 4: + rgb.append(3) + out_image = Image.fromarray(image[..., rgb]) + out_image.save(encoded, fmt, **self.kwargs) + encoded.seek(0) + return encoded + + def close(self): + """ Image writer does not need a close method """ + return diff --git a/plugins/plugin_loader.py b/plugins/plugin_loader.py index 3506cb8..375078a 100644 --- a/plugins/plugin_loader.py +++ b/plugins/plugin_loader.py @@ -20,11 +20,6 @@ class PluginLoader(): """ Return requested detector plugin """ return PluginLoader._import("extract.align", name) - @staticmethod - def get_converter(name): - """ Return requested converter plugin """ - return PluginLoader._import("convert", name) - @staticmethod def get_model(name): """ Return requested model plugin """ @@ -35,6 +30,11 @@ class PluginLoader(): """ Return requested trainer plugin """ return PluginLoader._import("train.trainer", name) + @staticmethod + def get_converter(category, name): + """ Return the converter sub plugin """ + return PluginLoader._import("convert.{}".format(category), name) + @staticmethod def _import(attr, name): """ Import the plugin's module """ @@ -46,26 +46,6 @@ class PluginLoader(): module = import_module(mod) return getattr(module, ttl) - @staticmethod - def get_available_models(): - """ Return a list of available models """ - modelpath = os.path.join(os.path.dirname(__file__), "train", "model") - models = sorted(item.name.replace(".py", "").replace("_", "-") - for item in os.scandir(modelpath) - if not item.name.startswith("_") - and item.name.endswith(".py")) - return models - - @staticmethod - def get_available_converters(): - """ Return a list of available converters """ - converter_path = os.path.join(os.path.dirname(__file__), "convert") - converters = sorted(item.name.replace(".py", "").replace("_", "-") - for item in os.scandir(converter_path) - if not item.name.startswith("_") - and item.name.endswith(".py")) - return converters - @staticmethod def get_available_extractors(extractor_type): """ Return a list of available models """ @@ -79,8 +59,32 @@ class PluginLoader(): and item.name != "manual.py") return extractors + @staticmethod + def get_available_models(): + """ Return a list of available models """ + modelpath = os.path.join(os.path.dirname(__file__), "train", "model") + models = sorted(item.name.replace(".py", "").replace("_", "-") + for item in os.scandir(modelpath) + if not item.name.startswith("_") + and item.name.endswith(".py")) + return models + @staticmethod def get_default_model(): """ Return the default model """ models = PluginLoader.get_available_models() return 'original' if 'original' in models else models[0] + + @staticmethod + def get_available_convert_plugins(convert_category, add_none=True): + """ Return a list of available models """ + convertpath = os.path.join(os.path.dirname(__file__), + "convert", + convert_category) + converters = sorted(item.name.replace(".py", "").replace("_", "-") + for item in os.scandir(convertpath) + if not item.name.startswith("_") + and item.name.endswith(".py")) + if add_none: + converters.insert(0, "none") + return converters diff --git a/plugins/train/_config.py b/plugins/train/_config.py index b54bf20..93f92f0 100644 --- a/plugins/train/_config.py +++ b/plugins/train/_config.py @@ -4,15 +4,16 @@ import logging from lib.config import FaceswapConfig +from lib.model.masks import get_available_masks logger = logging.getLogger(__name__) # pylint: disable=invalid-name -MASK_TYPES = ["none", "dfaker", "dfl_full", "components"] +MASK_TYPES = get_available_masks() MASK_INFO = ("The mask to be used for training:" - "\n\tnone: Doesn't use any mask." - "\n\tdfaker: A basic face hull mask using a facehull of all 68 landmarks." - "\n\tdfl_full: An improved face hull mask using a facehull of 3 facial parts" - "\n\tcomponents: An improved face hull mask using a facehull of 8 facial parts") + "\n\t none: Doesn't use any mask." + "\n\t components: An improved face hull mask using a facehull of 8 facial parts" + "\n\t dfl_full: An improved face hull mask using a facehull of 3 facial parts" + "\n\t facehull: Face cutout based on landmarks") COVERAGE_INFO = ("How much of the extracted image to train on. Generally the model is optimized\n" "to the default value. Sensible values to use are:" "\n\t62.5%% spans from eyebrow to eyebrow." @@ -62,7 +63,7 @@ class Config(FaceswapConfig): info="Dfaker Model (Adapted from https://github.com/dfaker/df)" + ADDITIONAL_INFO) self.add_item( - section=section, title="mask_type", datatype=str, default="dfaker", + section=section, title="mask_type", datatype=str, default="facehull", choices=MASK_TYPES, info=MASK_INFO) self.add_item( section=section, title="coverage", datatype=float, default=100.0, rounding=1, diff --git a/plugins/train/model/_base.py b/plugins/train/model/_base.py index a392679..e08c9fa 100644 --- a/plugins/train/model/_base.py +++ b/plugins/train/model/_base.py @@ -325,9 +325,13 @@ class ModelBase(): """ Converter for autoencoder models """ logger.debug("Getting Converter: (swap: %s)", swap) if swap: - retval = self.predictors["a"].predict + model = self.predictors["a"] else: - retval = self.predictors["b"].predict + model = self.predictors["b"] + if self.predict: + # Must compile the model to be thread safe + model._make_predict_function() # pylint: disable=protected-access + retval = model.predict logger.debug("Got Converter: %s", retval) return retval diff --git a/requirements.txt b/requirements.txt index 1e226ef..eff489a 100755 --- a/requirements.txt +++ b/requirements.txt @@ -4,9 +4,12 @@ pathlib numpy==1.16.2 opencv-python scikit-image +Pillow scikit-learn toposort matplotlib==2.2.2 +imageio +imageio-ffmpeg ffmpy==0.2.2 nvidia-ml-py3 h5py==2.9.0 diff --git a/scripts/convert.py b/scripts/convert.py index dbee8ec..c93ccd4 100644 --- a/scripts/convert.py +++ b/scripts/convert.py @@ -5,15 +5,18 @@ import logging import re import os import sys -from pathlib import Path +from time import sleep +from threading import Event -import cv2 +import numpy as np from tqdm import tqdm from scripts.fsmedia import Alignments, Images, PostProcess, Utils +from lib import Serializer +from lib.convert import Converter from lib.faces_detect import DetectedFace -from lib.multithreading import BackgroundGenerator -from lib.queue_manager import queue_manager +from lib.multithreading import MultiThread, PoolProcess, total_cpus +from lib.queue_manager import queue_manager, QueueEmpty from lib.utils import get_folder, get_image_paths, hash_image_file from plugins.plugin_loader import PluginLoader @@ -27,49 +30,212 @@ class Convert(): def __init__(self, arguments): logger.debug("Initializing %s: (args: %s)", self.__class__.__name__, arguments) self.args = arguments - self.output_dir = get_folder(self.args.output_dir) - self.extractor = None - self.faces_count = 0 - - self.images = Images(self.args) - self.alignments = Alignments(self.args, False, self.images.is_video) - - # Update Legacy alignments - Legacy(self.alignments, self.images.input_images, arguments.input_aligned_dir) - - self.post_process = PostProcess(arguments) - self.verify_output = False - - self.opts = OptionalActions(self.args, self.images.input_images, self.alignments) - logger.debug("Initialized %s", self.__class__.__name__) - - def process(self): - """ Original & LowMem models go with converter - - Note: GAN prediction outputs a mask + an image, while other - predicts only an image. """ Utils.set_verbosity(self.args.loglevel) + self.images = Images(self.args) + self.validate() + self.alignments = Alignments(self.args, False, self.images.is_video) + # Update Legacy alignments + Legacy(self.alignments, self.images.input_images, arguments.input_aligned_dir) + self.opts = OptionalActions(self.args, self.images.input_images, self.alignments) + + self.add_queues() + self.disk_io = DiskIO(self.alignments, self.images, arguments) + self.extractor = None + self.predictor = Predict(self.disk_io.load_queue, self.queue_size, arguments) + self.converter = Converter(get_folder(self.args.output_dir), + self.predictor.output_size, + self.predictor.has_predicted_mask, + self.disk_io.draw_transparent, + self.disk_io.pre_encode, + arguments) + + logger.debug("Initialized %s", self.__class__.__name__) + + @property + def queue_size(self): + """ Set q-size to double number of cpus available """ + if self.args.singleprocess: + retval = 2 + else: + retval = total_cpus() * 2 + logger.debug(retval) + return retval + + @property + def pool_processes(self): + """ return the maximum number of pooled processes to use """ + if self.args.singleprocess: + retval = 1 + else: + retval = min(total_cpus(), self.images.images_found) + logger.debug(retval) + return retval + + def validate(self): + """ Make the output folder if it doesn't exist and check that video flag is + a valid choice """ + if (self.args.writer == "ffmpeg" and + not self.images.is_video and + self.args.reference_video is None): + logger.error("Output as video selected, but using frames as input. You must provide a " + "reference video ('-ref', '--reference-video').") + exit(1) + output_dir = get_folder(self.args.output_dir) + logger.info("Output Directory: %s", output_dir) + + def add_queues(self): + """ Add the queues for convert """ + logger.debug("Adding queues. Queue size: %s", self.queue_size) + for qname in ("convert_in", "save", "patch"): + queue_manager.add_queue(qname, self.queue_size) + + def process(self): + """ Process the conversion """ + logger.debug("Starting Conversion") + # queue_manager.debug_monitor(2) + self.convert_images() + self.disk_io.save_thread.join() + queue_manager.terminate_queues() + + Utils.finalize(self.images.images_found, + self.predictor.faces_count, + self.predictor.verify_output) + logger.debug("Completed Conversion") + + def convert_images(self): + """ Convert the images """ + logger.debug("Converting images") + save_queue = queue_manager.get_queue("save") + patch_queue = queue_manager.get_queue("patch") + pool = PoolProcess(self.converter.process, patch_queue, save_queue, + processes=self.pool_processes) + pool.start() + while True: + self.check_thread_error() + if self.disk_io.completion_event.is_set(): + break + sleep(1) + pool.join() + + save_queue.put("EOF") + logger.debug("Converted images") + + def check_thread_error(self): + """ Check and raise thread errors """ + for thread in (self.predictor.thread, self.disk_io.load_thread, self.disk_io.save_thread): + thread.check_and_raise_error() + + def patch_iterator(self, processes): + """ Prepare the images for conversion """ + out_queue = queue_manager.get_queue("out") + completed = 0 + + while True: + try: + item = out_queue.get(True, 1) + except QueueEmpty: + self.check_thread_error() + continue + self.check_thread_error() + + if item == "EOF": + completed += 1 + logger.debug("Got EOF %s of %s", completed, processes) + if completed == processes: + break + continue + + logger.trace("Yielding: '%s'", item[0]) + yield item + logger.debug("iterator exhausted") + return "EOF" + + +class DiskIO(): + """ Background threads to: + Load images from disk and get the detected faces + Save images back to disk """ + def __init__(self, alignments, images, arguments): + logger.debug("Initializing %s: (alignments: %s, images: %s, arguments: %s)", + self.__class__.__name__, alignments, images, arguments) + self.alignments = alignments + self.images = images + self.args = arguments + self.completion_event = Event() + self.frame_ranges = self.get_frame_ranges() + self.writer = self.get_writer() + + # For frame skipping + self.imageidxre = re.compile(r"(\d+)(?!.*\d\.)(?=\.\w+$)") + + # Extractor for on the fly detection + self.extractor = None if not self.alignments.have_alignments_file: self.load_extractor() - model = self.load_model() - converter = self.load_converter(model) + self.load_queue = None + self.save_queue = None + self.load_thread = None + self.save_thread = None + self.init_threads() + logger.debug("Initialized %s", self.__class__.__name__) - batch = BackgroundGenerator(self.prepare_images(), 1) + @property + def draw_transparent(self): + """ Draw transparent is an image writer only parameter. + Return the value here for easy access for predictor """ + return self.writer.config.get("draw_transparent", False) - for item in batch.iterator(): - self.convert(converter, item) + @property + def pre_encode(self): + """ Return the writer's pre-encoder """ + dummy = np.zeros((20, 20, 3)).astype("uint8") + test = self.writer.pre_encode(dummy) + retval = None if test is None else self.writer.pre_encode + logger.debug("Writer pre_encode function: %s", retval) + return retval - if self.extractor: - queue_manager.terminate_queues() + @property + def total_count(self): + """ Return the total number of frames to be converted """ + if self.frame_ranges and not self.args.keep_unchanged: + retval = sum([fr[1] - fr[0] for fr in self.frame_ranges]) + else: + retval = self.images.images_found + logger.debug(retval) + return retval - Utils.finalize(self.images.images_found, - self.faces_count, - self.verify_output) + # Initalization + def get_writer(self): + """ Return the writer plugin """ + args = [self.args.output_dir] + if self.args.writer in ("ffmpeg", "gif"): + args.append(self.total_count) + if self.args.writer == "ffmpeg": + if self.images.is_video: + args.append(self.args.input_dir) + else: + args.append(self.args.reference_video) + logger.debug("Writer args: %s", args) + return PluginLoader.get_converter("writer", self.args.writer)(*args) + + def get_frame_ranges(self): + """ split out the frame ranges and parse out 'min' and 'max' values """ + if not self.args.frame_ranges: + logger.debug("No frame range set") + return None + + minmax = {"min": 0, # never any frames less than 0 + "max": float("inf")} + retval = [tuple(map(lambda q: minmax[q] if q in minmax.keys() else int(q), v.split("-"))) + for v in self.args.frame_ranges] + logger.debug("frame ranges: %s", retval) + return retval def load_extractor(self): """ Set on the fly extraction """ + logger.debug("Loading extractor") logger.warning("No Alignments file found. Extracting on the fly.") logger.warning("NB: This will use the inferior dlib-hog for extraction " "and dlib pose predictor for landmarks. It is recommended " @@ -80,74 +246,85 @@ class Convert(): self.extractor = Extractor(None, extract_args) self.extractor.launch_detector() self.extractor.launch_aligner() + logger.debug("Loaded extractor") - def load_model(self): - """ Load the model requested for conversion """ - logger.debug("Loading Model") - model_dir = get_folder(self.args.model_dir) - model = PluginLoader.get_model(self.args.trainer)(model_dir, self.args.gpus, predict=True) - logger.debug("Loaded Model") - return model + def init_threads(self): + """ Initialize queues and threads """ + logger.debug("Initializing DiskIO Threads") + for task in ("load", "save"): + self.add_queue(task) + self.start_thread(task) + logger.debug("Initialized DiskIO Threads") - def load_converter(self, model): - """ Load the requested converter for conversion """ - conv = self.args.converter - converter = PluginLoader.get_converter(conv)( - model.converter(self.args.swap_model), - model=model, - arguments=self.args) - return converter + def add_queue(self, task): + """ Add the queue to queue_manager and set queue attribute """ + logger.debug("Adding queue for task: '%s'", task) + q_name = "convert_in" if task == "load" else task + setattr(self, "{}_queue".format(task), queue_manager.get_queue(q_name)) + logger.debug("Added queue for task: '%s'", task) - def prepare_images(self): - """ Prepare the images for conversion """ - filename = "" - if self.extractor: - load_queue = queue_manager.get_queue("load") - for filename, image in tqdm(self.images.load(), - total=self.images.images_found, - file=sys.stdout): + def start_thread(self, task): + """ Start the DiskIO thread """ + logger.debug("Starting thread: '%s'", task) + args = self.completion_event if task == "save" else None + func = getattr(self, task) + io_thread = MultiThread(func, args, thread_count=1) + io_thread.start() + setattr(self, "{}_thread".format(task), io_thread) + logger.debug("Started thread: '%s'", task) - if (self.args.discard_frames and - self.opts.check_skipframe(filename) == "discard"): + # Loading tasks + def load(self, *args): # pylint: disable=unused-argument + """ Load the images with detected_faces""" + logger.debug("Load Images: Start") + extract_queue = queue_manager.get_queue("extract_in") if self.extractor else None + idx = 0 + for filename, image in self.images.load(): + idx += 1 + if self.load_queue.shutdown.is_set(): + logger.debug("Load Queue: Stop signal received. Terminating") + break + if image is None or not image.any(): + logger.warning("Unable to open image. Skipping: '%s'", filename) + continue + if self.check_skipframe(filename): + if self.args.keep_unchanged: + logger.trace("Saving unchanged frame: %s", filename) + out_file = os.path.join(self.args.output_dir, os.path.basename(filename)) + self.save_queue.put((out_file, image)) + else: + logger.trace("Discarding frame: '%s'", filename) continue - frame = os.path.basename(filename) - if self.extractor: - detected_faces = self.detect_faces(load_queue, filename, image) - else: - detected_faces = self.alignments_faces(frame, image) + detected_faces = self.get_detected_faces(filename, image, extract_queue) + item = dict(filename=filename, image=image, detected_faces=detected_faces) + self.load_queue.put(item) - faces_count = len(detected_faces) - if faces_count != 0: - # Post processing requires a dict with "detected_faces" key - self.post_process.do_actions( - {"detected_faces": detected_faces}) - self.faces_count += faces_count + self.load_queue.put("EOF") + logger.debug("Load Images: Complete") - if faces_count > 1: - self.verify_output = True - logger.verbose("Found more than one face in " - "an image! '%s'", frame) + def check_skipframe(self, filename): + """ Check whether frame is to be skipped """ + if not self.frame_ranges: + return None + indices = self.imageidxre.findall(filename) + if not indices: + logger.warning("Could not determine frame number. Frame will be converted: '%s'", + filename) + return False + idx = int(indices[0]) if indices else None + skipframe = not any(map(lambda b: b[0] <= idx <= b[1], self.frame_ranges)) + return skipframe - yield filename, image, detected_faces - - def detect_faces(self, load_queue, filename, image): - """ Extract the face from a frame (If alignments file not found) """ - inp = {"filename": filename, - "image": image} - load_queue.put(inp) - faces = next(self.extractor.detect_faces()) - - landmarks = faces["landmarks"] - detected_faces = faces["detected_faces"] - final_faces = list() - - for idx, face in enumerate(detected_faces): - detected_face = DetectedFace() - detected_face.from_dlib_rect(face) - detected_face.landmarksXY = landmarks[idx] - final_faces.append(detected_face) - return final_faces + def get_detected_faces(self, filename, image, extract_queue): + """ Return detected faces from alignments or detector """ + logger.trace("Getting faces for: '%s'", filename) + if not self.extractor: + detected_faces = self.alignments_faces(os.path.basename(filename), image) + else: + detected_faces = self.detect_faces(extract_queue, filename, image) + logger.trace("Got %s faces for: '%s'", len(detected_faces), filename) + return detected_faces def alignments_faces(self, frame, image): """ Get the face from alignments file """ @@ -171,25 +348,229 @@ class Convert(): "skipping".format(frame)) return have_alignments - def convert(self, converter, item): - """ Apply the conversion transferring faces onto frames """ - try: - filename, image, faces = item - skip = self.opts.check_skipframe(filename) + def detect_faces(self, load_queue, filename, image): + """ Extract the face from a frame (If alignments file not found) """ + inp = {"filename": filename, + "image": image} + load_queue.put(inp) + faces = next(self.extractor.detect_faces()) - if not skip: - for face in faces: - image = converter.patch_image(image, face) - filename = str(self.output_dir / Path(filename).name) + landmarks = faces["landmarks"] + detected_faces = faces["detected_faces"] + final_faces = list() - if self.args.draw_transparent: - filename = "{}.png".format(os.path.splitext(filename)[0]) - logger.trace("Set extension to png: `%s`", filename) + for idx, face in enumerate(detected_faces): + detected_face = DetectedFace() + detected_face.from_dlib_rect(face) + detected_face.landmarksXY = landmarks[idx] + final_faces.append(detected_face) + return final_faces - cv2.imwrite(filename, image) # pylint: disable=no-member - except Exception as err: - logger.error("Failed to convert image: '%s'. Reason: %s", filename, err) - raise + # Saving tasks + def save(self, completion_event): + """ Save the converted images """ + logger.debug("Save Images: Start") + for _ in tqdm(range(self.total_count), desc="Converting", file=sys.stdout): + if self.save_queue.shutdown.is_set(): + logger.debug("Save Queue: Stop signal received. Terminating") + break + item = self.save_queue.get() + if item == "EOF": + break + filename, image = item + self.writer.write(filename, image) + self.writer.close() + completion_event.set() + logger.debug("Save Faces: Complete") + + +class Predict(): + """ Predict faces from incoming queue """ + def __init__(self, in_queue, queue_size, arguments): + logger.debug("Initializing %s: (args: %s, queue_size: %s, in_queue: %s)", + self.__class__.__name__, arguments, queue_size, in_queue) + self.batchsize = min(queue_size, 16) + self.args = arguments + self.in_queue = in_queue + self.out_queue = queue_manager.get_queue("patch") + self.serializer = Serializer.get_serializer("json") + self.faces_count = 0 + self.verify_output = False + self.pre_process = PostProcess(arguments) + self.model = self.load_model() + self.predictor = self.model.converter(self.args.swap_model) + self.queues = dict() + + self.thread = MultiThread(self.predict_faces, thread_count=1) + self.thread.start() + logger.debug("Initialized %s: (out_queue: %s)", self.__class__.__name__, self.out_queue) + + @property + def coverage_ratio(self): + """ Return coverage ratio from training options """ + return self.model.training_opts["coverage_ratio"] + + @property + def input_size(self): + """ Return the model input size """ + return self.model.input_shape[0] + + @property + def output_size(self): + """ Return the model output size """ + return self.model.output_shape[0] + + @property + def input_mask(self): + """ Return the input mask """ + mask = np.zeros(self.model.state.mask_shapes[0], dtype="float32") + retval = np.expand_dims(mask, 0) + return retval + + @property + def has_predicted_mask(self): + """ Return whether this model has a predicted mask """ + return bool(self.model.state.mask_shapes) + + def load_model(self): + """ Load the model requested for conversion """ + logger.debug("Loading Model") + model_dir = get_folder(self.args.model_dir, make_folder=False) + if not model_dir: + logger.error("%s does not exist.", self.args.model_dir) + exit(1) + trainer = self.get_trainer(model_dir) + model = PluginLoader.get_model(trainer)(model_dir, self.args.gpus, predict=True) + logger.debug("Loaded Model") + return model + + def get_trainer(self, model_dir): + """ Return the trainer name if provided, or read from state file """ + if self.args.trainer: + logger.debug("Trainer name provided: '%s'", self.args.trainer) + return self.args.trainer + + statefile = [fname for fname in os.listdir(str(model_dir)) + if fname.endswith("_state.json")] + if len(statefile) != 1: + logger.error("There should be 1 state file in your model folder. %s were found. " + "Specify a trainer with the '-t', '--trainer' option.") + exit(1) + statefile = os.path.join(str(model_dir), statefile[0]) + + with open(statefile, "rb") as inp: + state = self.serializer.unmarshal(inp.read().decode("utf-8")) + trainer = state.get("name", None) + + if not trainer: + logger.error("Trainer name could not be read from state file. " + "Specify a trainer with the '-t', '--trainer' option.") + exit(1) + logger.debug("Trainer from state file: '%s'", trainer) + return trainer + + def predict_faces(self): + """ Get detected faces from images """ + faces_seen = 0 + batch = list() + while True: + item = self.in_queue.get() + if item != "EOF": + logger.trace("Got from queue: '%s'", item["filename"]) + faces_count = len(item["detected_faces"]) + if faces_count != 0: + self.pre_process.do_actions(item) + self.faces_count += faces_count + if faces_count > 1: + self.verify_output = True + logger.verbose("Found more than one face in an image! '%s'", + os.path.basename(item["filename"])) + + self.load_aligned(item) + + faces_seen += faces_count + batch.append(item) + + if faces_seen < self.batchsize and item != "EOF": + logger.trace("Continuing. Current batchsize: %s", faces_seen) + continue + + if batch: + detected_batch = [detected_face for item in batch + for detected_face in item["detected_faces"]] + feed_faces = self.compile_feed_faces(detected_batch) + predicted = self.predict(feed_faces) + + self.queue_out_frames(batch, predicted) + + faces_seen = 0 + batch = list() + if item == "EOF": + logger.debug("Load queue complete") + break + self.out_queue.put("EOF") + + def load_aligned(self, item): + """ Load the feed faces and reference output faces """ + logger.trace("Loading aligned faces: '%s'", item["filename"]) + for detected_face in item["detected_faces"]: + detected_face.load_feed_face(item["image"], + size=self.input_size, + coverage_ratio=self.coverage_ratio, + dtype="float32") + if self.input_size == self.output_size: + detected_face.reference = detected_face.feed + else: + detected_face.load_reference_face(item["image"], + size=self.output_size, + coverage_ratio=self.coverage_ratio, + dtype="float32") + logger.trace("Loaded aligned faces: '%s'", item["filename"]) + + @staticmethod + def compile_feed_faces(detected_faces): + """ Compile the faces for feeding into the predictor """ + logger.trace("Compiling feed face. Batchsize: %s", len(detected_faces)) + feed_faces = np.stack([detected_face.feed_face for detected_face in detected_faces]) + logger.trace("Compiled Feed faces. Shape: %s", feed_faces.shape) + return feed_faces + + def predict(self, feed_faces): + """ Perform inference on the feed """ + logger.trace("Predicting: Batchsize: %s", len(feed_faces)) + feed = [feed_faces] + if self.has_predicted_mask: + feed.append(np.repeat(self.input_mask, feed_faces.shape[0], axis=0)) + logger.trace("Input shape(s): %s", [item.shape for item in feed]) + + predicted = self.predictor(feed) + predicted = predicted if isinstance(predicted, list) else [predicted] + logger.trace("Output shape(s): %s", [predict.shape for predict in predicted]) + + # Compile masks into alpha channel or keep raw faces + predicted = np.concatenate(predicted, axis=-1) if len(predicted) == 2 else predicted[0] + predicted = predicted.astype("float32") + + logger.trace("Final shape: %s", predicted.shape) + return predicted + + def queue_out_frames(self, batch, swapped_faces): + """ Compile the batch back to original frames and put to out_queue """ + logger.trace("Queueing out batch. Batchsize: %s", len(batch)) + pointer = 0 + for item in batch: + num_faces = len(item["detected_faces"]) + if num_faces == 0: + item["swapped_faces"] = np.array(list()) + else: + item["swapped_faces"] = swapped_faces[pointer:pointer + num_faces] + + logger.trace("Putting to queue. ('%s', detected_faces: %s, swapped_faces: %s)", + item["filename"], len(item["detected_faces"]), + item["swapped_faces"].shape[0]) + self.out_queue.put(item) + pointer += num_faces + logger.trace("Queued out batch. Batchsize: %s", len(batch)) class OptionalActions(): @@ -200,8 +581,6 @@ class OptionalActions(): self.args = args self.input_images = input_images self.alignments = alignments - self.frame_ranges = self.get_frame_ranges() - self.imageidxre = re.compile(r"[^(mp4)](\d+)(?!.*\d)") self.remove_skipped_faces() logger.debug("Initialized %s", self.__class__.__name__) @@ -245,30 +624,6 @@ class OptionalActions(): "directory, are you sure this is the right folder?") return face_hashes - # SKIP FRAME RANGES # - def get_frame_ranges(self): - """ split out the frame ranges and parse out 'min' and 'max' values """ - if not self.args.frame_ranges: - return None - - minmax = {"min": 0, # never any frames less than 0 - "max": float("inf")} - rng = [tuple(map(lambda q: minmax[q] if q in minmax.keys() else int(q), - v.split("-"))) - for v in self.args.frame_ranges] - return rng - - def check_skipframe(self, filename): - """ Check whether frame is to be skipped """ - if not self.frame_ranges: - return None - idx = int(self.imageidxre.findall(filename)[0]) - skipframe = not any(map(lambda b: b[0] <= idx <= b[1], - self.frame_ranges)) - if skipframe and self.args.discard_frames: - skipframe = "discard" - return skipframe - class Legacy(): """ Update legacy alignments: diff --git a/tools/cli.py b/tools/cli.py index b82be50..0b9aa56 100644 --- a/tools/cli.py +++ b/tools/cli.py @@ -10,14 +10,14 @@ class AlignmentsArgs(FaceSwapArgs): """ Class to parse the command line arguments for Aligments tool """ def get_argument_list(self): - frames_dir = "\n\tMust Pass in a frames folder/source video file (-fr)." - faces_dir = "\n\tMust Pass in a faces folder (-fc)." - frames_or_faces_dir = ("\n\tMust Pass in either a frames folder/source video file OR a" - "\n\tfaces folder (-fr or -fc).") - frames_and_faces_dir = ("\n\tMust Pass in a frames folder/source video file AND a faces " - "\n\tfolder (-fr and -fc).") - output_opts = "\n\tUse the output option (-o) to process results." - align_eyes = "\n\tCan optionally use the align-eyes switch (-ae)." + frames_dir = " Must Pass in a frames folder/source video file (-fr)." + faces_dir = " Must Pass in a faces folder (-fc)." + frames_or_faces_dir = (" Must Pass in either a frames folder/source video file OR a" + "faces folder (-fr or -fc).") + frames_and_faces_dir = (" Must Pass in a frames folder/source video file AND a faces " + "folder (-fr and -fc).") + output_opts = " Use the output option (-o) to process results." + align_eyes = " Can optionally use the align-eyes switch (-ae)." argument_list = list() argument_list.append({ "opts": ("-j", "--job"), @@ -28,59 +28,57 @@ class AlignmentsArgs(FaceSwapArgs): "multi-faces", "no-faces", "reformat", "remove-faces", "remove-frames", "rename", "sort-x", "sort-y", "spatial", "update-hashes"), "required": True, - "help": "R|Choose which action you want to perform.\n" + "help": "R|Choose which action you want to perform. " "NB: All actions require an alignments file (-a) to be passed in." - "\n'draw': Draw landmarks on frames in the selected folder/video. A subfolder" - "\n\twill be created within the frames folder to hold the output." + + "\nL|'draw': Draw landmarks on frames in the selected folder/video. A " + "subfolder will be created within the frames folder to hold the output." + frames_dir + align_eyes + - "\n'extract': Re-extract faces from the source frames/video based on " - "\n\talignment data. This is a lot quicker than re-detecting faces." + + "\nL|'extract': Re-extract faces from the source frames/video based on " + "alignment data. This is a lot quicker than re-detecting faces." + frames_and_faces_dir + align_eyes + - "\n'extract-large' - Extract all faces that have not been upscaled. Useful" - "\n\tfor excluding low-res images from a training set." + + "\nL|'extract-large' - Extract all faces that have not been upscaled. Useful " + "for excluding low-res images from a training set." + frames_and_faces_dir + align_eyes + - "\n'manual': Manually view and edit landmarks." + frames_dir + align_eyes + - "\n'merge': Merge multiple alignment files into one. Specify a space " - "\n\tseparated list of alignments files with the -a flag." - "\n'missing-alignments': Identify frames that do not exist in the alignments" - "\n\tfile." + output_opts + frames_dir + - "\n'missing-frames': Identify frames in the alignments file that do no " - "\n\tappear within the frames folder/video." + output_opts + frames_dir + - "\n'legacy': This updates legacy alignments to the latest format by rotating" - "\n\tthe landmarks and bounding boxes and adding face_hashes." + + "\nL|'manual': Manually view and edit landmarks." + frames_dir + align_eyes + + "\nL|'merge': Merge multiple alignment files into one. Specify a space " + "separated list of alignments files with the -a flag." + "\nL|'missing-alignments': Identify frames that do not exist in the " + "alignments file." + output_opts + frames_dir + + "\nL|'missing-frames': Identify frames in the alignments file that do not " + "appear within the frames folder/video." + output_opts + frames_dir + + "\nL|'legacy': This updates legacy alignments to the latest format by " + "rotating the landmarks and bounding boxes and adding face_hashes." + frames_and_faces_dir + - "\n'leftover-faces': Identify faces in the faces folder that do not exist in" - "\n\tthe alignments file." + output_opts + faces_dir + - "\n'multi-faces': Identify where multiple faces exist within the alignments" - "\n\tfile." + output_opts + frames_or_faces_dir + - "\n'no-faces': Identify frames that exist within the alignment file but no" - "\n\tfaces were detected." + output_opts + frames_dir + - "\n'reformat': Save a copy of alignments file in a different format. Specify" - "\n\ta format with the -fmt option." - "\n\tAlignments can be converted from DeepFaceLab by specifing:" - "\n\t -a dfl" - "\n\t -fc " - "\n'remove-faces': Remove deleted faces from an alignments file. The original" - "\n\talignments file will be backed up. A different file format for the" - "\n\talignments file can optionally be specified (-fmt)." + faces_dir + - "\n'remove-frames': Remove deleted frames from an alignments file. The" - "\n\toriginal alignments file will be backed up. A different file format for" - "\n\tthe alignments file can optionally be specified (-fmt)." + frames_dir + - "\n'rename' - Rename faces to correspond with their parent frame and position" - "\n\tindex in the alignments file (i.e. how they are named after running" - "\n\textract)." + faces_dir + - "\n'sort-x': Re-index the alignments from left to right. For alignments with" - "\n\tmultiple faces this will ensure that the left-most face is at index 0" - "\n\tOptionally pass in a faces folder (-fc) to also rename extracted faces." - "\n'sort-y': Re-index the alignments from top to bottom. For alignments with" - "\n\tmultiple faces this will ensure that the top-most face is at index 0" - "\n\tOptionally pass in a faces folder (-fc) to also rename extracted faces." - "\n'spatial': Perform spatial and temporal filtering to smooth alignments" - "\n\t(EXPERIMENTAL!)" - "\n'update-hashes': Recalculate the face hashes. Only use this if you have " - "\n\taltered the extracted faces (e.g. colour adjust). The files MUST be " - "\n\tnamed '_face index' (i.e. how they are named after running" - "\n\textract)." + faces_dir}) + "\nL|'leftover-faces': Identify faces in the faces folder that do not exist " + "in the alignments file." + output_opts + faces_dir + + "\nL|'multi-faces': Identify where multiple faces exist within the alignments " + "file." + output_opts + frames_or_faces_dir + + "\nL|'no-faces': Identify frames that exist within the alignment file but no " + "faces were detected." + output_opts + frames_dir + + "\nL|'reformat': Save a copy of alignments file in a different format. " + "Specify a format with the -fmt option. Alignments can be converted from " + "DeepFaceLab by specifing: '-a dfl -fc '" + "\nL|'remove-faces': Remove deleted faces from an alignments file. The " + "original alignments file will be backed up. A different file format for the " + "alignments file can optionally be specified (-fmt)." + faces_dir + + "\nL|'remove-frames': Remove deleted frames from an alignments file. The " + "original alignments file will be backed up. A different file format for " + "the alignments file can optionally be specified (-fmt)." + frames_dir + + "\nL|'rename' - Rename faces to correspond with their parent frame and " + "position index in the alignments file (i.e. how they are named after running " + "extract)." + faces_dir + + "\nL|'sort-x': Re-index the alignments from left to right. For alignments " + "with multiple faces this will ensure that the left-most face is at index 0 " + "Optionally pass in a faces folder (-fc) to also rename extracted faces." + "\nL|'sort-y': Re-index the alignments from top to bottom. For alignments " + "with multiple faces this will ensure that the top-most face is at index 0. " + "Optionally pass in a faces folder (-fc) to also rename extracted faces." + "\nL|'spatial': Perform spatial and temporal filtering to smooth alignments " + "(EXPERIMENTAL!)" + "\nL|'update-hashes': Recalculate the face hashes. Only use this if you have " + "altered the extracted faces (e.g. colour adjust). The files MUST be " + "named '_face index' (i.e. how they are named after running " + "extract)." + faces_dir}) argument_list.append({"opts": ("-a", "--alignments_file"), "action": FilesFullPaths, "dest": "alignments_file", @@ -110,13 +108,12 @@ class AlignmentsArgs(FaceSwapArgs): "type": str, "choices": ("console", "file", "move"), "default": "console", - "help": "R|How to output discovered items ('faces' and" - "\n'frames' only):" - "\n'console': Print the list of frames to the screen. (DEFAULT)" - "\n'file': Output the list of frames to a text file (stored within the source" - "\n\tdirectory)." - "\n'move': Move the discovered items to a sub-folder within the source" - "\n\tdirectory."}) + "help": "R|How to output discovered items ('faces' and 'frames' only):" + "\nL|'console': Print the list of frames to the screen. (DEFAULT)" + "\nL|'file': Output the list of frames to a text file (stored within the " + " source directory)." + "\nL|'move': Move the discovered items to a sub-folder within the source " + "directory."}) argument_list.append({"opts": ("-sz", "--size"), "type": int, "action": Slider, @@ -170,11 +167,11 @@ class EffmpegArgs(FaceSwapArgs): "get-info", "mux-audio", "rescale", "rotate", "slice"), "default": "extract", - "help": "Choose which action you want ffmpeg " - "ffmpeg to do.\n" - "'slice' cuts a portion of the video " - "into a separate video file.\n" - "'get-fps' returns the chosen video's " + "help": "R|Choose which action you want ffmpeg " + "ffmpeg to do." + "\nL|'slice' cuts a portion of the video " + "into a separate video file." + "\nL|'get-fps' returns the chosen video's " "fps."}) argument_list.append({"opts": ('-i', '--input'), @@ -198,7 +195,7 @@ class EffmpegArgs(FaceSwapArgs): "meant to be a directory then a " "directory called 'out' will be " "created inside the input " - "directory.\n" + "directory." "Note: the chosen output file " "extension will determine the file " "encoding.", @@ -240,7 +237,7 @@ class EffmpegArgs(FaceSwapArgs): "dest": "start", "default": "00:00:00", "help": "Enter the start time from which an " - "action is to be applied.\n" + "action is to be applied. " "Default: 00:00:00, in HH:MM:SS " "format. You can also enter the time " "with or without the colons, e.g. " @@ -254,7 +251,7 @@ class EffmpegArgs(FaceSwapArgs): "is to be applied. If both an end time " "and duration are set, then the end " "time will be used and the duration " - "will be ignored.\n" + "will be ignored. " "Default: 00:00:00, in HH:MM:SS."}) argument_list.append({"opts": ('-d', '--duration'), @@ -266,7 +263,7 @@ class EffmpegArgs(FaceSwapArgs): "00:00:10 for slice, then the first 10 " "seconds after and including the start " "time will be cut out into a new " - "video.\n" + "video. " "Default: 00:00:00, in HH:MM:SS " "format. You can also enter the time " "with or without the colons, e.g. " @@ -369,13 +366,14 @@ class SortArgs(FaceSwapArgs): "choices": ("folders", "rename"), "dest": 'final_process', "default": "rename", - "help": "R|\n'folders': files are sorted using " - "the -s/--sort-by\n\tmethod, then they " - "are organized into\n\tfolders using " - "the -g/--group-by grouping\n\tmethod." - "\n'rename': files are sorted using " - "the -s/--sort-by\n\tthen they are " - "renamed.\nDefault: rename"}) + "help": "R|Default: rename." + "\nL|'folders': files are sorted using " + "the -s/--sort-by method, then they " + "are organized into folders using " + "the -g/--group-by grouping method." + "\nL|'rename': files are sorted using " + "the -s/--sort-by then they are " + "renamed."}) argument_list.append({"opts": ('-k', '--keep'), "action": 'store_true', diff --git a/tools/effmpeg.py b/tools/effmpeg.py index d7f81e6..3453693 100644 --- a/tools/effmpeg.py +++ b/tools/effmpeg.py @@ -15,6 +15,7 @@ import subprocess import datetime from collections import OrderedDict +import imageio_ffmpeg as im_ffm from ffmpy import FFprobe, FFmpeg, FFRuntimeError # faceswap imports @@ -145,7 +146,7 @@ class Effmpeg(): "rotate", "slice"] # Class variable that stores the target executable (ffmpeg or ffplay) - _executable = 'ffmpeg' + _executable = im_ffm.get_ffmpeg_exe() # Class variable that stores the common ffmpeg arguments based on verbosity __common_ffmpeg_args_dict = {"normal": "-hide_banner ", @@ -160,7 +161,7 @@ class Effmpeg(): def __init__(self, arguments): logger.debug("Initializing %s: (arguments: %s)", self.__class__.__name__, arguments) self.args = arguments - self.exe = "ffmpeg" + self.exe = im_ffm.get_ffmpeg_exe() self.input = DataItem() self.output = DataItem() self.ref_vid = DataItem() @@ -494,7 +495,7 @@ class Effmpeg(): return all(getattr(self, i).fps is None for i in items_to_check) @staticmethod - def __run_ffmpeg(exe="ffmpeg", inputs=None, outputs=None): + def __run_ffmpeg(exe=im_ffm.get_ffmpeg_exe(), inputs=None, outputs=None): """ Run ffmpeg """ logger.debug("Running ffmpeg: (exe: '%s', inputs: %s, outputs: %s", exe, inputs, outputs) ffm = FFmpeg(executable=exe, inputs=inputs, outputs=outputs)