Release on 09.11.24

2024-11-09 17:19:42 +03:00
commit 3c64fe0c3e
339 changed files with 456101 additions and 0 deletions
--- a/comfy_extras/nodes_sd3.py
+++ b/comfy_extras/nodes_sd3.py
@@ -0,0 +1,167 @@
+import folder_paths
+import comfy.sd
+import comfy.model_management
+import nodes
+import torch
+import re
+class TripleCLIPLoader:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "clip_name1": (folder_paths.get_filename_list("text_encoders"), ), "clip_name2": (folder_paths.get_filename_list("text_encoders"), ), "clip_name3": (folder_paths.get_filename_list("text_encoders"), )
+                             }}
+    RETURN_TYPES = ("CLIP",)
+    FUNCTION = "load_clip"
+
+    CATEGORY = "advanced/loaders"
+
+    def load_clip(self, clip_name1, clip_name2, clip_name3):
+        clip_path1 = folder_paths.get_full_path_or_raise("text_encoders", clip_name1)
+        clip_path2 = folder_paths.get_full_path_or_raise("text_encoders", clip_name2)
+        clip_path3 = folder_paths.get_full_path_or_raise("text_encoders", clip_name3)
+        clip = comfy.sd.load_clip(ckpt_paths=[clip_path1, clip_path2, clip_path3], embedding_directory=folder_paths.get_folder_paths("embeddings"))
+        return (clip,)
+
+class EmptySD3LatentImage:
+    def __init__(self):
+        self.device = comfy.model_management.intermediate_device()
+
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "width": ("INT", {"default": 1024, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
+                              "height": ("INT", {"default": 1024, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
+                              "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}}
+    RETURN_TYPES = ("LATENT",)
+    FUNCTION = "generate"
+
+    CATEGORY = "latent/sd3"
+
+    def generate(self, width, height, batch_size=1):
+        latent = torch.zeros([batch_size, 16, height // 8, width // 8], device=self.device)
+        return ({"samples":latent}, )
+
+class CLIPTextEncodeSD3:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {
+            "clip": ("CLIP", ),
+            "clip_l": ("STRING", {"multiline": True, "dynamicPrompts": True}),
+            "clip_g": ("STRING", {"multiline": True, "dynamicPrompts": True}),
+            "t5xxl": ("STRING", {"multiline": True, "dynamicPrompts": True}),
+            "empty_padding": (["none", "empty_prompt"], )
+            }}
+    RETURN_TYPES = ("CONDITIONING",)
+    FUNCTION = "encode"
+
+    CATEGORY = "advanced/conditioning"
+
+    def encode(self, clip, clip_l, clip_g, t5xxl, empty_padding):
+        no_padding = empty_padding == "none"
+
+        tokens = clip.tokenize(clip_g)
+        if len(clip_g) == 0 and no_padding:
+            tokens["g"] = []
+
+        if len(clip_l) == 0 and no_padding:
+            tokens["l"] = []
+        else:
+            tokens["l"] = clip.tokenize(clip_l)["l"]
+
+        if len(t5xxl) == 0 and no_padding:
+            tokens["t5xxl"] =  []
+        else:
+            tokens["t5xxl"] = clip.tokenize(t5xxl)["t5xxl"]
+        if len(tokens["l"]) != len(tokens["g"]):
+            empty = clip.tokenize("")
+            while len(tokens["l"]) < len(tokens["g"]):
+                tokens["l"] += empty["l"]
+            while len(tokens["l"]) > len(tokens["g"]):
+                tokens["g"] += empty["g"]
+        cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True)
+        return ([[cond, {"pooled_output": pooled}]], )
+
+
+class ControlNetApplySD3(nodes.ControlNetApplyAdvanced):
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {"positive": ("CONDITIONING", ),
+                             "negative": ("CONDITIONING", ),
+                             "control_net": ("CONTROL_NET", ),
+                             "vae": ("VAE", ),
+                             "image": ("IMAGE", ),
+                             "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                             "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}),
+                             "end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001})
+                             }}
+    CATEGORY = "conditioning/controlnet"
+    DEPRECATED = True
+
+class SkipLayerGuidanceSD3:
+    '''
+    Enhance guidance towards detailed dtructure by having another set of CFG negative with skipped layers.
+    Inspired by Perturbed Attention Guidance (https://arxiv.org/abs/2403.17377)
+    Experimental implementation by Dango233@StabilityAI.
+    '''
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {"model": ("MODEL", ),
+                             "layers": ("STRING", {"default": "7, 8, 9", "multiline": False}),
+                             "scale": ("FLOAT", {"default": 3.0, "min": 0.0, "max": 10.0, "step": 0.1}),
+                             "start_percent": ("FLOAT", {"default": 0.01, "min": 0.0, "max": 1.0, "step": 0.001}),
+                             "end_percent": ("FLOAT", {"default": 0.15, "min": 0.0, "max": 1.0, "step": 0.001})
+                                }}
+    RETURN_TYPES = ("MODEL",)
+    FUNCTION = "skip_guidance"
+
+    CATEGORY = "advanced/guidance"
+
+
+    def skip_guidance(self, model, layers, scale, start_percent, end_percent):
+        if layers == "" or layers == None:
+            return (model, )
+        # check if layer is comma separated integers
+        def skip(args, extra_args):
+            return args
+
+        model_sampling = model.get_model_object("model_sampling")
+        sigma_start = model_sampling.percent_to_sigma(start_percent)
+        sigma_end = model_sampling.percent_to_sigma(end_percent)
+
+        def post_cfg_function(args):
+            model = args["model"]
+            cond_pred = args["cond_denoised"]
+            cond = args["cond"]
+            cfg_result = args["denoised"]
+            sigma = args["sigma"]
+            x = args["input"]
+            model_options = args["model_options"].copy()
+        
+            for layer in layers:
+                model_options = comfy.model_patcher.set_model_options_patch_replace(model_options, skip, "dit", "double_block", layer)
+            model_sampling.percent_to_sigma(start_percent)
+
+            sigma_ = sigma[0].item()
+            if scale > 0 and sigma_ >= sigma_end and sigma_ <= sigma_start:
+                (slg,) = comfy.samplers.calc_cond_batch(model, [cond], x, sigma, model_options)
+                cfg_result = cfg_result + (cond_pred - slg) * scale
+            return cfg_result
+
+        layers = re.findall(r'\d+', layers)
+        layers = [int(i) for i in layers]
+        m = model.clone()
+        m.set_model_sampler_post_cfg_function(post_cfg_function)
+
+        return (m, )
+
+
+NODE_CLASS_MAPPINGS = {
+    "TripleCLIPLoader": TripleCLIPLoader,
+    "EmptySD3LatentImage": EmptySD3LatentImage,
+    "CLIPTextEncodeSD3": CLIPTextEncodeSD3,
+    "ControlNetApplySD3": ControlNetApplySD3,
+    "SkipLayerGuidanceSD3": SkipLayerGuidanceSD3,
+}
+
+NODE_DISPLAY_NAME_MAPPINGS = {
+    # Sampling
+    "ControlNetApplySD3": "Apply Controlnet with VAE",
+}