Spaces:

Aatricks
/

LightDiffusion-Next

Running on Zero

App Files Files Community

Aatricks commited on Mar 2

Commit

a85d04f

1 Parent(s): 27f9df3

Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

modules/Model/ModelBase.py +23 -14
modules/NeuralNetwork/unet.py +48 -34
modules/Utilities/util.py +24 -13
modules/cond/cond.py +87 -55
modules/sample/CFG.py +56 -20
modules/sample/ksampler_util.py +73 -23
modules/sample/samplers.py +44 -221
modules/sample/sampling.py +313 -370
modules/user/GUI.py +8 -4
modules/user/pipeline.py +6 -6

modules/Model/ModelBase.py CHANGED Viewed

@@ -56,7 +56,9 @@ class BaseModel(torch.nn.Module):
                     **unet_config, device=device, operations=operations
                 )
         self.model_type = model_type
-        self.model_sampling = sampling.model_sampling(model_config, model_type, flux=flux)
         self.adm_channels = unet_config.get("adm_in_channels", None)
         if self.adm_channels is None:
@@ -93,26 +95,32 @@ class BaseModel(torch.nn.Module):
         """
         sigma = t
         xc = self.model_sampling.calculate_input(sigma, x)
-        if c_concat is not None:
-            xc = torch.cat([xc] + [c_concat], dim=1)
-        context = c_crossattn
-        dtype = self.get_dtype()
-        if self.manual_cast_dtype is not None:
-            dtype = self.manual_cast_dtype
         xc = xc.to(dtype)
         t = self.model_sampling.timestep(t).float()
-        context = context.to(dtype)
         extra_conds = {}
-        for o in kwargs:
-            extra = kwargs[o]
-            if hasattr(extra, "dtype"):
-                if extra.dtype != torch.int and extra.dtype != torch.long:
-                    extra = extra.to(dtype)
-            extra_conds[o] = extra
         model_output = self.diffusion_model(
             xc,
             t,
@@ -121,6 +129,7 @@ class BaseModel(torch.nn.Module):
             transformer_options=transformer_options,
             **extra_conds,
         ).float()
         return self.model_sampling.calculate_denoised(sigma, model_output, x)
     def get_dtype(self) -> torch.dtype:

                     **unet_config, device=device, operations=operations
                 )
         self.model_type = model_type
+        self.model_sampling = sampling.model_sampling(
+            model_config, model_type, flux=flux
+        )
         self.adm_channels = unet_config.get("adm_in_channels", None)
         if self.adm_channels is None:
         """
         sigma = t
         xc = self.model_sampling.calculate_input(sigma, x)
+        # Optimize concatenation operation by avoiding unnecessary list creation
+        if c_concat is not None:
+            xc = torch.cat((xc, c_concat), dim=1)
+        # Determine dtype once to avoid repeated calls to get_dtype()
+        dtype = (
+            self.manual_cast_dtype
+            if self.manual_cast_dtype is not None
+            else self.get_dtype()
+        )
+        # Batch operations to reduce overhead
         xc = xc.to(dtype)
         t = self.model_sampling.timestep(t).float()
+        context = c_crossattn.to(dtype) if c_crossattn is not None else None
+        # Process extra conditions more efficiently
         extra_conds = {}
+        for name, value in kwargs.items():
+            if hasattr(value, "dtype") and value.dtype not in (torch.int, torch.long):
+                extra_conds[name] = value.to(dtype)
+            else:
+                extra_conds[name] = value
+        # Run diffusion model and calculate denoised output
         model_output = self.diffusion_model(
             xc,
             t,
             transformer_options=transformer_options,
             **extra_conds,
         ).float()
         return self.model_sampling.calculate_denoised(sigma, model_output, x)
     def get_dtype(self) -> torch.dtype:

modules/NeuralNetwork/unet.py CHANGED Viewed

@@ -304,7 +304,9 @@ class UNetModel1(nn.Module):
         if num_heads_upsample == -1:
             num_heads_upsample = num_heads
         if num_head_channels == -1:
-            assert num_heads != -1, "Either num_heads or num_head_channels has to be set"
         self.in_channels = in_channels
         self.model_channels = model_channels
@@ -684,36 +686,29 @@ class UNetModel1(nn.Module):
         transformer_options: Dict[str, Any] = {},
         **kwargs: Any,
     ) -> torch.Tensor:
-        """#### Forward pass of the UNet model.
-        #### Args:
-            - `x` (torch.Tensor): The input tensor.
-            - `timesteps` (Optional[torch.Tensor], optional): The timesteps tensor. Defaults to None.
-            - `context` (Optional[torch.Tensor], optional): The context tensor. Defaults to None.
-            - `y` (Optional[torch.Tensor], optional): The class labels tensor. Defaults to None.
-            - `control` (Optional[torch.Tensor], optional): The control tensor. Defaults to None.
-            - `transformer_options` (Dict[str, Any], optional): Options for the transformer. Defaults to {}.
-            - `**kwargs` (Any): Additional keyword arguments.
-        #### Returns:
-            - `torch.Tensor`: The output tensor.
-        """
         transformer_options["original_shape"] = list(x.shape)
         transformer_options["transformer_index"] = 0
-        transformer_patches = transformer_options.get("patches", {})
         num_video_frames = kwargs.get("num_video_frames", self.default_num_video_frames)
         image_only_indicator = kwargs.get("image_only_indicator", None)
         time_context = kwargs.get("time_context", None)
-        assert (y is not None) == (
-            self.num_classes is not None
-        ), "must specify y if and only if the model is class-conditional"
-        hs = []
-        t_emb = sampling_util.timestep_embedding(
-            timesteps, self.model_channels
-        ).to(x.dtype)
         emb = self.time_embed(t_emb)
         h = x
         for id, module in enumerate(self.input_blocks):
             transformer_options["block"] = ("input", id)
@@ -730,6 +725,7 @@ class UNetModel1(nn.Module):
             h = apply_control1(h, control, "input")
             hs.append(h)
         transformer_options["block"] = ("middle", 0)
         if self.middle_block is not None:
             h = ResBlock.forward_timestep_embed1(
@@ -744,17 +740,19 @@ class UNetModel1(nn.Module):
             )
         h = apply_control1(h, control, "middle")
         for id, module in enumerate(self.output_blocks):
             transformer_options["block"] = ("output", id)
             hsp = hs.pop()
             hsp = apply_control1(hsp, control, "output")
             h = torch.cat([h, hsp], dim=1)
-            del hsp
-            if len(hs) > 0:
-                output_shape = hs[-1].shape
-            else:
-                output_shape = None
             h = ResBlock.forward_timestep_embed1(
                 module,
                 h,
@@ -766,11 +764,15 @@ class UNetModel1(nn.Module):
                 num_video_frames=num_video_frames,
                 image_only_indicator=image_only_indicator,
             )
         h = h.type(x.dtype)
         return self.out(h)
-def detect_unet_config(state_dict: Dict[str, torch.Tensor], key_prefix: str) -> Dict[str, Any]:
     """#### Detect the UNet configuration from a state dictionary.
     #### Args:
@@ -1017,7 +1019,9 @@ def detect_unet_config(state_dict: Dict[str, torch.Tensor], key_prefix: str) ->
                     // model_channels
                 )
-                out = transformer.calculate_transformer_depth(prefix, state_dict_keys, state_dict)
                 if out is not None:
                     transformer_depth.append(out[0])
                     if context_dim is None:
@@ -1076,7 +1080,9 @@ def detect_unet_config(state_dict: Dict[str, torch.Tensor], key_prefix: str) ->
     return unet_config
-def model_config_from_unet_config(unet_config: Dict[str, Any], state_dict: Optional[Dict[str, torch.Tensor]] = None) -> Any:
     """#### Get the model configuration from a UNet configuration.
     #### Args:
@@ -1096,7 +1102,11 @@ def model_config_from_unet_config(unet_config: Dict[str, Any], state_dict: Optio
     return None
-def model_config_from_unet(state_dict: Dict[str, torch.Tensor], unet_key_prefix: str, use_base_if_no_match: bool = False) -> Any:
     """#### Get the model configuration from a UNet state dictionary.
     #### Args:
@@ -1117,7 +1127,11 @@ def model_config_from_unet(state_dict: Dict[str, torch.Tensor], unet_key_prefix:
 def unet_dtype1(
     device: Optional[torch.device] = None,
     model_params: int = 0,
-    supported_dtypes: List[torch.dtype] = [torch.float16, torch.bfloat16, torch.float32],
 ) -> torch.dtype:
     """#### Get the dtype for the UNet model.
@@ -1129,4 +1143,4 @@ def unet_dtype1(
     #### Returns:
         - `torch.dtype`: The dtype for the UNet model.
     """
-    return torch.float16

         if num_heads_upsample == -1:
             num_heads_upsample = num_heads
         if num_head_channels == -1:
+            assert num_heads != -1, (
+                "Either num_heads or num_head_channels has to be set"
+            )
         self.in_channels = in_channels
         self.model_channels = model_channels
         transformer_options: Dict[str, Any] = {},
         **kwargs: Any,
     ) -> torch.Tensor:
+        """#### Forward pass of the UNet model with optimized calculations."""
+        # Setup transformer options (avoid unused variable)
         transformer_options["original_shape"] = list(x.shape)
         transformer_options["transformer_index"] = 0
+        # Extract kwargs efficiently
         num_video_frames = kwargs.get("num_video_frames", self.default_num_video_frames)
         image_only_indicator = kwargs.get("image_only_indicator", None)
         time_context = kwargs.get("time_context", None)
+        # Validation
+        assert (y is not None) == (self.num_classes is not None), (
+            "must specify y if and only if the model is class-conditional"
+        )
+        # Time embedding - optimize by computing with target dtype directly
+        t_emb = sampling_util.timestep_embedding(timesteps, self.model_channels).to(
+            x.dtype
+        )
         emb = self.time_embed(t_emb)
+        # Input blocks processing
+        hs = []
         h = x
         for id, module in enumerate(self.input_blocks):
             transformer_options["block"] = ("input", id)
             h = apply_control1(h, control, "input")
             hs.append(h)
+        # Middle block processing
         transformer_options["block"] = ("middle", 0)
         if self.middle_block is not None:
             h = ResBlock.forward_timestep_embed1(
             )
         h = apply_control1(h, control, "middle")
+        # Output blocks processing - optimize memory usage
         for id, module in enumerate(self.output_blocks):
             transformer_options["block"] = ("output", id)
             hsp = hs.pop()
             hsp = apply_control1(hsp, control, "output")
+            # Concatenate tensors
             h = torch.cat([h, hsp], dim=1)
+            del hsp  # Free memory immediately
+            # Only calculate output shape when needed
+            output_shape = hs[-1].shape if hs else None
             h = ResBlock.forward_timestep_embed1(
                 module,
                 h,
                 num_video_frames=num_video_frames,
                 image_only_indicator=image_only_indicator,
             )
+        # Ensure output has correct dtype
         h = h.type(x.dtype)
         return self.out(h)
+def detect_unet_config(
+    state_dict: Dict[str, torch.Tensor], key_prefix: str
+) -> Dict[str, Any]:
     """#### Detect the UNet configuration from a state dictionary.
     #### Args:
                     // model_channels
                 )
+                out = transformer.calculate_transformer_depth(
+                    prefix, state_dict_keys, state_dict
+                )
                 if out is not None:
                     transformer_depth.append(out[0])
                     if context_dim is None:
     return unet_config
+def model_config_from_unet_config(
+    unet_config: Dict[str, Any], state_dict: Optional[Dict[str, torch.Tensor]] = None
+) -> Any:
     """#### Get the model configuration from a UNet configuration.
     #### Args:
     return None
+def model_config_from_unet(
+    state_dict: Dict[str, torch.Tensor],
+    unet_key_prefix: str,
+    use_base_if_no_match: bool = False,
+) -> Any:
     """#### Get the model configuration from a UNet state dictionary.
     #### Args:
 def unet_dtype1(
     device: Optional[torch.device] = None,
     model_params: int = 0,
+    supported_dtypes: List[torch.dtype] = [
+        torch.float16,
+        torch.bfloat16,
+        torch.float32,
+    ],
 ) -> torch.dtype:
     """#### Get the dtype for the UNet model.
     #### Returns:
         - `torch.dtype`: The dtype for the UNet model.
     """
+    return torch.float16

modules/Utilities/util.py CHANGED Viewed

@@ -4,7 +4,6 @@ import itertools
 import logging
 import math
 import os
-import pickle
 import safetensors.torch
 import torch
@@ -120,6 +119,18 @@ def state_dict_prefix_replace(
     return out
 def repeat_to_batch_size(
     tensor: torch.Tensor, batch_size: int, dim: int = 0
 ) -> torch.Tensor:
@@ -437,11 +448,11 @@ def tiled_scale_multidim(
     def get_upscale(dim: int, val: int) -> int:
         """#### Get the upscale value.
         #### Args:
             - `dim` (int): The dimension.
             - `val` (int): The value.
         #### Returns:
             - `int`: The upscaled value.
         """
@@ -453,11 +464,11 @@ def tiled_scale_multidim(
     def get_downscale(dim: int, val: int) -> int:
         """#### Get the downscale value.
         #### Args:
             - `dim` (int): The dimension.
             - `val` (int): The value.
         #### Returns:
             - `int`: The downscaled value.
         """
@@ -469,11 +480,11 @@ def tiled_scale_multidim(
     def get_upscale_pos(dim: int, val: int) -> int:
         """#### Get the upscaled position.
         #### Args:
             - `dim` (int): The dimension.
             - `val` (int): The value.
         #### Returns:
             - `int`: The upscaled position.
         """
@@ -485,11 +496,11 @@ def tiled_scale_multidim(
     def get_downscale_pos(dim: int, val: int) -> int:
         """#### Get the downscaled position.
         #### Args:
             - `dim` (int): The dimension.
             - `val` (int): The value.
         #### Returns:
             - `int`: The downscaled position.
         """
@@ -508,10 +519,10 @@ def tiled_scale_multidim(
     def mult_list_upscale(a: list) -> list:
         """#### Multiply a list by the upscale amount.
         #### Args:
             - `a` (list): The list.
         #### Returns:
             - `list`: The multiplied list.
         """
@@ -601,7 +612,7 @@ def tiled_scale(
     pbar: any = None,
 ):
     """#### Scale an image using a tiled approach.
     #### Args:
         - `samples` (torch.Tensor): The input samples.
         - `function` (function): The scaling function.
@@ -612,7 +623,7 @@ def tiled_scale(
         - `out_channels` (int, optional): The number of output channels. Defaults to 3.
         - `output_device` (str, optional): The output device. Defaults to "cpu".
         - `pbar` (any, optional): The progress bar. Defaults to None.
     #### Returns:
         - The scaled image.
     """

 import logging
 import math
 import os
 import safetensors.torch
 import torch
     return out
+def lcm_of_list(numbers):
+    """Calculate LCM of a list of numbers more efficiently."""
+    if not numbers:
+        return 1
+    result = numbers[0]
+    for num in numbers[1:]:
+        result = torch.lcm(torch.tensor(result), torch.tensor(num)).item()
+    return result
 def repeat_to_batch_size(
     tensor: torch.Tensor, batch_size: int, dim: int = 0
 ) -> torch.Tensor:
     def get_upscale(dim: int, val: int) -> int:
         """#### Get the upscale value.
         #### Args:
             - `dim` (int): The dimension.
             - `val` (int): The value.
         #### Returns:
             - `int`: The upscaled value.
         """
     def get_downscale(dim: int, val: int) -> int:
         """#### Get the downscale value.
         #### Args:
             - `dim` (int): The dimension.
             - `val` (int): The value.
         #### Returns:
             - `int`: The downscaled value.
         """
     def get_upscale_pos(dim: int, val: int) -> int:
         """#### Get the upscaled position.
         #### Args:
             - `dim` (int): The dimension.
             - `val` (int): The value.
         #### Returns:
             - `int`: The upscaled position.
         """
     def get_downscale_pos(dim: int, val: int) -> int:
         """#### Get the downscaled position.
         #### Args:
             - `dim` (int): The dimension.
             - `val` (int): The value.
         #### Returns:
             - `int`: The downscaled position.
         """
     def mult_list_upscale(a: list) -> list:
         """#### Multiply a list by the upscale amount.
         #### Args:
             - `a` (list): The list.
         #### Returns:
             - `list`: The multiplied list.
         """
     pbar: any = None,
 ):
     """#### Scale an image using a tiled approach.
     #### Args:
         - `samples` (torch.Tensor): The input samples.
         - `function` (function): The scaling function.
         - `out_channels` (int, optional): The number of output channels. Defaults to 3.
         - `output_device` (str, optional): The output device. Defaults to "cpu".
         - `pbar` (any, optional): The progress bar. Defaults to None.
     #### Returns:
         - The scaled image.
     """

modules/cond/cond.py CHANGED Viewed

@@ -42,13 +42,13 @@ class CONDRegular:
         return self._copy_with(
             util.repeat_to_batch_size(self.cond, batch_size).to(device)
         )
     def can_concat(self, other: "CONDRegular") -> bool:
         """#### Check if conditions can be concatenated.
         #### Args:
             - `other` (CONDRegular): The other condition.
         #### Returns:
             - `bool`: True if conditions can be concatenated, False otherwise.
         """
@@ -58,10 +58,10 @@ class CONDRegular:
     def concat(self, others: list) -> torch.Tensor:
         """#### Concatenate conditions.
         #### Args:
             - `others` (list): The list of other conditions.
         #### Returns:
             - `torch.Tensor`: The concatenated conditions.
         """
@@ -76,11 +76,11 @@ class CONDCrossAttn(CONDRegular):
     def can_concat(self, other: "CONDRegular") -> bool:
         """#### Check if conditions can be concatenated.
         #### Args:
             - `other` (CONDRegular): The other condition.
-        #### Returns:
             - `bool`: True if conditions can be concatenated, False otherwise.
         """
         s1 = self.cond.shape
@@ -96,31 +96,34 @@ class CONDCrossAttn(CONDRegular):
             ):  # arbitrary limit on the padding because it's probably going to impact performance negatively if it's too much
                 return False
         return True
-    def concat(self, others: list) -> torch.Tensor:
-        """#### Concatenate cross-attention conditions.
-        #### Args:
-            - `others` (list): The list of other conditions.
-        #### Returns:
-            - `torch.Tensor`: The concatenated conditions.
-        """
         conds = [self.cond]
-        crossattn_max_len = self.cond.shape[1]
         for x in others:
-            c = x.cond
-            crossattn_max_len = util.lcm(crossattn_max_len, c.shape[1])
-            conds.append(c)
-        out = []
-        for c in conds:
-            if c.shape[1] < crossattn_max_len:
-                c = c.repeat(
-                    1, crossattn_max_len // c.shape[1], 1
-                )  # padding with repeat doesn't change result, but avoids an error on tensor shape
-            out.append(c)
-        return torch.cat(out)
 def convert_cond(cond: list) -> list:
@@ -277,8 +280,10 @@ def calc_cond_batch(
                 out_c += output[o] * mult[o]
                 out_cts += mult[o]
     for i in range(len(out_conds)):
-        out_conds[i] /= out_counts[i]
     return out_conds
@@ -328,48 +333,75 @@ def encode_model_conds(
         conds[t] = x
     return conds
-def resolve_areas_and_cond_masks_multidim(conditions: list, dims: tuple, device: torch.device) -> None:
-    """#### Resolve areas and condition masks for multidimensional conditions.
-    #### Args:
-        - `conditions` (list): The list of conditions.
-        - `dims` (tuple): The dimensions.
-        - `device` (torch.device): The device.
-    """
-    # We need to decide on an area outside the sampling loop in order to properly generate opposite areas of equal sizes.
-    # While we're doing this, we can also resolve the mask device and scaling for performance reasons
     for i in range(len(conditions)):
         c = conditions[i]
         if "area" in c:
             area = c["area"]
             if area[0] == "percentage":
-                modified = c.copy()
                 a = area[1:]
                 a_len = len(a) // 2
-                area = ()
-                for d in range(len(dims)):
-                    area += (max(1, round(a[d] * dims[d])),)
-                for d in range(len(dims)):
-                    area += (round(a[d + a_len] * dims[d]),)
-                modified["area"] = area
-                c = modified
-                conditions[i] = c
         if "mask" in c:
-            mask = c["mask"]
-            mask = mask.to(device=device)
             modified = c.copy()
             if len(mask.shape) == len(dims):
                 mask = mask.unsqueeze(0)
             if mask.shape[1:] != dims:
-                mask = torch.nn.functional.interpolate(
-                    mask.unsqueeze(1), size=dims, mode="bilinear", align_corners=False
-                ).squeeze(1)
             modified["mask"] = mask
             conditions[i] = modified
 def process_conds(
     model: object,
     noise: torch.Tensor,
@@ -442,4 +474,4 @@ def process_conds(
                     positive, conds[k], "gligen", lambda cond_cnets, x: cond_cnets[x]
                 )
-    return conds

         return self._copy_with(
             util.repeat_to_batch_size(self.cond, batch_size).to(device)
         )
     def can_concat(self, other: "CONDRegular") -> bool:
         """#### Check if conditions can be concatenated.
         #### Args:
             - `other` (CONDRegular): The other condition.
         #### Returns:
             - `bool`: True if conditions can be concatenated, False otherwise.
         """
     def concat(self, others: list) -> torch.Tensor:
         """#### Concatenate conditions.
         #### Args:
             - `others` (list): The list of other conditions.
         #### Returns:
             - `torch.Tensor`: The concatenated conditions.
         """
     def can_concat(self, other: "CONDRegular") -> bool:
         """#### Check if conditions can be concatenated.
         #### Args:
             - `other` (CONDRegular): The other condition.
+        #### Returns:
             - `bool`: True if conditions can be concatenated, False otherwise.
         """
         s1 = self.cond.shape
             ):  # arbitrary limit on the padding because it's probably going to impact performance negatively if it's too much
                 return False
         return True
+    def concat(self, others: list) -> torch.Tensor:
+        """Optimized version of cross-attention condition concatenation."""
         conds = [self.cond]
+        shapes = [self.cond.shape[1]]
+        # Collect all conditions and their shapes
         for x in others:
+            conds.append(x.cond)
+            shapes.append(x.cond.shape[1])
+        # Calculate LCM more efficiently
+        crossattn_max_len = util.lcm_of_list(shapes)
+        # Process and concat in one step where possible
+        if all(c.shape[1] == shapes[0] for c in conds):
+            # All same length, simple concatenation
+            return torch.cat(conds)
+        else:
+            # Process conditions that need repeating
+            out = []
+            for c in conds:
+                if c.shape[1] < crossattn_max_len:
+                    repeat_factor = crossattn_max_len // c.shape[1]
+                    # Use repeat instead of individual operations
+                    c = c.repeat(1, repeat_factor, 1)
+                out.append(c)
+            return torch.cat(out)
 def convert_cond(cond: list) -> list:
                 out_c += output[o] * mult[o]
                 out_cts += mult[o]
+    # Vectorize the division at the end
     for i in range(len(out_conds)):
+        # Inplace division is already efficient
+        out_conds[i].div_(out_counts[i])  # Using .div_ instead of /= for clarity
     return out_conds
         conds[t] = x
     return conds
+def resolve_areas_and_cond_masks_multidim(conditions, dims, device):
+    """Optimized version that processes areas and masks more efficiently"""
     for i in range(len(conditions)):
         c = conditions[i]
+        # Process area
         if "area" in c:
             area = c["area"]
             if area[0] == "percentage":
+                # Vectorized calculation of area dimensions
                 a = area[1:]
                 a_len = len(a) // 2
+                # Calculate all dimensions at once using tensor operations
+                dims_tensor = torch.tensor(dims, device="cpu")
+                first_part = torch.tensor(a[:a_len], device="cpu") * dims_tensor
+                second_part = torch.tensor(a[a_len:], device="cpu") * dims_tensor
+                # Convert to rounded integers and tuple
+                first_part = torch.max(
+                    torch.ones_like(first_part), torch.round(first_part)
+                )
+                second_part = torch.round(second_part)
+                # Create the new area tuple
+                new_area = tuple(first_part.int().tolist()) + tuple(
+                    second_part.int().tolist()
+                )
+                # Create a modified copy with the new area
+                modified = c.copy()
+                modified["area"] = new_area
+                conditions[i] = modified
+        # Process mask
         if "mask" in c:
             modified = c.copy()
+            mask = c["mask"].to(device=device)
+            # Combine dimension checks and unsqueeze operation
             if len(mask.shape) == len(dims):
                 mask = mask.unsqueeze(0)
+            # Only interpolate if needed
             if mask.shape[1:] != dims:
+                # Optimize interpolation by ensuring mask is in the right format for the operation
+                if len(mask.shape) == 3 and mask.shape[0] == 1:
+                    # Already in the right format for interpolation
+                    mask = torch.nn.functional.interpolate(
+                        mask.unsqueeze(1),
+                        size=dims,
+                        mode="bilinear",
+                        align_corners=False,
+                    ).squeeze(1)
+                else:
+                    # Ensure mask is properly formatted for interpolation
+                    mask = torch.nn.functional.interpolate(
+                        mask
+                        if len(mask.shape) > 3 and mask.shape[1] == 1
+                        else mask.unsqueeze(1),
+                        size=dims,
+                        mode="bilinear",
+                        align_corners=False,
+                    ).squeeze(1)
             modified["mask"] = mask
             conditions[i] = modified
 def process_conds(
     model: object,
     noise: torch.Tensor,
                     positive, conds[k], "gligen", lambda cond_cnets, x: cond_cnets[x]
                 )
+    return conds

modules/sample/CFG.py CHANGED Viewed

@@ -30,10 +30,15 @@ def cfg_function(
     #### Returns:
         - `torch.Tensor`: The CFG result.
     """
     if "sampler_cfg_function" in model_options:
         args = {
-            "cond": x - cond_pred,
-            "uncond": x - uncond_pred,
             "cond_scale": cond_scale,
             "timestep": timestep,
             "input": x,
@@ -45,9 +50,18 @@ def cfg_function(
         }
         cfg_result = x - model_options["sampler_cfg_function"](args)
     else:
-        cfg_result = uncond_pred + (cond_pred - uncond_pred) * cond_scale
-    for fn in model_options.get("sampler_post_cfg_function", []):
         args = {
             "denoised": cfg_result,
             "cond": cond,
@@ -59,7 +73,12 @@ def cfg_function(
             "model_options": model_options,
             "input": x,
         }
-        cfg_result = fn(args)
     return cfg_result
@@ -89,21 +108,29 @@ def sampling_function(
     #### Returns:
         - `torch.Tensor`: The sampled tensor.
     """
-    if (
-        math.isclose(cond_scale, 1.0)
-        and model_options.get("disable_cfg1_optimization", False) is False
-    ):
-        uncond_ = None
-    else:
-        uncond_ = uncond
     conds = [condo, uncond_]
-    out = cond.calc_cond_batch(model, conds, x, timestep, model_options)
-    for fn in model_options.get("sampler_pre_cfg_function", []):
         args = {
             "conds": conds,
-            "conds_out": out,
             "cond_scale": cond_scale,
             "timestep": timestep,
             "input": x,
@@ -111,12 +138,20 @@ def sampling_function(
             "model": model,
             "model_options": model_options,
         }
-        out = fn(args)
     return cfg_function(
         model,
-        out[0],
-        out[1],
         cond_scale,
         x,
         timestep,
@@ -128,6 +163,7 @@ def sampling_function(
 class CFGGuider:
     """#### Class for guiding the sampling process with CFG."""
     def __init__(self, model_patcher, flux=False):
         """#### Initialize the CFGGuider.
@@ -315,4 +351,4 @@ class CFGGuider:
         del self.inner_model
         del self.conds
         del self.loaded_models
-        return output

     #### Returns:
         - `torch.Tensor`: The CFG result.
     """
+    # Check for custom sampler CFG function first
     if "sampler_cfg_function" in model_options:
+        # Precompute differences to avoid redundant operations
+        cond_diff = x - cond_pred
+        uncond_diff = x - uncond_pred
         args = {
+            "cond": cond_diff,
+            "uncond": uncond_diff,
             "cond_scale": cond_scale,
             "timestep": timestep,
             "input": x,
         }
         cfg_result = x - model_options["sampler_cfg_function"](args)
     else:
+        # Standard CFG calculation - optimized to avoid intermediate tensor allocation
+        # When cond_scale = 1.0, we can just return cond_pred without computation
+        if math.isclose(cond_scale, 1.0):
+            cfg_result = cond_pred
+        else:
+            # Fused operation: uncond_pred + (cond_pred - uncond_pred) * cond_scale
+            # Equivalent to: uncond_pred * (1 - cond_scale) + cond_pred * cond_scale
+            cfg_result = torch.lerp(uncond_pred, cond_pred, cond_scale)
+    # Apply post-CFG functions if any
+    post_cfg_functions = model_options.get("sampler_post_cfg_function", [])
+    if post_cfg_functions:
         args = {
             "denoised": cfg_result,
             "cond": cond,
             "model_options": model_options,
             "input": x,
         }
+        # Apply each post-CFG function in sequence
+        for fn in post_cfg_functions:
+            cfg_result = fn(args)
+            # Update the denoised result for the next function
+            args["denoised"] = cfg_result
     return cfg_result
     #### Returns:
         - `torch.Tensor`: The sampled tensor.
     """
+    # Optimize conditional logic for uncond
+    uncond_ = (
+        None
+        if (
+            math.isclose(cond_scale, 1.0)
+            and not model_options.get("disable_cfg1_optimization", False)
+        )
+        else uncond
+    )
+    # Create conditions list once
     conds = [condo, uncond_]
+    # Get model predictions for both conditions
+    cond_outputs = cond.calc_cond_batch(model, conds, x, timestep, model_options)
+    # Apply pre-CFG functions if any
+    pre_cfg_functions = model_options.get("sampler_pre_cfg_function", [])
+    if pre_cfg_functions:
+        # Create args dictionary once
         args = {
             "conds": conds,
+            "conds_out": cond_outputs,
             "cond_scale": cond_scale,
             "timestep": timestep,
             "input": x,
             "model": model,
             "model_options": model_options,
         }
+        # Apply each pre-CFG function
+        for fn in pre_cfg_functions:
+            cond_outputs = fn(args)
+            args["conds_out"] = cond_outputs
+    # Extract conditional and unconditional outputs explicitly for clarity
+    cond_pred, uncond_pred = cond_outputs[0], cond_outputs[1]
+    # Apply the CFG function
     return cfg_function(
         model,
+        cond_pred,
+        uncond_pred,
         cond_scale,
         x,
         timestep,
 class CFGGuider:
     """#### Class for guiding the sampling process with CFG."""
     def __init__(self, model_patcher, flux=False):
         """#### Initialize the CFGGuider.
         del self.inner_model
         del self.conds
         del self.loaded_models
+        return output

modules/sample/ksampler_util.py CHANGED Viewed

@@ -46,6 +46,7 @@ def pre_run_control(model: torch.nn.Module, conds: list) -> None:
         def percent_to_timestep_function(a):
             return s.percent_to_sigma(a)
         if "control" in x:
             x["control"].pre_run(model, percent_to_timestep_function)
@@ -96,9 +97,13 @@ def apply_empty_x_to_equal_area(
             uncond[temp[1]] = n
-def get_area_and_mult(
-    conds: dict, x_in: torch.Tensor, timestep_in: int
-) -> collections.namedtuple:
     """#### Get the area and multiplier.
     #### Args:
@@ -109,26 +114,39 @@ def get_area_and_mult(
     #### Returns:
         - `collections.namedtuple`: The area and multiplier.
     """
-    area = (x_in.shape[2], x_in.shape[3], 0, 0)
-    strength = 1.0
-    input_x = x_in[:, :, area[2] : area[0] + area[2], area[3] : area[1] + area[3]]
-    mask = torch.ones_like(input_x)
-    mult = mask * strength
     conditioning = {}
     model_conds = conds["model_conds"]
     for c in model_conds:
         conditioning[c] = model_conds[c].process_cond(
-            batch_size=x_in.shape[0], device=x_in.device, area=area
         )
     control = conds.get("control", None)
     patches = None
-    cond_obj = collections.namedtuple(
-        "cond_obj", ["input_x", "mult", "conditioning", "area", "control", "patches"]
-    )
-    return cond_obj(input_x, mult, conditioning, area, control, patches)
 def normal_scheduler(
@@ -158,6 +176,7 @@ def normal_scheduler(
     sigs += [0.0]
     return torch.FloatTensor(sigs)
 def simple_scheduler(model_sampling: torch.nn.Module, steps: int) -> torch.FloatTensor:
     """#### Create a simple scheduler.
@@ -176,21 +195,52 @@ def simple_scheduler(model_sampling: torch.nn.Module, steps: int) -> torch.Float
     sigs += [0.0]
     return torch.FloatTensor(sigs)
 # Implemented based on: https://arxiv.org/abs/2407.12173
 def beta_scheduler(model_sampling, steps, alpha=0.6, beta=0.6):
-    total_timesteps = (len(model_sampling.sigmas) - 1)
-    ts = 1 - np.linspace(0, 1, steps, endpoint=False)
-    ts = np.rint(scipy.stats.beta.ppf(ts, alpha, beta) * total_timesteps)
-    sigs = []
-    last_t = -1
-    for t in ts:
-        if t != last_t:
-            sigs += [float(model_sampling.sigmas[int(t)])]
-        last_t = t
-    sigs += [0.0]
     return torch.FloatTensor(sigs)
 def calculate_sigmas(
     model_sampling: torch.nn.Module, scheduler_name: str, steps: int
 ) -> torch.Tensor:

         def percent_to_timestep_function(a):
             return s.percent_to_sigma(a)
         if "control" in x:
             x["control"].pre_run(model, percent_to_timestep_function)
             uncond[temp[1]] = n
+# Define the namedtuple class once outside the function for reuse
+CondObj = collections.namedtuple(
+    "cond_obj", ["input_x", "mult", "conditioning", "area", "control", "patches"]
+)
+def get_area_and_mult(conds: dict, x_in: torch.Tensor, timestep_in: int) -> CondObj:
     """#### Get the area and multiplier.
     #### Args:
     #### Returns:
         - `collections.namedtuple`: The area and multiplier.
     """
+    # Cache shape information to avoid repeated access
+    x_shape = x_in.shape
+    # Define area dimensions in one operation
+    area = (x_shape[2], x_shape[3], 0, 0)
+    # Extract input region efficiently
+    # Since area[2] and area[3] are 0, this is essentially taking the full tensor
+    # But we maintain the slice operation for consistency
+    input_x = x_in[:, :, : area[0], : area[1]]
+    # Create multiplier tensor directly without intermediate mask creation
+    # This avoids an unnecessary tensor allocation and multiplication
+    mult = torch.ones_like(input_x)  # strength is 1.0, so just create ones directly
+    # Prepare conditioning dictionary with cached device and batch_size
     conditioning = {}
     model_conds = conds["model_conds"]
+    batch_size = x_shape[0]
+    device = x_in.device
+    # Process conditions with cached parameters
     for c in model_conds:
         conditioning[c] = model_conds[c].process_cond(
+            batch_size=batch_size, device=device, area=area
         )
+    # Get control directly without redundant variable assignment
     control = conds.get("control", None)
     patches = None
+    # Use the pre-defined namedtuple class instead of creating it every call
+    return CondObj(input_x, mult, conditioning, area, control, patches)
 def normal_scheduler(
     sigs += [0.0]
     return torch.FloatTensor(sigs)
 def simple_scheduler(model_sampling: torch.nn.Module, steps: int) -> torch.FloatTensor:
     """#### Create a simple scheduler.
     sigs += [0.0]
     return torch.FloatTensor(sigs)
 # Implemented based on: https://arxiv.org/abs/2407.12173
 def beta_scheduler(model_sampling, steps, alpha=0.6, beta=0.6):
+    """Creates a beta scheduler for noise levels based on the beta distribution.
+    This optimized implementation efficiently computes sigmas using the beta
+    distribution and caches calculations where possible.
+    Args:
+        model_sampling: Model sampling module
+        steps: Number of steps
+        alpha: Alpha parameter for beta distribution
+        beta: Beta parameter for beta distribution
+    Returns:
+        torch.FloatTensor: Tensor of sigma values for each step
+    """
+    # Calculate total timesteps once
+    total_timesteps = len(model_sampling.sigmas) - 1
+    # Create a cache dictionary for reused values
+    model_sigmas = model_sampling.sigmas
+    # Generate evenly spaced values in [0,1) interval
+    ts_normalized = np.linspace(0, 1, steps, endpoint=False)
+    # Apply beta inverse CDF to get sampled time points - vectorized operation
+    ts_beta = scipy.stats.beta.ppf(1 - ts_normalized, alpha, beta)
+    # Scale to timestep indices and round to integers
+    ts_indices = np.rint(ts_beta * total_timesteps).astype(np.int32)
+    # Use numpy's unique function with return_index to efficiently find unique values
+    # while preserving order
+    unique_ts, indices = np.unique(ts_indices, return_index=True)
+    ordered_unique_ts = unique_ts[np.argsort(indices)]
+    # Map indices to sigma values efficiently
+    sigs = [float(model_sigmas[idx]) for idx in ordered_unique_ts]
+    # Add final sigma value of 0.0
+    sigs.append(0.0)
     return torch.FloatTensor(sigs)
 def calculate_sigmas(
     model_sampling: torch.nn.Module, scheduler_name: str, steps: int
 ) -> torch.Tensor:

modules/sample/samplers.py CHANGED Viewed

@@ -142,181 +142,15 @@ def sample_euler(
     return x
-@torch.no_grad()
-def sample_dpmpp_sde(
-    model,
-    x,
-    sigmas,
-    extra_args=None,
-    callback=None,
-    disable=None,
-    eta=1.0,
-    s_noise=1.0,
-    noise_sampler=None,
-    r=1 / 2,
-    pipeline=False,
-    seed=None,
-):
-    # Pre-calculate common values
-    device = x.device
-    global disable_gui
-    disable_gui = pipeline
-    if not disable_gui:
-        from modules.AutoEncoders import taesd
-        from modules.user import app_instance
-    # Early return check
-    if len(sigmas) <= 1:
-        return x
-    # Pre-allocate tensors and values
-    s_in = torch.ones((x.shape[0],), device=device)
-    n_steps = len(sigmas) - 1
-    extra_args = {} if extra_args is None else extra_args
-    # Define helper functions
-    def sigma_fn(t):
-        return (-t).exp()
-    def t_fn(sigma):
-        return -sigma.log()
-    # Initialize noise sampler
-    if noise_sampler is None:
-        noise_sampler = sampling_util.BrownianTreeNoiseSampler(
-            x, sigmas[sigmas > 0].min(), sigmas.max(), seed=seed, cpu=True
-        )
-    for i in trange(n_steps, disable=disable):
-        if (
-            not pipeline
-            and hasattr(app_instance.app, "interrupt_flag")
-            and app_instance.app.interrupt_flag
-        ):
-            return x
-        if not pipeline:
-            app_instance.app.progress.set(i / n_steps)
-        # Model inference
-        denoised = model(x, sigmas[i] * s_in, **extra_args)
-        if callback is not None:
-            callback({"x": x, "i": i, "sigma": sigmas[i], "denoised": denoised})
-        if sigmas[i + 1] == 0:
-            # Single fused Euler step
-            x = x + util.to_d(x, sigmas[i], denoised) * (sigmas[i + 1] - sigmas[i])
-        else:
-            # Fused DPM-Solver++ steps
-            t, t_next = t_fn(sigmas[i]), t_fn(sigmas[i + 1])
-            s = t + (t_next - t) * r
-            # Step 1 - Combined calculations
-            sd, su = sampling_util.get_ancestral_step(sigma_fn(t), sigma_fn(s), eta)
-            s_ = t_fn(sd)
-            x_2 = (
-                (sigma_fn(s_) / sigma_fn(t)) * x
-                - (t - s_).expm1() * denoised
-                + noise_sampler(sigma_fn(t), sigma_fn(s)) * s_noise * su
-            )
-            denoised_2 = model(x_2, sigma_fn(s) * s_in, **extra_args)
-            # Step 2 - Combined calculations
-            sd, su = sampling_util.get_ancestral_step(
-                sigma_fn(t), sigma_fn(t_next), eta
-            )
-            t_next_ = t_fn(sd)
-            # Final update in single calculation
-            x = (
-                (sigma_fn(t_next_) / sigma_fn(t)) * x
-                - (t - t_next_).expm1()
-                * ((1 - 1 / (2 * r)) * denoised + (1 / (2 * r)) * denoised_2)
-                + noise_sampler(sigma_fn(t), sigma_fn(t_next)) * s_noise * su
-            )
-        # Preview updates
-        if not pipeline and app_instance.app.previewer_var.get() and i % 5 == 0:
-            threading.Thread(target=taesd.taesd_preview, args=(x,)).start()
-    return x
-@torch.no_grad()
-def sample_dpmpp_2m(
-    model,
-    x,
-    sigmas,
-    extra_args=None,
-    callback=None,
-    disable=None,
-    pipeline=False,
 ):
-    """DPM-Solver++(2M) sampler with optimizations"""
-    # Pre-calculate common values and setup
-    device = x.device
-    global disable_gui
-    disable_gui = pipeline
-    if not disable_gui:
-        from modules.AutoEncoders import taesd
-        from modules.user import app_instance
-    # Pre-allocate tensors and transform sigmas
-    s_in = torch.ones((x.shape[0],), device=device)
-    t_steps = -torch.log(sigmas)  # Fused calculation
-    # Pre-calculate all needed values in one go
-    sigma_steps = torch.exp(-t_steps)  # Fused calculation
-    ratios = sigma_steps[1:] / sigma_steps[:-1]
-    h_steps = t_steps[1:] - t_steps[:-1]
-    old_denoised = None
-    extra_args = {} if extra_args is None else extra_args
-    for i in trange(len(sigmas) - 1, disable=disable):
-        if (
-            not pipeline
-            and hasattr(app_instance.app, "interrupt_flag")
-            and app_instance.app.interrupt_flag
-        ):
-            return x
-        if not pipeline:
-            app_instance.app.progress.set(i / (len(sigmas) - 1))
-        # Fused model inference and update calculations
-        denoised = model(x, sigmas[i] * s_in, **extra_args)
-        if callback is not None:
-            callback(
-                {
-                    "x": x,
-                    "i": i,
-                    "sigma": sigmas[i],
-                    "sigma_hat": sigmas[i],
-                    "denoised": denoised,
-                }
-            )
-        # Combined update step
-        x = ratios[i] * x - (-h_steps[i]).expm1() * (
-            denoised
-            if old_denoised is None or sigmas[i + 1] == 0
-            else (1 + h_steps[i - 1] / (2 * h_steps[i])) * denoised
-            - (h_steps[i - 1] / (2 * h_steps[i])) * old_denoised
-        )
-        old_denoised = denoised
-        # Preview updates
-        if not pipeline and app_instance.app.previewer_var.get() and i % 5 == 0:
-            threading.Thread(target=taesd.taesd_preview, args=(x,)).start()
-    return x
 @torch.no_grad()
@@ -354,17 +188,26 @@ def sample_dpmpp_2m_cfgpp(
     ratios = sigma_steps[1:] / sigma_steps[:-1]
     h_steps = t_steps[1:] - t_steps[:-1]
-    # CFG++ scheduling
-    def get_cfg_scale(step):
-        # Linear scheduling from cfg_scale to cfg_min
-        progress = step / n_steps
-        return cfg_scale + (cfg_min - cfg_scale) * progress
     old_denoised = None
     old_uncond_denoised = None
     extra_args = {} if extra_args is None else extra_args
-    for i in trange(len(sigmas) - 1, disable=disable):
         if (
             not pipeline
             and hasattr(app_instance.app, "interrupt_flag")
@@ -373,20 +216,10 @@ def sample_dpmpp_2m_cfgpp(
             return x
         if not pipeline:
-            app_instance.app.progress.set(i / (len(sigmas) - 1))
-        # Get current CFG scale
-        current_cfg = get_cfg_scale(i)
-        def post_cfg_function(args):
-            nonlocal old_uncond_denoised
-            old_uncond_denoised = args["uncond_denoised"]
-            return args["denoised"]
-        model_options = extra_args.get("model_options", {}).copy()
-        extra_args["model_options"] = set_model_options_post_cfg_function(
-            model_options, post_cfg_function, disable_cfg1_optimization=True
-        )
         # Fused model inference and update calculations
         denoised = model(x, sigmas[i] * s_in, **extra_args)
@@ -406,27 +239,29 @@ def sample_dpmpp_2m_cfgpp(
                 }
             )
-        # CFG++ update step
         if old_uncond_denoised is None or sigmas[i + 1] == 0:
-            # First step or last step - regular update
-            cfg_denoised = uncond_denoised + (denoised - uncond_denoised) * current_cfg
         else:
-            # CFG++ combination with momentum
-            x0_coeff = cfg_x0_scale * current_cfg
-            s_coeff = cfg_s_scale * current_cfg
-            # Momentum terms
             h_ratio = h_steps[i - 1] / (2 * h_steps[i])
-            momentum = (1 + h_ratio) * denoised - h_ratio * old_denoised
-            uncond_momentum = (
-                1 + h_ratio
-            ) * uncond_denoised - h_ratio * old_uncond_denoised
-            # Combined update
-            cfg_denoised = uncond_momentum + (momentum - uncond_momentum) * x0_coeff
-        # Apply update
-        x = ratios[i] * x - (-h_steps[i]).expm1() * cfg_denoised
         old_denoised = denoised
         old_uncond_denoised = uncond_denoised
@@ -438,17 +273,6 @@ def sample_dpmpp_2m_cfgpp(
     return x
-def set_model_options_post_cfg_function(
-    model_options, post_cfg_function, disable_cfg1_optimization=False
-):
-    model_options["sampler_post_cfg_function"] = model_options.get(
-        "sampler_post_cfg_function", []
-    ) + [post_cfg_function]
-    if disable_cfg1_optimization:
-        model_options["disable_cfg1_optimization"] = True
-    return model_options
 @torch.no_grad()
 def sample_dpmpp_sde_cfgpp(
     model,
@@ -572,7 +396,6 @@ def sample_dpmpp_sde_cfgpp(
             else:
                 # CFG++ with momentum
                 x0_coeff = cfg_x0_scale * current_cfg
-                s_coeff = cfg_s_scale * current_cfg
                 # Calculate momentum terms
                 h_ratio = (t - s_) / (2 * (t - t_next))

     return x
+def set_model_options_post_cfg_function(
+    model_options, post_cfg_function, disable_cfg1_optimization=False
 ):
+    model_options["sampler_post_cfg_function"] = model_options.get(
+        "sampler_post_cfg_function", []
+    ) + [post_cfg_function]
+    if disable_cfg1_optimization:
+        model_options["disable_cfg1_optimization"] = True
+    return model_options
 @torch.no_grad()
     ratios = sigma_steps[1:] / sigma_steps[:-1]
     h_steps = t_steps[1:] - t_steps[:-1]
+    # Pre-calculate CFG schedule for the entire sampling process
+    steps = torch.arange(n_steps, device=device)
+    cfg_values = cfg_scale + (cfg_min - cfg_scale) * (steps / n_steps)
     old_denoised = None
     old_uncond_denoised = None
     extra_args = {} if extra_args is None else extra_args
+    # Define post-CFG function once outside the loop
+    def post_cfg_function(args):
+        nonlocal old_uncond_denoised
+        old_uncond_denoised = args["uncond_denoised"]
+        return args["denoised"]
+    model_options = extra_args.get("model_options", {}).copy()
+    extra_args["model_options"] = set_model_options_post_cfg_function(
+        model_options, post_cfg_function, disable_cfg1_optimization=True
+    )
+    for i in trange(n_steps, disable=disable):
         if (
             not pipeline
             and hasattr(app_instance.app, "interrupt_flag")
             return x
         if not pipeline:
+            app_instance.app.progress.set(i / n_steps)
+        # Use pre-calculated CFG scale
+        current_cfg = cfg_values[i]
         # Fused model inference and update calculations
         denoised = model(x, sigmas[i] * s_in, **extra_args)
                 }
             )
+        # CFG++ update step using optimized operations
         if old_uncond_denoised is None or sigmas[i + 1] == 0:
+            # First step or last step - use torch.lerp for efficient interpolation
+            cfg_denoised = torch.lerp(uncond_denoised, denoised, current_cfg)
         else:
+            # Fused momentum calculations
             h_ratio = h_steps[i - 1] / (2 * h_steps[i])
+            h_ratio_plus_1 = 1 + h_ratio
+            # Use fused multiply-add operations for momentum terms
+            momentum = torch.addcmul(denoised * h_ratio_plus_1, old_denoised, -h_ratio)
+            uncond_momentum = torch.addcmul(
+                uncond_denoised * h_ratio_plus_1, old_uncond_denoised, -h_ratio
+            )
+            # Optimized interpolation for CFG++ update
+            cfg_denoised = torch.lerp(
+                uncond_momentum, momentum, current_cfg * cfg_x0_scale
+            )
+        # Apply update with pre-calculated expm1
+        h_expm1 = torch.expm1(-h_steps[i])
+        x = ratios[i] * x - h_expm1 * cfg_denoised
         old_denoised = denoised
         old_uncond_denoised = uncond_denoised
     return x
 @torch.no_grad()
 def sample_dpmpp_sde_cfgpp(
     model,
             else:
                 # CFG++ with momentum
                 x0_coeff = cfg_x0_scale * current_cfg
                 # Calculate momentum terms
                 h_ratio = (t - s_) / (2 * (t - t_next))

modules/sample/sampling.py CHANGED Viewed

@@ -76,6 +76,7 @@ class EPS:
         if max_denoise:
             noise = noise * torch.sqrt(1.0 + sigma**2.0)
         else:
             noise = noise * sigma
         noise += latent_image
@@ -513,153 +514,22 @@ def ksampler(
     #### Returns:
         - `KSAMPLER`: The KSAMPLER object.
     """
-    if sampler_name == "dpmpp_2m":
-        def dpmpp_2m_function(
-            model: torch.nn.Module,
-            noise: torch.Tensor,
-            sigmas: torch.Tensor,
-            extra_args: dict,
-            callback: callable,
-            disable: bool,
-            pipeline: bool,
-            **extra_options,
-        ) -> torch.Tensor:
-            sigma_min = sigmas[-1]
-            if sigma_min == 0:
-                sigma_min = sigmas[-2]
-            return samplers.sample_dpmpp_2m(
-                model,
-                noise,
-                sigmas,
-                extra_args=extra_args,
-                callback=callback,
-                disable=disable,
-                pipeline=pipeline,
-                **extra_options,
-            )
-        sampler_function = dpmpp_2m_function
-    elif sampler_name == "dpmpp_2m_cfgpp":
-        def dpmpp_2m_dy_function(
-            model: torch.nn.Module,
-            noise: torch.Tensor,
-            sigmas: torch.Tensor,
-            extra_args: dict,
-            callback: callable,
-            disable: bool,
-            pipeline: bool,
-            **extra_options,
-        ) -> torch.Tensor:
-            sigma_min = sigmas[-1]
-            if sigma_min == 0:
-                sigma_min = sigmas[-2]
-            return samplers.sample_dpmpp_2m_cfgpp(
-                model,
-                noise,
-                sigmas,
-                extra_args=extra_args,
-                callback=callback,
-                disable=disable,
-                pipeline=pipeline,
-                **extra_options,
-            )
-        sampler_function = dpmpp_2m_dy_function
-    elif sampler_name == "dpmpp_sde":
-        def dpmpp_sde_function(
-            model: torch.nn.Module,
-            noise: torch.Tensor,
-            sigmas: torch.Tensor,
-            extra_args: dict,
-            callback: callable,
-            disable: bool,
-            pipeline: bool,
-            **extra_options,
-        ) -> torch.Tensor:
-            return samplers.sample_dpmpp_sde(
-                model,
-                noise,
-                sigmas,
-                extra_args=extra_args,
-                callback=callback,
-                disable=disable,
-                pipeline=pipeline,
-                **extra_options,
-            )
-        sampler_function = dpmpp_sde_function
     elif sampler_name == "euler_ancestral":
-        def euler_ancestral_function(
-            model: torch.nn.Module,
-            noise: torch.Tensor,
-            sigmas: torch.Tensor,
-            extra_args: dict,
-            callback: callable,
-            disable: bool,
-            pipeline: bool,
-        ) -> torch.Tensor:
-            return samplers.sample_euler_ancestral(
-                model,
-                noise,
-                sigmas,
-                extra_args=extra_args,
-                callback=callback,
-                disable=disable,
-                pipeline=pipeline,
-                **extra_options,
-            )
-        sampler_function = euler_ancestral_function
     elif sampler_name == "dpmpp_sde_cfgpp":
-        def dpmpp_sde_dy_function(
-            model: torch.nn.Module,
-            noise: torch.Tensor,
-            sigmas: torch.Tensor,
-            extra_args: dict,
-            callback: callable,
-            disable: bool,
-            pipeline: bool,
-            **extra_options,
-        ) -> torch.Tensor:
-            return samplers.sample_dpmpp_sde_cfgpp(
-                model,
-                noise,
-                sigmas,
-                extra_args=extra_args,
-                callback=callback,
-                disable=disable,
-                pipeline=pipeline,
-                **extra_options,
-            )
-        sampler_function = dpmpp_sde_dy_function
     elif sampler_name == "euler":
-        def euler_function(
-            model, noise, sigmas, extra_args, callback, disable, pipeline=False
-        ):
-            return samplers.sample_euler(
-                model,
-                noise,
-                sigmas,
-                extra_args=extra_args,
-                callback=callback,
-                disable=disable,
-                pipeline=pipeline,
-                **extra_options,
-            )
-        sampler_function = euler_function
     return KSAMPLER(sampler_function, extra_options, inpaint_options)
@@ -734,49 +604,49 @@ def sampler_object(name: str, pipeline: bool = False) -> KSAMPLER:
     return sampler
-class KSampler1:
-    """#### Class for KSampler1."""
     def __init__(
         self,
-        model: torch.nn.Module,
-        steps: int,
-        device,
         sampler: str = None,
         scheduler: str = None,
-        denoise: float = None,
         model_options: dict = {},
         pipeline: bool = False,
     ):
-        """#### Initialize the KSampler1 class.
-        #### Args:
-            - `model` (torch.nn.Module): The model.
-            - `steps` (int): The number of steps.
-            - `device` (torch.device): The device.
-            - `sampler` (str, optional): The sampler name. Defaults to None.
-            - `scheduler` (str, optional): The scheduler name. Defaults to None.
-            - `denoise` (float, optional): The denoise factor. Defaults to None.
-            - `model_options` (dict, optional): The model options. Defaults to {}.
-            - `pipeline` (bool, optional): Whether to use the pipeline. Defaults to False.
         """
         self.model = model
-        self.device = device
         self.scheduler = scheduler
-        self.sampler = sampler
-        self.set_steps(steps, denoise)
         self.denoise = denoise
         self.model_options = model_options
         self.pipeline = pipeline
     def calculate_sigmas(self, steps: int) -> torch.Tensor:
-        """#### Calculate the sigmas for the given steps.
-        #### Args:
-            - `steps` (int): The number of steps.
-        #### Returns:
-            - `torch.Tensor`: The calculated sigmas.
         """
         sigmas = ksampler_util.calculate_sigmas(
             self.model.get_model_object("model_sampling"), self.scheduler, steps
@@ -784,11 +654,11 @@ class KSampler1:
         return sigmas
     def set_steps(self, steps: int, denoise: float = None):
-        """#### Set the steps and calculate the sigmas.
-        #### Args:
-            - `steps` (int): The number of steps.
-            - `denoise` (float, optional): The denoise factor. Defaults to None.
         """
         self.steps = steps
         if denoise is None or denoise > 0.9999:
@@ -801,7 +671,29 @@ class KSampler1:
                 sigmas = self.calculate_sigmas(new_steps).to(self.device)
                 self.sigmas = sigmas[-(steps + 1) :]
-    def sample(
         self,
         noise: torch.Tensor,
         positive: torch.Tensor,
@@ -816,48 +708,45 @@ class KSampler1:
         callback: callable = None,
         disable_pbar: bool = False,
         seed: int = None,
-        pipeline: bool = False,
         flux: bool = False,
     ) -> torch.Tensor:
-        """#### Sample using the KSampler1.
-        #### Args:
-            - `noise` (torch.Tensor): The noise tensor.
-            - `positive` (torch.Tensor): The positive tensor.
-            - `negative` (torch.Tensor): The negative tensor.
-            - `cfg` (float): The CFG value.
-            - `latent_image` (torch.Tensor, optional): The latent image tensor. Defaults to None.
-            - `start_step` (int, optional): The start step. Defaults to None.
-            - `last_step` (int, optional): The last step. Defaults to None.
-            - `force_full_denoise` (bool, optional): Whether to force full denoise. Defaults to False.
-            - `denoise_mask` (torch.Tensor, optional): The denoise mask tensor. Defaults to None.
-            - `sigmas` (torch.Tensor, optional): The sigmas tensor. Defaults to None.
-            - `callback` (callable, optional): The callback function. Defaults to None.
-            - `disable_pbar` (bool, optional): Whether to disable the progress bar. Defaults to False.
-            - `seed` (int, optional): The seed value. Defaults to None.
-            - `pipeline` (bool, optional): Whether to use the pipeline. Defaults to False.
-        #### Returns:
-            - `torch.Tensor`: The sampled tensor.
         """
         if sigmas is None:
             sigmas = self.sigmas
-        if last_step is not None and last_step < (len(sigmas) - 1):
-            sigmas = sigmas[: last_step + 1]
-            if force_full_denoise:
-                sigmas[-1] = 0
-        if start_step is not None:
-            if start_step < (len(sigmas) - 1):
-                sigmas = sigmas[start_step:]
             else:
-                if latent_image is not None:
-                    return latent_image
-                else:
-                    return torch.zeros_like(noise)
-        sampler = sampler_object(self.sampler, pipeline=pipeline)
         return sample(
             self.model,
@@ -866,7 +755,7 @@ class KSampler1:
             negative,
             cfg,
             self.device,
-            sampler,
             sigmas,
             self.model_options,
             latent_image=latent_image,
@@ -874,11 +763,117 @@ class KSampler1:
             callback=callback,
             disable_pbar=disable_pbar,
             seed=seed,
-            pipeline=pipeline,
             flux=flux,
         )
 def sample1(
     model: torch.nn.Module,
     noise: torch.Tensor,
@@ -902,37 +897,37 @@ def sample1(
     pipeline: bool = False,
     flux: bool = False,
 ) -> torch.Tensor:
-    """#### Sample using the given parameters.
-    #### Args:
-        - `model` (torch.nn.Module): The model.
-        - `noise` (torch.Tensor): The noise tensor.
-        - `steps` (int): The number of steps.
-        - `cfg` (float): The CFG value.
-        - `sampler_name` (str): The sampler name.
-        - `scheduler` (str): The scheduler name.
-        - `positive` (torch.Tensor): The positive tensor.
-        - `negative` (torch.Tensor): The negative tensor.
-        - `latent_image` (torch.Tensor): The latent image tensor.
-        - `denoise` (float, optional): The denoise factor. Defaults to 1.0.
-        - `disable_noise` (bool, optional): Whether to disable noise. Defaults to False.
-        - `start_step` (int, optional): The start step. Defaults to None.
-        - `last_step` (int, optional): The last step. Defaults to None.
-        - `force_full_denoise` (bool, optional): Whether to force full denoise. Defaults to False.
-        - `noise_mask` (torch.Tensor, optional): The noise mask tensor. Defaults to None.
-        - `sigmas` (torch.Tensor, optional): The sigmas tensor. Defaults to None.
-        - `callback` (callable, optional): The callback function. Defaults to None.
-        - `disable_pbar` (bool, optional): Whether to disable the progress bar. Defaults to False.
-        - `seed` (int, optional): The seed value. Defaults to None.
-        - `pipeline` (bool, optional): Whether to use the pipeline. Defaults to False.
-    #### Returns:
-        - `torch.Tensor`: The sampled tensor.
     """
-    sampler = KSampler1(
-        model,
         steps=steps,
-        device=model.load_device,
         sampler=sampler_name,
         scheduler=scheduler,
         denoise=denoise,
@@ -940,7 +935,7 @@ def sample1(
         pipeline=pipeline,
     )
-    samples = sampler.sample(
         noise,
         positive,
         negative,
@@ -954,147 +949,12 @@ def sample1(
         callback=callback,
         disable_pbar=disable_pbar,
         seed=seed,
-        pipeline=pipeline,
         flux=flux,
     )
     samples = samples.to(Device.intermediate_device())
     return samples
-def common_ksampler(
-    model: torch.nn.Module,
-    seed: int,
-    steps: int,
-    cfg: float,
-    sampler_name: str,
-    scheduler: str,
-    positive: torch.Tensor,
-    negative: torch.Tensor,
-    latent: dict,
-    denoise: float = 1.0,
-    disable_noise: bool = False,
-    start_step: int = None,
-    last_step: int = None,
-    force_full_denoise: bool = False,
-    pipeline: bool = False,
-    flux: bool = False,
-) -> tuple:
-    """#### Common ksampler function.
-    #### Args:
-        - `model` (torch.nn.Module): The model.
-        - `seed` (int): The seed value.
-        - `steps` (int): The number of steps.
-        - `cfg` (float): The CFG value.
-        - `sampler_name` (str): The sampler name.
-        - `scheduler` (str): The scheduler name.
-        - `positive` (torch.Tensor): The positive tensor.
-        - `negative` (torch.Tensor): The negative tensor.
-        - `latent` (dict): The latent dictionary.
-        - `denoise` (float, optional): The denoise factor. Defaults to 1.0.
-        - `disable_noise` (bool, optional): Whether to disable noise. Defaults to False.
-        - `start_step` (int, optional): The start step. Defaults to None.
-        - `last_step` (int, optional): The last step. Defaults to None.
-        - `force_full_denoise` (bool, optional): Whether to force full denoise. Defaults to False.
-        - `pipeline` (bool, optional): Whether to use the pipeline. Defaults to False.
-    #### Returns:
-        - `tuple`: The output tuple containing the latent dictionary and samples.
-    """
-    latent_image = latent["samples"]
-    latent_image = Latent.fix_empty_latent_channels(model, latent_image)
-    if disable_noise:
-        noise = torch.zeros(
-            latent_image.size(),
-            dtype=latent_image.dtype,
-            layout=latent_image.layout,
-            device="cpu",
-        )
-    else:
-        batch_inds = latent["batch_index"] if "batch_index" in latent else None
-        noise = ksampler_util.prepare_noise(latent_image, seed, batch_inds)
-    noise_mask = None
-    if "noise_mask" in latent:
-        noise_mask = latent["noise_mask"]
-    samples = sample1(
-        model,
-        noise,
-        steps,
-        cfg,
-        sampler_name,
-        scheduler,
-        positive,
-        negative,
-        latent_image,
-        denoise=denoise,
-        disable_noise=disable_noise,
-        start_step=start_step,
-        last_step=last_step,
-        force_full_denoise=force_full_denoise,
-        noise_mask=noise_mask,
-        seed=seed,
-        pipeline=pipeline,
-        flux=flux,
-    )
-    out = latent.copy()
-    out["samples"] = samples
-    return (out,)
-class KSampler2:
-    """#### Class for KSampler2."""
-    def sample(
-        self,
-        model: torch.nn.Module,
-        seed: int,
-        steps: int,
-        cfg: float,
-        sampler_name: str,
-        scheduler: str,
-        positive: torch.Tensor,
-        negative: torch.Tensor,
-        latent_image: torch.Tensor,
-        denoise: float = 1.0,
-        pipeline: bool = False,
-        flux: bool = False,
-    ) -> tuple:
-        """#### Sample using the KSampler2.
-        #### Args:
-            - `model` (torch.nn.Module): The model.
-            - `seed` (int): The seed value.
-            - `steps` (int): The number of steps.
-            - `cfg` (float): The CFG value.
-            - `sampler_name` (str): The sampler name.
-            - `scheduler` (str): The scheduler name.
-            - `positive` (torch.Tensor): The positive tensor.
-            - `negative` (torch.Tensor): The negative tensor.
-            - `latent_image` (torch.Tensor): The latent image tensor.
-            - `denoise` (float, optional): The denoise factor. Defaults to 1.0.
-            - `pipeline` (bool, optional): Whether to use the pipeline. Defaults to False.
-        #### Returns:
-            - `tuple`: The output tuple containing the latent dictionary and samples.
-        """
-        return common_ksampler(
-            model,
-            seed,
-            steps,
-            cfg,
-            sampler_name,
-            scheduler,
-            positive,
-            negative,
-            latent_image,
-            denoise=denoise,
-            pipeline=pipeline,
-            flux=flux,
-        )
 class ModelType(Enum):
     """#### Enum for Model Types."""
@@ -1187,3 +1047,86 @@ def sample_custom(
     )
     samples = samples.to(Device.intermediate_device())
     return samples

         if max_denoise:
             noise = noise * torch.sqrt(1.0 + sigma**2.0)
         else:
+            sigma = sigma.view(sigma.shape[:1] + (1,) * (noise.ndim - 1))
             noise = noise * sigma
         noise += latent_image
     #### Returns:
         - `KSAMPLER`: The KSAMPLER object.
     """
+    if sampler_name == "dpmpp_2m_cfgpp":
+        sampler_function = samplers.sample_dpmpp_2m_cfgpp
     elif sampler_name == "euler_ancestral":
+        sampler_function = samplers.sample_euler_ancestral
     elif sampler_name == "dpmpp_sde_cfgpp":
+        sampler_function = samplers.sample_dpmpp_sde_cfgpp
     elif sampler_name == "euler":
+        sampler_function = samplers.sample_euler
+    else:
+        # Default fallback
+        sampler_function = samplers.sample_euler
+        print(f"Warning: Unknown sampler '{sampler_name}', falling back to euler")
     return KSAMPLER(sampler_function, extra_options, inpaint_options)
     return sampler
+class KSampler:
+    """A unified sampler class that replaces both KSampler1 and KSampler2."""
     def __init__(
         self,
+        model: torch.nn.Module = None,
+        steps: int = None,
         sampler: str = None,
         scheduler: str = None,
+        denoise: float = 1.0,
         model_options: dict = {},
         pipeline: bool = False,
     ):
+        """Initialize the KSampler class.
+        Args:
+            model (torch.nn.Module, optional): The model to use for sampling. Required for direct sampling.
+            steps (int, optional): The number of steps. Required for direct sampling.
+            sampler (str, optional): The sampler name. Defaults to None.
+            scheduler (str, optional): The scheduler name. Defaults to None.
+            denoise (float, optional): The denoise factor. Defaults to 1.0.
+            model_options (dict, optional): The model options. Defaults to {}.
+            pipeline (bool, optional): Whether to use the pipeline. Defaults to False.
         """
         self.model = model
+        self.device = model.load_device if model is not None else None
         self.scheduler = scheduler
+        self.sampler_name = sampler
         self.denoise = denoise
         self.model_options = model_options
         self.pipeline = pipeline
+        if model is not None and steps is not None:
+            self.set_steps(steps, denoise)
     def calculate_sigmas(self, steps: int) -> torch.Tensor:
+        """Calculate the sigmas for the given steps.
+        Args:
+            steps (int): The number of steps.
+        Returns:
+            torch.Tensor: The calculated sigmas.
         """
         sigmas = ksampler_util.calculate_sigmas(
             self.model.get_model_object("model_sampling"), self.scheduler, steps
         return sigmas
     def set_steps(self, steps: int, denoise: float = None):
+        """Set the steps and calculate the sigmas.
+        Args:
+            steps (int): The number of steps.
+            denoise (float, optional): The denoise factor. Defaults to None.
         """
         self.steps = steps
         if denoise is None or denoise > 0.9999:
                 sigmas = self.calculate_sigmas(new_steps).to(self.device)
                 self.sigmas = sigmas[-(steps + 1) :]
+    def _process_sigmas(self, sigmas, start_step, last_step, force_full_denoise):
+        """Process sigmas based on start_step and last_step.
+        Args:
+            sigmas (torch.Tensor): The sigmas tensor.
+            start_step (int, optional): The start step. Defaults to None.
+            last_step (int, optional): The last step. Defaults to None.
+            force_full_denoise (bool): Whether to force full denoise.
+        Returns:
+            torch.Tensor: The processed sigmas.
+        """
+        if last_step is not None and last_step < (len(sigmas) - 1):
+            sigmas = sigmas[: last_step + 1]
+            if force_full_denoise:
+                sigmas[-1] = 0
+        if start_step is not None and start_step < (len(sigmas) - 1):
+            sigmas = sigmas[start_step:]
+        return sigmas
+    def direct_sample(
         self,
         noise: torch.Tensor,
         positive: torch.Tensor,
         callback: callable = None,
         disable_pbar: bool = False,
         seed: int = None,
         flux: bool = False,
     ) -> torch.Tensor:
+        """Sample directly with the initialized model and parameters.
+        Args:
+            noise (torch.Tensor): The noise tensor.
+            positive (torch.Tensor): The positive tensor.
+            negative (torch.Tensor): The negative tensor.
+            cfg (float): The CFG value.
+            latent_image (torch.Tensor, optional): The latent image tensor. Defaults to None.
+            start_step (int, optional): The start step. Defaults to None.
+            last_step (int, optional): The last step. Defaults to None.
+            force_full_denoise (bool, optional): Whether to force full denoise. Defaults to False.
+            denoise_mask (torch.Tensor, optional): The denoise mask tensor. Defaults to None.
+            sigmas (torch.Tensor, optional): The sigmas tensor. Defaults to None.
+            callback (callable, optional): The callback function. Defaults to None.
+            disable_pbar (bool, optional): Whether to disable the progress bar. Defaults to False.
+            seed (int, optional): The seed value. Defaults to None.
+            flux (bool, optional): Whether to use flux mode. Defaults to False.
+        Returns:
+            torch.Tensor: The sampled tensor.
         """
+        if self.model is None:
+            raise ValueError("Model must be provided for direct sampling")
         if sigmas is None:
             sigmas = self.sigmas
+        sigmas = self._process_sigmas(sigmas, start_step, last_step, force_full_denoise)
+        # Early return if needed
+        if start_step is not None and start_step >= (len(sigmas) - 1):
+            if latent_image is not None:
+                return latent_image
             else:
+                return torch.zeros_like(noise)
+        sampler_obj = sampler_object(self.sampler_name, pipeline=self.pipeline)
         return sample(
             self.model,
             negative,
             cfg,
             self.device,
+            sampler_obj,
             sigmas,
             self.model_options,
             latent_image=latent_image,
             callback=callback,
             disable_pbar=disable_pbar,
             seed=seed,
+            pipeline=self.pipeline,
             flux=flux,
         )
+    def sample(
+        self,
+        model: torch.nn.Module = None,
+        seed: int = None,
+        steps: int = None,
+        cfg: float = None,
+        sampler_name: str = None,
+        scheduler: str = None,
+        positive: torch.Tensor = None,
+        negative: torch.Tensor = None,
+        latent_image: torch.Tensor = None,
+        denoise: float = None,
+        start_step: int = None,
+        last_step: int = None,
+        force_full_denoise: bool = False,
+        noise_mask: torch.Tensor = None,
+        callback: callable = None,
+        disable_pbar: bool = False,
+        disable_noise: bool = False,
+        pipeline: bool = False,
+        flux: bool = False,
+    ) -> tuple:
+        """Unified sampling interface that works both as direct sampling and through the common_ksampler.
+        This method can be used in two ways:
+        1. If model is provided, it will create a temporary sampler and use that
+        2. If model is None, it will use the pre-initialized model and parameters
+        Args:
+            model (torch.nn.Module, optional): The model to use for sampling. If None, uses pre-initialized model.
+            seed (int, optional): The seed value.
+            steps (int, optional): The number of steps. If None, uses pre-initialized steps.
+            cfg (float, optional): The CFG value.
+            sampler_name (str, optional): The sampler name. If None, uses pre-initialized sampler.
+            scheduler (str, optional): The scheduler name. If None, uses pre-initialized scheduler.
+            positive (torch.Tensor, optional): The positive tensor.
+            negative (torch.Tensor, optional): The negative tensor.
+            latent_image (torch.Tensor, optional): The latent image tensor.
+            denoise (float, optional): The denoise factor. If None, uses pre-initialized denoise.
+            start_step (int, optional): The start step. Defaults to None.
+            last_step (int, optional): The last step. Defaults to None.
+            force_full_denoise (bool, optional): Whether to force full denoise. Defaults to False.
+            noise_mask (torch.Tensor, optional): The noise mask tensor. Defaults to None.
+            callback (callable, optional): The callback function. Defaults to None.
+            disable_pbar (bool, optional): Whether to disable the progress bar. Defaults to False.
+            disable_noise (bool, optional): Whether to disable noise. Defaults to False.
+            pipeline (bool, optional): Whether to use the pipeline. Defaults to False.
+            flux (bool, optional): Whether to use flux mode. Defaults to False.
+        Returns:
+            tuple: The output tuple containing either (latent_dict,) or the sampled tensor.
+        """
+        # Case 1: Use pre-initialized model for direct sampling
+        if model is None:
+            if latent_image is None:
+                raise ValueError(
+                    "latent_image must be provided when using pre-initialized model"
+                )
+            return (
+                self.direct_sample(
+                    None,  # noise will be generated in common_ksampler
+                    positive,
+                    negative,
+                    cfg,
+                    latent_image,
+                    start_step,
+                    last_step,
+                    force_full_denoise,
+                    noise_mask,
+                    None,  # sigmas will use pre-calculated ones
+                    callback,
+                    disable_pbar,
+                    seed,
+                    flux,
+                ),
+            )
+        # Case 2: Use common_ksampler approach with provided model
+        else:
+            # For backwards compatibility with KSampler2 usage pattern
+            if isinstance(latent_image, dict):
+                latent = latent_image
+            else:
+                latent = {"samples": latent_image}
+            return common_ksampler(
+                model,
+                seed,
+                steps,
+                cfg,
+                sampler_name or self.sampler_name,
+                scheduler or self.scheduler,
+                positive,
+                negative,
+                latent,
+                denoise or self.denoise,
+                disable_noise,
+                start_step,
+                last_step,
+                force_full_denoise,
+                pipeline or self.pipeline,
+                flux,
+            )
+# Refactor sample1 to use KSampler directly
 def sample1(
     model: torch.nn.Module,
     noise: torch.Tensor,
     pipeline: bool = False,
     flux: bool = False,
 ) -> torch.Tensor:
+    """Sample using the given parameters with the unified KSampler.
+    Args:
+        model (torch.nn.Module): The model.
+        noise (torch.Tensor): The noise tensor.
+        steps (int): The number of steps.
+        cfg (float): The CFG value.
+        sampler_name (str): The sampler name.
+        scheduler (str): The scheduler name.
+        positive (torch.Tensor): The positive tensor.
+        negative (torch.Tensor): The negative tensor.
+        latent_image (torch.Tensor): The latent image tensor.
+        denoise (float, optional): The denoise factor. Defaults to 1.0.
+        disable_noise (bool, optional): Whether to disable noise. Defaults to False.
+        start_step (int, optional): The start step. Defaults to None.
+        last_step (int, optional): The last step. Defaults to None.
+        force_full_denoise (bool, optional): Whether to force full denoise. Defaults to False.
+        noise_mask (torch.Tensor, optional): The noise mask tensor. Defaults to None.
+        sigmas (torch.Tensor, optional): The sigmas tensor. Defaults to None.
+        callback (callable, optional): The callback function. Defaults to None.
+        disable_pbar (bool, optional): Whether to disable the progress bar. Defaults to False.
+        seed (int, optional): The seed value. Defaults to None.
+        pipeline (bool, optional): Whether to use the pipeline. Defaults to False.
+        flux (bool, optional): Whether to use flux mode. Defaults to False.
+    Returns:
+        torch.Tensor: The sampled tensor.
     """
+    sampler = KSampler(
+        model=model,
         steps=steps,
         sampler=sampler_name,
         scheduler=scheduler,
         denoise=denoise,
         pipeline=pipeline,
     )
+    samples = sampler.direct_sample(
         noise,
         positive,
         negative,
         callback=callback,
         disable_pbar=disable_pbar,
         seed=seed,
         flux=flux,
     )
     samples = samples.to(Device.intermediate_device())
     return samples
 class ModelType(Enum):
     """#### Enum for Model Types."""
     )
     samples = samples.to(Device.intermediate_device())
     return samples
+def common_ksampler(
+    model: torch.nn.Module,
+    seed: int,
+    steps: int,
+    cfg: float,
+    sampler_name: str,
+    scheduler: str,
+    positive: torch.Tensor,
+    negative: torch.Tensor,
+    latent: dict,
+    denoise: float = 1.0,
+    disable_noise: bool = False,
+    start_step: int = None,
+    last_step: int = None,
+    force_full_denoise: bool = False,
+    pipeline: bool = False,
+    flux: bool = False,
+) -> tuple:
+    """Common ksampler function.
+    Args:
+        model (torch.nn.Module): The model.
+        seed (int): The seed value.
+        steps (int): The number of steps.
+        cfg (float): The CFG value.
+        sampler_name (str): The sampler name.
+        scheduler (str): The scheduler name.
+        positive (torch.Tensor): The positive tensor.
+        negative (torch.Tensor): The negative tensor.
+        latent (dict): The latent dictionary.
+        denoise (float, optional): The denoise factor. Defaults to 1.0.
+        disable_noise (bool, optional): Whether to disable noise. Defaults to False.
+        start_step (int, optional): The start step. Defaults to None.
+        last_step (int, optional): The last step. Defaults to None.
+        force_full_denoise (bool, optional): Whether to force full denoise. Defaults to False.
+        pipeline (bool, optional): Whether to use the pipeline. Defaults to False.
+        flux (bool, optional): Whether to use flux mode. Defaults to False.
+    Returns:
+        tuple: The output tuple containing the latent dictionary and samples.
+    """
+    latent_image = latent["samples"]
+    latent_image = Latent.fix_empty_latent_channels(model, latent_image)
+    if disable_noise:
+        noise = torch.zeros(
+            latent_image.size(),
+            dtype=latent_image.dtype,
+            layout=latent_image.layout,
+            device="cpu",
+        )
+    else:
+        batch_inds = latent["batch_index"] if "batch_index" in latent else None
+        noise = ksampler_util.prepare_noise(latent_image, seed, batch_inds)
+    noise_mask = None
+    if "noise_mask" in latent:
+        noise_mask = latent["noise_mask"]
+    samples = sample1(
+        model,
+        noise,
+        steps,
+        cfg,
+        sampler_name,
+        scheduler,
+        positive,
+        negative,
+        latent_image,
+        denoise=denoise,
+        disable_noise=disable_noise,
+        start_step=start_step,
+        last_step=last_step,
+        force_full_denoise=force_full_denoise,
+        noise_mask=noise_mask,
+        seed=seed,
+        pipeline=pipeline,
+        flux=flux,
+    )
+    out = latent.copy()
+    out["samples"] = samples
+    return (out,)

modules/user/GUI.py CHANGED Viewed

@@ -449,7 +449,9 @@ class App(tk.Tk):
         img_tensor = img_tensor.unsqueeze(0)
         self.interrupt_flag = False
         self.sampler = (
-            "dpmpp_sde" if not self.prioritize_speed_var.get() else "dpmpp_2m"
         )
         with torch.inference_mode():
             (
@@ -612,7 +614,7 @@ class App(tk.Tk):
                 )
                 self.cliptextencode = Clip.CLIPTextEncode()
                 self.emptylatentimage = Latent.EmptyLatentImage()
-                self.ksampler_instance = sampling.KSampler2()
                 self.vaedecode = VariationalAE.VAEDecode()
                 self.latent_upscale = upscale.LatentUpscale()
                 self.upscalemodelloader = USDU_upscaler.UpscaleModelLoader()
@@ -637,7 +639,9 @@ class App(tk.Tk):
         self.generation_threads.append(current_thread)
         self.interrupt_flag = False
         self.sampler = (
-            "dpmpp_sde" if not self.prioritize_speed_var.get() else "dpmpp_2m"
         )
         try:
             # Disable generate button during generation
@@ -955,7 +959,7 @@ class App(tk.Tk):
                 unetloadergguf = Quantizer.UnetLoaderGGUF()
                 cliptextencodeflux = Quantizer.CLIPTextEncodeFlux()
                 conditioningzeroout = Quantizer.ConditioningZeroOut()
-                ksampler = sampling.KSampler2()
                 vaedecode = VariationalAE.VAEDecode()
                 unetloadergguf_10 = unetloadergguf.load_unet(
                     unet_name="flux1-dev-Q8_0.gguf"

         img_tensor = img_tensor.unsqueeze(0)
         self.interrupt_flag = False
         self.sampler = (
+            "dpmpp_sde_cfgpp"
+            if not self.prioritize_speed_var.get()
+            else "dpmpp_2m_cfgpp"
         )
         with torch.inference_mode():
             (
                 )
                 self.cliptextencode = Clip.CLIPTextEncode()
                 self.emptylatentimage = Latent.EmptyLatentImage()
+                self.ksampler_instance = sampling.KSampler()
                 self.vaedecode = VariationalAE.VAEDecode()
                 self.latent_upscale = upscale.LatentUpscale()
                 self.upscalemodelloader = USDU_upscaler.UpscaleModelLoader()
         self.generation_threads.append(current_thread)
         self.interrupt_flag = False
         self.sampler = (
+            "dpmpp_sde_cfgpp"
+            if not self.prioritize_speed_var.get()
+            else "dpmpp_2m_cfgpp"
         )
         try:
             # Disable generate button during generation
                 unetloadergguf = Quantizer.UnetLoaderGGUF()
                 cliptextencodeflux = Quantizer.CLIPTextEncodeFlux()
                 conditioningzeroout = Quantizer.ConditioningZeroOut()
+                ksampler = sampling.KSampler()
                 vaedecode = VariationalAE.VAEDecode()
                 unetloadergguf_10 = unetloadergguf.load_unet(
                     unet_name="flux1-dev-Q8_0.gguf"

modules/user/pipeline.py CHANGED Viewed

@@ -92,7 +92,7 @@ def pipeline(
             hidiffoptimizer = msw_msa_attention.ApplyMSWMSAAttentionSimple()
         cliptextencode = Clip.CLIPTextEncode()
         emptylatentimage = Latent.EmptyLatentImage()
-        ksampler_instance = sampling.KSampler2()
         vaedecode = VariationalAE.VAEDecode()
         saveimage = ImageSaver.SaveImage()
         latent_upscale = upscale.LatentUpscale()
@@ -187,7 +187,7 @@ def pipeline(
                 unetloadergguf = Quantizer.UnetLoaderGGUF()
                 cliptextencodeflux = Quantizer.CLIPTextEncodeFlux()
                 conditioningzeroout = Quantizer.ConditioningZeroOut()
-                ksampler = sampling.KSampler2()
                 unetloadergguf_10 = unetloadergguf.load_unet(
                     unet_name="flux1-dev-Q8_0.gguf"
                 )
@@ -283,10 +283,10 @@ def pipeline(
                     )
                 else:
                     applystablefast_158 = loraloader_274
-                    fb_cache = fbcache_nodes.ApplyFBCacheOnModel()
-                    applystablefast_158 = fb_cache.patch(
-                        applystablefast_158, "diffusion_model", 0.120
-                    )
                 ksampler_239 = ksampler_instance.sample(
                     seed=seed,

             hidiffoptimizer = msw_msa_attention.ApplyMSWMSAAttentionSimple()
         cliptextencode = Clip.CLIPTextEncode()
         emptylatentimage = Latent.EmptyLatentImage()
+        ksampler_instance = sampling.KSampler()
         vaedecode = VariationalAE.VAEDecode()
         saveimage = ImageSaver.SaveImage()
         latent_upscale = upscale.LatentUpscale()
                 unetloadergguf = Quantizer.UnetLoaderGGUF()
                 cliptextencodeflux = Quantizer.CLIPTextEncodeFlux()
                 conditioningzeroout = Quantizer.ConditioningZeroOut()
+                ksampler = sampling.KSampler()
                 unetloadergguf_10 = unetloadergguf.load_unet(
                     unet_name="flux1-dev-Q8_0.gguf"
                 )
                     )
                 else:
                     applystablefast_158 = loraloader_274
+                    # fb_cache = fbcache_nodes.ApplyFBCacheOnModel()
+                    # applystablefast_158 = fb_cache.patch(
+                    #     applystablefast_158, "diffusion_model", 0.120
+                    # )
                 ksampler_239 = ksampler_instance.sample(
                     seed=seed,