Aduc_sdr

Paused

App Files Files Community

euiia commited on Sep 4

Commit

61b63f7

verified ·

1 Parent(s): 539271a

Update tools/tensor_utils.py

Browse files

Files changed (1) hide show

tools/tensor_utils.py +26 -7

tools/tensor_utils.py CHANGED Viewed

@@ -2,7 +2,7 @@
 #
 # Copyright (C) August 4, 2025  Carlos Rodrigues dos Santos
 #
-# Version: 1.0.0
 #
 # This module provides utility functions for tensor manipulation, specifically for
 # image and video processing tasks. The functions here, such as wavelet reconstruction,
@@ -14,11 +14,17 @@
 import torch
 from torch import Tensor
 from torch.nn import functional as F
 def wavelet_blur(image: Tensor, radius: int) -> Tensor:
     """
     Apply wavelet blur to the input tensor.
     """
     # convolution kernel
     kernel_vals = [
         [0.0625, 0.125, 0.0625],
@@ -26,13 +32,15 @@ def wavelet_blur(image: Tensor, radius: int) -> Tensor:
         [0.0625, 0.125, 0.0625],
     ]
     kernel = torch.tensor(kernel_vals, dtype=image.dtype, device=image.device)
-    # add channel dimensions to the kernel to make it a 4D tensor
-    kernel = kernel[None, None]
-    # repeat the kernel across all input channels
-    kernel = kernel.repeat(image.shape[1], 1, 1, 1) # Match input channels
     image = F.pad(image, (radius, radius, radius, radius), mode='replicate')
-    # apply convolution
-    output = F.conv2d(image, kernel, groups=image.shape[1], dilation=radius)
     return output
 def wavelet_decomposition(image: Tensor, levels=5) -> Tuple[Tensor, Tensor]:
@@ -40,6 +48,12 @@ def wavelet_decomposition(image: Tensor, levels=5) -> Tuple[Tensor, Tensor]:
     Apply wavelet decomposition to the input tensor.
     This function returns both the high frequency and low frequency components.
     """
     high_freq = torch.zeros_like(image)
     low_freq = image
     for i in range(levels):
@@ -48,12 +62,17 @@ def wavelet_decomposition(image: Tensor, levels=5) -> Tuple[Tensor, Tensor]:
         high_freq += (low_freq - blurred)
         low_freq = blurred
     return high_freq, low_freq
 def wavelet_reconstruction(content_feat: Tensor, style_feat: Tensor) -> Tensor:
     """
     Applies wavelet decomposition to transfer the color/style (low-frequency components)
     from a style feature to the details (high-frequency components) of a content feature.
     Args:
         content_feat (Tensor): The tensor containing the structural details.

 #
 # Copyright (C) August 4, 2025  Carlos Rodrigues dos Santos
 #
+# Version: 1.0.1
 #
 # This module provides utility functions for tensor manipulation, specifically for
 # image and video processing tasks. The functions here, such as wavelet reconstruction,
 import torch
 from torch import Tensor
 from torch.nn import functional as F
+from typing import Tuple
 def wavelet_blur(image: Tensor, radius: int) -> Tensor:
     """
     Apply wavelet blur to the input tensor.
     """
+    if image.ndim != 4: # Expects (B, C, H, W)
+        raise ValueError(f"wavelet_blur expects a 4D tensor, but got shape {image.shape}")
+    b, c, h, w = image.shape
     # convolution kernel
     kernel_vals = [
         [0.0625, 0.125, 0.0625],
         [0.0625, 0.125, 0.0625],
     ]
     kernel = torch.tensor(kernel_vals, dtype=image.dtype, device=image.device)
+    kernel = kernel[None, None] # (1, 1, 3, 3)
+    # repeat the kernel across all input channels for grouped convolution
+    kernel = kernel.repeat(c, 1, 1, 1) # (C, 1, 3, 3)
     image = F.pad(image, (radius, radius, radius, radius), mode='replicate')
+    # apply convolution with groups=c to process each channel independently
+    output = F.conv2d(image, kernel, groups=c, dilation=radius)
     return output
 def wavelet_decomposition(image: Tensor, levels=5) -> Tuple[Tensor, Tensor]:
     Apply wavelet decomposition to the input tensor.
     This function returns both the high frequency and low frequency components.
     """
+    # Ensure tensor is 4D (B, C, H, W)
+    is_video_frame = image.ndim == 5 # (B, C, F, H, W)
+    if is_video_frame:
+        b, c, f, h, w = image.shape
+        image = image.permute(0, 2, 1, 3, 4).reshape(b * f, c, h, w)
     high_freq = torch.zeros_like(image)
     low_freq = image
     for i in range(levels):
         high_freq += (low_freq - blurred)
         low_freq = blurred
+    if is_video_frame:
+        high_freq = high_freq.view(b, f, c, h, w).permute(0, 2, 1, 3, 4)
+        low_freq = low_freq.view(b, f, c, h, w).permute(0, 2, 1, 3, 4)
     return high_freq, low_freq
 def wavelet_reconstruction(content_feat: Tensor, style_feat: Tensor) -> Tensor:
     """
     Applies wavelet decomposition to transfer the color/style (low-frequency components)
     from a style feature to the details (high-frequency components) of a content feature.
+    This works for both images (4D) and videos (5D).
     Args:
         content_feat (Tensor): The tensor containing the structural details.