Spaces:

tristan-deep
/

semantic-diffusion-echo-dehazing

Sleeping

App Files Files Community

tristan-deep commited on Aug 21

Commit

36e1539

0 Parent(s):

init

Browse files

Files changed (12) hide show

.gitignore +5 -0
Dockerfile +9 -0
assets/patient-1-4C-frame-2.png +0 -0
assets/patient-17-4C-frame-11.png +0 -0
assets/patient-21-4C-frame-21.png +0 -0
assets/patient-46-4C-frame-57.png +0 -0
assets/patient-47-4C-frame-59.png +0 -0
assets/patient-50-4C-frame-53.png +0 -0
configs/semantic_dps.yaml +28 -0
eval.py +321 -0
fid_score.py +480 -0
main.py +743 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+.devcontainer
+.env
+temp/
+*.png
+*.pdf

Dockerfile ADDED Viewed

	@@ -0,0 +1,9 @@

+FROM zeahub/all:v0.0.4
+RUN pip install --no-cache-dir SimpleITK tyro beautifulsoup4 tabulate optuna
+RUN pip install --no-cache-dir --no-deps pytorch_fid
+RUN pip install --no-cache-dir -U keras
+WORKDIR /workspace

assets/patient-1-4C-frame-2.png ADDED Viewed

assets/patient-17-4C-frame-11.png ADDED Viewed

assets/patient-21-4C-frame-21.png ADDED Viewed

assets/patient-46-4C-frame-57.png ADDED Viewed

assets/patient-47-4C-frame-59.png ADDED Viewed

assets/patient-50-4C-frame-53.png ADDED Viewed

configs/semantic_dps.yaml ADDED Viewed

	@@ -0,0 +1,28 @@

+diffusion_model_path: "hf://tristan-deep/semantic-diffusion-echo-dehazing"
+segmentation_model_path: "hf://tristan-deep/semantic-segmentation-echo-dehazing"
+seed: 42
+params:
+  diffusion_steps: 480
+  initial_diffusion_step: 0
+  batch_size: 16
+  threshold_output_quantile: 0.17447
+  preserve_bottom_percent: 32.0
+  bottom_transition_width: 7.0
+  mask_params:
+    sigma: 4.2
+    threshold: 0.176
+  fixed_mask_params:
+    top_px: 20
+    bottom_px: 40
+  skeleton_params:
+    sigma_pre: 4.2
+    sigma_post: 4.2
+    threshold: 0.176
+  guidance_kwargs:
+    omega: 1
+    omega_vent: 0.3
+    omega_sept: 2.037
+    eta: 0.00780
+    smooth_l1_beta: 1.6355

eval.py ADDED Viewed

	@@ -0,0 +1,321 @@

+import warnings
+from glob import glob
+from pathlib import Path
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+import tyro
+from PIL import Image
+from scipy.ndimage import binary_erosion, distance_transform_edt
+from scipy.stats import ks_2samp
+from zea.io_lib import load_image
+import fid_score
+def calculate_fid_score(denoised_image_dirs, ground_truth_dir):
+    if isinstance(denoised_image_dirs, (str, Path)):
+        denoised_image_dirs = [denoised_image_dirs]
+    elif not isinstance(denoised_image_dirs, list):
+        raise ValueError("Input must be a path or list of paths")
+    clean_images_folder = glob(str(ground_truth_dir) + "/*.png")
+    print(f"Looking for clean images in: {ground_truth_dir}")
+    print(f"Found {len(clean_images_folder)} clean images")
+    # Determine optimal batch size based on number of images
+    num_denoised = len(denoised_image_dirs)
+    num_clean = len(clean_images_folder)
+    optimal_batch_size = min(8, num_denoised, num_clean)
+    print(f"Using batch size: {optimal_batch_size}")
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", message="os.fork.*JAX is multithreaded")
+        fid_value = fid_score.calculate_fid_with_cached_ground_truth(
+            denoised_image_dirs,
+            clean_images_folder,
+            batch_size=optimal_batch_size,
+            device="cuda" if torch.cuda.is_available() else "cpu",
+            num_workers=2 if torch.cuda.is_available() else 0,
+            dims=2048,
+        )
+    return fid_value
+def gcnr(img1, img2):
+    """Generalized Contrast-to-Noise Ratio"""
+    _, bins = np.histogram(np.concatenate((img1, img2)), bins=256)
+    f, _ = np.histogram(img1, bins=bins, density=True)
+    g, _ = np.histogram(img2, bins=bins, density=True)
+    f /= f.sum()
+    g /= g.sum()
+    return 1 - np.sum(np.minimum(f, g))
+def cnr(img1, img2):
+    """Contrast-to-Noise Ratio"""
+    return (img1.mean() - img2.mean()) / np.sqrt(img1.var() + img2.var())
+def calculate_cnr_gcnr(result_dehazed_cardiac_ultrasound, mask_path):
+    """
+    Evaluate gCNR and CNR metrics for denoised images using paired masks.
+    Saves detailed and summary statistics to Excel.
+    """
+    results = []
+    mask = np.array(Image.open(mask_path).convert("L"))
+    roi1_pixels = result_dehazed_cardiac_ultrasound[mask == 255]  # Foreground ROI
+    roi2_pixels = result_dehazed_cardiac_ultrasound[mask == 128]  # Background/Noise ROI
+    gcnr_val = gcnr(roi1_pixels, roi2_pixels)
+    cnr_val = cnr(roi1_pixels, roi2_pixels)
+    results.append([cnr_val, gcnr_val])
+    return results
+def calculate_ks_statistics(
+    result_hazy_cardiac_ultrasound, result_dehazed_cardiac_ultrasound, mask_path
+):
+    mask = np.array(Image.open(mask_path).convert("L"))
+    roi1_original = result_hazy_cardiac_ultrasound[mask == 255]  # region A
+    roi1_denoised = result_dehazed_cardiac_ultrasound[mask == 255]
+    roi2_original = result_hazy_cardiac_ultrasound[mask == 128]  # region B
+    roi2_denoised = result_dehazed_cardiac_ultrasound[mask == 128]
+    roi1_ks_stat, roi1_ks_p_value = (None, None)
+    roi2_ks_stat, roi2_ks_p_value = (None, None)
+    if roi1_original.size > 0 and roi1_denoised.size > 0:
+        roi1_ks_stat, roi1_ks_p_value = ks_2samp(roi1_original, roi1_denoised)
+    if roi2_original.size > 0 and roi2_denoised.size > 0:
+        roi2_ks_stat, roi2_ks_p_value = ks_2samp(roi2_original, roi2_denoised)
+    return roi1_ks_stat, roi1_ks_p_value, roi2_ks_stat, roi2_ks_p_value
+def calculate_dice_asd(image_path, label_path, checkpoint_path, image_size=224):
+    try:
+        from test import inference  # Our Segmentation Method
+    except ImportError:
+        raise ImportError(
+            "Segmentation method not available, skipping Dice/ASD calculation"
+        )
+    pred_img = inference(image_path, checkpoint_path, image_size)
+    pred = np.array(pred_img) > 127
+    label = Image.open(label_path).convert("L")
+    label = label.resize((image_size, image_size), Image.NEAREST)
+    label = np.array(label) > 127
+    # calculate Dice
+    intersection = np.logical_and(pred, label).sum()
+    dice = 2 * intersection / (pred.sum() + label.sum() + 1e-8)
+    # calculate ASD
+    if pred.sum() == 0 or label.sum() == 0:
+        asd = np.nan
+    else:
+        pred_dt = distance_transform_edt(~pred)
+        label_dt = distance_transform_edt(~label)
+        surface_pred = pred ^ binary_erosion(pred)
+        surface_label = label ^ binary_erosion(label)
+        d1 = pred_dt[surface_label].mean()
+        d2 = label_dt[surface_pred].mean()
+        asd = (d1 + d2) / 2
+    return dice, asd
+def calculate_final_score(aggregates):
+    try:
+        # (FID + CNR + gCNR):(KS^A + KS^B):(Dice + ASD)= 5:3:2
+        group1_score = 0  # FID + CNR + gCNR
+        if aggregates.get("fid") is not None:
+            fid_min = 60.0
+            fid_max = 150.0
+            fid_score = (fid_max - aggregates["fid"]) / (fid_max - fid_min)
+            fid_score = max(0, min(1, fid_score))
+            group1_score += fid_score * 100 * 0.33
+        if aggregates.get("cnr_mean") is not None:
+            cnr_min = 1.0
+            cnr_max = 1.5
+            cnr_score = (aggregates["cnr_mean"] - cnr_min) / (cnr_max - cnr_min)
+            cnr_score = max(0, min(1, cnr_score))
+            group1_score += cnr_score * 100 * 0.33
+        if aggregates.get("gcnr_mean") is not None:
+            gcnr_min = 0.5
+            gcnr_max = 0.8
+            gcnr_score = (aggregates["gcnr_mean"] - gcnr_min) / (gcnr_max - gcnr_min)
+            gcnr_score = max(0, min(1, gcnr_score))
+            group1_score += gcnr_score * 100 * 0.34
+        group2_score = 0  # KS^A + KS^B
+        if aggregates.get("ks_roi1_ksstatistic_mean") is not None:
+            ks1_min = 0.1
+            ks1_max = 0.3
+            ks1_score = (ks1_max - aggregates["ks_roi1_ksstatistic_mean"]) / (
+                ks1_max - ks1_min
+            )
+            ks1_score = max(0, min(1, ks1_score))
+            group2_score += ks1_score * 100 * 0.5
+        if aggregates.get("ks_roi2_ksstatistic_mean") is not None:
+            ks2_min = 0.0
+            ks2_max = 0.5
+            ks2_score = (aggregates["ks_roi2_ksstatistic_mean"] - ks2_min) / (
+                ks2_max - ks2_min
+            )
+            ks2_score = max(0, min(1, ks2_score))
+            group2_score += ks2_score * 100 * 0.5
+        group3_score = 0  # Dice + ASD
+        if aggregates.get("dice_mean") is not None:
+            dice_min = 0.85
+            dice_max = 0.95
+            dice_score = (aggregates["dice_mean"] - dice_min) / (dice_max - dice_min)
+            dice_score = max(0, min(1, dice_score))
+            group3_score += dice_score * 100 * 0.5
+        if aggregates.get("asd_mean") is not None:
+            asd_min = 0.7
+            asd_max = 2.0
+            asd_score = (asd_max - aggregates["asd_mean"]) / (asd_max - asd_min)
+            asd_score = max(0, min(1, asd_score))
+            group3_score += asd_score * 100 * 0.5
+        # Final score calculation
+        final_score = (group1_score * 5 + group2_score * 3 + group3_score * 2) / 10
+        return final_score
+    except Exception as e:
+        print(f"Error calculating final score: {str(e)}")
+        return 0
+def plot_metrics(metrics, limits, out_path):
+    plt.style.use("seaborn-v0_8-darkgrid")
+    fig, axes = plt.subplots(1, len(metrics), figsize=(7.2, 2.7), dpi=600)
+    colors = ["#0057b7", "#ffb300", "#008744", "#d62d20"]
+    # Arrow direction: ↑ for up, ↓ for down
+    metric_labels = {
+        "CNR": r"CNR $\uparrow$",
+        "gCNR": r"gCNR $\uparrow$",
+        "KS_A": r"KS$_{septum}$ $\downarrow$",
+        "KS_B": r"KS$_{ventricle}$ $\uparrow$",
+    }
+    for idx, (ax, (name, values)) in enumerate(zip(axes, metrics.items())):
+        ax.hist(
+            values,
+            bins=30,
+            color=colors[idx % len(colors)],
+            alpha=0.85,
+            edgecolor="black",
+            linewidth=0.7,
+        )
+        ax.set_xlabel(metric_labels.get(name, name), fontsize=11)
+        ax.set_ylabel("Count", fontsize=10)
+        # Draw limits
+        if name in limits:
+            for lim in limits[name]:
+                ax.axvline(lim, color="crimson", linestyle="--", lw=1.2)
+        ax.spines["top"].set_visible(False)
+        ax.spines["right"].set_visible(False)
+        ax.tick_params(axis="both", which="major", labelsize=9)
+    fig.tight_layout(pad=1.5)
+    fig.savefig(out_path, bbox_inches="tight", dpi=600)
+    plt.close(fig)
+def main(folder: str, roi_folder: str, reference_folder: str):
+    folder = Path(folder)
+    roi_folder = Path(roi_folder)
+    reference_folder = Path(reference_folder)
+    folder_files = set(f.name for f in folder.glob("*.png"))
+    roi_files = set(f.name for f in roi_folder.glob("*.png"))
+    ref_files = set(f.name for f in reference_folder.glob("*.png"))
+    print(f"Found {len(folder_files)} .png files in output folder: {folder}")
+    print(f"Found {len(roi_files)} .png files in ROI folder: {roi_folder}")
+    print(f"Found {len(ref_files)} .png files in reference folder: {reference_folder}")
+    # Find intersection of filenames
+    common_files = sorted(folder_files & roi_files & ref_files)
+    print(f"Found {len(common_files)} images present in all folders.")
+    if len(common_files) == 0:
+        print("No matching images found in all folders. Check your folder contents.")
+        print(f"Output folder files: {sorted(folder_files)}")
+        print(f"ROI folder files: {sorted(roi_files)}")
+        print(f"Reference folder files: {sorted(ref_files)}")
+        assert len(common_files) > 0, (
+            "No matching .png files in all folders. Cannot proceed."
+        )
+    metrics = {"CNR": [], "gCNR": [], "KS_A": [], "KS_B": []}
+    limits = {
+        "CNR": [1.0, 1.5],
+        "gCNR": [0.5, 0.8],
+        "KS_A": [0.1, 0.3],
+        "KS_B": [0.0, 0.5],
+    }
+    for name in common_files:
+        our_path = folder / name
+        roi_path = roi_folder / name
+        ref_path = reference_folder / name
+        assert our_path.exists(), f"Missing file in output folder: {our_path}"
+        assert roi_path.exists(), f"Missing file in ROI folder: {roi_path}"
+        assert ref_path.exists(), f"Missing file in reference folder: {ref_path}"
+        try:
+            img = np.array(load_image(str(our_path)))
+            img_ref = np.array(load_image(str(ref_path)))
+        except Exception as e:
+            print(f"Error loading image {name}: {e}")
+            continue
+        # CNR/gCNR
+        cnr_gcnr = calculate_cnr_gcnr(img, str(roi_path))
+        metrics["CNR"].append(cnr_gcnr[0][0])
+        metrics["gCNR"].append(cnr_gcnr[0][1])
+        # KS statistics
+        ks_a, _, ks_b, _ = calculate_ks_statistics(img_ref, img, str(roi_path))
+        metrics["KS_A"].append(ks_a)
+        metrics["KS_B"].append(ks_b)
+    # Compute statistics
+    stats = {
+        k: (np.mean(v), np.std(v), np.min(v), np.max(v)) for k, v in metrics.items()
+    }
+    print("Contrast statistics:")
+    for k, (mean, std, minv, maxv) in stats.items():
+        print(f"{k}: mean={mean:.3f}, std={std:.3f}, min={minv:.3f}, max={maxv:.3f}")
+    plot_metrics(metrics, limits, str(folder / "contrast_metrics.png"))
+    print(f"Saved metrics plot to {folder / 'contrast_metrics.png'}")
+    # Compute FID
+    fid_image_paths = [str(folder / name) for name in common_files]
+    fid_score = calculate_fid_score(fid_image_paths, str(reference_folder))
+    print(f"FID between {folder} and {reference_folder}: {fid_score:.3f}")
+if __name__ == "__main__":
+    tyro.cli(main)

fid_score.py ADDED Viewed

	@@ -0,0 +1,480 @@

+"""Calculates the Frechet Inception Distance (FID) to evalulate GANs
+The FID metric calculates the distance between two distributions of images.
+Typically, we have summary statistics (mean & covariance matrix) of one
+of these distributions, while the 2nd distribution is given by a GAN.
+When run as a stand-alone program, it compares the distribution of
+images that are stored as PNG/JPEG at a specified location with a
+distribution given by summary statistics (in pickle format).
+The FID is calculated by assuming that X_1 and X_2 are the activations of
+the pool_3 layer of the inception net for generated samples and real world
+samples respectively.
+See --help to see further details.
+Code apapted from https://github.com/bioinf-jku/TTUR to use PyTorch instead
+of Tensorflow
+Copyright 2018 Institute of Bioinformatics, JKU Linz
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import hashlib
+import os
+import pathlib
+from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
+import numpy as np
+import torch
+import torchvision.transforms as TF
+from PIL import Image
+from scipy import linalg
+from torch.nn.functional import adaptive_avg_pool2d
+try:
+    from tqdm import tqdm
+except ImportError:
+    # If tqdm is not available, provide a mock version of it
+    def tqdm(x):
+        return x
+from pytorch_fid.inception import InceptionV3
+parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
+parser.add_argument("--batch-size", type=int, default=50, help="Batch size to use")
+parser.add_argument(
+    "--num-workers",
+    type=int,
+    help=(
+        "Number of processes to use for data loading. Defaults to `min(8, num_cpus)`"
+    ),
+)
+parser.add_argument(
+    "--device", type=str, default=None, help="Device to use. Like cuda, cuda:0 or cpu"
+)
+parser.add_argument(
+    "--dims",
+    type=int,
+    default=2048,
+    choices=list(InceptionV3.BLOCK_INDEX_BY_DIM),
+    help=(
+        "Dimensionality of Inception features to use. By default, uses pool3 features"
+    ),
+)
+parser.add_argument(
+    "--save-stats",
+    action="store_true",
+    help=(
+        "Generate an npz archive from a directory of samples. "
+        "The first path is used as input and the second as output."
+    ),
+)
+parser.add_argument(
+    "path",
+    type=str,
+    nargs=2,
+    help=("Paths to the generated images or to .npz statistic files"),
+)
+IMAGE_EXTENSIONS = {"bmp", "jpg", "jpeg", "pgm", "png", "ppm", "tif", "tiff", "webp"}
+class ImagePathDataset(torch.utils.data.Dataset):
+    def __init__(self, files, transforms=None):
+        self.files = files
+        self.transforms = transforms
+    def __len__(self):
+        return len(self.files)
+    def __getitem__(self, i):
+        path = self.files[i]
+        img = Image.open(path).convert("RGB")
+        if self.transforms is not None:
+            img = self.transforms(img)
+        return img
+def get_activations(
+    files, model, batch_size=50, dims=2048, device="cpu", num_workers=1
+):
+    """Calculates the activations of the pool_3 layer for all images.
+    Params:
+    -- files       : List of image files paths
+    -- model       : Instance of inception model
+    -- batch_size  : Batch size of images for the model to process at once.
+                     Make sure that the number of samples is a multiple of
+                     the batch size, otherwise some samples are ignored. This
+                     behavior is retained to match the original FID score
+                     implementation.
+    -- dims        : Dimensionality of features returned by Inception
+    -- device      : Device to run calculations
+    -- num_workers : Number of parallel dataloader workers
+    Returns:
+    -- A numpy array of dimension (num images, dims) that contains the
+       activations of the given tensor when feeding inception with the
+       query tensor.
+    """
+    model.eval()
+    if batch_size > len(files):
+        print(
+            (
+                "Warning: batch size is bigger than the data size. "
+                "Setting batch size to data size"
+            )
+        )
+        batch_size = len(files)
+    # print(files)
+    dataset = ImagePathDataset(files, transforms=TF.ToTensor())
+    dataloader = torch.utils.data.DataLoader(
+        dataset,
+        batch_size=batch_size,
+        shuffle=False,
+        drop_last=False,
+        num_workers=num_workers,
+    )
+    pred_arr = np.empty((len(files), dims))
+    start_idx = 0
+    for batch in tqdm(dataloader):
+        batch = batch.to(device)
+        with torch.no_grad():
+            pred = model(batch)[0]
+        # If model output is not scalar, apply global spatial average pooling.
+        # This happens if you choose a dimensionality not equal 2048.
+        if pred.size(2) != 1 or pred.size(3) != 1:
+            pred = adaptive_avg_pool2d(pred, output_size=(1, 1))
+        pred = pred.squeeze(3).squeeze(2).cpu().numpy()
+        pred_arr[start_idx : start_idx + pred.shape[0]] = pred
+        start_idx = start_idx + pred.shape[0]
+    return pred_arr
+def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
+    """Numpy implementation of the Frechet Distance.
+    The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
+    and X_2 ~ N(mu_2, C_2) is
+            d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
+    Stable version by Dougal J. Sutherland.
+    Params:
+    -- mu1   : Numpy array containing the activations of a layer of the
+               inception net (like returned by the function 'get_predictions')
+               for generated samples.
+    -- mu2   : The sample mean over activations, precalculated on an
+               representative data set.
+    -- sigma1: The covariance matrix over activations for generated samples.
+    -- sigma2: The covariance matrix over activations, precalculated on an
+               representative data set.
+    Returns:
+    --   : The Frechet Distance.
+    """
+    mu1 = np.atleast_1d(mu1)
+    mu2 = np.atleast_1d(mu2)
+    sigma1 = np.atleast_2d(sigma1)
+    sigma2 = np.atleast_2d(sigma2)
+    assert mu1.shape == mu2.shape, (
+        "Training and test mean vectors have different lengths"
+    )
+    assert sigma1.shape == sigma2.shape, (
+        "Training and test covariances have different dimensions"
+    )
+    diff = mu1 - mu2
+    # Product might be almost singular
+    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
+    if not np.isfinite(covmean).all():
+        msg = (
+            "fid calculation produces singular product; "
+            "adding %s to diagonal of cov estimates"
+        ) % eps
+        print(msg)
+        offset = np.eye(sigma1.shape[0]) * eps
+        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
+    # Numerical error might give slight imaginary component
+    if np.iscomplexobj(covmean):
+        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
+            m = np.max(np.abs(covmean.imag))
+            raise ValueError("Imaginary component {}".format(m))
+        covmean = covmean.real
+    tr_covmean = np.trace(covmean)
+    return diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean
+def calculate_activation_statistics(
+    files, model, batch_size=50, dims=2048, device="cpu", num_workers=1
+):
+    """Calculation of the statistics used by the FID.
+    Params:
+    -- files       : List of image files paths
+    -- model       : Instance of inception model
+    -- batch_size  : The images numpy array is split into batches with
+                     batch size batch_size. A reasonable batch size
+                     depends on the hardware.
+    -- dims        : Dimensionality of features returned by Inception
+    -- device      : Device to run calculations
+    -- num_workers : Number of parallel dataloader workers
+    Returns:
+    -- mu    : The mean over samples of the activations of the pool_3 layer of
+               the inception model.
+    -- sigma : The covariance matrix of the activations of the pool_3 layer of
+               the inception model.
+    """
+    act = get_activations(files, model, batch_size, dims, device, num_workers)
+    mu = np.mean(act, axis=0)
+    sigma = np.cov(act, rowvar=False)
+    return mu, sigma
+def compute_statistics_of_path(path, model, batch_size, dims, device, num_workers=1):
+    # if path.endswith('.npz'):
+    #     with np.load(path) as f:
+    #         m, s = f['mu'][:], f['sigma'][:]
+    # else:
+    m, s = calculate_activation_statistics(
+        path, model, batch_size, dims, device, num_workers
+    )
+    # else:
+    #     path = pathlib.Path(path)
+    #     files = sorted([file for ext in IMAGE_EXTENSIONS
+    #                    for file in path.glob('*.{}'.format(ext))])
+    #     m, s = calculate_activation_statistics(files, model, batch_size,
+    #                                            dims, device, num_workers)
+    return m, s
+def _fid_cache_paths():
+    tmp_dir = pathlib.Path("tmp")
+    tmp_dir.mkdir(exist_ok=True)
+    stats_path = tmp_dir / "fid_stats.npz"
+    hash_path = tmp_dir / "fid_stats.hash"
+    return stats_path, hash_path
+def _load_fid_stats(stats_path):
+    arr = np.load(stats_path)
+    return arr["mu"], arr["sigma"]
+def _save_fid_stats(stats_path, mu, sigma):
+    np.savez_compressed(stats_path, mu=mu, sigma=sigma)
+def calculate_fid_given_paths(paths, batch_size, device, dims, num_workers=1):
+    """Calculates the FID of two paths, with caching for ground truth stats if the second path is a directory of images."""
+    import pathlib
+    if isinstance(paths[1], (str, pathlib.Path)) and pathlib.Path(paths[1]).is_dir():
+        # Get all PNGs in the directory
+        gt_images = list(pathlib.Path(paths[1]).glob("*.png"))
+        stats_path, hash_path = _fid_cache_paths()
+        if stats_path.exists():
+            print(f"Using cached FID stats from {stats_path}")
+            print("WARNING: Cache may be stale if ground truth images have changed.")
+            m1, s1 = _load_fid_stats(stats_path)
+        else:
+            print("Computing FID stats for ground truth images...")
+            block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
+            model = InceptionV3([block_idx]).to(device)
+            m1, s1 = calculate_activation_statistics(
+                gt_images, model, batch_size, dims, device, num_workers
+            )
+            _save_fid_stats(stats_path, m1, s1)
+        # m2, s2 for denoised images
+        block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
+        model = InceptionV3([block_idx]).to(device)
+        m2, s2 = calculate_activation_statistics(
+            paths[0], model, batch_size, dims, device, num_workers
+        )
+        fid_value = calculate_frechet_distance(m1, s1, m2, s2)
+        return fid_value
+    block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
+    model = InceptionV3([block_idx]).to(device)
+    m1, s1 = compute_statistics_of_path(
+        paths[0], model, batch_size, dims, device, num_workers
+    )
+    print(paths[1])
+    m2, s2 = compute_statistics_of_path(
+        paths[1], model, batch_size, dims, device, num_workers
+    )
+    fid_value = calculate_frechet_distance(m1, s1, m2, s2)
+    return fid_value
+def save_fid_stats(paths, batch_size, device, dims, num_workers=1):
+    """Calculates the FID of two paths"""
+    if not os.path.exists(paths[0]):
+        raise RuntimeError("Invalid path: %s" % paths[0])
+    if os.path.exists(paths[1]):
+        raise RuntimeError("Existing output file: %s" % paths[1])
+    block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
+    model = InceptionV3([block_idx]).to(device)
+    print(f"Saving statistics for {paths[0]}")
+    m1, s1 = compute_statistics_of_path(
+        paths[0], model, batch_size, dims, device, num_workers
+    )
+    np.savez_compressed(paths[1], mu=m1, sigma=s1)
+def calculate_fid_with_cached_ground_truth(
+    denoised_image_dirs,
+    ground_truth_image_dirs,
+    batch_size=50,
+    device="cpu",
+    dims=2048,
+    num_workers=1,
+):
+    """
+    Calculates the FID between denoised images and ground truth images, using cached stats for ground truth if possible.
+    Args:
+        denoised_image_dirs: list of denoised image paths
+        ground_truth_image_dirs: list of ground truth image paths (or a directory)
+        batch_size, device, dims, num_workers: same as calculate_fid_given_paths
+    Returns:
+        FID value
+    """
+    # If ground_truth_image_dirs is a directory, get all PNGs
+    if isinstance(ground_truth_image_dirs, (str, pathlib.Path)):
+        ground_truth_image_dirs = list(
+            pathlib.Path(ground_truth_image_dirs).glob("*.png")
+        )
+    # Compute hash for cache
+    def compute_file_hashes(file_list):
+        hash_md5 = hashlib.md5()
+        for fname in sorted(map(str, file_list)):
+            try:
+                stat = os.stat(fname)
+                hash_md5.update(fname.encode())
+                hash_md5.update(str(stat.st_mtime).encode())
+            except Exception:
+                continue
+        return hash_md5.hexdigest()
+    tmp_dir = pathlib.Path("tmp")
+    tmp_dir.mkdir(exist_ok=True)
+    stats_path = tmp_dir / "fid_stats.npz"
+    hash_path = tmp_dir / "fid_stats.hash"
+    # TODO: caching shouldn't be based on ground truth image dirs
+    # since we can have multiple reconstructions of same ground truth
+    current_hash = compute_file_hashes(ground_truth_image_dirs)
+    cache_valid = False
+    if stats_path.exists() and hash_path.exists():
+        try:
+            with open(hash_path, "r") as f:
+                cached_hash = f.read().strip()
+            if cached_hash == current_hash:
+                cache_valid = True
+        except Exception:
+            pass
+    # TODO: need more sophisticated caching for sweeps
+    if cache_valid:
+        print(f"Using cached FID stats from {stats_path}")
+        arr = np.load(stats_path)
+        mu, sigma = arr["mu"], arr["sigma"]
+    else:
+        print("Computing FID stats for ground truth images...")
+        block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
+        model = InceptionV3([block_idx]).to(device)
+        mu, sigma = calculate_activation_statistics(
+            ground_truth_image_dirs,
+            model,
+            batch_size=batch_size,
+            dims=dims,
+            device=device,
+            num_workers=num_workers,
+        )
+        np.savez_compressed(stats_path, mu=mu, sigma=sigma)
+        with open(hash_path, "w") as f:
+            f.write(current_hash)
+    # Compute stats for denoised images
+    block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
+    model = InceptionV3([block_idx]).to(device)
+    mu2, sigma2 = calculate_activation_statistics(
+        denoised_image_dirs,
+        model,
+        batch_size=batch_size,
+        dims=dims,
+        device=device,
+        num_workers=num_workers,
+    )
+    fid_value = calculate_frechet_distance(mu, sigma, mu2, sigma2)
+    return fid_value
+def main():
+    args = parser.parse_args()
+    if args.device is None:
+        device = torch.device("cuda" if (torch.cuda.is_available()) else "cpu")
+    else:
+        device = torch.device(args.device)
+    if args.num_workers is None:
+        try:
+            num_cpus = len(os.sched_getaffinity(0))
+        except AttributeError:
+            # os.sched_getaffinity is not available under Windows, use
+            # os.cpu_count instead (which may not return the *available* number
+            # of CPUs).
+            num_cpus = os.cpu_count()
+        num_workers = min(num_cpus, 8) if num_cpus is not None else 0
+    else:
+        num_workers = args.num_workers
+    if args.save_stats:
+        save_fid_stats(args.path, args.batch_size, device, args.dims, num_workers)
+        return
+    fid_value = calculate_fid_given_paths(
+        args.path, args.batch_size, device, args.dims, num_workers
+    )
+    print("FID: ", fid_value)
+if __name__ == "__main__":
+    main()

main.py ADDED Viewed

	@@ -0,0 +1,743 @@

+import copy
+import os
+from pathlib import Path
+os.environ["KERAS_BACKEND"] = "jax"
+import jax
+import keras
+import matplotlib.pyplot as plt
+import numpy as np
+import scipy
+import tyro
+import zea
+from keras import ops
+from matplotlib.patches import PathPatch
+from matplotlib.path import Path as pltPath
+from PIL import Image
+from skimage import filters, measure, morphology
+from zea import Config, init_device, log
+from zea.internal.operators import Operator
+from zea.models.diffusion import (
+    DPS,
+    DiffusionModel,
+    diffusion_guidance_registry,
+)
+from zea.tensor_ops import L2
+from zea.utils import translate
+from zea.visualize import plot_image_grid
+def L1(x):
+    """L1 norm of a tensor.
+    Implementation of L1 norm: https://mathworld.wolfram.com/L1-Norm.html
+    """
+    return ops.sum(ops.abs(x))
+def smooth_L1(x, beta=0.4):
+    """Smooth L1 loss function.
+    Implementation of Smooth L1 loss. Large beta values make it similar to L1 loss,
+    while small beta values make it similar to L2 loss.
+    """
+    abs_x = ops.abs(x)
+    loss = ops.where(abs_x < beta, 0.5 * x**2 / beta, abs_x - 0.5 * beta)
+    return ops.sum(loss)
+def postprocess(data, normalization_range):
+    """Postprocess data from model output to image."""
+    data = ops.clip(data, *normalization_range)
+    data = translate(data, normalization_range, (0, 255))
+    data = ops.convert_to_numpy(data)
+    data = np.squeeze(data, axis=-1)
+    return np.clip(data, 0, 255).astype("uint8")
+def preprocess(data, normalization_range):
+    """Preprocess data for model input. Converts uint8 image(s) in [0, 255] to model input range."""
+    data = ops.convert_to_tensor(data, dtype="float32")
+    data = translate(data, (0, 255), normalization_range)
+    data = ops.expand_dims(data, axis=-1)
+    return data
+def apply_bottom_preservation(
+    output_images, input_images, preserve_bottom_percent=30.0, transition_width=10.0
+):
+    """Apply bottom preservation with smooth windowed transition.
+    Args:
+        output_images: Model output images, (batch, height, width, channels)
+        input_images: Original input images, (batch, height, width, channels)
+        preserve_bottom_percent: Percentage of bottom to preserve from input (default 30%)
+        transition_width: Percentage of image height for smooth transition (default 10%)
+    Returns:
+        Blended images with preserved bottom portion
+    """
+    output_shape = ops.shape(output_images)
+    batch_size, height, width, channels = output_shape
+    preserve_height = int(height * preserve_bottom_percent / 100.0)
+    transition_height = int(height * transition_width / 100.0)
+    transition_start = height - preserve_height - transition_height
+    preserve_start = height - preserve_height
+    transition_start = max(0, transition_start)
+    preserve_start = min(height, preserve_start)
+    if transition_start >= preserve_start:
+        transition_start = preserve_start
+        transition_height = 0
+    y_coords = ops.arange(height, dtype="float32")
+    y_coords = ops.reshape(y_coords, (height, 1, 1))
+    if transition_height > 0:
+        # Smooth transition using cosine interpolation
+        transition_region = ops.logical_and(
+            y_coords >= transition_start, y_coords < preserve_start
+        )
+        transition_progress = (y_coords - transition_start) / transition_height
+        transition_progress = ops.clip(transition_progress, 0.0, 1.0)
+        # Use cosine for smooth transition (0.5 * (1 - cos(π * t)))
+        cosine_weight = 0.5 * (1.0 - ops.cos(np.pi * transition_progress))
+        blend_weight = ops.where(
+            y_coords < transition_start,
+            0.0,
+            ops.where(
+                transition_region,
+                cosine_weight,
+                1.0,
+            ),
+        )
+    else:
+        # No transition, just hard switch
+        blend_weight = ops.where(y_coords >= preserve_start, 1.0, 0.0)
+    blend_weight = ops.expand_dims(blend_weight, axis=0)
+    blended_images = (1.0 - blend_weight) * output_images + blend_weight * input_images
+    return blended_images
+def extract_skeleton(images, input_range, sigma_pre=4, sigma_post=4, threshold=0.3):
+    """Extract skeletons from the input images."""
+    images_np = ops.convert_to_numpy(images)
+    images_np = np.clip(images_np, input_range[0], input_range[1])
+    images_np = translate(images_np, input_range, (0, 1))
+    images_np = np.squeeze(images_np, axis=-1)
+    skeleton_masks = []
+    for img in images_np:
+        img[img < threshold] = 0
+        smoothed = filters.gaussian(img, sigma=sigma_pre)
+        binary = smoothed > filters.threshold_otsu(smoothed)
+        skeleton = morphology.skeletonize(binary)
+        skeleton = morphology.dilation(skeleton, morphology.disk(2))
+        skeleton = filters.gaussian(skeleton.astype(np.float32), sigma=sigma_post)
+        skeleton_masks.append(skeleton)
+    skeleton_masks = np.array(skeleton_masks)
+    skeleton_masks = np.expand_dims(skeleton_masks, axis=-1)
+    # normalize to [0, 1]
+    min_val, max_val = np.min(skeleton_masks), np.max(skeleton_masks)
+    skeleton_masks = (skeleton_masks - min_val) / (max_val - min_val + 1e-8)
+    return ops.convert_to_tensor(skeleton_masks, dtype=images.dtype)
+class IdentityOperator(Operator):
+    def forward(self, data):
+        return data
+    def __str__(self):
+        return "y = x"
+@diffusion_guidance_registry(name="semantic_dps")
+class SemanticDPS(DPS):
+    def __init__(
+        self,
+        diffusion_model,
+        segmentation_model,
+        operator,
+        disable_jit=False,
+        **kwargs,
+    ):
+        """Initialize the diffusion guidance.
+        Args:
+            diffusion_model: The diffusion model to use for guidance.
+            operator: The forward (measurement) operator to use for guidance.
+            disable_jit: Whether to disable JIT compilation.
+        """
+        self.diffusion_model = diffusion_model
+        self.segmentation_model = segmentation_model
+        self.operator = operator
+        self.disable_jit = disable_jit
+        self.setup(**kwargs)
+    def _get_fixed_mask(
+        self,
+        images,
+        bottom_px=40,
+        top_px=20,
+    ):
+        batch_size, height, width, channels = ops.shape(images)
+        # Create row indices for each pixel
+        row_indices = ops.arange(height)
+        row_indices = ops.reshape(row_indices, (height, 1))
+        row_indices = ops.tile(row_indices, (1, width))
+        # Create top row mask
+        fixed_mask = ops.where(
+            ops.logical_or(row_indices < top_px, row_indices >= height - bottom_px),
+            1.0,
+            0.0,
+        )
+        fixed_mask = ops.expand_dims(fixed_mask, axis=0)
+        fixed_mask = ops.expand_dims(fixed_mask, axis=-1)
+        fixed_mask = ops.tile(fixed_mask, (batch_size, 1, 1, channels))
+        return fixed_mask
+    def _get_segmentation_mask(self, images, threshold, sigma):
+        input_range = self.diffusion_model.input_range
+        images = ops.clip(images, input_range[0], input_range[1])
+        images = translate(images, input_range, (-1, 1))
+        masks = self.segmentation_model(images)
+        mask_vent = masks[..., 0]  # ROI 1 ventricle
+        mask_sept = masks[..., 1]  # ROI 2 septum
+        def _preprocess_mask(mask):
+            mask = ops.convert_to_numpy(mask)
+            mask = np.expand_dims(mask, axis=-1)
+            mask = np.where(mask > threshold, 1.0, 0.0)
+            mask = filters.gaussian(mask, sigma=sigma)
+            mask = (mask - ops.min(mask)) / (ops.max(mask) - ops.min(mask) + 1e-8)
+            return mask
+        mask_vent = _preprocess_mask(mask_vent)
+        mask_sept = _preprocess_mask(mask_sept)
+        return mask_vent, mask_sept
+    def _get_dark_mask(self, images):
+        min_val = self.diffusion_model.input_range[0]
+        dark_mask = ops.where(ops.abs(images - min_val) < 1e-6, 1.0, 0.0)
+        return dark_mask
+    def make_omega_map(
+        self, images, mask_params, fixed_mask_params, skeleton_params, guidance_kwargs
+    ):
+        masks = self.get_masks(images, mask_params, fixed_mask_params, skeleton_params)
+        masks_vent = masks["vent"]
+        masks_sept = masks["sept"]
+        masks_fixed = masks["fixed"]
+        masks_skeleton = masks["skeleton"]
+        masks_dark = masks["dark"]
+        masks_strong = ops.clip(
+            masks_sept + masks_fixed + masks_skeleton + masks_dark, 0, 1
+        )
+        # background = not masks_strong, not vent
+        background = ops.where(masks_strong < 0.1, 1.0, 0.0) * ops.where(
+            masks_vent == 0, 1.0, 0.0
+        )
+        masks_vent_filtered = masks_vent * (1.0 - masks_strong)
+        per_pixel_omega = (
+            guidance_kwargs["omega"] * background
+            + guidance_kwargs["omega_vent"] * masks_vent_filtered
+            + guidance_kwargs["omega_sept"] * masks_strong
+        )
+        haze_mask_components = (masks_vent > 0.5) * (1 - masks_strong > 0.5)
+        haze_mask = []
+        for i, m in enumerate(haze_mask_components):
+            if scipy.ndimage.label(m)[1] > 1:
+                # masks_strong _splits_ masks_vent in 2 or more components
+                # so we fall back to masks_vent
+                haze_mask.append(masks_vent[i])
+                # also remove guidance from this region to avoid bringing haze in
+                per_pixel_omega = per_pixel_omega.at[i].set(
+                    per_pixel_omega[i] * (1 - masks_vent[i])
+                )
+            else:
+                # masks_strong 'shaves off' some of masks_vent,
+                # where there is tissue
+                haze_mask.append((masks_vent * (1 - masks_strong))[i])
+        haze_mask = ops.stack(haze_mask, axis=0)
+        masks["per_pixel_omega"] = per_pixel_omega
+        masks["haze"] = haze_mask
+        return masks
+    def get_masks(self, images, mask_params, fixed_mask_params, skeleton_params):
+        """Generate a mask from the input images."""
+        masks_vent, masks_sept = self._get_segmentation_mask(images, **mask_params)
+        masks_fixed = self._get_fixed_mask(images, **fixed_mask_params)
+        masks_skeleton = extract_skeleton(
+            images, self.diffusion_model.input_range, **skeleton_params
+        )
+        masks_dark = self._get_dark_mask(images)
+        return {
+            "vent": masks_vent,
+            "sept": masks_sept,
+            "fixed": masks_fixed,
+            "skeleton": masks_skeleton,
+            "dark": masks_dark,
+        }
+    def compute_error(
+        self,
+        noisy_images,
+        measurements,
+        noise_rates,
+        signal_rates,
+        per_pixel_omega,
+        haze_mask,
+        eta=0.01,
+        smooth_l1_beta=0.5,
+        **kwargs,
+    ):
+        """Compute measurement error for diffusion posterior sampling.
+        Args:
+            noisy_images: Noisy images.
+            measurement: Target measurement.
+            operator: Forward operator.
+            noise_rates: Current noise rates.
+            signal_rates: Current signal rates.
+            omega: Weight for the measurement error.
+            omega_mask: Weight for the measurement error at the mask region.
+            omega_haze_prior: Weight for the haze prior penalty.
+            **kwargs: Additional arguments for the operator.
+        Returns:
+            Tuple of (measurement_error, (pred_noises, pred_images))
+        """
+        pred_noises, pred_images = self.diffusion_model.denoise(
+            noisy_images,
+            noise_rates,
+            signal_rates,
+            training=False,
+        )
+        measurement_error = L2(
+            per_pixel_omega
+            * (measurements - self.operator.forward(pred_images, **kwargs))
+        )
+        hazy_pixels = pred_images * haze_mask
+        # L1 penalty on haze pixels
+        # add +1 to make -1 (=black) the 'sparse' value
+        haze_prior_error = smooth_L1(hazy_pixels + 1, beta=smooth_l1_beta)
+        total_error = measurement_error + eta * haze_prior_error
+        return total_error, (pred_noises, pred_images)
+def init(config):
+    """Initialize models, operator, and guidance objects for semantic-dps dehazing."""
+    operator = IdentityOperator()
+    diffusion_model = DiffusionModel.from_preset(
+        config.diffusion_model_path,
+    )
+    log.success(
+        f"Diffusion model loaded from {log.yellow(config.diffusion_model_path)}"
+    )
+    segmentation_model = load_segmentation_model(config.segmentation_model_path)
+    log.success(
+        f"Segmentation model loaded from {log.yellow(config.segmentation_model_path)}"
+    )
+    guidance_fn = SemanticDPS(
+        diffusion_model=diffusion_model,
+        segmentation_model=segmentation_model,
+        operator=operator,
+    )
+    diffusion_model._init_operator_and_guidance(operator, guidance_fn)
+    return diffusion_model
+def load_segmentation_model(path):
+    """Load segmentation model"""
+    segmentation_model = keras.saving.load_model(path)
+    return segmentation_model
+def run(
+    hazy_images: any,
+    diffusion_model: DiffusionModel,
+    seed,
+    guidance_kwargs: dict,
+    mask_params: dict,
+    fixed_mask_params: dict,
+    skeleton_params: dict,
+    batch_size: int = 4,
+    diffusion_steps: int = 100,
+    initial_diffusion_step: int = 0,
+    threshold_output_quantile: float = None,
+    preserve_bottom_percent: float = 30.0,
+    bottom_transition_width: float = 10.0,
+    verbose: bool = True,
+):
+    input_range = diffusion_model.input_range
+    hazy_images = preprocess(hazy_images, normalization_range=input_range)
+    pred_tissue_images = []
+    masks_out = []
+    num_images = hazy_images.shape[0]
+    num_batches = (num_images + batch_size - 1) // batch_size
+    progbar = keras.utils.Progbar(num_batches, verbose=verbose)
+    i = 0
+    batch_idx = 0
+    for i in range(num_batches):
+        batch = hazy_images[i * batch_size : (i * batch_size) + batch_size]
+        masks = diffusion_model.guidance_fn.make_omega_map(
+            batch, mask_params, fixed_mask_params, skeleton_params, guidance_kwargs
+        )
+        batch_images = diffusion_model.posterior_sample(
+            batch,
+            n_samples=1,
+            n_steps=diffusion_steps,
+            initial_step=initial_diffusion_step,
+            seed=seed,
+            verbose=True,
+            per_pixel_omega=masks["per_pixel_omega"],
+            haze_mask=masks["haze"],
+            eta=guidance_kwargs["eta"],
+            smooth_l1_beta=guidance_kwargs["smooth_l1_beta"],
+        )
+        batch_images = ops.take(batch_images, 0, axis=1)
+        pred_tissue_images.append(batch_images)
+        masks_out.append(masks)
+        batch_idx += 1
+        progbar.update(batch_idx)
+        i += batch_size
+    pred_tissue_images = ops.concatenate(pred_tissue_images, axis=0)
+    masks_out = {
+        key: ops.concatenate([m[key] for m in masks_out], axis=0)
+        for key in masks_out[0].keys()
+    }
+    pred_haze_images = hazy_images - pred_tissue_images - 1
+    if threshold_output_quantile is not None:
+        threshold_value = ops.quantile(
+            pred_tissue_images, threshold_output_quantile, axis=(1, 2), keepdims=True
+        )
+        pred_tissue_images = ops.where(
+            pred_tissue_images < threshold_value, input_range[0], pred_tissue_images
+        )
+    # Apply bottom preservation with smooth transition
+    if preserve_bottom_percent > 0:
+        pred_tissue_images = apply_bottom_preservation(
+            pred_tissue_images,
+            hazy_images,
+            preserve_bottom_percent=preserve_bottom_percent,
+            transition_width=bottom_transition_width,
+        )
+    pred_tissue_images = postprocess(pred_tissue_images, input_range)
+    hazy_images = postprocess(hazy_images, input_range)
+    pred_haze_images = postprocess(pred_haze_images, input_range)
+    return hazy_images, pred_tissue_images, pred_haze_images, masks_out
+def add_shape_from_mask(ax, mask, **kwargs):
+    """add a shape to axis from mask array.
+    Args:
+        ax (plt.ax): matplotlib axis
+        mask (ndarray): numpy array with non-zero
+            shape defining the region of interest.
+    Kwargs:
+        edgecolor (str): color of the shape's edge
+        facecolor (str): color of the shape's face
+        linewidth (int): width of the shape's edge
+    Returns:
+        plt.ax: matplotlib axis with shape added
+    """
+    # Pad mask to ensure edge contours are found
+    padded_mask = np.pad(mask, pad_width=1, mode="constant", constant_values=0)
+    contours = measure.find_contours(padded_mask, 0.5)
+    patches = []
+    for contour in contours:
+        # Remove padding offset
+        contour -= 1
+        path = pltPath(contour[:, ::-1])
+        patch = PathPatch(path, **kwargs)
+        patches.append(ax.add_patch(patch))
+    return patches
+def plot_batch_with_named_masks(
+    images, masks_dict, mask_colors=None, titles=None, **kwargs
+):
+    """
+    Plot batch of images in rows, each column overlays a different mask from the dict.
+    Mask labels are shown as column titles. If mask name is 'per_pixel_omega', show it
+    directly with inferno colormap (no overlay).
+    Args:
+        images: np.ndarray, shape (batch, height, width, channels)
+        masks_dict: dict of {name: mask}, each mask shape  (batch, height, width, channels)
+        mask_colors: dict of {name: color} or None (default colors used)
+    """
+    mask_names = list(masks_dict.keys())
+    batch_size = images.shape[0]
+    default_colors = ["red", "green", "#33aaff", "yellow", "magenta", "cyan"]
+    mask_colors = mask_colors or {
+        name: default_colors[i % len(default_colors)]
+        for i, name in enumerate(mask_names)
+    }
+    # Prepare images for each column
+    columns = []
+    cmaps = []
+    for name in mask_names:
+        if name == "per_pixel_omega":
+            mask_np = np.array(masks_dict[name])
+            columns.append(np.squeeze(mask_np))
+            cmaps.append(["inferno"] * batch_size)
+        else:
+            columns.append(np.squeeze(images))
+            cmaps.append(["gray"] * batch_size)
+    # Stack columns: shape (num_columns, batch, ...)
+    all_images = np.stack(columns, axis=0)  # (num_columns, batch, ...)
+    # Rearrange to (batch, num_columns, ...)
+    all_images = (
+        np.transpose(all_images, (1, 0, 2, 3, 4))
+        if all_images.ndim == 5
+        else np.transpose(all_images, (1, 0, 2, 3))
+    )
+    # Flatten to (batch * num_columns, ...)
+    all_images = all_images.reshape(batch_size * len(mask_names), *images.shape[1:])
+    # Flatten cmaps for plot_image_grid in the same order as images
+    flat_cmaps = []
+    for row in range(batch_size):
+        for col in range(len(mask_names)):
+            flat_cmaps.append(cmaps[col][row])
+    fig, _ = plot_image_grid(
+        all_images,
+        ncols=len(mask_names),
+        remove_axis=False,
+        cmap=flat_cmaps,
+        figsize=(8, 3.3),
+        **kwargs,
+    )
+    # Overlay masks for non-per_pixel_omega columns
+    for col_idx, name in enumerate(mask_names):
+        if name == "per_pixel_omega":
+            continue
+        mask_np = np.array(masks_dict[name])
+        axes = fig.axes[col_idx : batch_size * len(mask_names) : len(mask_names)]
+        for ax, mask_img in zip(axes, mask_np):
+            add_shape_from_mask(
+                ax, mask_img.squeeze(), color=mask_colors[name], alpha=0.3
+            )
+    # Add column titles
+    row_idx = 0
+    if titles is None:
+        titles = mask_names
+    for col_idx, name in enumerate(titles):
+        ax_idx = row_idx * len(mask_names) + col_idx
+        fig.axes[ax_idx].set_title(name, fontsize=9, color="white")
+        fig.axes[ax_idx].set_facecolor("black")
+    # Add colorbar for per_pixel_omega if present
+    if "per_pixel_omega" in mask_names:
+        col_idx = mask_names.index("per_pixel_omega")
+        axes = fig.axes[col_idx : batch_size * len(mask_names) : len(mask_names)]
+        # Get vertical bounds of the subplot column
+        top_ax = axes[0]
+        bottom_ax = axes[-1]
+        top_pos = top_ax.get_position()
+        bottom_pos = bottom_ax.get_position()
+        full_y0 = bottom_pos.y0
+        full_y1 = top_pos.y1
+        full_height = full_y1 - full_y0
+        # Manually shrink to 80% of full height and center vertically
+        scale = 0.8
+        height = full_height * scale
+        y0 = full_y0 + (full_height - height) / 2
+        x0 = top_pos.x1 + 0.015  # Horizontal position to the right
+        width = 0.015  # Thin bar
+        # Add colorbar axis
+        cax = fig.add_axes([x0, y0, width, height])
+        im = axes[0].get_images()[0] if axes[0].get_images() else None
+        cbar = fig.colorbar(im, cax=cax)
+        cbar.set_label(r"Guidance weighting \mathbf{p}")
+        cbar.ax.yaxis.set_major_locator(plt.MaxNLocator(nbins=6))
+        cbar.ax.yaxis.set_tick_params(labelsize=7)
+        cbar.ax.yaxis.label.set_size(8)
+    return fig
+def plot_dehazed_results(
+    hazy_images,
+    pred_tissue_images,
+    pred_haze_images,
+    diffusion_model,
+    titles=("Hazy", "Dehazed", "Haze"),
+):
+    """Create and save visualization with optional mask overlays."""
+    # Create the processed image stack using the helper function
+    input_shape = diffusion_model.input_shape
+    stack_images = ops.stack(
+        [
+            hazy_images,
+            pred_tissue_images,
+            pred_haze_images,
+        ]
+    )
+    stack_images = ops.reshape(stack_images, (-1, input_shape[0], input_shape[1]))
+    # Define labels based on what we're showing
+    fig, _ = plot_image_grid(
+        stack_images,
+        ncols=len(hazy_images),
+        remove_axis=False,
+        vmin=0,
+        vmax=255,
+    )
+    # Set labels and styling
+    for i, ax in enumerate(fig.axes):
+        if i % len(hazy_images) == 0:
+            label = titles[(i // len(hazy_images)) % len(titles)]
+            ax.set_ylabel(label, fontsize=12)
+    return fig
+def main(
+    input_folder: str = "./assets",
+    output_folder: str = "./temp",
+    num_imgs_plot: int = 4,
+    device: str = "auto:1",
+    config: str = "configs/semantic_dps.yaml",
+):
+    num_img = num_imgs_plot
+    zea.visualize.set_mpl_style()
+    init_device(device)
+    config = Config.from_yaml(config)
+    seed = jax.random.PRNGKey(config.seed)
+    paths = list(Path(input_folder).glob("*.png"))
+    output_folder = Path(output_folder)
+    images = []
+    for path in paths:
+        image = zea.io_lib.load_image(path)
+        images.append(image)
+    images = ops.stack(images, axis=0)
+    diffusion_model = init(config)
+    hazy_images, pred_tissue_images, pred_haze_images, masks = run(
+        images,
+        diffusion_model=diffusion_model,
+        seed=seed,
+        **config.params,
+    )
+    output_folder.mkdir(parents=True, exist_ok=True)
+    for image, path in zip(pred_tissue_images, paths):
+        image = ops.convert_to_numpy(image)
+        file_name = path.name
+        Image.fromarray(image).save(output_folder / file_name)
+    fig = plot_dehazed_results(
+        hazy_images[:num_img],
+        pred_tissue_images[:num_img],
+        pred_haze_images[:num_img],
+        diffusion_model,
+        titles=[
+            r"Hazy $\mathbf{y}$",
+            r"Dehazed $\mathbf{\hat{x}}$",
+            r"Haze $\mathbf{\hat{h}}$",
+        ],
+    )
+    path = Path("dehazed_results.png")
+    save_kwargs = {"bbox_inches": "tight", "dpi": 300}
+    fig.savefig(path, **save_kwargs)
+    fig.savefig(path.with_suffix(".pdf"), **save_kwargs)
+    log.success(f"Segmentation steps saved to {log.yellow(path)}")
+    masks_viz = copy.deepcopy(masks)
+    masks_viz.pop("haze")
+    masks_viz = {k: v[:num_img] for k, v in masks_viz.items()}
+    fig = plot_batch_with_named_masks(
+        images[:num_img],
+        masks_viz,
+        titles=[
+            r"Ventricle $v(\mathbf{y})$",
+            r"Septum $s(\mathbf{y})$",
+            r"Fixed",
+            r"Skeleton $t(\mathbf{y})$",
+            r"Dark $b(\mathbf{y})$",
+            r"Guidance $d(\mathbf{y})$",
+        ],
+    )
+    path = Path("segmentation_steps.png")
+    fig.savefig(path, **save_kwargs)
+    fig.savefig(path.with_suffix(".pdf"), **save_kwargs)
+    log.success(f"Segmentation steps saved to {log.yellow(path)}")
+    plt.close("all")
+if __name__ == "__main__":
+    tyro.cli(main)