Commit
·
36e1539
0
Parent(s):
init
Browse files- .gitignore +5 -0
- Dockerfile +9 -0
- assets/patient-1-4C-frame-2.png +0 -0
- assets/patient-17-4C-frame-11.png +0 -0
- assets/patient-21-4C-frame-21.png +0 -0
- assets/patient-46-4C-frame-57.png +0 -0
- assets/patient-47-4C-frame-59.png +0 -0
- assets/patient-50-4C-frame-53.png +0 -0
- configs/semantic_dps.yaml +28 -0
- eval.py +321 -0
- fid_score.py +480 -0
- main.py +743 -0
.gitignore
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.devcontainer
|
| 2 |
+
.env
|
| 3 |
+
temp/
|
| 4 |
+
*.png
|
| 5 |
+
*.pdf
|
Dockerfile
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM zeahub/all:v0.0.4
|
| 2 |
+
|
| 3 |
+
RUN pip install --no-cache-dir SimpleITK tyro beautifulsoup4 tabulate optuna
|
| 4 |
+
|
| 5 |
+
RUN pip install --no-cache-dir --no-deps pytorch_fid
|
| 6 |
+
|
| 7 |
+
RUN pip install --no-cache-dir -U keras
|
| 8 |
+
|
| 9 |
+
WORKDIR /workspace
|
assets/patient-1-4C-frame-2.png
ADDED
|
assets/patient-17-4C-frame-11.png
ADDED
|
assets/patient-21-4C-frame-21.png
ADDED
|
assets/patient-46-4C-frame-57.png
ADDED
|
assets/patient-47-4C-frame-59.png
ADDED
|
assets/patient-50-4C-frame-53.png
ADDED
|
configs/semantic_dps.yaml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
diffusion_model_path: "hf://tristan-deep/semantic-diffusion-echo-dehazing"
|
| 2 |
+
segmentation_model_path: "hf://tristan-deep/semantic-segmentation-echo-dehazing"
|
| 3 |
+
seed: 42
|
| 4 |
+
|
| 5 |
+
params:
|
| 6 |
+
diffusion_steps: 480
|
| 7 |
+
initial_diffusion_step: 0
|
| 8 |
+
batch_size: 16
|
| 9 |
+
threshold_output_quantile: 0.17447
|
| 10 |
+
preserve_bottom_percent: 32.0
|
| 11 |
+
bottom_transition_width: 7.0
|
| 12 |
+
|
| 13 |
+
mask_params:
|
| 14 |
+
sigma: 4.2
|
| 15 |
+
threshold: 0.176
|
| 16 |
+
fixed_mask_params:
|
| 17 |
+
top_px: 20
|
| 18 |
+
bottom_px: 40
|
| 19 |
+
skeleton_params:
|
| 20 |
+
sigma_pre: 4.2
|
| 21 |
+
sigma_post: 4.2
|
| 22 |
+
threshold: 0.176
|
| 23 |
+
guidance_kwargs:
|
| 24 |
+
omega: 1
|
| 25 |
+
omega_vent: 0.3
|
| 26 |
+
omega_sept: 2.037
|
| 27 |
+
eta: 0.00780
|
| 28 |
+
smooth_l1_beta: 1.6355
|
eval.py
ADDED
|
@@ -0,0 +1,321 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import warnings
|
| 2 |
+
from glob import glob
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
import matplotlib.pyplot as plt
|
| 6 |
+
import numpy as np
|
| 7 |
+
import torch
|
| 8 |
+
import tyro
|
| 9 |
+
from PIL import Image
|
| 10 |
+
from scipy.ndimage import binary_erosion, distance_transform_edt
|
| 11 |
+
from scipy.stats import ks_2samp
|
| 12 |
+
from zea.io_lib import load_image
|
| 13 |
+
|
| 14 |
+
import fid_score
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def calculate_fid_score(denoised_image_dirs, ground_truth_dir):
|
| 18 |
+
if isinstance(denoised_image_dirs, (str, Path)):
|
| 19 |
+
denoised_image_dirs = [denoised_image_dirs]
|
| 20 |
+
elif not isinstance(denoised_image_dirs, list):
|
| 21 |
+
raise ValueError("Input must be a path or list of paths")
|
| 22 |
+
|
| 23 |
+
clean_images_folder = glob(str(ground_truth_dir) + "/*.png")
|
| 24 |
+
|
| 25 |
+
print(f"Looking for clean images in: {ground_truth_dir}")
|
| 26 |
+
print(f"Found {len(clean_images_folder)} clean images")
|
| 27 |
+
|
| 28 |
+
# Determine optimal batch size based on number of images
|
| 29 |
+
num_denoised = len(denoised_image_dirs)
|
| 30 |
+
num_clean = len(clean_images_folder)
|
| 31 |
+
optimal_batch_size = min(8, num_denoised, num_clean)
|
| 32 |
+
print(f"Using batch size: {optimal_batch_size}")
|
| 33 |
+
|
| 34 |
+
with warnings.catch_warnings():
|
| 35 |
+
warnings.filterwarnings("ignore", message="os.fork.*JAX is multithreaded")
|
| 36 |
+
|
| 37 |
+
fid_value = fid_score.calculate_fid_with_cached_ground_truth(
|
| 38 |
+
denoised_image_dirs,
|
| 39 |
+
clean_images_folder,
|
| 40 |
+
batch_size=optimal_batch_size,
|
| 41 |
+
device="cuda" if torch.cuda.is_available() else "cpu",
|
| 42 |
+
num_workers=2 if torch.cuda.is_available() else 0,
|
| 43 |
+
dims=2048,
|
| 44 |
+
)
|
| 45 |
+
return fid_value
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def gcnr(img1, img2):
|
| 49 |
+
"""Generalized Contrast-to-Noise Ratio"""
|
| 50 |
+
_, bins = np.histogram(np.concatenate((img1, img2)), bins=256)
|
| 51 |
+
f, _ = np.histogram(img1, bins=bins, density=True)
|
| 52 |
+
g, _ = np.histogram(img2, bins=bins, density=True)
|
| 53 |
+
f /= f.sum()
|
| 54 |
+
g /= g.sum()
|
| 55 |
+
return 1 - np.sum(np.minimum(f, g))
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def cnr(img1, img2):
|
| 59 |
+
"""Contrast-to-Noise Ratio"""
|
| 60 |
+
return (img1.mean() - img2.mean()) / np.sqrt(img1.var() + img2.var())
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def calculate_cnr_gcnr(result_dehazed_cardiac_ultrasound, mask_path):
|
| 64 |
+
"""
|
| 65 |
+
Evaluate gCNR and CNR metrics for denoised images using paired masks.
|
| 66 |
+
Saves detailed and summary statistics to Excel.
|
| 67 |
+
"""
|
| 68 |
+
results = []
|
| 69 |
+
|
| 70 |
+
mask = np.array(Image.open(mask_path).convert("L"))
|
| 71 |
+
|
| 72 |
+
roi1_pixels = result_dehazed_cardiac_ultrasound[mask == 255] # Foreground ROI
|
| 73 |
+
roi2_pixels = result_dehazed_cardiac_ultrasound[mask == 128] # Background/Noise ROI
|
| 74 |
+
|
| 75 |
+
gcnr_val = gcnr(roi1_pixels, roi2_pixels)
|
| 76 |
+
cnr_val = cnr(roi1_pixels, roi2_pixels)
|
| 77 |
+
|
| 78 |
+
results.append([cnr_val, gcnr_val])
|
| 79 |
+
|
| 80 |
+
return results
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def calculate_ks_statistics(
|
| 84 |
+
result_hazy_cardiac_ultrasound, result_dehazed_cardiac_ultrasound, mask_path
|
| 85 |
+
):
|
| 86 |
+
mask = np.array(Image.open(mask_path).convert("L"))
|
| 87 |
+
|
| 88 |
+
roi1_original = result_hazy_cardiac_ultrasound[mask == 255] # region A
|
| 89 |
+
roi1_denoised = result_dehazed_cardiac_ultrasound[mask == 255]
|
| 90 |
+
roi2_original = result_hazy_cardiac_ultrasound[mask == 128] # region B
|
| 91 |
+
roi2_denoised = result_dehazed_cardiac_ultrasound[mask == 128]
|
| 92 |
+
|
| 93 |
+
roi1_ks_stat, roi1_ks_p_value = (None, None)
|
| 94 |
+
roi2_ks_stat, roi2_ks_p_value = (None, None)
|
| 95 |
+
|
| 96 |
+
if roi1_original.size > 0 and roi1_denoised.size > 0:
|
| 97 |
+
roi1_ks_stat, roi1_ks_p_value = ks_2samp(roi1_original, roi1_denoised)
|
| 98 |
+
|
| 99 |
+
if roi2_original.size > 0 and roi2_denoised.size > 0:
|
| 100 |
+
roi2_ks_stat, roi2_ks_p_value = ks_2samp(roi2_original, roi2_denoised)
|
| 101 |
+
|
| 102 |
+
return roi1_ks_stat, roi1_ks_p_value, roi2_ks_stat, roi2_ks_p_value
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def calculate_dice_asd(image_path, label_path, checkpoint_path, image_size=224):
|
| 106 |
+
try:
|
| 107 |
+
from test import inference # Our Segmentation Method
|
| 108 |
+
except ImportError:
|
| 109 |
+
raise ImportError(
|
| 110 |
+
"Segmentation method not available, skipping Dice/ASD calculation"
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
pred_img = inference(image_path, checkpoint_path, image_size)
|
| 114 |
+
pred = np.array(pred_img) > 127
|
| 115 |
+
|
| 116 |
+
label = Image.open(label_path).convert("L")
|
| 117 |
+
label = label.resize((image_size, image_size), Image.NEAREST)
|
| 118 |
+
label = np.array(label) > 127
|
| 119 |
+
|
| 120 |
+
# calculate Dice
|
| 121 |
+
intersection = np.logical_and(pred, label).sum()
|
| 122 |
+
dice = 2 * intersection / (pred.sum() + label.sum() + 1e-8)
|
| 123 |
+
|
| 124 |
+
# calculate ASD
|
| 125 |
+
if pred.sum() == 0 or label.sum() == 0:
|
| 126 |
+
asd = np.nan
|
| 127 |
+
else:
|
| 128 |
+
pred_dt = distance_transform_edt(~pred)
|
| 129 |
+
label_dt = distance_transform_edt(~label)
|
| 130 |
+
|
| 131 |
+
surface_pred = pred ^ binary_erosion(pred)
|
| 132 |
+
surface_label = label ^ binary_erosion(label)
|
| 133 |
+
|
| 134 |
+
d1 = pred_dt[surface_label].mean()
|
| 135 |
+
d2 = label_dt[surface_pred].mean()
|
| 136 |
+
asd = (d1 + d2) / 2
|
| 137 |
+
|
| 138 |
+
return dice, asd
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def calculate_final_score(aggregates):
|
| 142 |
+
try:
|
| 143 |
+
# (FID + CNR + gCNR):(KS^A + KS^B):(Dice + ASD)= 5:3:2
|
| 144 |
+
|
| 145 |
+
group1_score = 0 # FID + CNR + gCNR
|
| 146 |
+
if aggregates.get("fid") is not None:
|
| 147 |
+
fid_min = 60.0
|
| 148 |
+
fid_max = 150.0
|
| 149 |
+
fid_score = (fid_max - aggregates["fid"]) / (fid_max - fid_min)
|
| 150 |
+
fid_score = max(0, min(1, fid_score))
|
| 151 |
+
group1_score += fid_score * 100 * 0.33
|
| 152 |
+
|
| 153 |
+
if aggregates.get("cnr_mean") is not None:
|
| 154 |
+
cnr_min = 1.0
|
| 155 |
+
cnr_max = 1.5
|
| 156 |
+
cnr_score = (aggregates["cnr_mean"] - cnr_min) / (cnr_max - cnr_min)
|
| 157 |
+
cnr_score = max(0, min(1, cnr_score))
|
| 158 |
+
group1_score += cnr_score * 100 * 0.33
|
| 159 |
+
|
| 160 |
+
if aggregates.get("gcnr_mean") is not None:
|
| 161 |
+
gcnr_min = 0.5
|
| 162 |
+
gcnr_max = 0.8
|
| 163 |
+
gcnr_score = (aggregates["gcnr_mean"] - gcnr_min) / (gcnr_max - gcnr_min)
|
| 164 |
+
gcnr_score = max(0, min(1, gcnr_score))
|
| 165 |
+
group1_score += gcnr_score * 100 * 0.34
|
| 166 |
+
|
| 167 |
+
group2_score = 0 # KS^A + KS^B
|
| 168 |
+
if aggregates.get("ks_roi1_ksstatistic_mean") is not None:
|
| 169 |
+
ks1_min = 0.1
|
| 170 |
+
ks1_max = 0.3
|
| 171 |
+
ks1_score = (ks1_max - aggregates["ks_roi1_ksstatistic_mean"]) / (
|
| 172 |
+
ks1_max - ks1_min
|
| 173 |
+
)
|
| 174 |
+
ks1_score = max(0, min(1, ks1_score))
|
| 175 |
+
group2_score += ks1_score * 100 * 0.5
|
| 176 |
+
|
| 177 |
+
if aggregates.get("ks_roi2_ksstatistic_mean") is not None:
|
| 178 |
+
ks2_min = 0.0
|
| 179 |
+
ks2_max = 0.5
|
| 180 |
+
ks2_score = (aggregates["ks_roi2_ksstatistic_mean"] - ks2_min) / (
|
| 181 |
+
ks2_max - ks2_min
|
| 182 |
+
)
|
| 183 |
+
ks2_score = max(0, min(1, ks2_score))
|
| 184 |
+
group2_score += ks2_score * 100 * 0.5
|
| 185 |
+
|
| 186 |
+
group3_score = 0 # Dice + ASD
|
| 187 |
+
if aggregates.get("dice_mean") is not None:
|
| 188 |
+
dice_min = 0.85
|
| 189 |
+
dice_max = 0.95
|
| 190 |
+
dice_score = (aggregates["dice_mean"] - dice_min) / (dice_max - dice_min)
|
| 191 |
+
dice_score = max(0, min(1, dice_score))
|
| 192 |
+
group3_score += dice_score * 100 * 0.5
|
| 193 |
+
if aggregates.get("asd_mean") is not None:
|
| 194 |
+
asd_min = 0.7
|
| 195 |
+
asd_max = 2.0
|
| 196 |
+
asd_score = (asd_max - aggregates["asd_mean"]) / (asd_max - asd_min)
|
| 197 |
+
asd_score = max(0, min(1, asd_score))
|
| 198 |
+
group3_score += asd_score * 100 * 0.5
|
| 199 |
+
|
| 200 |
+
# Final score calculation
|
| 201 |
+
final_score = (group1_score * 5 + group2_score * 3 + group3_score * 2) / 10
|
| 202 |
+
|
| 203 |
+
return final_score
|
| 204 |
+
|
| 205 |
+
except Exception as e:
|
| 206 |
+
print(f"Error calculating final score: {str(e)}")
|
| 207 |
+
return 0
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
def plot_metrics(metrics, limits, out_path):
|
| 211 |
+
plt.style.use("seaborn-v0_8-darkgrid")
|
| 212 |
+
fig, axes = plt.subplots(1, len(metrics), figsize=(7.2, 2.7), dpi=600)
|
| 213 |
+
colors = ["#0057b7", "#ffb300", "#008744", "#d62d20"]
|
| 214 |
+
# Arrow direction: ↑ for up, ↓ for down
|
| 215 |
+
metric_labels = {
|
| 216 |
+
"CNR": r"CNR $\uparrow$",
|
| 217 |
+
"gCNR": r"gCNR $\uparrow$",
|
| 218 |
+
"KS_A": r"KS$_{septum}$ $\downarrow$",
|
| 219 |
+
"KS_B": r"KS$_{ventricle}$ $\uparrow$",
|
| 220 |
+
}
|
| 221 |
+
for idx, (ax, (name, values)) in enumerate(zip(axes, metrics.items())):
|
| 222 |
+
ax.hist(
|
| 223 |
+
values,
|
| 224 |
+
bins=30,
|
| 225 |
+
color=colors[idx % len(colors)],
|
| 226 |
+
alpha=0.85,
|
| 227 |
+
edgecolor="black",
|
| 228 |
+
linewidth=0.7,
|
| 229 |
+
)
|
| 230 |
+
ax.set_xlabel(metric_labels.get(name, name), fontsize=11)
|
| 231 |
+
ax.set_ylabel("Count", fontsize=10)
|
| 232 |
+
# Draw limits
|
| 233 |
+
if name in limits:
|
| 234 |
+
for lim in limits[name]:
|
| 235 |
+
ax.axvline(lim, color="crimson", linestyle="--", lw=1.2)
|
| 236 |
+
ax.spines["top"].set_visible(False)
|
| 237 |
+
ax.spines["right"].set_visible(False)
|
| 238 |
+
ax.tick_params(axis="both", which="major", labelsize=9)
|
| 239 |
+
fig.tight_layout(pad=1.5)
|
| 240 |
+
fig.savefig(out_path, bbox_inches="tight", dpi=600)
|
| 241 |
+
plt.close(fig)
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
def main(folder: str, roi_folder: str, reference_folder: str):
|
| 245 |
+
folder = Path(folder)
|
| 246 |
+
roi_folder = Path(roi_folder)
|
| 247 |
+
reference_folder = Path(reference_folder)
|
| 248 |
+
|
| 249 |
+
folder_files = set(f.name for f in folder.glob("*.png"))
|
| 250 |
+
roi_files = set(f.name for f in roi_folder.glob("*.png"))
|
| 251 |
+
ref_files = set(f.name for f in reference_folder.glob("*.png"))
|
| 252 |
+
|
| 253 |
+
print(f"Found {len(folder_files)} .png files in output folder: {folder}")
|
| 254 |
+
print(f"Found {len(roi_files)} .png files in ROI folder: {roi_folder}")
|
| 255 |
+
print(f"Found {len(ref_files)} .png files in reference folder: {reference_folder}")
|
| 256 |
+
|
| 257 |
+
# Find intersection of filenames
|
| 258 |
+
common_files = sorted(folder_files & roi_files & ref_files)
|
| 259 |
+
print(f"Found {len(common_files)} images present in all folders.")
|
| 260 |
+
if len(common_files) == 0:
|
| 261 |
+
print("No matching images found in all folders. Check your folder contents.")
|
| 262 |
+
print(f"Output folder files: {sorted(folder_files)}")
|
| 263 |
+
print(f"ROI folder files: {sorted(roi_files)}")
|
| 264 |
+
print(f"Reference folder files: {sorted(ref_files)}")
|
| 265 |
+
assert len(common_files) > 0, (
|
| 266 |
+
"No matching .png files in all folders. Cannot proceed."
|
| 267 |
+
)
|
| 268 |
+
|
| 269 |
+
metrics = {"CNR": [], "gCNR": [], "KS_A": [], "KS_B": []}
|
| 270 |
+
limits = {
|
| 271 |
+
"CNR": [1.0, 1.5],
|
| 272 |
+
"gCNR": [0.5, 0.8],
|
| 273 |
+
"KS_A": [0.1, 0.3],
|
| 274 |
+
"KS_B": [0.0, 0.5],
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
for name in common_files:
|
| 278 |
+
our_path = folder / name
|
| 279 |
+
roi_path = roi_folder / name
|
| 280 |
+
ref_path = reference_folder / name
|
| 281 |
+
|
| 282 |
+
assert our_path.exists(), f"Missing file in output folder: {our_path}"
|
| 283 |
+
assert roi_path.exists(), f"Missing file in ROI folder: {roi_path}"
|
| 284 |
+
assert ref_path.exists(), f"Missing file in reference folder: {ref_path}"
|
| 285 |
+
|
| 286 |
+
try:
|
| 287 |
+
img = np.array(load_image(str(our_path)))
|
| 288 |
+
img_ref = np.array(load_image(str(ref_path)))
|
| 289 |
+
except Exception as e:
|
| 290 |
+
print(f"Error loading image {name}: {e}")
|
| 291 |
+
continue
|
| 292 |
+
|
| 293 |
+
# CNR/gCNR
|
| 294 |
+
cnr_gcnr = calculate_cnr_gcnr(img, str(roi_path))
|
| 295 |
+
metrics["CNR"].append(cnr_gcnr[0][0])
|
| 296 |
+
metrics["gCNR"].append(cnr_gcnr[0][1])
|
| 297 |
+
|
| 298 |
+
# KS statistics
|
| 299 |
+
ks_a, _, ks_b, _ = calculate_ks_statistics(img_ref, img, str(roi_path))
|
| 300 |
+
metrics["KS_A"].append(ks_a)
|
| 301 |
+
metrics["KS_B"].append(ks_b)
|
| 302 |
+
|
| 303 |
+
# Compute statistics
|
| 304 |
+
stats = {
|
| 305 |
+
k: (np.mean(v), np.std(v), np.min(v), np.max(v)) for k, v in metrics.items()
|
| 306 |
+
}
|
| 307 |
+
print("Contrast statistics:")
|
| 308 |
+
for k, (mean, std, minv, maxv) in stats.items():
|
| 309 |
+
print(f"{k}: mean={mean:.3f}, std={std:.3f}, min={minv:.3f}, max={maxv:.3f}")
|
| 310 |
+
|
| 311 |
+
plot_metrics(metrics, limits, str(folder / "contrast_metrics.png"))
|
| 312 |
+
print(f"Saved metrics plot to {folder / 'contrast_metrics.png'}")
|
| 313 |
+
|
| 314 |
+
# Compute FID
|
| 315 |
+
fid_image_paths = [str(folder / name) for name in common_files]
|
| 316 |
+
fid_score = calculate_fid_score(fid_image_paths, str(reference_folder))
|
| 317 |
+
print(f"FID between {folder} and {reference_folder}: {fid_score:.3f}")
|
| 318 |
+
|
| 319 |
+
|
| 320 |
+
if __name__ == "__main__":
|
| 321 |
+
tyro.cli(main)
|
fid_score.py
ADDED
|
@@ -0,0 +1,480 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Calculates the Frechet Inception Distance (FID) to evalulate GANs
|
| 2 |
+
|
| 3 |
+
The FID metric calculates the distance between two distributions of images.
|
| 4 |
+
Typically, we have summary statistics (mean & covariance matrix) of one
|
| 5 |
+
of these distributions, while the 2nd distribution is given by a GAN.
|
| 6 |
+
|
| 7 |
+
When run as a stand-alone program, it compares the distribution of
|
| 8 |
+
images that are stored as PNG/JPEG at a specified location with a
|
| 9 |
+
distribution given by summary statistics (in pickle format).
|
| 10 |
+
|
| 11 |
+
The FID is calculated by assuming that X_1 and X_2 are the activations of
|
| 12 |
+
the pool_3 layer of the inception net for generated samples and real world
|
| 13 |
+
samples respectively.
|
| 14 |
+
|
| 15 |
+
See --help to see further details.
|
| 16 |
+
|
| 17 |
+
Code apapted from https://github.com/bioinf-jku/TTUR to use PyTorch instead
|
| 18 |
+
of Tensorflow
|
| 19 |
+
|
| 20 |
+
Copyright 2018 Institute of Bioinformatics, JKU Linz
|
| 21 |
+
|
| 22 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 23 |
+
you may not use this file except in compliance with the License.
|
| 24 |
+
You may obtain a copy of the License at
|
| 25 |
+
|
| 26 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 27 |
+
|
| 28 |
+
Unless required by applicable law or agreed to in writing, software
|
| 29 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 30 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 31 |
+
See the License for the specific language governing permissions and
|
| 32 |
+
limitations under the License.
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
import hashlib
|
| 36 |
+
import os
|
| 37 |
+
import pathlib
|
| 38 |
+
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
|
| 39 |
+
|
| 40 |
+
import numpy as np
|
| 41 |
+
import torch
|
| 42 |
+
import torchvision.transforms as TF
|
| 43 |
+
from PIL import Image
|
| 44 |
+
from scipy import linalg
|
| 45 |
+
from torch.nn.functional import adaptive_avg_pool2d
|
| 46 |
+
|
| 47 |
+
try:
|
| 48 |
+
from tqdm import tqdm
|
| 49 |
+
except ImportError:
|
| 50 |
+
# If tqdm is not available, provide a mock version of it
|
| 51 |
+
def tqdm(x):
|
| 52 |
+
return x
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
from pytorch_fid.inception import InceptionV3
|
| 56 |
+
|
| 57 |
+
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
|
| 58 |
+
parser.add_argument("--batch-size", type=int, default=50, help="Batch size to use")
|
| 59 |
+
parser.add_argument(
|
| 60 |
+
"--num-workers",
|
| 61 |
+
type=int,
|
| 62 |
+
help=(
|
| 63 |
+
"Number of processes to use for data loading. Defaults to `min(8, num_cpus)`"
|
| 64 |
+
),
|
| 65 |
+
)
|
| 66 |
+
parser.add_argument(
|
| 67 |
+
"--device", type=str, default=None, help="Device to use. Like cuda, cuda:0 or cpu"
|
| 68 |
+
)
|
| 69 |
+
parser.add_argument(
|
| 70 |
+
"--dims",
|
| 71 |
+
type=int,
|
| 72 |
+
default=2048,
|
| 73 |
+
choices=list(InceptionV3.BLOCK_INDEX_BY_DIM),
|
| 74 |
+
help=(
|
| 75 |
+
"Dimensionality of Inception features to use. By default, uses pool3 features"
|
| 76 |
+
),
|
| 77 |
+
)
|
| 78 |
+
parser.add_argument(
|
| 79 |
+
"--save-stats",
|
| 80 |
+
action="store_true",
|
| 81 |
+
help=(
|
| 82 |
+
"Generate an npz archive from a directory of samples. "
|
| 83 |
+
"The first path is used as input and the second as output."
|
| 84 |
+
),
|
| 85 |
+
)
|
| 86 |
+
parser.add_argument(
|
| 87 |
+
"path",
|
| 88 |
+
type=str,
|
| 89 |
+
nargs=2,
|
| 90 |
+
help=("Paths to the generated images or to .npz statistic files"),
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
IMAGE_EXTENSIONS = {"bmp", "jpg", "jpeg", "pgm", "png", "ppm", "tif", "tiff", "webp"}
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
class ImagePathDataset(torch.utils.data.Dataset):
|
| 97 |
+
def __init__(self, files, transforms=None):
|
| 98 |
+
self.files = files
|
| 99 |
+
self.transforms = transforms
|
| 100 |
+
|
| 101 |
+
def __len__(self):
|
| 102 |
+
return len(self.files)
|
| 103 |
+
|
| 104 |
+
def __getitem__(self, i):
|
| 105 |
+
path = self.files[i]
|
| 106 |
+
img = Image.open(path).convert("RGB")
|
| 107 |
+
if self.transforms is not None:
|
| 108 |
+
img = self.transforms(img)
|
| 109 |
+
return img
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def get_activations(
|
| 113 |
+
files, model, batch_size=50, dims=2048, device="cpu", num_workers=1
|
| 114 |
+
):
|
| 115 |
+
"""Calculates the activations of the pool_3 layer for all images.
|
| 116 |
+
|
| 117 |
+
Params:
|
| 118 |
+
-- files : List of image files paths
|
| 119 |
+
-- model : Instance of inception model
|
| 120 |
+
-- batch_size : Batch size of images for the model to process at once.
|
| 121 |
+
Make sure that the number of samples is a multiple of
|
| 122 |
+
the batch size, otherwise some samples are ignored. This
|
| 123 |
+
behavior is retained to match the original FID score
|
| 124 |
+
implementation.
|
| 125 |
+
-- dims : Dimensionality of features returned by Inception
|
| 126 |
+
-- device : Device to run calculations
|
| 127 |
+
-- num_workers : Number of parallel dataloader workers
|
| 128 |
+
|
| 129 |
+
Returns:
|
| 130 |
+
-- A numpy array of dimension (num images, dims) that contains the
|
| 131 |
+
activations of the given tensor when feeding inception with the
|
| 132 |
+
query tensor.
|
| 133 |
+
"""
|
| 134 |
+
model.eval()
|
| 135 |
+
|
| 136 |
+
if batch_size > len(files):
|
| 137 |
+
print(
|
| 138 |
+
(
|
| 139 |
+
"Warning: batch size is bigger than the data size. "
|
| 140 |
+
"Setting batch size to data size"
|
| 141 |
+
)
|
| 142 |
+
)
|
| 143 |
+
batch_size = len(files)
|
| 144 |
+
# print(files)
|
| 145 |
+
dataset = ImagePathDataset(files, transforms=TF.ToTensor())
|
| 146 |
+
dataloader = torch.utils.data.DataLoader(
|
| 147 |
+
dataset,
|
| 148 |
+
batch_size=batch_size,
|
| 149 |
+
shuffle=False,
|
| 150 |
+
drop_last=False,
|
| 151 |
+
num_workers=num_workers,
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
pred_arr = np.empty((len(files), dims))
|
| 155 |
+
|
| 156 |
+
start_idx = 0
|
| 157 |
+
|
| 158 |
+
for batch in tqdm(dataloader):
|
| 159 |
+
batch = batch.to(device)
|
| 160 |
+
|
| 161 |
+
with torch.no_grad():
|
| 162 |
+
pred = model(batch)[0]
|
| 163 |
+
|
| 164 |
+
# If model output is not scalar, apply global spatial average pooling.
|
| 165 |
+
# This happens if you choose a dimensionality not equal 2048.
|
| 166 |
+
if pred.size(2) != 1 or pred.size(3) != 1:
|
| 167 |
+
pred = adaptive_avg_pool2d(pred, output_size=(1, 1))
|
| 168 |
+
|
| 169 |
+
pred = pred.squeeze(3).squeeze(2).cpu().numpy()
|
| 170 |
+
|
| 171 |
+
pred_arr[start_idx : start_idx + pred.shape[0]] = pred
|
| 172 |
+
|
| 173 |
+
start_idx = start_idx + pred.shape[0]
|
| 174 |
+
|
| 175 |
+
return pred_arr
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
|
| 179 |
+
"""Numpy implementation of the Frechet Distance.
|
| 180 |
+
The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
|
| 181 |
+
and X_2 ~ N(mu_2, C_2) is
|
| 182 |
+
d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
|
| 183 |
+
|
| 184 |
+
Stable version by Dougal J. Sutherland.
|
| 185 |
+
|
| 186 |
+
Params:
|
| 187 |
+
-- mu1 : Numpy array containing the activations of a layer of the
|
| 188 |
+
inception net (like returned by the function 'get_predictions')
|
| 189 |
+
for generated samples.
|
| 190 |
+
-- mu2 : The sample mean over activations, precalculated on an
|
| 191 |
+
representative data set.
|
| 192 |
+
-- sigma1: The covariance matrix over activations for generated samples.
|
| 193 |
+
-- sigma2: The covariance matrix over activations, precalculated on an
|
| 194 |
+
representative data set.
|
| 195 |
+
|
| 196 |
+
Returns:
|
| 197 |
+
-- : The Frechet Distance.
|
| 198 |
+
"""
|
| 199 |
+
|
| 200 |
+
mu1 = np.atleast_1d(mu1)
|
| 201 |
+
mu2 = np.atleast_1d(mu2)
|
| 202 |
+
|
| 203 |
+
sigma1 = np.atleast_2d(sigma1)
|
| 204 |
+
sigma2 = np.atleast_2d(sigma2)
|
| 205 |
+
|
| 206 |
+
assert mu1.shape == mu2.shape, (
|
| 207 |
+
"Training and test mean vectors have different lengths"
|
| 208 |
+
)
|
| 209 |
+
assert sigma1.shape == sigma2.shape, (
|
| 210 |
+
"Training and test covariances have different dimensions"
|
| 211 |
+
)
|
| 212 |
+
|
| 213 |
+
diff = mu1 - mu2
|
| 214 |
+
|
| 215 |
+
# Product might be almost singular
|
| 216 |
+
covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
|
| 217 |
+
if not np.isfinite(covmean).all():
|
| 218 |
+
msg = (
|
| 219 |
+
"fid calculation produces singular product; "
|
| 220 |
+
"adding %s to diagonal of cov estimates"
|
| 221 |
+
) % eps
|
| 222 |
+
print(msg)
|
| 223 |
+
offset = np.eye(sigma1.shape[0]) * eps
|
| 224 |
+
covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
|
| 225 |
+
|
| 226 |
+
# Numerical error might give slight imaginary component
|
| 227 |
+
if np.iscomplexobj(covmean):
|
| 228 |
+
if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
|
| 229 |
+
m = np.max(np.abs(covmean.imag))
|
| 230 |
+
raise ValueError("Imaginary component {}".format(m))
|
| 231 |
+
covmean = covmean.real
|
| 232 |
+
|
| 233 |
+
tr_covmean = np.trace(covmean)
|
| 234 |
+
|
| 235 |
+
return diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
def calculate_activation_statistics(
|
| 239 |
+
files, model, batch_size=50, dims=2048, device="cpu", num_workers=1
|
| 240 |
+
):
|
| 241 |
+
"""Calculation of the statistics used by the FID.
|
| 242 |
+
Params:
|
| 243 |
+
-- files : List of image files paths
|
| 244 |
+
-- model : Instance of inception model
|
| 245 |
+
-- batch_size : The images numpy array is split into batches with
|
| 246 |
+
batch size batch_size. A reasonable batch size
|
| 247 |
+
depends on the hardware.
|
| 248 |
+
-- dims : Dimensionality of features returned by Inception
|
| 249 |
+
-- device : Device to run calculations
|
| 250 |
+
-- num_workers : Number of parallel dataloader workers
|
| 251 |
+
|
| 252 |
+
Returns:
|
| 253 |
+
-- mu : The mean over samples of the activations of the pool_3 layer of
|
| 254 |
+
the inception model.
|
| 255 |
+
-- sigma : The covariance matrix of the activations of the pool_3 layer of
|
| 256 |
+
the inception model.
|
| 257 |
+
"""
|
| 258 |
+
act = get_activations(files, model, batch_size, dims, device, num_workers)
|
| 259 |
+
mu = np.mean(act, axis=0)
|
| 260 |
+
sigma = np.cov(act, rowvar=False)
|
| 261 |
+
return mu, sigma
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
def compute_statistics_of_path(path, model, batch_size, dims, device, num_workers=1):
|
| 265 |
+
# if path.endswith('.npz'):
|
| 266 |
+
# with np.load(path) as f:
|
| 267 |
+
# m, s = f['mu'][:], f['sigma'][:]
|
| 268 |
+
# else:
|
| 269 |
+
m, s = calculate_activation_statistics(
|
| 270 |
+
path, model, batch_size, dims, device, num_workers
|
| 271 |
+
)
|
| 272 |
+
# else:
|
| 273 |
+
# path = pathlib.Path(path)
|
| 274 |
+
# files = sorted([file for ext in IMAGE_EXTENSIONS
|
| 275 |
+
# for file in path.glob('*.{}'.format(ext))])
|
| 276 |
+
# m, s = calculate_activation_statistics(files, model, batch_size,
|
| 277 |
+
# dims, device, num_workers)
|
| 278 |
+
|
| 279 |
+
return m, s
|
| 280 |
+
|
| 281 |
+
|
| 282 |
+
def _fid_cache_paths():
|
| 283 |
+
tmp_dir = pathlib.Path("tmp")
|
| 284 |
+
tmp_dir.mkdir(exist_ok=True)
|
| 285 |
+
stats_path = tmp_dir / "fid_stats.npz"
|
| 286 |
+
hash_path = tmp_dir / "fid_stats.hash"
|
| 287 |
+
return stats_path, hash_path
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
def _load_fid_stats(stats_path):
|
| 291 |
+
arr = np.load(stats_path)
|
| 292 |
+
return arr["mu"], arr["sigma"]
|
| 293 |
+
|
| 294 |
+
|
| 295 |
+
def _save_fid_stats(stats_path, mu, sigma):
|
| 296 |
+
np.savez_compressed(stats_path, mu=mu, sigma=sigma)
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
def calculate_fid_given_paths(paths, batch_size, device, dims, num_workers=1):
|
| 300 |
+
"""Calculates the FID of two paths, with caching for ground truth stats if the second path is a directory of images."""
|
| 301 |
+
import pathlib
|
| 302 |
+
|
| 303 |
+
if isinstance(paths[1], (str, pathlib.Path)) and pathlib.Path(paths[1]).is_dir():
|
| 304 |
+
# Get all PNGs in the directory
|
| 305 |
+
gt_images = list(pathlib.Path(paths[1]).glob("*.png"))
|
| 306 |
+
stats_path, hash_path = _fid_cache_paths()
|
| 307 |
+
if stats_path.exists():
|
| 308 |
+
print(f"Using cached FID stats from {stats_path}")
|
| 309 |
+
print("WARNING: Cache may be stale if ground truth images have changed.")
|
| 310 |
+
m1, s1 = _load_fid_stats(stats_path)
|
| 311 |
+
else:
|
| 312 |
+
print("Computing FID stats for ground truth images...")
|
| 313 |
+
block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
|
| 314 |
+
model = InceptionV3([block_idx]).to(device)
|
| 315 |
+
m1, s1 = calculate_activation_statistics(
|
| 316 |
+
gt_images, model, batch_size, dims, device, num_workers
|
| 317 |
+
)
|
| 318 |
+
_save_fid_stats(stats_path, m1, s1)
|
| 319 |
+
# m2, s2 for denoised images
|
| 320 |
+
block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
|
| 321 |
+
model = InceptionV3([block_idx]).to(device)
|
| 322 |
+
m2, s2 = calculate_activation_statistics(
|
| 323 |
+
paths[0], model, batch_size, dims, device, num_workers
|
| 324 |
+
)
|
| 325 |
+
fid_value = calculate_frechet_distance(m1, s1, m2, s2)
|
| 326 |
+
return fid_value
|
| 327 |
+
|
| 328 |
+
block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
|
| 329 |
+
model = InceptionV3([block_idx]).to(device)
|
| 330 |
+
m1, s1 = compute_statistics_of_path(
|
| 331 |
+
paths[0], model, batch_size, dims, device, num_workers
|
| 332 |
+
)
|
| 333 |
+
print(paths[1])
|
| 334 |
+
m2, s2 = compute_statistics_of_path(
|
| 335 |
+
paths[1], model, batch_size, dims, device, num_workers
|
| 336 |
+
)
|
| 337 |
+
fid_value = calculate_frechet_distance(m1, s1, m2, s2)
|
| 338 |
+
return fid_value
|
| 339 |
+
|
| 340 |
+
|
| 341 |
+
def save_fid_stats(paths, batch_size, device, dims, num_workers=1):
|
| 342 |
+
"""Calculates the FID of two paths"""
|
| 343 |
+
if not os.path.exists(paths[0]):
|
| 344 |
+
raise RuntimeError("Invalid path: %s" % paths[0])
|
| 345 |
+
|
| 346 |
+
if os.path.exists(paths[1]):
|
| 347 |
+
raise RuntimeError("Existing output file: %s" % paths[1])
|
| 348 |
+
|
| 349 |
+
block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
|
| 350 |
+
|
| 351 |
+
model = InceptionV3([block_idx]).to(device)
|
| 352 |
+
|
| 353 |
+
print(f"Saving statistics for {paths[0]}")
|
| 354 |
+
|
| 355 |
+
m1, s1 = compute_statistics_of_path(
|
| 356 |
+
paths[0], model, batch_size, dims, device, num_workers
|
| 357 |
+
)
|
| 358 |
+
|
| 359 |
+
np.savez_compressed(paths[1], mu=m1, sigma=s1)
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
def calculate_fid_with_cached_ground_truth(
|
| 363 |
+
denoised_image_dirs,
|
| 364 |
+
ground_truth_image_dirs,
|
| 365 |
+
batch_size=50,
|
| 366 |
+
device="cpu",
|
| 367 |
+
dims=2048,
|
| 368 |
+
num_workers=1,
|
| 369 |
+
):
|
| 370 |
+
"""
|
| 371 |
+
Calculates the FID between denoised images and ground truth images, using cached stats for ground truth if possible.
|
| 372 |
+
Args:
|
| 373 |
+
denoised_image_dirs: list of denoised image paths
|
| 374 |
+
ground_truth_image_dirs: list of ground truth image paths (or a directory)
|
| 375 |
+
batch_size, device, dims, num_workers: same as calculate_fid_given_paths
|
| 376 |
+
Returns:
|
| 377 |
+
FID value
|
| 378 |
+
"""
|
| 379 |
+
# If ground_truth_image_dirs is a directory, get all PNGs
|
| 380 |
+
if isinstance(ground_truth_image_dirs, (str, pathlib.Path)):
|
| 381 |
+
ground_truth_image_dirs = list(
|
| 382 |
+
pathlib.Path(ground_truth_image_dirs).glob("*.png")
|
| 383 |
+
)
|
| 384 |
+
|
| 385 |
+
# Compute hash for cache
|
| 386 |
+
def compute_file_hashes(file_list):
|
| 387 |
+
hash_md5 = hashlib.md5()
|
| 388 |
+
for fname in sorted(map(str, file_list)):
|
| 389 |
+
try:
|
| 390 |
+
stat = os.stat(fname)
|
| 391 |
+
hash_md5.update(fname.encode())
|
| 392 |
+
hash_md5.update(str(stat.st_mtime).encode())
|
| 393 |
+
except Exception:
|
| 394 |
+
continue
|
| 395 |
+
return hash_md5.hexdigest()
|
| 396 |
+
|
| 397 |
+
tmp_dir = pathlib.Path("tmp")
|
| 398 |
+
tmp_dir.mkdir(exist_ok=True)
|
| 399 |
+
stats_path = tmp_dir / "fid_stats.npz"
|
| 400 |
+
hash_path = tmp_dir / "fid_stats.hash"
|
| 401 |
+
# TODO: caching shouldn't be based on ground truth image dirs
|
| 402 |
+
# since we can have multiple reconstructions of same ground truth
|
| 403 |
+
current_hash = compute_file_hashes(ground_truth_image_dirs)
|
| 404 |
+
cache_valid = False
|
| 405 |
+
if stats_path.exists() and hash_path.exists():
|
| 406 |
+
try:
|
| 407 |
+
with open(hash_path, "r") as f:
|
| 408 |
+
cached_hash = f.read().strip()
|
| 409 |
+
if cached_hash == current_hash:
|
| 410 |
+
cache_valid = True
|
| 411 |
+
except Exception:
|
| 412 |
+
pass
|
| 413 |
+
# TODO: need more sophisticated caching for sweeps
|
| 414 |
+
if cache_valid:
|
| 415 |
+
print(f"Using cached FID stats from {stats_path}")
|
| 416 |
+
arr = np.load(stats_path)
|
| 417 |
+
mu, sigma = arr["mu"], arr["sigma"]
|
| 418 |
+
else:
|
| 419 |
+
print("Computing FID stats for ground truth images...")
|
| 420 |
+
block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
|
| 421 |
+
model = InceptionV3([block_idx]).to(device)
|
| 422 |
+
mu, sigma = calculate_activation_statistics(
|
| 423 |
+
ground_truth_image_dirs,
|
| 424 |
+
model,
|
| 425 |
+
batch_size=batch_size,
|
| 426 |
+
dims=dims,
|
| 427 |
+
device=device,
|
| 428 |
+
num_workers=num_workers,
|
| 429 |
+
)
|
| 430 |
+
np.savez_compressed(stats_path, mu=mu, sigma=sigma)
|
| 431 |
+
with open(hash_path, "w") as f:
|
| 432 |
+
f.write(current_hash)
|
| 433 |
+
# Compute stats for denoised images
|
| 434 |
+
block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
|
| 435 |
+
model = InceptionV3([block_idx]).to(device)
|
| 436 |
+
mu2, sigma2 = calculate_activation_statistics(
|
| 437 |
+
denoised_image_dirs,
|
| 438 |
+
model,
|
| 439 |
+
batch_size=batch_size,
|
| 440 |
+
dims=dims,
|
| 441 |
+
device=device,
|
| 442 |
+
num_workers=num_workers,
|
| 443 |
+
)
|
| 444 |
+
fid_value = calculate_frechet_distance(mu, sigma, mu2, sigma2)
|
| 445 |
+
return fid_value
|
| 446 |
+
|
| 447 |
+
|
| 448 |
+
def main():
|
| 449 |
+
args = parser.parse_args()
|
| 450 |
+
|
| 451 |
+
if args.device is None:
|
| 452 |
+
device = torch.device("cuda" if (torch.cuda.is_available()) else "cpu")
|
| 453 |
+
else:
|
| 454 |
+
device = torch.device(args.device)
|
| 455 |
+
|
| 456 |
+
if args.num_workers is None:
|
| 457 |
+
try:
|
| 458 |
+
num_cpus = len(os.sched_getaffinity(0))
|
| 459 |
+
except AttributeError:
|
| 460 |
+
# os.sched_getaffinity is not available under Windows, use
|
| 461 |
+
# os.cpu_count instead (which may not return the *available* number
|
| 462 |
+
# of CPUs).
|
| 463 |
+
num_cpus = os.cpu_count()
|
| 464 |
+
|
| 465 |
+
num_workers = min(num_cpus, 8) if num_cpus is not None else 0
|
| 466 |
+
else:
|
| 467 |
+
num_workers = args.num_workers
|
| 468 |
+
|
| 469 |
+
if args.save_stats:
|
| 470 |
+
save_fid_stats(args.path, args.batch_size, device, args.dims, num_workers)
|
| 471 |
+
return
|
| 472 |
+
|
| 473 |
+
fid_value = calculate_fid_given_paths(
|
| 474 |
+
args.path, args.batch_size, device, args.dims, num_workers
|
| 475 |
+
)
|
| 476 |
+
print("FID: ", fid_value)
|
| 477 |
+
|
| 478 |
+
|
| 479 |
+
if __name__ == "__main__":
|
| 480 |
+
main()
|
main.py
ADDED
|
@@ -0,0 +1,743 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import copy
|
| 2 |
+
import os
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
os.environ["KERAS_BACKEND"] = "jax"
|
| 6 |
+
|
| 7 |
+
import jax
|
| 8 |
+
import keras
|
| 9 |
+
import matplotlib.pyplot as plt
|
| 10 |
+
import numpy as np
|
| 11 |
+
import scipy
|
| 12 |
+
import tyro
|
| 13 |
+
import zea
|
| 14 |
+
from keras import ops
|
| 15 |
+
from matplotlib.patches import PathPatch
|
| 16 |
+
from matplotlib.path import Path as pltPath
|
| 17 |
+
from PIL import Image
|
| 18 |
+
from skimage import filters, measure, morphology
|
| 19 |
+
from zea import Config, init_device, log
|
| 20 |
+
from zea.internal.operators import Operator
|
| 21 |
+
from zea.models.diffusion import (
|
| 22 |
+
DPS,
|
| 23 |
+
DiffusionModel,
|
| 24 |
+
diffusion_guidance_registry,
|
| 25 |
+
)
|
| 26 |
+
from zea.tensor_ops import L2
|
| 27 |
+
from zea.utils import translate
|
| 28 |
+
from zea.visualize import plot_image_grid
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def L1(x):
|
| 32 |
+
"""L1 norm of a tensor.
|
| 33 |
+
|
| 34 |
+
Implementation of L1 norm: https://mathworld.wolfram.com/L1-Norm.html
|
| 35 |
+
"""
|
| 36 |
+
return ops.sum(ops.abs(x))
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def smooth_L1(x, beta=0.4):
|
| 40 |
+
"""Smooth L1 loss function.
|
| 41 |
+
|
| 42 |
+
Implementation of Smooth L1 loss. Large beta values make it similar to L1 loss,
|
| 43 |
+
while small beta values make it similar to L2 loss.
|
| 44 |
+
"""
|
| 45 |
+
abs_x = ops.abs(x)
|
| 46 |
+
loss = ops.where(abs_x < beta, 0.5 * x**2 / beta, abs_x - 0.5 * beta)
|
| 47 |
+
return ops.sum(loss)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def postprocess(data, normalization_range):
|
| 51 |
+
"""Postprocess data from model output to image."""
|
| 52 |
+
data = ops.clip(data, *normalization_range)
|
| 53 |
+
data = translate(data, normalization_range, (0, 255))
|
| 54 |
+
data = ops.convert_to_numpy(data)
|
| 55 |
+
data = np.squeeze(data, axis=-1)
|
| 56 |
+
return np.clip(data, 0, 255).astype("uint8")
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def preprocess(data, normalization_range):
|
| 60 |
+
"""Preprocess data for model input. Converts uint8 image(s) in [0, 255] to model input range."""
|
| 61 |
+
data = ops.convert_to_tensor(data, dtype="float32")
|
| 62 |
+
data = translate(data, (0, 255), normalization_range)
|
| 63 |
+
data = ops.expand_dims(data, axis=-1)
|
| 64 |
+
return data
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def apply_bottom_preservation(
|
| 68 |
+
output_images, input_images, preserve_bottom_percent=30.0, transition_width=10.0
|
| 69 |
+
):
|
| 70 |
+
"""Apply bottom preservation with smooth windowed transition.
|
| 71 |
+
|
| 72 |
+
Args:
|
| 73 |
+
output_images: Model output images, (batch, height, width, channels)
|
| 74 |
+
input_images: Original input images, (batch, height, width, channels)
|
| 75 |
+
preserve_bottom_percent: Percentage of bottom to preserve from input (default 30%)
|
| 76 |
+
transition_width: Percentage of image height for smooth transition (default 10%)
|
| 77 |
+
|
| 78 |
+
Returns:
|
| 79 |
+
Blended images with preserved bottom portion
|
| 80 |
+
"""
|
| 81 |
+
output_shape = ops.shape(output_images)
|
| 82 |
+
|
| 83 |
+
batch_size, height, width, channels = output_shape
|
| 84 |
+
|
| 85 |
+
preserve_height = int(height * preserve_bottom_percent / 100.0)
|
| 86 |
+
transition_height = int(height * transition_width / 100.0)
|
| 87 |
+
|
| 88 |
+
transition_start = height - preserve_height - transition_height
|
| 89 |
+
preserve_start = height - preserve_height
|
| 90 |
+
|
| 91 |
+
transition_start = max(0, transition_start)
|
| 92 |
+
preserve_start = min(height, preserve_start)
|
| 93 |
+
|
| 94 |
+
if transition_start >= preserve_start:
|
| 95 |
+
transition_start = preserve_start
|
| 96 |
+
transition_height = 0
|
| 97 |
+
|
| 98 |
+
y_coords = ops.arange(height, dtype="float32")
|
| 99 |
+
y_coords = ops.reshape(y_coords, (height, 1, 1))
|
| 100 |
+
|
| 101 |
+
if transition_height > 0:
|
| 102 |
+
# Smooth transition using cosine interpolation
|
| 103 |
+
transition_region = ops.logical_and(
|
| 104 |
+
y_coords >= transition_start, y_coords < preserve_start
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
transition_progress = (y_coords - transition_start) / transition_height
|
| 108 |
+
transition_progress = ops.clip(transition_progress, 0.0, 1.0)
|
| 109 |
+
|
| 110 |
+
# Use cosine for smooth transition (0.5 * (1 - cos(π * t)))
|
| 111 |
+
cosine_weight = 0.5 * (1.0 - ops.cos(np.pi * transition_progress))
|
| 112 |
+
|
| 113 |
+
blend_weight = ops.where(
|
| 114 |
+
y_coords < transition_start,
|
| 115 |
+
0.0,
|
| 116 |
+
ops.where(
|
| 117 |
+
transition_region,
|
| 118 |
+
cosine_weight,
|
| 119 |
+
1.0,
|
| 120 |
+
),
|
| 121 |
+
)
|
| 122 |
+
else:
|
| 123 |
+
# No transition, just hard switch
|
| 124 |
+
blend_weight = ops.where(y_coords >= preserve_start, 1.0, 0.0)
|
| 125 |
+
|
| 126 |
+
blend_weight = ops.expand_dims(blend_weight, axis=0)
|
| 127 |
+
|
| 128 |
+
blended_images = (1.0 - blend_weight) * output_images + blend_weight * input_images
|
| 129 |
+
|
| 130 |
+
return blended_images
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
def extract_skeleton(images, input_range, sigma_pre=4, sigma_post=4, threshold=0.3):
|
| 134 |
+
"""Extract skeletons from the input images."""
|
| 135 |
+
images_np = ops.convert_to_numpy(images)
|
| 136 |
+
images_np = np.clip(images_np, input_range[0], input_range[1])
|
| 137 |
+
images_np = translate(images_np, input_range, (0, 1))
|
| 138 |
+
images_np = np.squeeze(images_np, axis=-1)
|
| 139 |
+
|
| 140 |
+
skeleton_masks = []
|
| 141 |
+
for img in images_np:
|
| 142 |
+
img[img < threshold] = 0
|
| 143 |
+
smoothed = filters.gaussian(img, sigma=sigma_pre)
|
| 144 |
+
binary = smoothed > filters.threshold_otsu(smoothed)
|
| 145 |
+
skeleton = morphology.skeletonize(binary)
|
| 146 |
+
skeleton = morphology.dilation(skeleton, morphology.disk(2))
|
| 147 |
+
skeleton = filters.gaussian(skeleton.astype(np.float32), sigma=sigma_post)
|
| 148 |
+
skeleton_masks.append(skeleton)
|
| 149 |
+
|
| 150 |
+
skeleton_masks = np.array(skeleton_masks)
|
| 151 |
+
skeleton_masks = np.expand_dims(skeleton_masks, axis=-1)
|
| 152 |
+
|
| 153 |
+
# normalize to [0, 1]
|
| 154 |
+
min_val, max_val = np.min(skeleton_masks), np.max(skeleton_masks)
|
| 155 |
+
skeleton_masks = (skeleton_masks - min_val) / (max_val - min_val + 1e-8)
|
| 156 |
+
|
| 157 |
+
return ops.convert_to_tensor(skeleton_masks, dtype=images.dtype)
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
class IdentityOperator(Operator):
|
| 161 |
+
def forward(self, data):
|
| 162 |
+
return data
|
| 163 |
+
|
| 164 |
+
def __str__(self):
|
| 165 |
+
return "y = x"
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
@diffusion_guidance_registry(name="semantic_dps")
|
| 169 |
+
class SemanticDPS(DPS):
|
| 170 |
+
def __init__(
|
| 171 |
+
self,
|
| 172 |
+
diffusion_model,
|
| 173 |
+
segmentation_model,
|
| 174 |
+
operator,
|
| 175 |
+
disable_jit=False,
|
| 176 |
+
**kwargs,
|
| 177 |
+
):
|
| 178 |
+
"""Initialize the diffusion guidance.
|
| 179 |
+
|
| 180 |
+
Args:
|
| 181 |
+
diffusion_model: The diffusion model to use for guidance.
|
| 182 |
+
operator: The forward (measurement) operator to use for guidance.
|
| 183 |
+
disable_jit: Whether to disable JIT compilation.
|
| 184 |
+
"""
|
| 185 |
+
self.diffusion_model = diffusion_model
|
| 186 |
+
self.segmentation_model = segmentation_model
|
| 187 |
+
self.operator = operator
|
| 188 |
+
self.disable_jit = disable_jit
|
| 189 |
+
self.setup(**kwargs)
|
| 190 |
+
|
| 191 |
+
def _get_fixed_mask(
|
| 192 |
+
self,
|
| 193 |
+
images,
|
| 194 |
+
bottom_px=40,
|
| 195 |
+
top_px=20,
|
| 196 |
+
):
|
| 197 |
+
batch_size, height, width, channels = ops.shape(images)
|
| 198 |
+
|
| 199 |
+
# Create row indices for each pixel
|
| 200 |
+
row_indices = ops.arange(height)
|
| 201 |
+
row_indices = ops.reshape(row_indices, (height, 1))
|
| 202 |
+
row_indices = ops.tile(row_indices, (1, width))
|
| 203 |
+
|
| 204 |
+
# Create top row mask
|
| 205 |
+
fixed_mask = ops.where(
|
| 206 |
+
ops.logical_or(row_indices < top_px, row_indices >= height - bottom_px),
|
| 207 |
+
1.0,
|
| 208 |
+
0.0,
|
| 209 |
+
)
|
| 210 |
+
fixed_mask = ops.expand_dims(fixed_mask, axis=0)
|
| 211 |
+
fixed_mask = ops.expand_dims(fixed_mask, axis=-1)
|
| 212 |
+
fixed_mask = ops.tile(fixed_mask, (batch_size, 1, 1, channels))
|
| 213 |
+
|
| 214 |
+
return fixed_mask
|
| 215 |
+
|
| 216 |
+
def _get_segmentation_mask(self, images, threshold, sigma):
|
| 217 |
+
input_range = self.diffusion_model.input_range
|
| 218 |
+
images = ops.clip(images, input_range[0], input_range[1])
|
| 219 |
+
images = translate(images, input_range, (-1, 1))
|
| 220 |
+
|
| 221 |
+
masks = self.segmentation_model(images)
|
| 222 |
+
mask_vent = masks[..., 0] # ROI 1 ventricle
|
| 223 |
+
mask_sept = masks[..., 1] # ROI 2 septum
|
| 224 |
+
|
| 225 |
+
def _preprocess_mask(mask):
|
| 226 |
+
mask = ops.convert_to_numpy(mask)
|
| 227 |
+
mask = np.expand_dims(mask, axis=-1)
|
| 228 |
+
mask = np.where(mask > threshold, 1.0, 0.0)
|
| 229 |
+
mask = filters.gaussian(mask, sigma=sigma)
|
| 230 |
+
mask = (mask - ops.min(mask)) / (ops.max(mask) - ops.min(mask) + 1e-8)
|
| 231 |
+
return mask
|
| 232 |
+
|
| 233 |
+
mask_vent = _preprocess_mask(mask_vent)
|
| 234 |
+
mask_sept = _preprocess_mask(mask_sept)
|
| 235 |
+
return mask_vent, mask_sept
|
| 236 |
+
|
| 237 |
+
def _get_dark_mask(self, images):
|
| 238 |
+
min_val = self.diffusion_model.input_range[0]
|
| 239 |
+
dark_mask = ops.where(ops.abs(images - min_val) < 1e-6, 1.0, 0.0)
|
| 240 |
+
return dark_mask
|
| 241 |
+
|
| 242 |
+
def make_omega_map(
|
| 243 |
+
self, images, mask_params, fixed_mask_params, skeleton_params, guidance_kwargs
|
| 244 |
+
):
|
| 245 |
+
masks = self.get_masks(images, mask_params, fixed_mask_params, skeleton_params)
|
| 246 |
+
|
| 247 |
+
masks_vent = masks["vent"]
|
| 248 |
+
masks_sept = masks["sept"]
|
| 249 |
+
masks_fixed = masks["fixed"]
|
| 250 |
+
masks_skeleton = masks["skeleton"]
|
| 251 |
+
masks_dark = masks["dark"]
|
| 252 |
+
|
| 253 |
+
masks_strong = ops.clip(
|
| 254 |
+
masks_sept + masks_fixed + masks_skeleton + masks_dark, 0, 1
|
| 255 |
+
)
|
| 256 |
+
|
| 257 |
+
# background = not masks_strong, not vent
|
| 258 |
+
background = ops.where(masks_strong < 0.1, 1.0, 0.0) * ops.where(
|
| 259 |
+
masks_vent == 0, 1.0, 0.0
|
| 260 |
+
)
|
| 261 |
+
|
| 262 |
+
masks_vent_filtered = masks_vent * (1.0 - masks_strong)
|
| 263 |
+
|
| 264 |
+
per_pixel_omega = (
|
| 265 |
+
guidance_kwargs["omega"] * background
|
| 266 |
+
+ guidance_kwargs["omega_vent"] * masks_vent_filtered
|
| 267 |
+
+ guidance_kwargs["omega_sept"] * masks_strong
|
| 268 |
+
)
|
| 269 |
+
|
| 270 |
+
haze_mask_components = (masks_vent > 0.5) * (1 - masks_strong > 0.5)
|
| 271 |
+
|
| 272 |
+
haze_mask = []
|
| 273 |
+
for i, m in enumerate(haze_mask_components):
|
| 274 |
+
if scipy.ndimage.label(m)[1] > 1:
|
| 275 |
+
# masks_strong _splits_ masks_vent in 2 or more components
|
| 276 |
+
# so we fall back to masks_vent
|
| 277 |
+
haze_mask.append(masks_vent[i])
|
| 278 |
+
# also remove guidance from this region to avoid bringing haze in
|
| 279 |
+
per_pixel_omega = per_pixel_omega.at[i].set(
|
| 280 |
+
per_pixel_omega[i] * (1 - masks_vent[i])
|
| 281 |
+
)
|
| 282 |
+
else:
|
| 283 |
+
# masks_strong 'shaves off' some of masks_vent,
|
| 284 |
+
# where there is tissue
|
| 285 |
+
haze_mask.append((masks_vent * (1 - masks_strong))[i])
|
| 286 |
+
haze_mask = ops.stack(haze_mask, axis=0)
|
| 287 |
+
|
| 288 |
+
masks["per_pixel_omega"] = per_pixel_omega
|
| 289 |
+
masks["haze"] = haze_mask
|
| 290 |
+
|
| 291 |
+
return masks
|
| 292 |
+
|
| 293 |
+
def get_masks(self, images, mask_params, fixed_mask_params, skeleton_params):
|
| 294 |
+
"""Generate a mask from the input images."""
|
| 295 |
+
masks_vent, masks_sept = self._get_segmentation_mask(images, **mask_params)
|
| 296 |
+
masks_fixed = self._get_fixed_mask(images, **fixed_mask_params)
|
| 297 |
+
masks_skeleton = extract_skeleton(
|
| 298 |
+
images, self.diffusion_model.input_range, **skeleton_params
|
| 299 |
+
)
|
| 300 |
+
masks_dark = self._get_dark_mask(images)
|
| 301 |
+
return {
|
| 302 |
+
"vent": masks_vent,
|
| 303 |
+
"sept": masks_sept,
|
| 304 |
+
"fixed": masks_fixed,
|
| 305 |
+
"skeleton": masks_skeleton,
|
| 306 |
+
"dark": masks_dark,
|
| 307 |
+
}
|
| 308 |
+
|
| 309 |
+
def compute_error(
|
| 310 |
+
self,
|
| 311 |
+
noisy_images,
|
| 312 |
+
measurements,
|
| 313 |
+
noise_rates,
|
| 314 |
+
signal_rates,
|
| 315 |
+
per_pixel_omega,
|
| 316 |
+
haze_mask,
|
| 317 |
+
eta=0.01,
|
| 318 |
+
smooth_l1_beta=0.5,
|
| 319 |
+
**kwargs,
|
| 320 |
+
):
|
| 321 |
+
"""Compute measurement error for diffusion posterior sampling.
|
| 322 |
+
|
| 323 |
+
Args:
|
| 324 |
+
noisy_images: Noisy images.
|
| 325 |
+
measurement: Target measurement.
|
| 326 |
+
operator: Forward operator.
|
| 327 |
+
noise_rates: Current noise rates.
|
| 328 |
+
signal_rates: Current signal rates.
|
| 329 |
+
omega: Weight for the measurement error.
|
| 330 |
+
omega_mask: Weight for the measurement error at the mask region.
|
| 331 |
+
omega_haze_prior: Weight for the haze prior penalty.
|
| 332 |
+
**kwargs: Additional arguments for the operator.
|
| 333 |
+
|
| 334 |
+
Returns:
|
| 335 |
+
Tuple of (measurement_error, (pred_noises, pred_images))
|
| 336 |
+
"""
|
| 337 |
+
pred_noises, pred_images = self.diffusion_model.denoise(
|
| 338 |
+
noisy_images,
|
| 339 |
+
noise_rates,
|
| 340 |
+
signal_rates,
|
| 341 |
+
training=False,
|
| 342 |
+
)
|
| 343 |
+
|
| 344 |
+
measurement_error = L2(
|
| 345 |
+
per_pixel_omega
|
| 346 |
+
* (measurements - self.operator.forward(pred_images, **kwargs))
|
| 347 |
+
)
|
| 348 |
+
|
| 349 |
+
hazy_pixels = pred_images * haze_mask
|
| 350 |
+
|
| 351 |
+
# L1 penalty on haze pixels
|
| 352 |
+
# add +1 to make -1 (=black) the 'sparse' value
|
| 353 |
+
haze_prior_error = smooth_L1(hazy_pixels + 1, beta=smooth_l1_beta)
|
| 354 |
+
|
| 355 |
+
total_error = measurement_error + eta * haze_prior_error
|
| 356 |
+
|
| 357 |
+
return total_error, (pred_noises, pred_images)
|
| 358 |
+
|
| 359 |
+
|
| 360 |
+
def init(config):
|
| 361 |
+
"""Initialize models, operator, and guidance objects for semantic-dps dehazing."""
|
| 362 |
+
|
| 363 |
+
operator = IdentityOperator()
|
| 364 |
+
|
| 365 |
+
diffusion_model = DiffusionModel.from_preset(
|
| 366 |
+
config.diffusion_model_path,
|
| 367 |
+
)
|
| 368 |
+
log.success(
|
| 369 |
+
f"Diffusion model loaded from {log.yellow(config.diffusion_model_path)}"
|
| 370 |
+
)
|
| 371 |
+
segmentation_model = load_segmentation_model(config.segmentation_model_path)
|
| 372 |
+
|
| 373 |
+
log.success(
|
| 374 |
+
f"Segmentation model loaded from {log.yellow(config.segmentation_model_path)}"
|
| 375 |
+
)
|
| 376 |
+
|
| 377 |
+
guidance_fn = SemanticDPS(
|
| 378 |
+
diffusion_model=diffusion_model,
|
| 379 |
+
segmentation_model=segmentation_model,
|
| 380 |
+
operator=operator,
|
| 381 |
+
)
|
| 382 |
+
diffusion_model._init_operator_and_guidance(operator, guidance_fn)
|
| 383 |
+
|
| 384 |
+
return diffusion_model
|
| 385 |
+
|
| 386 |
+
|
| 387 |
+
def load_segmentation_model(path):
|
| 388 |
+
"""Load segmentation model"""
|
| 389 |
+
segmentation_model = keras.saving.load_model(path)
|
| 390 |
+
return segmentation_model
|
| 391 |
+
|
| 392 |
+
|
| 393 |
+
def run(
|
| 394 |
+
hazy_images: any,
|
| 395 |
+
diffusion_model: DiffusionModel,
|
| 396 |
+
seed,
|
| 397 |
+
guidance_kwargs: dict,
|
| 398 |
+
mask_params: dict,
|
| 399 |
+
fixed_mask_params: dict,
|
| 400 |
+
skeleton_params: dict,
|
| 401 |
+
batch_size: int = 4,
|
| 402 |
+
diffusion_steps: int = 100,
|
| 403 |
+
initial_diffusion_step: int = 0,
|
| 404 |
+
threshold_output_quantile: float = None,
|
| 405 |
+
preserve_bottom_percent: float = 30.0,
|
| 406 |
+
bottom_transition_width: float = 10.0,
|
| 407 |
+
verbose: bool = True,
|
| 408 |
+
):
|
| 409 |
+
input_range = diffusion_model.input_range
|
| 410 |
+
|
| 411 |
+
hazy_images = preprocess(hazy_images, normalization_range=input_range)
|
| 412 |
+
|
| 413 |
+
pred_tissue_images = []
|
| 414 |
+
masks_out = []
|
| 415 |
+
num_images = hazy_images.shape[0]
|
| 416 |
+
num_batches = (num_images + batch_size - 1) // batch_size
|
| 417 |
+
|
| 418 |
+
progbar = keras.utils.Progbar(num_batches, verbose=verbose)
|
| 419 |
+
i = 0
|
| 420 |
+
batch_idx = 0
|
| 421 |
+
for i in range(num_batches):
|
| 422 |
+
batch = hazy_images[i * batch_size : (i * batch_size) + batch_size]
|
| 423 |
+
|
| 424 |
+
masks = diffusion_model.guidance_fn.make_omega_map(
|
| 425 |
+
batch, mask_params, fixed_mask_params, skeleton_params, guidance_kwargs
|
| 426 |
+
)
|
| 427 |
+
|
| 428 |
+
batch_images = diffusion_model.posterior_sample(
|
| 429 |
+
batch,
|
| 430 |
+
n_samples=1,
|
| 431 |
+
n_steps=diffusion_steps,
|
| 432 |
+
initial_step=initial_diffusion_step,
|
| 433 |
+
seed=seed,
|
| 434 |
+
verbose=True,
|
| 435 |
+
per_pixel_omega=masks["per_pixel_omega"],
|
| 436 |
+
haze_mask=masks["haze"],
|
| 437 |
+
eta=guidance_kwargs["eta"],
|
| 438 |
+
smooth_l1_beta=guidance_kwargs["smooth_l1_beta"],
|
| 439 |
+
)
|
| 440 |
+
batch_images = ops.take(batch_images, 0, axis=1)
|
| 441 |
+
|
| 442 |
+
pred_tissue_images.append(batch_images)
|
| 443 |
+
masks_out.append(masks)
|
| 444 |
+
batch_idx += 1
|
| 445 |
+
progbar.update(batch_idx)
|
| 446 |
+
i += batch_size
|
| 447 |
+
|
| 448 |
+
pred_tissue_images = ops.concatenate(pred_tissue_images, axis=0)
|
| 449 |
+
masks_out = {
|
| 450 |
+
key: ops.concatenate([m[key] for m in masks_out], axis=0)
|
| 451 |
+
for key in masks_out[0].keys()
|
| 452 |
+
}
|
| 453 |
+
pred_haze_images = hazy_images - pred_tissue_images - 1
|
| 454 |
+
|
| 455 |
+
if threshold_output_quantile is not None:
|
| 456 |
+
threshold_value = ops.quantile(
|
| 457 |
+
pred_tissue_images, threshold_output_quantile, axis=(1, 2), keepdims=True
|
| 458 |
+
)
|
| 459 |
+
pred_tissue_images = ops.where(
|
| 460 |
+
pred_tissue_images < threshold_value, input_range[0], pred_tissue_images
|
| 461 |
+
)
|
| 462 |
+
|
| 463 |
+
# Apply bottom preservation with smooth transition
|
| 464 |
+
if preserve_bottom_percent > 0:
|
| 465 |
+
pred_tissue_images = apply_bottom_preservation(
|
| 466 |
+
pred_tissue_images,
|
| 467 |
+
hazy_images,
|
| 468 |
+
preserve_bottom_percent=preserve_bottom_percent,
|
| 469 |
+
transition_width=bottom_transition_width,
|
| 470 |
+
)
|
| 471 |
+
|
| 472 |
+
pred_tissue_images = postprocess(pred_tissue_images, input_range)
|
| 473 |
+
hazy_images = postprocess(hazy_images, input_range)
|
| 474 |
+
pred_haze_images = postprocess(pred_haze_images, input_range)
|
| 475 |
+
|
| 476 |
+
return hazy_images, pred_tissue_images, pred_haze_images, masks_out
|
| 477 |
+
|
| 478 |
+
|
| 479 |
+
def add_shape_from_mask(ax, mask, **kwargs):
|
| 480 |
+
"""add a shape to axis from mask array.
|
| 481 |
+
|
| 482 |
+
Args:
|
| 483 |
+
ax (plt.ax): matplotlib axis
|
| 484 |
+
mask (ndarray): numpy array with non-zero
|
| 485 |
+
shape defining the region of interest.
|
| 486 |
+
Kwargs:
|
| 487 |
+
edgecolor (str): color of the shape's edge
|
| 488 |
+
facecolor (str): color of the shape's face
|
| 489 |
+
linewidth (int): width of the shape's edge
|
| 490 |
+
|
| 491 |
+
Returns:
|
| 492 |
+
plt.ax: matplotlib axis with shape added
|
| 493 |
+
"""
|
| 494 |
+
# Pad mask to ensure edge contours are found
|
| 495 |
+
padded_mask = np.pad(mask, pad_width=1, mode="constant", constant_values=0)
|
| 496 |
+
contours = measure.find_contours(padded_mask, 0.5)
|
| 497 |
+
patches = []
|
| 498 |
+
for contour in contours:
|
| 499 |
+
# Remove padding offset
|
| 500 |
+
contour -= 1
|
| 501 |
+
path = pltPath(contour[:, ::-1])
|
| 502 |
+
patch = PathPatch(path, **kwargs)
|
| 503 |
+
patches.append(ax.add_patch(patch))
|
| 504 |
+
return patches
|
| 505 |
+
|
| 506 |
+
|
| 507 |
+
def plot_batch_with_named_masks(
|
| 508 |
+
images, masks_dict, mask_colors=None, titles=None, **kwargs
|
| 509 |
+
):
|
| 510 |
+
"""
|
| 511 |
+
Plot batch of images in rows, each column overlays a different mask from the dict.
|
| 512 |
+
Mask labels are shown as column titles. If mask name is 'per_pixel_omega', show it
|
| 513 |
+
directly with inferno colormap (no overlay).
|
| 514 |
+
|
| 515 |
+
Args:
|
| 516 |
+
images: np.ndarray, shape (batch, height, width, channels)
|
| 517 |
+
masks_dict: dict of {name: mask}, each mask shape (batch, height, width, channels)
|
| 518 |
+
mask_colors: dict of {name: color} or None (default colors used)
|
| 519 |
+
"""
|
| 520 |
+
mask_names = list(masks_dict.keys())
|
| 521 |
+
batch_size = images.shape[0]
|
| 522 |
+
default_colors = ["red", "green", "#33aaff", "yellow", "magenta", "cyan"]
|
| 523 |
+
mask_colors = mask_colors or {
|
| 524 |
+
name: default_colors[i % len(default_colors)]
|
| 525 |
+
for i, name in enumerate(mask_names)
|
| 526 |
+
}
|
| 527 |
+
|
| 528 |
+
# Prepare images for each column
|
| 529 |
+
columns = []
|
| 530 |
+
cmaps = []
|
| 531 |
+
for name in mask_names:
|
| 532 |
+
if name == "per_pixel_omega":
|
| 533 |
+
mask_np = np.array(masks_dict[name])
|
| 534 |
+
columns.append(np.squeeze(mask_np))
|
| 535 |
+
cmaps.append(["inferno"] * batch_size)
|
| 536 |
+
else:
|
| 537 |
+
columns.append(np.squeeze(images))
|
| 538 |
+
cmaps.append(["gray"] * batch_size)
|
| 539 |
+
|
| 540 |
+
# Stack columns: shape (num_columns, batch, ...)
|
| 541 |
+
all_images = np.stack(columns, axis=0) # (num_columns, batch, ...)
|
| 542 |
+
# Rearrange to (batch, num_columns, ...)
|
| 543 |
+
all_images = (
|
| 544 |
+
np.transpose(all_images, (1, 0, 2, 3, 4))
|
| 545 |
+
if all_images.ndim == 5
|
| 546 |
+
else np.transpose(all_images, (1, 0, 2, 3))
|
| 547 |
+
)
|
| 548 |
+
# Flatten to (batch * num_columns, ...)
|
| 549 |
+
all_images = all_images.reshape(batch_size * len(mask_names), *images.shape[1:])
|
| 550 |
+
|
| 551 |
+
# Flatten cmaps for plot_image_grid in the same order as images
|
| 552 |
+
flat_cmaps = []
|
| 553 |
+
for row in range(batch_size):
|
| 554 |
+
for col in range(len(mask_names)):
|
| 555 |
+
flat_cmaps.append(cmaps[col][row])
|
| 556 |
+
|
| 557 |
+
fig, _ = plot_image_grid(
|
| 558 |
+
all_images,
|
| 559 |
+
ncols=len(mask_names),
|
| 560 |
+
remove_axis=False,
|
| 561 |
+
cmap=flat_cmaps,
|
| 562 |
+
figsize=(8, 3.3),
|
| 563 |
+
**kwargs,
|
| 564 |
+
)
|
| 565 |
+
|
| 566 |
+
# Overlay masks for non-per_pixel_omega columns
|
| 567 |
+
for col_idx, name in enumerate(mask_names):
|
| 568 |
+
if name == "per_pixel_omega":
|
| 569 |
+
continue
|
| 570 |
+
mask_np = np.array(masks_dict[name])
|
| 571 |
+
axes = fig.axes[col_idx : batch_size * len(mask_names) : len(mask_names)]
|
| 572 |
+
for ax, mask_img in zip(axes, mask_np):
|
| 573 |
+
add_shape_from_mask(
|
| 574 |
+
ax, mask_img.squeeze(), color=mask_colors[name], alpha=0.3
|
| 575 |
+
)
|
| 576 |
+
|
| 577 |
+
# Add column titles
|
| 578 |
+
row_idx = 0
|
| 579 |
+
if titles is None:
|
| 580 |
+
titles = mask_names
|
| 581 |
+
for col_idx, name in enumerate(titles):
|
| 582 |
+
ax_idx = row_idx * len(mask_names) + col_idx
|
| 583 |
+
fig.axes[ax_idx].set_title(name, fontsize=9, color="white")
|
| 584 |
+
fig.axes[ax_idx].set_facecolor("black")
|
| 585 |
+
|
| 586 |
+
# Add colorbar for per_pixel_omega if present
|
| 587 |
+
if "per_pixel_omega" in mask_names:
|
| 588 |
+
col_idx = mask_names.index("per_pixel_omega")
|
| 589 |
+
axes = fig.axes[col_idx : batch_size * len(mask_names) : len(mask_names)]
|
| 590 |
+
|
| 591 |
+
# Get vertical bounds of the subplot column
|
| 592 |
+
top_ax = axes[0]
|
| 593 |
+
bottom_ax = axes[-1]
|
| 594 |
+
top_pos = top_ax.get_position()
|
| 595 |
+
bottom_pos = bottom_ax.get_position()
|
| 596 |
+
|
| 597 |
+
full_y0 = bottom_pos.y0
|
| 598 |
+
full_y1 = top_pos.y1
|
| 599 |
+
full_height = full_y1 - full_y0
|
| 600 |
+
|
| 601 |
+
# Manually shrink to 80% of full height and center vertically
|
| 602 |
+
scale = 0.8
|
| 603 |
+
height = full_height * scale
|
| 604 |
+
y0 = full_y0 + (full_height - height) / 2
|
| 605 |
+
|
| 606 |
+
x0 = top_pos.x1 + 0.015 # Horizontal position to the right
|
| 607 |
+
width = 0.015 # Thin bar
|
| 608 |
+
|
| 609 |
+
# Add colorbar axis
|
| 610 |
+
cax = fig.add_axes([x0, y0, width, height])
|
| 611 |
+
|
| 612 |
+
im = axes[0].get_images()[0] if axes[0].get_images() else None
|
| 613 |
+
cbar = fig.colorbar(im, cax=cax)
|
| 614 |
+
cbar.set_label(r"Guidance weighting \mathbf{p}")
|
| 615 |
+
cbar.ax.yaxis.set_major_locator(plt.MaxNLocator(nbins=6))
|
| 616 |
+
cbar.ax.yaxis.set_tick_params(labelsize=7)
|
| 617 |
+
cbar.ax.yaxis.label.set_size(8)
|
| 618 |
+
|
| 619 |
+
return fig
|
| 620 |
+
|
| 621 |
+
|
| 622 |
+
def plot_dehazed_results(
|
| 623 |
+
hazy_images,
|
| 624 |
+
pred_tissue_images,
|
| 625 |
+
pred_haze_images,
|
| 626 |
+
diffusion_model,
|
| 627 |
+
titles=("Hazy", "Dehazed", "Haze"),
|
| 628 |
+
):
|
| 629 |
+
"""Create and save visualization with optional mask overlays."""
|
| 630 |
+
|
| 631 |
+
# Create the processed image stack using the helper function
|
| 632 |
+
input_shape = diffusion_model.input_shape
|
| 633 |
+
stack_images = ops.stack(
|
| 634 |
+
[
|
| 635 |
+
hazy_images,
|
| 636 |
+
pred_tissue_images,
|
| 637 |
+
pred_haze_images,
|
| 638 |
+
]
|
| 639 |
+
)
|
| 640 |
+
stack_images = ops.reshape(stack_images, (-1, input_shape[0], input_shape[1]))
|
| 641 |
+
|
| 642 |
+
# Define labels based on what we're showing
|
| 643 |
+
fig, _ = plot_image_grid(
|
| 644 |
+
stack_images,
|
| 645 |
+
ncols=len(hazy_images),
|
| 646 |
+
remove_axis=False,
|
| 647 |
+
vmin=0,
|
| 648 |
+
vmax=255,
|
| 649 |
+
)
|
| 650 |
+
# Set labels and styling
|
| 651 |
+
for i, ax in enumerate(fig.axes):
|
| 652 |
+
if i % len(hazy_images) == 0:
|
| 653 |
+
label = titles[(i // len(hazy_images)) % len(titles)]
|
| 654 |
+
ax.set_ylabel(label, fontsize=12)
|
| 655 |
+
|
| 656 |
+
return fig
|
| 657 |
+
|
| 658 |
+
|
| 659 |
+
def main(
|
| 660 |
+
input_folder: str = "./assets",
|
| 661 |
+
output_folder: str = "./temp",
|
| 662 |
+
num_imgs_plot: int = 4,
|
| 663 |
+
device: str = "auto:1",
|
| 664 |
+
config: str = "configs/semantic_dps.yaml",
|
| 665 |
+
):
|
| 666 |
+
num_img = num_imgs_plot
|
| 667 |
+
|
| 668 |
+
zea.visualize.set_mpl_style()
|
| 669 |
+
init_device(device)
|
| 670 |
+
|
| 671 |
+
config = Config.from_yaml(config)
|
| 672 |
+
seed = jax.random.PRNGKey(config.seed)
|
| 673 |
+
|
| 674 |
+
paths = list(Path(input_folder).glob("*.png"))
|
| 675 |
+
|
| 676 |
+
output_folder = Path(output_folder)
|
| 677 |
+
|
| 678 |
+
images = []
|
| 679 |
+
for path in paths:
|
| 680 |
+
image = zea.io_lib.load_image(path)
|
| 681 |
+
images.append(image)
|
| 682 |
+
images = ops.stack(images, axis=0)
|
| 683 |
+
|
| 684 |
+
diffusion_model = init(config)
|
| 685 |
+
|
| 686 |
+
hazy_images, pred_tissue_images, pred_haze_images, masks = run(
|
| 687 |
+
images,
|
| 688 |
+
diffusion_model=diffusion_model,
|
| 689 |
+
seed=seed,
|
| 690 |
+
**config.params,
|
| 691 |
+
)
|
| 692 |
+
|
| 693 |
+
output_folder.mkdir(parents=True, exist_ok=True)
|
| 694 |
+
|
| 695 |
+
for image, path in zip(pred_tissue_images, paths):
|
| 696 |
+
image = ops.convert_to_numpy(image)
|
| 697 |
+
file_name = path.name
|
| 698 |
+
Image.fromarray(image).save(output_folder / file_name)
|
| 699 |
+
|
| 700 |
+
fig = plot_dehazed_results(
|
| 701 |
+
hazy_images[:num_img],
|
| 702 |
+
pred_tissue_images[:num_img],
|
| 703 |
+
pred_haze_images[:num_img],
|
| 704 |
+
diffusion_model,
|
| 705 |
+
titles=[
|
| 706 |
+
r"Hazy $\mathbf{y}$",
|
| 707 |
+
r"Dehazed $\mathbf{\hat{x}}$",
|
| 708 |
+
r"Haze $\mathbf{\hat{h}}$",
|
| 709 |
+
],
|
| 710 |
+
)
|
| 711 |
+
path = Path("dehazed_results.png")
|
| 712 |
+
save_kwargs = {"bbox_inches": "tight", "dpi": 300}
|
| 713 |
+
fig.savefig(path, **save_kwargs)
|
| 714 |
+
fig.savefig(path.with_suffix(".pdf"), **save_kwargs)
|
| 715 |
+
log.success(f"Segmentation steps saved to {log.yellow(path)}")
|
| 716 |
+
|
| 717 |
+
masks_viz = copy.deepcopy(masks)
|
| 718 |
+
masks_viz.pop("haze")
|
| 719 |
+
|
| 720 |
+
masks_viz = {k: v[:num_img] for k, v in masks_viz.items()}
|
| 721 |
+
|
| 722 |
+
fig = plot_batch_with_named_masks(
|
| 723 |
+
images[:num_img],
|
| 724 |
+
masks_viz,
|
| 725 |
+
titles=[
|
| 726 |
+
r"Ventricle $v(\mathbf{y})$",
|
| 727 |
+
r"Septum $s(\mathbf{y})$",
|
| 728 |
+
r"Fixed",
|
| 729 |
+
r"Skeleton $t(\mathbf{y})$",
|
| 730 |
+
r"Dark $b(\mathbf{y})$",
|
| 731 |
+
r"Guidance $d(\mathbf{y})$",
|
| 732 |
+
],
|
| 733 |
+
)
|
| 734 |
+
path = Path("segmentation_steps.png")
|
| 735 |
+
fig.savefig(path, **save_kwargs)
|
| 736 |
+
fig.savefig(path.with_suffix(".pdf"), **save_kwargs)
|
| 737 |
+
log.success(f"Segmentation steps saved to {log.yellow(path)}")
|
| 738 |
+
|
| 739 |
+
plt.close("all")
|
| 740 |
+
|
| 741 |
+
|
| 742 |
+
if __name__ == "__main__":
|
| 743 |
+
tyro.cli(main)
|