Spaces:

Mavthunder
/

one-click-aesthetic

Running

App Files Files Community

Mavthunder commited on Aug 21

Commit

d379ce4

verified ·

1 Parent(s): f129981

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -216

app.py CHANGED Viewed

@@ -1,13 +1,17 @@
 import gradio as gr
 import numpy as np
 from PIL import Image, ImageFilter
 import time
-# ---------- small image utils ----------
 def pil_to_np(img_pil):
-    arr = np.asarray(img_pil.convert("RGB")).astype(np.float32) / 255.0
-    return arr
 def np_to_pil(arr):
     arr = np.clip(arr * 255.0, 0, 255).astype(np.uint8)
@@ -20,256 +24,163 @@ def resize_max_side(img_pil, max_side=1600):
         return img_pil.resize((int(w*scale), int(h*scale)), Image.LANCZOS)
     return img_pil
-def rgb_to_hsv_np(rgb):
-    # rgb: HxWx3 in [0,1]
-    r, g, b = rgb[..., 0], rgb[..., 1], rgb[..., 2]
-    mx = np.max(rgb, axis=-1)
-    mn = np.min(rgb, axis=-1)
-    diff = mx - mn + 1e-8
-    # Hue
-    h = np.zeros_like(mx)
-    mask = diff > 1e-8
-    r_is_max = (mx == r) & mask
-    g_is_max = (mx == g) & mask
-    b_is_max = (mx == b) & mask
-    h[r_is_max] = (g[r_is_max] - b[r_is_max]) / diff[r_is_max]
-    h[g_is_max] = 2.0 + (b[g_is_max] - r[g_is_max]) / diff[g_is_max]
-    h[b_is_max] = 4.0 + (r[b_is_max] - g[b_is_max]) / diff[b_is_max]
-    h = (h / 6.0) % 1.0
-    # Saturation
-    s = np.where(mx <= 1e-8, 0, diff / (mx + 1e-8))
-    v = mx
-    return np.stack([h, s, v], axis=-1)
-def hsv_to_rgb_np(hsv):
-    h, s, v = hsv[..., 0], hsv[..., 1], hsv[..., 2]
-    i = np.floor(h * 6).astype(int)
-    f = h * 6 - i
-    p = v * (1 - s)
-    q = v * (1 - f * s)
-    t = v * (1 - (1 - f) * s)
-    i_mod = i % 6
-    r = np.select(
-        [i_mod == 0, i_mod == 1, i_mod == 2, i_mod == 3, i_mod == 4, i_mod == 5],
-        [v, q, p, p, t, v])
-    g = np.select(
-        [i_mod == 0, i_mod == 1, i_mod == 2, i_mod == 3, i_mod == 4, i_mod == 5],
-        [t, v, v, q, p, p])
-    b = np.select(
-        [i_mod == 0, i_mod == 1, i_mod == 2, i_mod == 3, i_mod == 4, i_mod == 5],
-        [p, p, t, v, v, q])
-    rgb = np.stack([r, g, b], axis=-1)
-    return np.clip(rgb, 0, 1)
 def unsharp_mask(img_pil, radius=1.2, amount=0.7):
-    # classic local contrast boost
     blurred = img_pil.filter(ImageFilter.GaussianBlur(radius=radius))
     arr = pil_to_np(img_pil)
     arr_blur = pil_to_np(blurred)
     out = np.clip(arr + amount * (arr - arr_blur), 0, 1)
     return np_to_pil(out)
-# ---------- core adjustments ----------
-def apply_adjustments(img_pil,
-                      exposure_stops=0.0,
-                      contrast=0.0,
-                      saturation=0.0,
-                      warmth=0.0,        # + warm, - cool
-                      hue_shift_deg=0.0,
-                      gamma=1.0,
-                      clarity=0.0,
-                      lift=0.0):         # lift blacks / fade
-    """All params are gentle, designed to stay natural."""
-    img = resize_max_side(img_pil)
-    arr = pil_to_np(img)
-    # exposure (stops)
-    if abs(exposure_stops) > 1e-6:
-        arr = np.clip(arr * (2.0 ** exposure_stops), 0, 1)
-    # contrast (simple S-curve around mid 0.5)
-    if abs(contrast) > 1e-6:
-        arr = np.clip((arr - 0.5) * (1.0 + contrast) + 0.5, 0, 1)
-    # lift blacks (fade)
-    if abs(lift) > 1e-6:
-        arr = np.clip(arr + lift * (1.0 - arr), 0, 1)
-    # warmth (white-balance tilt)
-    if abs(warmth) > 1e-6:
-        wb = np.array([1.0 + warmth, 1.0, 1.0 - warmth], dtype=np.float32)
-        arr = np.clip(arr * wb, 0, 1)
-    # HSV tweaks (saturation + hue shift + gamma)
-    hsv = rgb_to_hsv_np(arr)
-    if abs(saturation) > 1e-6:
-        hsv[..., 1] = np.clip(hsv[..., 1] * (1.0 + saturation), 0, 1)
-    if abs(hue_shift_deg) > 1e-6:
-        hsv[..., 0] = (hsv[..., 0] + hue_shift_deg / 360.0) % 1.0
-    if abs(gamma - 1.0) > 1e-6:
-        hsv[..., 2] = np.clip(hsv[..., 2] ** (1.0 / gamma), 0, 1)
-    arr = hsv_to_rgb_np(hsv)
     out = np_to_pil(arr)
-    # clarity via unsharp mask
-    if abs(clarity) > 1e-6:
-        out = unsharp_mask(out, radius=1.2, amount=clarity)
     return out
-# ---------- aesthetic scoring (fast heuristic) ----------
-def aesthetic_score_fast(img_pil):
-    arr = pil_to_np(img_pil)
-    # luminance
-    Y = 0.2126 * arr[..., 0] + 0.7152 * arr[..., 1] + 0.0722 * arr[..., 2]
-    brightness = float(np.mean(Y))
-    contrast = float(np.std(Y))
-    # saturation
-    s = rgb_to_hsv_np(arr)[..., 1]
-    sat = float(np.mean(s))
-    # targets tuned for mass-appeal feed aesthetics (roughly)
-    target_b = 0.62
-    target_sat = 0.35
-    score_b = 1.0 - min(abs(brightness - target_b) / 0.62, 1.0)
-    score_c = min(max((contrast - 0.04) / 0.26, 0.0), 1.0)
-    score_s = 1.0 - min(abs(sat - target_sat) / 0.35, 1.0)
-    # clipping penalties
-    clip_hi = float((Y > 0.98).mean())
-    clip_lo = float((Y < 0.02).mean())
-    penalty_clip = min(clip_hi * 4.0 + clip_lo * 2.5, 1.5)
-    # white balance cast penalty (channel means too far apart)
-    means = arr.reshape(-1, 3).mean(axis=0)
-    cast = float(np.max(means) - np.min(means))
-    penalty_cast = min(cast * 2.0, 1.0)
-    # simple skin guard: if skin-ish pixels oversaturated, penalize
-    hsv = rgb_to_hsv_np(arr)
-    h, s_, v = hsv[..., 0], hsv[..., 1], hsv[..., 2]
-    skin_mask = (h < (50/360)) | (h > (345/360))
-    skin_mask &= (s_ > 0.23) & (v > 0.35)
-    skin_sat = float(s_[skin_mask].mean()) if np.any(skin_mask) else 0.0
-    penalty_skin = max(0.0, (skin_sat - 0.65) * 2.0)
-    raw = 0.4 * score_b + 0.35 * score_c + 0.25 * score_s
-    penalties = penalty_clip + penalty_cast + penalty_skin
-    final = max(0.0, min(1.0, raw - 0.4 * penalties))
-    return final, {
-        "brightness": round(brightness, 3),
-        "contrast": round(contrast, 3),
-        "saturation": round(sat, 3),
-        "clip_hi%": round(clip_hi * 100, 2),
-        "clip_lo%": round(clip_lo * 100, 2)
-    }
-# ---------- vibe presets ----------
-VIBES = {
-    "Natural": dict(exposure_stops=0.10, contrast=0.08, saturation=0.06, warmth=0.02, clarity=0.06, gamma=1.0, lift=0.00),
-    "Film":    dict(exposure_stops=0.05, contrast=-0.03, saturation=-0.02, warmth=0.05, clarity=0.03, gamma=0.95, lift=0.06),
-    "Pop":     dict(exposure_stops=0.00, contrast=0.15, saturation=0.12, warmth=0.00, clarity=0.15, gamma=1.0,  lift=0.00),
-    "Moody":   dict(exposure_stops=-0.15,contrast=0.10, saturation=-0.08,warmth=-0.03, clarity=0.05, gamma=1.05, lift=0.02),
-    "Pastel":  dict(exposure_stops=0.10, contrast=-0.10, saturation=-0.15,warmth=0.03, clarity=0.02, gamma=0.90, lift=0.08),
-}
-# Keep recent result in memory so feedback buttons can store something meaningful
-LAST_RESULT = {"winner": None, "scores": None}
 def process(image, intensity):
     if image is None:
         raise gr.Error("Please upload a photo first.")
-    # generate candidates
     candidates = []
-    scores = []
-    metrics = []
-    for name, params in VIBES.items():
-        out = apply_adjustments(image, **params)
-        score, met = aesthetic_score_fast(out)
         candidates.append((name, out, score))
-        scores.append(score)
-        metrics.append((name, met))
-    # pick winner
     candidates.sort(key=lambda x: x[2], reverse=True)
     winner_name, winner_img, winner_score = candidates[0]
-    # blend intensity with original (0..100)
-    t = float(intensity) / 100.0
     base = resize_max_side(image)
-    wnp = pil_to_np(winner_img)
-    onp = pil_to_np(base)
-    blended = np_to_pil(onp * (1 - t) + wnp * t)
-    # gallery: show all looks with their scores
-    gallery = []
-    for name, img, score in candidates:
-        caption = f"{name} — score {score:.2f}"
-        gallery.append((img, caption))
-    # remember
-    LAST_RESULT["winner"] = {
-        "name": winner_name,
-        "score": float(winner_score),
-        "when": time.strftime("%Y-%m-%d %H:%M:%S")
-    }
-    LAST_RESULT["scores"] = {name: float(s) for name, _, s in candidates}
-    # metrics text
-    metrics_top = next(m for n, m in metrics if n == winner_name)
-    info = f"Picked: **{winner_name}** (score {winner_score:.2f})"
-    info += f"\n\nBrightness: {metrics_top['brightness']} | Contrast: {metrics_top['contrast']} | Saturation: {metrics_top['saturation']}"
-    info += f"\nClipped Highlights: {metrics_top['clip_hi%']}% | Deep Shadows: {metrics_top['clip_lo%']}%"
-    return blended, gallery, info
 def feedback(good):
     if LAST_RESULT["winner"] is None:
-        return "Upload a photo and generate a result first."
-    # Append a tiny log in Space storage (ephemeral on free tier, good enough for MVP)
     try:
-        with open("feedback_log.csv", "a", encoding="utf-8") as f:
-            f.write(
-                f"{LAST_RESULT['winner']['when']},{LAST_RESULT['winner']['name']},{LAST_RESULT['winner']['score']},{'up' if good else 'down'}\n"
-            )
-    except Exception:
         pass
     return "Thanks for the feedback! ✨"
-# ---------- UI ----------
-with gr.Blocks(title="One-Click Aesthetic") as demo:
-    gr.Markdown(
-        """
-        # One-Click Aesthetic ✨
-        Upload a photo and hit **Make it Aesthetic**.
-        The app tries a few tasteful looks and picks the one with the best predicted mass-appeal score.
-        Use the **Intensity** slider to control how strong the look is.
-        """
-    )
     with gr.Row():
         inp = gr.Image(label="Upload photo", type="pil")
-        out = gr.Image(label="Aesthetic result")
-    intensity = gr.Slider(0, 100, value=80, step=1, label="Intensity (blend)")
-    go = gr.Button("Make it Aesthetic", variant="primary")
     info = gr.Markdown()
-    gallery = gr.Gallery(label="Tried looks (ranked high → low)", show_label=True, columns=5, height="auto")
     with gr.Row():
-        up = gr.Button("👍 Looks great")
-        down = gr.Button("👎 Needs work")
-    go.click(process, inputs=[inp, intensity], outputs=[out, gallery, info])
-    up.click(lambda: feedback(True), inputs=None, outputs=info)
-    down.click(lambda: feedback(False), inputs=None, outputs=info)
 demo.launch()

 import gradio as gr
 import numpy as np
 from PIL import Image, ImageFilter
+import torch
+import torch.nn as nn
+from transformers import AutoProcessor, AutoModel
+from huggingface_hub import hf_hub_download
+import cv2
 import time
+# ------------------ Utility functions ------------------
 def pil_to_np(img_pil):
+    return np.asarray(img_pil.convert("RGB")).astype(np.float32) / 255.0
 def np_to_pil(arr):
     arr = np.clip(arr * 255.0, 0, 255).astype(np.uint8)
         return img_pil.resize((int(w*scale), int(h*scale)), Image.LANCZOS)
     return img_pil
 def unsharp_mask(img_pil, radius=1.2, amount=0.7):
     blurred = img_pil.filter(ImageFilter.GaussianBlur(radius=radius))
     arr = pil_to_np(img_pil)
     arr_blur = pil_to_np(blurred)
     out = np.clip(arr + amount * (arr - arr_blur), 0, 1)
     return np_to_pil(out)
+# ------------------ Zero-DCE++ Model ------------------
+class EnhanceNet(nn.Module):
+    def __init__(self):
+        super(EnhanceNet, self).__init__()
+        number_f = 32
+        self.e_conv1 = nn.Conv2d(3, number_f, 3, 1, 1, bias=True)
+        self.e_conv2 = nn.Conv2d(number_f, number_f, 3, 1, 1, bias=True)
+        self.e_conv3 = nn.Conv2d(number_f, number_f, 3, 1, 1, bias=True)
+        self.e_conv4 = nn.Conv2d(number_f, number_f, 3, 1, 1, bias=True)
+        self.e_conv5 = nn.Conv2d(number_f*2, number_f, 3, 1, 1, bias=True)
+        self.e_conv6 = nn.Conv2d(number_f*2, number_f, 3, 1, 1, bias=True)
+        self.e_conv7 = nn.Conv2d(number_f*2, 24, 3, 1, 1, bias=True)
+        self.relu = nn.ReLU(inplace=True)
+    def forward(self, x):
+        x1 = self.relu(self.e_conv1(x))
+        x2 = self.relu(self.e_conv2(x1))
+        x3 = self.relu(self.e_conv3(x2))
+        x4 = self.relu(self.e_conv4(x3))
+        x5 = self.relu(self.e_conv5(torch.cat([x3, x4], 1)))
+        x6 = self.relu(self.e_conv6(torch.cat([x2, x5], 1)))
+        x_r = torch.tanh(self.e_conv7(torch.cat([x1, x6], 1)))
+        r1, r2, r3, r4, r5, r6, r7, r8 = torch.split(x_r, 3, dim=1)
+        x = x + r1 * (torch.pow(x, 2) - x)
+        x = x + r2 * (torch.pow(x, 2) - x)
+        x = x + r3 * (torch.pow(x, 2) - x)
+        enhance_image_1 = x + r4 * (torch.pow(x, 2) - x)
+        enhance_image_2 = enhance_image_1 + r5 * (torch.pow(enhance_image_1, 2) - enhance_image_1)
+        enhance_image_3 = enhance_image_2 + r6 * (torch.pow(enhance_image_2, 2) - enhance_image_2)
+        enhance_image_4 = enhance_image_3 + r7 * (torch.pow(enhance_image_3, 2) - enhance_image_3)
+        return enhance_image_4
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model_path = hf_hub_download("LLVIP/Zero-DCEpp", "zerodcepp.pth")
+zero_dce = EnhanceNet().to(device)
+zero_dce.load_state_dict(torch.load(model_path, map_location=device))
+zero_dce.eval()
+def zero_dce_enhance(img_pil):
+    img = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
+    img = cv2.resize(img, (400, 400))  # small resize for speed
+    img = img.astype(np.float32) / 255.0
+    inp = torch.from_numpy(img).permute(2,0,1).unsqueeze(0).to(device)
+    with torch.no_grad():
+        out = zero_dce(inp)
+    out = out.squeeze(0).permute(1,2,0).cpu().numpy()
+    out = np.clip(out * 255.0, 0, 255).astype(np.uint8)
+    return Image.fromarray(cv2.cvtColor(out, cv2.COLOR_BGR2RGB))
+# ------------------ Aesthetic Predictor ------------------
+predictor_name = "shunk031/aesthetic-predictor-v2"
+processor = AutoProcessor.from_pretrained(predictor_name)
+model_pred = AutoModel.from_pretrained(predictor_name).to(device)
+model_pred.eval()
+def aesthetic_score_ai(img_pil):
+    inputs = processor(images=img_pil, return_tensors="pt").to(device)
+    with torch.no_grad():
+        outputs = model_pred(**inputs)
+    score = outputs.logits.mean().item()
+    return float(score)
+# ------------------ Vibes ------------------
+VIBES = {
+    "Natural": dict(exposure=0.05, contrast=0.08, saturation=0.08, sharp=0.05),
+    "Film":    dict(exposure=0.0, contrast=-0.05, saturation=-0.02, sharp=0.03),
+    "Pop":     dict(exposure=0.1, contrast=0.15, saturation=0.20, sharp=0.15),
+    "Moody":   dict(exposure=-0.1, contrast=0.10, saturation=-0.08, sharp=0.05),
+    "Pastel":  dict(exposure=0.1, contrast=-0.08, saturation=-0.15, sharp=0.02),
+}
+def apply_vibe(img_pil, vibe):
+    arr = pil_to_np(img_pil)
+    # Exposure
+    arr = np.clip(arr * (1.0 + vibe["exposure"]), 0, 1)
+    # Contrast
+    arr = np.clip((arr - 0.5) * (1.0 + vibe["contrast"]) + 0.5, 0, 1)
+    # Saturation (HSV)
+    hsv = cv2.cvtColor((arr*255).astype(np.uint8), cv2.COLOR_RGB2HSV).astype(np.float32)
+    hsv[...,1] = np.clip(hsv[...,1] * (1.0 + vibe["saturation"]), 0, 255)
+    arr = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2RGB).astype(np.float32)/255.0
     out = np_to_pil(arr)
+    if vibe["sharp"] > 0:
+        out = unsharp_mask(out, amount=vibe["sharp"])
     return out
+# ------------------ Processing ------------------
+LAST_RESULT = {"winner": None}
 def process(image, intensity):
     if image is None:
         raise gr.Error("Please upload a photo first.")
+    # Step 1: enhance with Zero-DCE++
+    enhanced = zero_dce_enhance(image)
+    # Step 2: apply vibes + score them
     candidates = []
+    for name, vibe in VIBES.items():
+        out = apply_vibe(enhanced, vibe)
+        score = aesthetic_score_ai(out)
         candidates.append((name, out, score))
     candidates.sort(key=lambda x: x[2], reverse=True)
     winner_name, winner_img, winner_score = candidates[0]
+    # Intensity blend
+    t = intensity / 100.0
     base = resize_max_side(image)
+    out_np = pil_to_np(winner_img)
+    base_np = pil_to_np(base)
+    blended = np_to_pil(base_np * (1-t) + out_np * t)
+    gallery = [(img, f"{name}: {score:.2f}") for name, img, score in candidates]
+    LAST_RESULT["winner"] = {"name": winner_name, "score": winner_score, "when": time.strftime("%Y-%m-%d %H:%M:%S")}
+    return blended, gallery, f"Chosen: **{winner_name}** (score {winner_score:.2f})"
 def feedback(good):
     if LAST_RESULT["winner"] is None:
+        return "Generate a result first!"
     try:
+        with open("feedback_log.csv", "a") as f:
+            f.write(f"{LAST_RESULT['winner']['when']},{LAST_RESULT['winner']['name']},{LAST_RESULT['winner']['score']},{'up' if good else 'down'}\n")
+    except:
         pass
     return "Thanks for the feedback! ✨"
+# ------------------ UI ------------------
+with gr.Blocks(title="One-Click Aesthetic AI") as demo:
+    gr.Markdown("# One-Click Aesthetic ✨\nUpload a photo → AI enhances it (Zero-DCE++) → tries vibes → ranks with an AI aesthetic predictor.")
     with gr.Row():
         inp = gr.Image(label="Upload photo", type="pil")
+        out = gr.Image(label="Result")
+    intensity = gr.Slider(0, 100, value=80, label="Intensity")
+    go = gr.Button("Make it Aesthetic")
     info = gr.Markdown()
+    gallery = gr.Gallery(label="Tried Looks", columns=5)
     with gr.Row():
+        up = gr.Button("👍 Good")
+        down = gr.Button("👎 Bad")
+    go.click(process, [inp, intensity], [out, gallery, info])
+    up.click(lambda: feedback(True), None, info)
+    down.click(lambda: feedback(False), None, info)
 demo.launch()