Spaces:

Mavthunder
/

one-click-aesthetic

Sleeping

App Files Files Community

Mavthunder commited on Aug 21

Commit

cf39539

verified ·

1 Parent(s): 5fe3046

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -87

app.py CHANGED Viewed

@@ -2,124 +2,90 @@ import gradio as gr
 from PIL import Image, ImageEnhance
 import numpy as np
 import torch
-from transformers import AutoProcessor, AutoModel, pipeline, ViTFeatureExtractor, ViTForImageClassification
-# ------------------ Device ------------------
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# ------------------ Aesthetic Predictor ------------------
-predictor_name = "shunk031/aesthetic-predictor-v2"
-processor = AutoProcessor.from_pretrained(predictor_name)
-aesthetic_model = AutoModel.from_pretrained(predictor_name).to(device)
-aesthetic_model.eval()
-def aesthetic_score_ai(img_pil):
-    inputs = processor(images=img_pil, return_tensors="pt").to(device)
     with torch.no_grad():
-        outputs = aesthetic_model(**inputs)
-    return float(outputs.logits.mean().item())
-# ------------------ Zero-DCE (public) ------------------
-zero_dce_pipe = pipeline("image-enhancement", model="nateraw/zero-dce", device=0 if torch.cuda.is_available() else -1)
-def zero_dce_enhance(img_pil):
     enhanced = zero_dce_pipe(img_pil)
     return enhanced[0]
-# ------------------ Image Classifier ------------------
-cls_model_name = "google/vit-base-patch16-224"
-cls_extractor = ViTFeatureExtractor.from_pretrained(cls_model_name)
-cls_model = ViTForImageClassification.from_pretrained(cls_model_name).to(device)
 cls_model.eval()
 def classify_image(img_pil):
-    inputs = cls_extractor(images=img_pil, return_tensors="pt").to(device)
     with torch.no_grad():
         logits = cls_model(**inputs).logits
-    pred = logits.argmax(-1).item()
-    label = cls_model.config.id2label[pred]
-    return label.lower()
-# ------------------ Vibes per Category ------------------
 CATEGORY_VIBES = {
-    "person": [
-        dict(name="Portrait Soft", exposure_stops=0.1, contrast=0.05, saturation=0.05, warmth=0.1, clarity=-0.05),
-        dict(name="Portrait Glow", exposure_stops=0.15, contrast=-0.02, saturation=0.1, warmth=0.08, clarity=0.0),
-    ],
-    "food": [
-        dict(name="Food Vibrant", exposure_stops=0.1, contrast=0.15, saturation=0.25, warmth=0.05, clarity=0.1),
-        dict(name="Food Natural", exposure_stops=0.05, contrast=0.05, saturation=0.1, warmth=0.02, clarity=0.05),
-    ],
-    "landscape": [
-        dict(name="Landscape Punch", exposure_stops=0.1, contrast=0.2, saturation=0.2, warmth=0.05, clarity=0.15),
-        dict(name="Landscape Film", exposure_stops=0.0, contrast=0.1, saturation=0.05, warmth=0.02, clarity=0.1),
-    ],
-    "default": [
-        dict(name="Pop", exposure_stops=0.05, contrast=0.2, saturation=0.2, warmth=0.05, clarity=0.1),
-        dict(name="Moody", exposure_stops=-0.05, contrast=0.15, saturation=-0.05, warmth=-0.05, clarity=0.2),
-    ],
 }
-# ------------------ Image Adjustment ------------------
-def apply_adjustments(img_pil, exposure_stops=0.0, contrast=0.0, saturation=0.0, warmth=0.0, clarity=0.0):
-    img = img_pil.convert("RGB")
-    if exposure_stops != 0:
-        factor = 2.0 ** exposure_stops
-        img = ImageEnhance.Brightness(img).enhance(factor)
-    if contrast != 0:
-        img = ImageEnhance.Contrast(img).enhance(1 + contrast)
-    if saturation != 0:
-        img = ImageEnhance.Color(img).enhance(1 + saturation)
-    if clarity != 0:
         arr = np.array(img).astype(np.float32)
         arr = np.clip(arr * (1 + clarity), 0, 255).astype(np.uint8)
         img = Image.fromarray(arr)
-    if warmth != 0:
         r, g, b = img.split()
-        r = r.point(lambda i: min(255, i * (1 + warmth)))
-        b = b.point(lambda i: min(255, i * (1 - warmth)))
-        img = Image.merge("RGB", (r, g, b))
     return img
-# ------------------ Main Process ------------------
 def process(image):
-    # Step 1: Enhance with Zero-DCE
-    enhanced = zero_dce_enhance(image)
-    # Step 2: Classify image
     label = classify_image(enhanced)
-    # Pick vibes for category
-    if "person" in label or "face" in label:
-        vibes = CATEGORY_VIBES["person"]
-    elif "food" in label or "dish" in label:
-        vibes = CATEGORY_VIBES["food"]
-    elif "landscape" in label or "tree" in label or "mountain" in label:
-        vibes = CATEGORY_VIBES["landscape"]
-    else:
-        vibes = CATEGORY_VIBES["default"]
-    # Step 3: Apply vibes + score
-    candidates = []
     for vibe in vibes:
-        out = apply_adjustments(enhanced, **{k: v for k, v in vibe.items() if k != "name"})
-        score = aesthetic_score_ai(out)
-        candidates.append((score, vibe["name"], out))
-    # Step 4: Pick best
-    best = max(candidates, key=lambda x: x[0])
-    score, vibe_name, img_out = best
-    return img_out, f"Classified as: {label} → Chosen: {vibe_name} (score {score:.2f})"
-# ------------------ UI ------------------
 demo = gr.Interface(
     fn=process,
     inputs=gr.Image(type="pil"),
-    outputs=[gr.Image(type="pil"), gr.Textbox()],
-    title="AI Aesthetic Photo Enhancer",
-    description="Uploads → Enhance (Zero-DCE) → Classify → Apply vibes → Pick most aesthetic"
 )
 if __name__ == "__main__":
     demo.launch()

 from PIL import Image, ImageEnhance
 import numpy as np
 import torch
+from transformers import AutoProcessor, AutoModel, pipeline, ViTFeatureExtractor, ViTForImageClassification, CLIPProcessor
+import cv2
 device = "cuda" if torch.cuda.is_available() else "cpu"
+#  Aesthetic Scorer: rsinema/aesthetic-scorer (public)
+ae_processor = CLIPProcessor.from_pretrained("rsinema/aesthetic-scorer")
+ae_model = AutoModel.from_pretrained("rsinema/aesthetic-scorer").to(device)
+ae_model.eval()
+def aesthetic_score(img_pil):
+    inputs = ae_processor(images=img_pil, return_tensors="pt")["pixel_values"].to(device)
     with torch.no_grad():
+        scores = ae_model(inputs)
+    # scores returns 7 dims; first is overall aesthetic
+    return float(scores[0][0].item())
+#  Enhancement using public Zero-DCE model
+zero_dce_pipe = pipeline(
+    "image-enhancement",
+    model="nateraw/zero-dce",
+    device=0 if torch.cuda.is_available() else -1
+)
+def enhance_image(img_pil):
     enhanced = zero_dce_pipe(img_pil)
     return enhanced[0]
+#  Image Classifier (ViT)
+cls_ext = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224")
+cls_model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224").to(device)
 cls_model.eval()
 def classify_image(img_pil):
+    inputs = cls_ext(images=img_pil, return_tensors="pt").to(device)
     with torch.no_grad():
         logits = cls_model(**inputs).logits
+    label = cls_model.config.id2label[logits.argmax(-1).item()].lower()
+    return label
+#  Category-specific vibes
 CATEGORY_VIBES = {
+    "person": [...],      # same presets as before
+    "food": [...],
+    "landscape": [...],
+    "default": [...],
 }
+def apply_adjustments(img, exposure, contrast, saturation, warmth, clarity):
+    img = img.convert("RGB")
+    if exposure: img = ImageEnhance.Brightness(img).enhance(2**exposure)
+    if contrast: img = ImageEnhance.Contrast(img).enhance(1 + contrast)
+    if saturation: img = ImageEnhance.Color(img).enhance(1 + saturation)
+    if clarity:
         arr = np.array(img).astype(np.float32)
         arr = np.clip(arr * (1 + clarity), 0, 255).astype(np.uint8)
         img = Image.fromarray(arr)
+    if warmth:
         r, g, b = img.split()
+        r = r.point(lambda i: min(255, i*(1+warmth)))
+        b = b.point(lambda i: min(255, i*(1-warmth)))
+        img = Image.merge("RGB",(r,g,b))
     return img
 def process(image):
+    enhanced = enhance_image(image)
     label = classify_image(enhanced)
+    vibes = CATEGORY_VIBES.get(label, CATEGORY_VIBES["default"])
+    best, best_score, best_name = None, -float("inf"), None
     for vibe in vibes:
+        out = apply_adjustments(enhanced, **vibe)
+        score = aesthetic_score(out)
+        if score > best_score:
+            best, best_score, best_name = out, score, vibe["name"]
+    return best, f"Classified as {label} → Chosen style: {best_name} (score {best_score:.2f})"
 demo = gr.Interface(
     fn=process,
     inputs=gr.Image(type="pil"),
+    outputs=[gr.Image(type="pil"), gr.Text()],
+    title="Content-Aware Aesthetic AI (Public)",
+    description="Enhance → classify → apply category vibes → score with public aesthetic model"
 )
 if __name__ == "__main__":
     demo.launch()