hahaBrij commited on
Commit
a6a3b86
Β·
verified Β·
1 Parent(s): 1c3ce3d

Upload 5 files

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. README.md +12 -0
  3. SuSy.pt +3 -0
  4. app.py +297 -0
  5. gitattributes +35 -0
  6. requirements.txt +31 -0
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ SuSy.pt filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Combined Model
3
+ emoji: πŸ¦€
4
+ colorFrom: green
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 5.32.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
SuSy.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa10fae300ee2742c7a373b6c3332c2595b461954b8f5616d2d382ef2751020e
3
+ size 50810392
app.py ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Unified AI-Image & Deepfake Detector
3
+ ===================================
4
+ β€’ Combines a generic AI-image detector (Swin-V2 + SuSy) *and*
5
+ a deepfake-specialist face detector (Inception-ResNet V1).
6
+ β€’ Always runs both experts β†’ fuses their calibrated scores.
7
+ β€’ Works on images **and** short videos (≀ 30 s).
8
+ Add/keep in requirements.txt (versions pinned earlier):
9
+ torch torchvision facenet-pytorch transformers torchcam captum timm
10
+ mediapipe opencv-python-headless pillow scikit-image matplotlib
11
+ gradio fpdf pandas numpy absl-py ttach
12
+ """
13
+
14
+ # ───────────────────── bootstrap for extra wheels ──────────────────────
15
+ import os, uuid, warnings, math, tempfile
16
+ from pathlib import Path
17
+ from typing import List, Tuple
18
+
19
+ warnings.filterwarnings("ignore")
20
+
21
+ def _ensure_deps():
22
+ try:
23
+ import mediapipe, fpdf # noqa: F401
24
+ except ImportError:
25
+ os.system("pip install --quiet --upgrade mediapipe fpdf")
26
+
27
+ _ensure_deps()
28
+
29
+ # ─────────────────────────────── imports ───────────────────────────────
30
+ import cv2
31
+ import gradio as gr
32
+ import numpy as np
33
+ import torch
34
+ import torch.nn.functional as F
35
+ from PIL import Image
36
+ from fpdf import FPDF
37
+ import mediapipe as mp
38
+ from facenet_pytorch import InceptionResnetV1, MTCNN
39
+ from pytorch_grad_cam import GradCAM
40
+ from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
41
+ from torchvision import transforms
42
+ from transformers import AutoImageProcessor, AutoModelForImageClassification
43
+ from torchcam.methods import GradCAM as TCGradCAM
44
+ from captum.attr import Saliency
45
+ from skimage.feature import graycomatrix, graycoprops
46
+ import matplotlib.pyplot as plt
47
+ import pandas as pd
48
+ import spaces
49
+
50
+ # ───────────────────────── runtime / models ────────────────────────────
51
+ plt.set_loglevel("ERROR")
52
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
53
+
54
+ # Deep-fake specialist
55
+ _face_det = MTCNN(select_largest=False, post_process=False, device=device).eval().to(device)
56
+ _df_model = InceptionResnetV1(pretrained="vggface2", classify=True, num_classes=1, device=device)
57
+ _df_model.load_state_dict(torch.load("resnet_inception.pth", map_location="cpu")["model_state_dict"])
58
+ _df_model.to(device).eval()
59
+ _df_cam = GradCAM(_df_model, target_layers=[_df_model.block8.branch1[-1]],
60
+ use_cuda=device.type == "cuda")
61
+
62
+ # Helper: robust layer fetch
63
+ def _get_layer(model, name: str):
64
+ mods = dict(model.named_modules())
65
+ return mods.get(name) or next(m for n, m in mods.items() if n.endswith(name))
66
+
67
+ # Binary AI-image detector (Swin-V2)
68
+ BIN_ID = "haywoodsloan/ai-image-detector-deploy"
69
+ _bin_proc = AutoImageProcessor.from_pretrained(BIN_ID)
70
+ _bin_mod = AutoModelForImageClassification.from_pretrained(BIN_ID).to(device).eval()
71
+ _CAM_LAYER_BIN = "encoder.layers.3.blocks.1.layernorm_after"
72
+ _bin_cam = TCGradCAM(_bin_mod, target_layer=_get_layer(_bin_mod, _CAM_LAYER_BIN))
73
+
74
+ # Generator classifier (SuSy β€” ScriptModule β†’ Captum only)
75
+ _susy_mod = torch.jit.load("SuSy.pt").to(device).eval()
76
+ _GEN_CLASSES = ["Stable Diffusion 1.x", "DALLΒ·E 3",
77
+ "MJ V5/V6", "Stable Diffusion XL", "MJ V1/V2"]
78
+ _PATCH, _TOP = 224, 5
79
+ _to_tensor = transforms.ToTensor()
80
+ _to_gray = transforms.Compose([transforms.PILToTensor(), transforms.Grayscale()])
81
+
82
+ # ─────────────── calibration placeholders (optional tune) ──────────────
83
+ _calib_df_slope, _calib_df_inter = 1.0, 0.0
84
+ _calib_ai_slope, _calib_ai_inter = 1.0, 0.0
85
+
86
+ # def _calibrate_df(p: float) -> float:
87
+
88
+ # def _calibrate_ai(p: float) -> float:
89
+ # return 1 / (1 + math.exp(-(_calib_ai_slope * (p + _calib_ai_inter))))
90
+
91
+ def _calibrate_df(p: float) -> float: # keep raw score for now
92
+ return p
93
+
94
+ def _calibrate_ai(p: float) -> float:
95
+ return p
96
+
97
+ # ───────────────────────────── misc helpers ────────────────────────────
98
+ UNCERTAIN_GAP = 0.10
99
+ MIN_FRAMES, MAX_SAMPLES = 4, 20
100
+
101
+ def _extract_landmarks(rgb: np.ndarray) -> Tuple[np.ndarray, np.ndarray | None]:
102
+ mesh = mp.solutions.face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1)
103
+ res = mesh.process(rgb); mesh.close()
104
+ if not res.multi_face_landmarks:
105
+ return rgb, None
106
+ h, w, _ = rgb.shape
107
+ out = rgb.copy()
108
+ for lm in res.multi_face_landmarks[0].landmark:
109
+ cx, cy = int(lm.x * w), int(lm.y * h)
110
+ cv2.circle(out, (cx, cy), 1, (0, 255, 0), -1)
111
+ return out, None
112
+
113
+ def _overlay_cam(cam, base):
114
+ # ---- NEW: make sure 'cam' is a NumPy array on CPU ----
115
+ if torch.is_tensor(cam): # covers torchcam output
116
+ cam = cam.detach().cpu().numpy()
117
+ # ------------------------------------------------------
118
+
119
+ cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-6)
120
+ heat = Image.fromarray(
121
+ (plt.cm.jet(cam)[:, :, :3] * 255).astype(np.uint8)
122
+ ).resize((base.shape[1], base.shape[0]), Image.BICUBIC)
123
+
124
+ return Image.blend(
125
+ Image.fromarray(base).convert("RGBA"),
126
+ heat.convert("RGBA"),
127
+ alpha=0.45,
128
+ )
129
+
130
+ def _render_pdf(title: str, verdict: str, conf: dict, pages: List[Image.Image]) -> str:
131
+ out = Path(f"/tmp/report_{uuid.uuid4().hex}.pdf")
132
+ pdf = FPDF(); pdf.set_auto_page_break(True, 15); pdf.add_page()
133
+ pdf.set_font("Helvetica", size=14); pdf.cell(0, 10, title, ln=True, align="C")
134
+ pdf.ln(4); pdf.set_font("Helvetica", size=12)
135
+ pdf.multi_cell(0, 6, f"Verdict: {verdict}\n"
136
+ f"Confidence -> Real {conf['real']:.3f} Fake {conf['fake']:.3f}")
137
+ for idx, img in enumerate(pages):
138
+ pdf.ln(4); pdf.set_font("Helvetica", size=11)
139
+ pdf.cell(0, 6, f"Figure {idx+1}", ln=True)
140
+
141
+ tmp = Path(tempfile.mktemp(suffix=".jpg"))
142
+ img.convert("RGB").save(tmp, format="JPEG") # ← add .convert("RGB")
143
+ pdf.image(str(tmp), x=10, w=90)
144
+ tmp.unlink(missing_ok=True)
145
+ pdf.output(out)
146
+ return str(out)
147
+
148
+ # ────────────────────────── SuSy helpers (saliency) ────────────────────
149
+ def _susy_cam(tensor: torch.Tensor, class_idx: int) -> np.ndarray:
150
+ sal = Saliency(_susy_mod)
151
+ grad = sal.attribute(tensor, target=class_idx).abs().mean(1, keepdim=True)
152
+ return grad.squeeze().detach().cpu().numpy()
153
+
154
+ @spaces.GPU
155
+ def _susy_predict(img: Image.Image):
156
+ w, h = img.size
157
+ npx, npy = max(1, w // _PATCH), max(1, h // _PATCH)
158
+ patches = np.zeros((npx * npy, _PATCH, _PATCH, 3), dtype=np.uint8)
159
+ for i in range(npx):
160
+ for j in range(npy):
161
+ x, y = i * _PATCH, j * _PATCH
162
+ patches[i*npy + j] = np.array(img.crop((x, y, x+_PATCH, y+_PATCH))
163
+ .resize((_PATCH, _PATCH)))
164
+ contrasts = []
165
+ for p in patches:
166
+ g = _to_gray(Image.fromarray(p)).squeeze(0).numpy()
167
+ glcm = graycomatrix(g, [5], [0], 256, symmetric=True, normed=True)
168
+ contrasts.append(graycoprops(glcm, "contrast")[0, 0])
169
+ idx = np.argsort(contrasts)[::-1][:_TOP]
170
+ tens = torch.from_numpy(patches[idx].transpose(0, 3, 1, 2)).float() / 255.0
171
+ with torch.no_grad():
172
+ probs = _susy_mod(tens.to(device)).softmax(-1).mean(0).cpu().numpy()[1:]
173
+ return dict(zip(_GEN_CLASSES, probs))
174
+
175
+ # ───────────────────────────── fusion math ─────────────────────────────
176
+ def _fuse(p_ai: float, p_df: float) -> float:
177
+ return 1 - (1 - p_ai) * (1 - p_df)
178
+
179
+ def _verdict(p: float) -> str:
180
+ return "uncertain" if abs(p - 0.5) <= UNCERTAIN_GAP else ("fake" if p > 0.5 else "real")
181
+
182
+ # ─────────────────────────── IMAGE PIPELINE ────────────────────────────
183
+ @spaces.GPU
184
+ def _predict_image(pil: Image.Image):
185
+ gallery: List[Image.Image] = []
186
+
187
+ # Deep-fake path
188
+ try:
189
+ face = _face_det(pil)
190
+ except Exception:
191
+ face = None
192
+ if face is not None:
193
+ ft = F.interpolate(face.unsqueeze(0), (256, 256), mode="bilinear",
194
+ align_corners=False).float() / 255.0
195
+ p_df_raw = torch.sigmoid(_df_model(ft.to(device))).item()
196
+ p_df = _calibrate_df(p_df_raw)
197
+ crop_np = (ft.squeeze(0).permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8)
198
+ cam_df = _df_cam(ft, [ClassifierOutputTarget(0)])[0]
199
+ gallery.append(_overlay_cam(cam_df, crop_np))
200
+ gallery.append(Image.fromarray(_extract_landmarks(
201
+ cv2.cvtColor(np.array(pil), cv2.COLOR_BGR2RGB))[0]))
202
+ else:
203
+ p_df = 0.5
204
+
205
+ # Binary AI model
206
+ inp_bin = _bin_proc(images=pil, return_tensors="pt").to(device)
207
+ logits = _bin_mod(**inp_bin).logits.softmax(-1)[0]
208
+ p_ai_raw = logits[0].item()
209
+ p_ai = _calibrate_ai(p_ai_raw)
210
+ winner_idx = 0 if p_ai_raw >= logits[1].item() else 1
211
+ inp_bin_h = {k: v.clone().detach().requires_grad_(True) for k, v in inp_bin.items()}
212
+ cam_bin = _bin_cam(winner_idx, scores=_bin_mod(**inp_bin_h).logits)[0]
213
+ gallery.append(_overlay_cam(cam_bin, np.array(pil)))
214
+
215
+ # Generator breakdown (SuSy) if AI
216
+ bar_plot = gr.update(visible=False)
217
+ if p_ai_raw > logits[1].item():
218
+ gen_probs = _susy_predict(pil)
219
+ bar_plot = gr.update(value=pd.DataFrame(gen_probs.items(), columns=["class", "prob"]),
220
+ visible=True)
221
+ susy_in = _to_tensor(pil.resize((224, 224))).unsqueeze(0).to(device)
222
+ g_idx = _susy_mod(susy_in)[0, 1:].argmax().item() + 1
223
+ cam_susy = _susy_cam(susy_in, g_idx)
224
+ gallery.append(_overlay_cam(cam_susy, np.array(pil)))
225
+
226
+ # Fusion
227
+ p_final = _fuse(p_ai, p_df)
228
+ verdict = _verdict(p_final)
229
+ conf = {"real": round(1-p_final, 4), "fake": round(p_final, 4)}
230
+ pdf = _render_pdf("Unified Detector", verdict, conf, gallery[:3])
231
+
232
+ return verdict, conf, gallery, bar_plot, pdf
233
+
234
+ # ─────────────────────────── VIDEO PIPELINE ────────────────────────────
235
+ def _sample_idx(n): # max 20 evenly spaced
236
+ return list(range(n)) if n <= MAX_SAMPLES else np.linspace(0, n-1, MAX_SAMPLES, dtype=int)
237
+
238
+ @spaces.GPU
239
+ def _predict_video(path: str):
240
+ cap = cv2.VideoCapture(path); total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or 1
241
+ probs, frames = [], []
242
+ for i in _sample_idx(total):
243
+ cap.set(cv2.CAP_PROP_POS_FRAMES, i)
244
+ ok, frm = cap.read()
245
+ if not ok:
246
+ continue
247
+ pil = Image.fromarray(cv2.cvtColor(frm, cv2.COLOR_BGR2RGB))
248
+ verdict, conf, _, _, _ = _predict_image(pil)
249
+ probs.append(conf["fake"])
250
+ if len(frames) < MIN_FRAMES:
251
+ frames.append(Image.fromarray(frm))
252
+ cap.release()
253
+ if not probs:
254
+ blank = Image.new("RGB", (256, 256))
255
+ return "No frames analysed", {"real": 0, "fake": 0}, [blank]
256
+
257
+ p_final = float(np.mean(probs))
258
+ return _verdict(p_final), {"real": round(1-p_final, 4), "fake": round(p_final, 4)}, frames
259
+
260
+ # ───────────────────────────────── UI ──────────────────────────────────
261
+ _css = "footer{visibility:hidden!important}.logo,#logo{display:none!important}"
262
+
263
+ with gr.Blocks(css=_css, title="Unified AI-Fake & Deepfake Detector") as demo:
264
+ gr.Markdown("""
265
+ ## Unified AI-Fake & Deepfake Detector
266
+ Upload an **image** or a short **video**.
267
+ The app fuses two complementary models, then shows heat-maps & a PDF report.
268
+ """)
269
+
270
+ with gr.Tab("Image"):
271
+ with gr.Row():
272
+ with gr.Column(scale=1):
273
+ img_in = gr.Image(label="Upload image", type="pil")
274
+ btn_i = gr.Button("Analyze")
275
+ with gr.Column(scale=2):
276
+ txt_v = gr.Textbox(label="Verdict", interactive=False)
277
+ lbl_c = gr.Label(label="Confidence")
278
+ gal = gr.Gallery(label="Explanations", columns=3, height=320)
279
+ bar = gr.BarPlot(x="class", y="prob", title="Likely generator",
280
+ y_label="probability", visible=False)
281
+ pdf_f = gr.File(label="Download PDF report")
282
+
283
+ btn_i.click(_predict_image, img_in, [txt_v, lbl_c, gal, bar, pdf_f])
284
+
285
+ with gr.Tab("Video"):
286
+ with gr.Row():
287
+ with gr.Column(scale=1):
288
+ vid_in = gr.Video(label="Upload MP4/AVI", format="mp4")
289
+ btn_v = gr.Button("Analyze")
290
+ with gr.Column(scale=2):
291
+ txt_vv = gr.Textbox(label="Verdict", interactive=False)
292
+ lbl_cv = gr.Label(label="Confidence")
293
+ gal_v = gr.Gallery(label="Sample frames", columns=4, height=240)
294
+
295
+ btn_v.click(_predict_video, vid_in, [txt_vv, lbl_cv, gal_v])
296
+
297
+ demo.launch(share=True, show_api=False)
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
requirements.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch==2.1.2
2
+ torchvision==0.16.2
3
+ torchaudio==2.1.2 # optional but same CUDA tag
4
+
5
+ # vision / CAM libs
6
+ facenet-pytorch==2.5.2
7
+ grad-cam==1.4.6
8
+ torchcam==0.4.0
9
+ captum==0.8.0
10
+ ttach==0.0.3 # grad-cam helper
11
+
12
+ # AI-detector deps
13
+ transformers==4.52.4
14
+ timm==1.0.15
15
+ huggingface_hub>=0.22
16
+
17
+ # utils
18
+ opencv-python-headless==4.7.0.72
19
+ mediapipe==0.10.21
20
+ Pillow>=10.1 # <── drop the old pin
21
+ scikit-image==0.25.2 # requires Pillow β‰₯ 10.1
22
+ scikit-learn==1.6.1
23
+ matplotlib>=3.8
24
+ numpy>=1.26
25
+ pandas
26
+ absl-py==2.3.0 # mediapipe dep
27
+
28
+ # UI
29
+ gradio==5.23.2
30
+ pydantic==2.10.6
31
+ wheel