|
|
import os, uuid, warnings, math, tempfile |
|
|
from pathlib import Path |
|
|
from typing import List, Tuple |
|
|
|
|
|
warnings.filterwarnings("ignore") |
|
|
|
|
|
def _ensure_deps(): |
|
|
try: |
|
|
import mediapipe, fpdf |
|
|
except ImportError: |
|
|
os.system("pip install --quiet --upgrade mediapipe fpdf") |
|
|
|
|
|
_ensure_deps() |
|
|
|
|
|
import cv2 |
|
|
import gradio as gr |
|
|
import numpy as np |
|
|
import torch |
|
|
import torch.nn.functional as F |
|
|
from PIL import Image |
|
|
from fpdf import FPDF |
|
|
import mediapipe as mp |
|
|
from facenet_pytorch import InceptionResnetV1, MTCNN |
|
|
from pytorch_grad_cam import GradCAM |
|
|
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget |
|
|
from torchvision import transforms |
|
|
from transformers import AutoImageProcessor, AutoModelForImageClassification |
|
|
from torchcam.methods import GradCAM as TCGradCAM |
|
|
from captum.attr import Saliency |
|
|
from skimage.feature import graycomatrix, graycoprops |
|
|
import matplotlib.pyplot as plt |
|
|
import pandas as pd |
|
|
import spaces |
|
|
|
|
|
plt.set_loglevel("ERROR") |
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
|
|
_face_det = MTCNN(select_largest=False, post_process=False, device=device).eval().to(device) |
|
|
_df_model = InceptionResnetV1(pretrained="vggface2", classify=True, num_classes=1, device=device) |
|
|
_df_model.load_state_dict(torch.load("resnet_inception.pth", map_location="cpu")["model_state_dict"]) |
|
|
_df_model.to(device).eval() |
|
|
_df_cam = GradCAM(_df_model, target_layers=[_df_model.block8.branch1[-1]], |
|
|
use_cuda=device.type == "cuda") |
|
|
|
|
|
def _get_layer(model, name: str): |
|
|
mods = dict(model.named_modules()) |
|
|
return mods.get(name) or next(m for n, m in mods.items() if n.endswith(name)) |
|
|
|
|
|
BIN_ID = "haywoodsloan/ai-image-detector-deploy" |
|
|
_bin_proc = AutoImageProcessor.from_pretrained(BIN_ID) |
|
|
_bin_mod = AutoModelForImageClassification.from_pretrained(BIN_ID).to(device).eval() |
|
|
_CAM_LAYER_BIN = "encoder.layers.3.blocks.1.layernorm_after" |
|
|
_bin_cam = TCGradCAM(_bin_mod, target_layer=_get_layer(_bin_mod, _CAM_LAYER_BIN)) |
|
|
|
|
|
_susy_mod = torch.jit.load("SuSy.pt").to(device).eval() |
|
|
_GEN_CLASSES = ["Stable Diffusion 1.x", "DALL·E 3", |
|
|
"MJ V5/V6", "Stable Diffusion XL", "MJ V1/V2"] |
|
|
_PATCH, _TOP = 224, 5 |
|
|
_to_tensor = transforms.ToTensor() |
|
|
_to_gray = transforms.Compose([transforms.PILToTensor(), transforms.Grayscale()]) |
|
|
|
|
|
_calib_df_slope, _calib_df_inter = 1.0, 0.0 |
|
|
_calib_ai_slope, _calib_ai_inter = 1.0, 0.0 |
|
|
|
|
|
|
|
|
def _calibrate_df(p: float) -> float: |
|
|
return p |
|
|
|
|
|
def _calibrate_ai(p: float) -> float: |
|
|
return p |
|
|
|
|
|
UNCERTAIN_GAP = 0.10 |
|
|
MIN_FRAMES, MAX_SAMPLES = 4, 20 |
|
|
|
|
|
def _extract_landmarks(rgb: np.ndarray) -> Tuple[np.ndarray, np.ndarray | None]: |
|
|
mesh = mp.solutions.face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1) |
|
|
res = mesh.process(rgb); mesh.close() |
|
|
if not res.multi_face_landmarks: |
|
|
return rgb, None |
|
|
h, w, _ = rgb.shape |
|
|
out = rgb.copy() |
|
|
for lm in res.multi_face_landmarks[0].landmark: |
|
|
cx, cy = int(lm.x * w), int(lm.y * h) |
|
|
cv2.circle(out, (cx, cy), 1, (0, 255, 0), -1) |
|
|
return out, None |
|
|
|
|
|
def _overlay_cam(cam, base): |
|
|
if torch.is_tensor(cam): |
|
|
cam = cam.detach().cpu().numpy() |
|
|
|
|
|
cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-6) |
|
|
heat = Image.fromarray( |
|
|
(plt.cm.jet(cam)[:, :, :3] * 255).astype(np.uint8) |
|
|
).resize((base.shape[1], base.shape[0]), Image.BICUBIC) |
|
|
|
|
|
return Image.blend( |
|
|
Image.fromarray(base).convert("RGBA"), |
|
|
heat.convert("RGBA"), |
|
|
alpha=0.45, |
|
|
) |
|
|
|
|
|
def _render_pdf(title: str, verdict: str, conf: dict, pages: List[Image.Image]) -> str: |
|
|
out = Path(f"/tmp/report_{uuid.uuid4().hex}.pdf") |
|
|
pdf = FPDF(); pdf.set_auto_page_break(True, 15); pdf.add_page() |
|
|
pdf.set_font("Helvetica", size=14); pdf.cell(0, 10, title, ln=True, align="C") |
|
|
pdf.ln(4); pdf.set_font("Helvetica", size=12) |
|
|
pdf.multi_cell(0, 6, f"Verdict: {verdict}\n" |
|
|
f"Confidence -> Real {conf['real']:.3f} Fake {conf['fake']:.3f}") |
|
|
for idx, img in enumerate(pages): |
|
|
pdf.ln(4); pdf.set_font("Helvetica", size=11) |
|
|
pdf.cell(0, 6, f"Figure {idx+1}", ln=True) |
|
|
|
|
|
tmp = Path(tempfile.mktemp(suffix=".jpg")) |
|
|
img.convert("RGB").save(tmp, format="JPEG") |
|
|
pdf.image(str(tmp), x=10, w=90) |
|
|
tmp.unlink(missing_ok=True) |
|
|
pdf.output(out) |
|
|
return str(out) |
|
|
|
|
|
|
|
|
def _susy_cam(tensor: torch.Tensor, class_idx: int) -> np.ndarray: |
|
|
sal = Saliency(_susy_mod) |
|
|
grad = sal.attribute(tensor, target=class_idx).abs().mean(1, keepdim=True) |
|
|
return grad.squeeze().detach().cpu().numpy() |
|
|
|
|
|
@spaces.GPU |
|
|
def _susy_predict(img: Image.Image): |
|
|
w, h = img.size |
|
|
npx, npy = max(1, w // _PATCH), max(1, h // _PATCH) |
|
|
patches = np.zeros((npx * npy, _PATCH, _PATCH, 3), dtype=np.uint8) |
|
|
for i in range(npx): |
|
|
for j in range(npy): |
|
|
x, y = i * _PATCH, j * _PATCH |
|
|
patches[i*npy + j] = np.array(img.crop((x, y, x+_PATCH, y+_PATCH)) |
|
|
.resize((_PATCH, _PATCH))) |
|
|
contrasts = [] |
|
|
for p in patches: |
|
|
g = _to_gray(Image.fromarray(p)).squeeze(0).numpy() |
|
|
glcm = graycomatrix(g, [5], [0], 256, symmetric=True, normed=True) |
|
|
contrasts.append(graycoprops(glcm, "contrast")[0, 0]) |
|
|
idx = np.argsort(contrasts)[::-1][:_TOP] |
|
|
tens = torch.from_numpy(patches[idx].transpose(0, 3, 1, 2)).float() / 255.0 |
|
|
with torch.no_grad(): |
|
|
probs = _susy_mod(tens.to(device)).softmax(-1).mean(0).cpu().numpy()[1:] |
|
|
return dict(zip(_GEN_CLASSES, probs)) |
|
|
|
|
|
def _fuse(p_ai: float, p_df: float) -> float: |
|
|
return 1 - (1 - p_ai) * (1 - p_df) |
|
|
|
|
|
def _verdict(p: float) -> str: |
|
|
return "uncertain" if abs(p - 0.5) <= UNCERTAIN_GAP else ("fake" if p > 0.5 else "real") |
|
|
|
|
|
@spaces.GPU |
|
|
def _predict_image(pil: Image.Image): |
|
|
gallery: List[Image.Image] = [] |
|
|
|
|
|
try: |
|
|
face = _face_det(pil) |
|
|
except Exception: |
|
|
face = None |
|
|
if face is not None: |
|
|
ft = F.interpolate(face.unsqueeze(0), (256, 256), mode="bilinear", |
|
|
align_corners=False).float() / 255.0 |
|
|
p_df_raw = torch.sigmoid(_df_model(ft.to(device))).item() |
|
|
p_df = _calibrate_df(p_df_raw) |
|
|
crop_np = (ft.squeeze(0).permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8) |
|
|
cam_df = _df_cam(ft, [ClassifierOutputTarget(0)])[0] |
|
|
gallery.append(_overlay_cam(cam_df, crop_np)) |
|
|
gallery.append(Image.fromarray(_extract_landmarks( |
|
|
cv2.cvtColor(np.array(pil), cv2.COLOR_BGR2RGB))[0])) |
|
|
else: |
|
|
p_df = 0.5 |
|
|
|
|
|
inp_bin = _bin_proc(images=pil, return_tensors="pt").to(device) |
|
|
logits = _bin_mod(**inp_bin).logits.softmax(-1)[0] |
|
|
p_ai_raw = logits[0].item() |
|
|
p_ai = _calibrate_ai(p_ai_raw) |
|
|
winner_idx = 0 if p_ai_raw >= logits[1].item() else 1 |
|
|
inp_bin_h = {k: v.clone().detach().requires_grad_(True) for k, v in inp_bin.items()} |
|
|
cam_bin = _bin_cam(winner_idx, scores=_bin_mod(**inp_bin_h).logits)[0] |
|
|
gallery.append(_overlay_cam(cam_bin, np.array(pil))) |
|
|
|
|
|
bar_plot = gr.update(visible=False) |
|
|
if p_ai_raw > logits[1].item(): |
|
|
gen_probs = _susy_predict(pil) |
|
|
bar_plot = gr.update(value=pd.DataFrame(gen_probs.items(), columns=["class", "prob"]), |
|
|
visible=True) |
|
|
susy_in = _to_tensor(pil.resize((224, 224))).unsqueeze(0).to(device) |
|
|
g_idx = _susy_mod(susy_in)[0, 1:].argmax().item() + 1 |
|
|
cam_susy = _susy_cam(susy_in, g_idx) |
|
|
gallery.append(_overlay_cam(cam_susy, np.array(pil))) |
|
|
|
|
|
|
|
|
p_final = _fuse(p_ai, p_df) |
|
|
verdict = _verdict(p_final) |
|
|
conf = {"real": round(1-p_final, 4), "fake": round(p_final, 4)} |
|
|
pdf = _render_pdf("Unified Detector", verdict, conf, gallery[:3]) |
|
|
|
|
|
return verdict, conf, gallery, bar_plot, pdf |
|
|
|
|
|
def _sample_idx(n): |
|
|
return list(range(n)) if n <= MAX_SAMPLES else np.linspace(0, n-1, MAX_SAMPLES, dtype=int) |
|
|
|
|
|
@spaces.GPU |
|
|
def _predict_video(path: str): |
|
|
cap = cv2.VideoCapture(path); total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or 1 |
|
|
probs, frames = [], [] |
|
|
for i in _sample_idx(total): |
|
|
cap.set(cv2.CAP_PROP_POS_FRAMES, i) |
|
|
ok, frm = cap.read() |
|
|
if not ok: |
|
|
continue |
|
|
pil = Image.fromarray(cv2.cvtColor(frm, cv2.COLOR_BGR2RGB)) |
|
|
verdict, conf, _, _, _ = _predict_image(pil) |
|
|
probs.append(conf["fake"]) |
|
|
if len(frames) < MIN_FRAMES: |
|
|
frames.append(Image.fromarray(frm)) |
|
|
cap.release() |
|
|
if not probs: |
|
|
blank = Image.new("RGB", (256, 256)) |
|
|
return "No frames analysed", {"real": 0, "fake": 0}, [blank] |
|
|
|
|
|
p_final = float(np.mean(probs)) |
|
|
return _verdict(p_final), {"real": round(1-p_final, 4), "fake": round(p_final, 4)}, frames |
|
|
|
|
|
_css = "footer{visibility:hidden!important}.logo,#logo{display:none!important}" |
|
|
|
|
|
with gr.Blocks(css=_css, title="AI-Fake & Deepfake Analyser") as demo: |
|
|
gr.Markdown(""" |
|
|
## Deepfake detector |
|
|
Upload an **image** or a short **video**. |
|
|
The app fuses two complementary models, then shows heat-maps & a PDF report.Made by Code Alchemists |
|
|
Which is (Brijesh Khanoolkar, Shreeya Dessai, Slevin Rodrigues , Rafan Khan) |
|
|
""") |
|
|
|
|
|
with gr.Tab("Image"): |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
img_in = gr.Image(label="Upload image", type="pil") |
|
|
btn_i = gr.Button("Analyze") |
|
|
with gr.Column(scale=2): |
|
|
txt_v = gr.Textbox(label="Verdict", interactive=False) |
|
|
lbl_c = gr.Label(label="Confidence") |
|
|
gal = gr.Gallery(label="Explanations", columns=3, height=320) |
|
|
bar = gr.BarPlot(x="class", y="prob", title="Likely generator", |
|
|
y_label="probability", visible=False) |
|
|
pdf_f = gr.File(label="Download PDF report") |
|
|
|
|
|
btn_i.click(_predict_image, img_in, [txt_v, lbl_c, gal, bar, pdf_f]) |
|
|
|
|
|
with gr.Tab("Video"): |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
vid_in = gr.Video(label="Upload MP4/AVI", format="mp4") |
|
|
btn_v = gr.Button("Analyze") |
|
|
with gr.Column(scale=2): |
|
|
txt_vv = gr.Textbox(label="Verdict", interactive=False) |
|
|
lbl_cv = gr.Label(label="Confidence") |
|
|
gal_v = gr.Gallery(label="Sample frames", columns=4, height=240) |
|
|
|
|
|
btn_v.click(_predict_video, vid_in, [txt_vv, lbl_cv, gal_v]) |
|
|
|
|
|
demo.launch(share=True, show_api=False) |
|
|
|