Spaces:

Caoyunkang
/

AdaCLIP

Running

App Files Files Community

AdaCLIP / app.py

Caoyunkang

first commit

a25563f verified over 1 year ago

raw

history blame contribute delete

4.09 kB

	import gradio as gr
	from PIL import Image, ImageDraw, ImageFont
	import warnings
	import os
	os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
	import json
	import os
	import torch
	from scipy.ndimage import gaussian_filter
	import cv2
	from method import AdaCLIP_Trainer
	import numpy as np

	############ Init Model
	ckt_path1 = 'weights/pretrained_mvtec_colondb.pth'
	ckt_path2 = "weights/pretrained_visa_clinicdb.pth"
	ckt_path3 = 'weights/pretrained_all.pth'

	# Configurations
	image_size = 518
	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	# device = 'cpu'
	model = "ViT-L-14-336"
	prompting_depth = 4
	prompting_length = 5
	prompting_type = 'SD'
	prompting_branch = 'VL'
	use_hsf = True
	k_clusters = 20

	config_path = os.path.join('./model_configs', f'{model}.json')

	# Prepare model
	with open(config_path, 'r') as f:
	model_configs = json.load(f)

	# Set up the feature hierarchy
	n_layers = model_configs['vision_cfg']['layers']
	substage = n_layers // 4
	features_list = [substage, substage * 2, substage * 3, substage * 4]

	model = AdaCLIP_Trainer(
	backbone=model,
	feat_list=features_list,
	input_dim=model_configs['vision_cfg']['width'],
	output_dim=model_configs['embed_dim'],
	learning_rate=0.,
	device=device,
	image_size=image_size,
	prompting_depth=prompting_depth,
	prompting_length=prompting_length,
	prompting_branch=prompting_branch,
	prompting_type=prompting_type,
	use_hsf=use_hsf,
	k_clusters=k_clusters
	).to(device)


	def process_image(image, text, options):
	# Load the model based on selected options
	if 'MVTec AD+Colondb' in options:
	model.load(ckt_path1)
	elif 'VisA+Clinicdb' in options:
	model.load(ckt_path2)
	elif 'All' in options:
	model.load(ckt_path3)
	else:
	# Default to 'All' if no valid option is provided
	model.load(ckt_path3)
	print('Invalid option. Defaulting to All.')

	# Ensure image is in RGB mode
	image = image.convert('RGB')

	# Convert PIL image to NumPy array
	np_image = np.array(image)

	# Convert RGB to BGR for OpenCV
	np_image = cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR)
	np_image = cv2.resize(np_image, (image_size, image_size))
	# Preprocess the image and run the model
	img_input = model.preprocess(image).unsqueeze(0)
	img_input = img_input.to(model.device)

	with torch.no_grad():
	anomaly_map, anomaly_score = model.clip_model(img_input, [text], aggregation=True)

	# Process anomaly map
	anomaly_map = anomaly_map[0, :, :].cpu().numpy()
	anomaly_score = anomaly_score[0].cpu().numpy()
	anomaly_map = gaussian_filter(anomaly_map, sigma=4)
	anomaly_map = (anomaly_map * 255).astype(np.uint8)

	# Apply color map and blend with original image
	heat_map = cv2.applyColorMap(anomaly_map, cv2.COLORMAP_JET)
	vis_map = cv2.addWeighted(heat_map, 0.5, np_image, 0.5, 0)

	# Convert OpenCV image back to PIL image for Gradio
	vis_map_pil = Image.fromarray(cv2.cvtColor(vis_map, cv2.COLOR_BGR2RGB))

	return vis_map_pil, f'{anomaly_score:.3f}'

	# Define examples
	examples = [
	["asset/img.png", "candle", "MVTec AD+Colondb"],
	["asset/img2.png", "bottle", "VisA+Clinicdb"],
	["asset/img3.png", "button", "All"],
	]

	# Gradio interface layout
	demo = gr.Interface(
	fn=process_image,
	inputs=[
	gr.Image(type="pil", label="Upload Image"),
	gr.Textbox(label="Class Name"),
	gr.Radio(["MVTec AD+Colondb",
	"VisA+Clinicdb",
	"All"],
	label="Pre-trained Datasets")
	],
	outputs=[
	gr.Image(type="pil", label="Output Image"),
	gr.Textbox(label="Anomaly Score"),
	],
	examples=examples,
	title="AdaCLIP -- Zero-shot Anomaly Detection",
	description="Upload an image, enter class name, and select pre-trained datasets to do zero-shot anomaly detection"
	)

	# Launch the demo
	demo.launch()
	# demo.launch(server_name="0.0.0.0", server_port=10002)