GroundingDINO_DSA2024

Running on Zero

App Files Files Community

GroundingDINO_DSA2024 / app.py

andrewkatumba

Update app.py

1eb6b8e verified over 1 year ago

raw

history blame contribute delete

2.68 kB

	import spaces
	from transformers import Owlv2Processor, Owlv2ForObjectDetection, AutoProcessor, AutoModelForZeroShotObjectDetection
	import torch
	import gradio as gr

	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

	dino_processor = AutoProcessor.from_pretrained("IDEA-Research/grounding-dino-base")
	dino_model = AutoModelForZeroShotObjectDetection.from_pretrained("IDEA-Research/grounding-dino-base").to("cuda")

	@spaces.GPU
	def infer(img, text_queries, score_threshold, model):

	if model == "dino":
	queries=""
	for query in text_queries:
	queries += f"{query}. "

	width, height = img.shape[:2]

	target_sizes=[(width, height)]
	inputs = dino_processor(text=queries, images=img, return_tensors="pt").to(device)

	with torch.no_grad():
	outputs = dino_model(**inputs)
	outputs.logits = outputs.logits.cpu()
	outputs.pred_boxes = outputs.pred_boxes.cpu()
	results = dino_processor.post_process_grounded_object_detection(outputs=outputs, input_ids=inputs.input_ids,
	box_threshold=score_threshold,
	target_sizes=target_sizes)

	boxes, scores, labels = results[0]["boxes"], results[0]["scores"], results[0]["labels"]
	result_labels = []

	for box, score, label in zip(boxes, scores, labels):
	box = [int(i) for i in box.tolist()]
	if score < score_threshold:
	continue

	if model == "dino":
	if label != "":
	result_labels.append((box, label))
	return result_labels

	def query_image(img, text_queries, dino_threshold):
	text_queries = text_queries
	text_queries = text_queries.split(",")
	dino_output = infer(img, text_queries, dino_threshold, "dino")


	return (img, dino_output)


	dino_threshold = gr.Slider(0, 1, value=0.12, label="Grounding DINO Threshold")
	dino_output = gr.AnnotatedImage(label="Grounding DINO Output")
	demo = gr.Interface(
	query_image,
	inputs=[gr.Image(label="Input Image"), gr.Textbox(label="Candidate Labels"), dino_threshold],
	outputs=[ dino_output],
	title="Grounding DINO DSA2024",
	description="DSA2024 Space to evaluate state-of-the-art [Grounding DINO](https://huggingface.co/IDEA-Research/grounding-dino-base) zero-shot object detection model. Simply upload an image and enter a list of the objects you want to detect with comma, or try one of the examples. Play with the threshold to filter out low confidence predictions in the model.",
	examples=[["./deer.jpg", "zebra, deer, goat", 0.16], ["./zebra.jpg", "zebra, lion, deer", 0.16]]
	)
	demo.launch(debug=True)