Spaces:

ariG23498
/

qwen-od

Running

App Files Files Community

qwen-od / app.py

ariG23498 HF Staff

gui-update (#1)

ec8cc88 verified 6 months ago

raw

history blame contribute delete

4.28 kB

	import base64
	import re
	from io import BytesIO
	from typing import List, Tuple, Optional

	import gradio as gr
	import requests
	from PIL import Image
	from huggingface_hub import InferenceClient

	# Hugging Face Inference Client (uses the free Inference API)
	client = InferenceClient(model="Qwen/Qwen2.5-VL-32B-Instruct", provider="hf-inference")

	BOX_TAG_PATTERN = r"<box>\((\d+),(\d+),(\d+),(\d+)\):([^<]+)</box>"

	def parse_bounding_boxes(text: str) -> List[Tuple[Tuple[int, int, int, int], str]]:
	matches = re.findall(BOX_TAG_PATTERN, text)
	return [((int(x1), int(y1), int(x2), int(y2)), label.strip()) for x1, y1, x2, y2, label in matches]

	def fetch_image_from_url(url: str) -> Image.Image:
	resp = requests.get(url, timeout=10)
	resp.raise_for_status()
	return Image.open(BytesIO(resp.content)).convert("RGB")

	def pil_to_data_uri(img: Image.Image) -> str:
	buffer = BytesIO()
	img.save(buffer, format="PNG")
	return "data:image/png;base64," + base64.b64encode(buffer.getvalue()).decode()

	def predict(image: Optional[Image.Image], image_url: str):
	if image is None and not image_url:
	return None, "❌ Please provide an image or URL."

	if image is None:
	try:
	image = fetch_image_from_url(image_url)
	data_uri = image_url
	except Exception as e:
	return None, f"❌ {e}"
	else:
	image = image.convert("RGB")
	data_uri = pil_to_data_uri(image)

	prompt = (
	"Detect all objects in the provided image and output their bounding box "
	"coordinates and class labels in the format <box>(x1,y1,x2,y2):class_label</box>. "
	"If multiple objects are detected, list each bounding box and class label in a new <box> tag. "
	"Do not include any other text or descriptions."
	)

	stream = client.chat.completions.create(
	messages=[
	{"role": "user", "content": [
	{"type": "text", "text": prompt},
	{"type": "image_url", "image_url": {"url": data_uri}},
	]}
	],
	stream=True,
	)
	response_text = "".join(chunk.choices[0].delta.content or "" for chunk in stream)

	bboxes = parse_bounding_boxes(response_text)
	if not bboxes:
	return None, "⚠️ No objects detected."

	annotations = [(bbox, label) for bbox, label in bboxes]
	return (image, annotations), "✅ Detection complete."


	def build_demo():
	theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="emerald")
	with gr.Blocks(theme=theme, title="Qwen Object Detection Demo") as demo:
	gr.Markdown("## Qwen2.5‑VL Object Detection Demo 🎯")
	gr.Markdown("Upload an image or paste an image URL, then click Detect Objects 🚀.")
	gr.Markdown("[Check out the model](https://huggingface.co/Qwen/Qwen2.5-VL-32B-Instruct)")

	with gr.Tabs():
	with gr.TabItem("Upload Image"):
	img_input = gr.Image(type="pil", label="Upload Image", height=300)
	gr.Examples(
	examples=[
	["./example_images/example_1.png"],
	["./example_images/example_2.jpg"],
	],
	inputs=[img_input],
	label="Click an example to try 👇",
	)

	with gr.TabItem("Image URL"):
	url_input = gr.Textbox(label="Image URL", placeholder="https://example.com/img.jpg")
	gr.Examples(
	examples=[
	[None, "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/google-cloud/model-card.png"],
	[None, "http://images.cocodataset.org/val2017/000000039769.jpg"],
	],
	inputs=[img_input, url_input],
	label="Click an example to try 👇",
	)

	detect_btn = gr.Button("Detect Objects 🚀")
	output_img = gr.AnnotatedImage(label="Detections", height=600)
	status = gr.Markdown()

	detect_btn.click(predict, inputs=[img_input, url_input], outputs=[output_img, status])
	return demo


	def main():
	demo = build_demo()
	demo.launch()

	if __name__ == "__main__":
	main()