Molmo-7B-D-0924

Running on Zero

App Files Files Community

Molmo-7B-D-0924 / app.py

chris-propeller

revert

866ef1d verified 17 days ago

raw

history blame contribute delete

2.62 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig
	from PIL import Image
	import requests
	from io import BytesIO
	import spaces # Import spaces for ZeroGPU support

	# Load the model and processor
	repo_name = "allenai/Molmo-7B-D-0924"
	arguments = {
	"device_map": "auto", # Device will be set automatically
	"torch_dtype": "auto", # Use appropriate precision
	"trust_remote_code": True # Allow loading remote code
	}

	# Load the processor (this part doesn't need GPU yet)
	processor = AutoProcessor.from_pretrained(repo_name, **arguments)

	# Define the function for image description
	@spaces.GPU # This ensures the function gets GPU access when needed
	def describe_image(image, question):
	# Load the model inside the function and move it to GPU
	model = AutoModelForCausalLM.from_pretrained(repo_name, **arguments).to('cuda')

	# Process the uploaded image along with the user's question
	inputs = processor.process(
	images=[image],
	text=question if question else "Describe this image in great detail without missing any piece of information"
	)

	# Move inputs to model device (GPU)
	inputs = {k: v.to('cuda').unsqueeze(0) for k, v in inputs.items()}

	# Generate output using the model on GPU
	output = model.generate_from_batch(
	inputs,
	GenerationConfig(max_new_tokens=2048, stop_strings="<\|endoftext\|>"),
	tokenizer=processor.tokenizer,
	)

	# Decode the generated tokens
	generated_tokens = output[0, inputs["input_ids"].size(1):]
	generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)

	return generated_text

	# Gradio interface
	def gradio_app():
	with gr.Blocks() as demo:
	gr.Markdown("# Long Image Description with Molmo-7B-D-0924\n### Upload an image and ask a question about it!")

	with gr.Row():
	image_input = gr.Image(type="pil", label="Upload an Image")
	question_input = gr.Textbox(placeholder="Ask a question about the image (e.g., 'What is happening in this image?')", label="Question (Optional)")

	output_text = gr.Textbox(label="Image Description", interactive=False)

	# Submit button to generate the description
	submit_btn = gr.Button("Generate Description")

	# Callback to run when submit button is clicked
	submit_btn.click(
	fn=describe_image,
	inputs=[image_input, question_input],
	outputs=output_text
	)

	# Launch the Gradio interface
	demo.launch()

	# Launch the Gradio app
	gradio_app()