Spaces:
Runtime error
Runtime error
| from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer | |
| import threading | |
| import gradio as gr | |
| image_model_id = "Qwen/Qwen-VL-Chat-Int4" | |
| image_tokenizer = AutoTokenizer.from_pretrained(image_model_id, trust_remote_code=True) | |
| image_model = AutoModelForCausalLM.from_pretrained(image_model_id, device_map="cuda", trust_remote_code=True).eval() | |
| # Load model and tokenizer | |
| code_model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" | |
| code_tokenizer = AutoTokenizer.from_pretrained(code_model_id, trust_remote_code=True) | |
| code_tokenizer.pad_token_id = code_tokenizer.eos_token_id | |
| code_model = AutoModelForCausalLM.from_pretrained( | |
| code_model_id, | |
| torch_dtype="float16", | |
| device_map="auto" | |
| ).eval() | |
| stop_image_generation = threading.Event() | |
| stop_code_generation = threading.Event() | |
| def generate_response_image(uploaded_image, user_prompt, temperature, top_p, max_new_tokens): | |
| stop_image_generation.clear() | |
| temp_path = "/tmp/temp_image.png" | |
| uploaded_image.save(temp_path) | |
| image_sys_prompt = ( | |
| "You are a helpful assistant that describes images very concisely. " | |
| "Provide a one-sentence summary of the image in less than 15 words. " | |
| "Use simple, direct language." | |
| ) | |
| # Compose prompt using tokenizer's helper | |
| query_text = image_tokenizer.from_list_format([ | |
| {"image": temp_path}, | |
| {"text": f"<|system|>\n{image_sys_prompt}\n<|end|>"}, | |
| {"text": f"<|user|>\n{user_prompt}\n<|end|>"}, | |
| {"text": "<|assistant|>"} | |
| ]) | |
| # Tokenize the input text -> get input_ids and attention_mask tensors | |
| inputs = image_tokenizer(query_text, return_tensors="pt").to("cuda") | |
| streamer = TextIteratorStreamer(image_tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| generation_kwargs = dict( | |
| **inputs, | |
| streamer=streamer, | |
| temperature=temperature, | |
| top_p=top_p, | |
| max_new_tokens=max_new_tokens, | |
| do_sample=True, | |
| use_cache=True, | |
| return_dict_in_generate=True, | |
| ) | |
| thread = threading.Thread(target=image_model.generate, kwargs=generation_kwargs) | |
| thread.start() | |
| response = "" | |
| for new_text in streamer: | |
| if stop_image_generation.is_set(): | |
| break | |
| response += new_text | |
| yield response | |
| def stop_image_generation_func(): | |
| stop_image_generation.set() | |
| return "" | |
| def generate_stream_local(prompt, temperature, top_p, max_new_tokens): | |
| stop_code_generation.clear() | |
| inputs = code_tokenizer(prompt, return_tensors="pt").to(code_model.device) | |
| streamer = TextIteratorStreamer(code_tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| generation_kwargs = dict( | |
| **inputs, | |
| streamer=streamer, | |
| temperature=temperature, | |
| top_p=top_p, | |
| max_new_tokens=max_new_tokens, | |
| do_sample=True, | |
| use_cache=True, | |
| return_dict_in_generate=True, | |
| ) | |
| thread = threading.Thread(target=code_model.generate, kwargs=generation_kwargs) | |
| thread.start() | |
| for new_text in streamer: | |
| if stop_code_generation.is_set(): | |
| break | |
| yield new_text | |
| # --- Respond logic for Gradio --- | |
| def respond(message, temperature, top_p, max_new_tokens): | |
| sys_prompt = ( | |
| "You are an AI coding assistant. If the user input is too vague to generate accurate code " | |
| "(e.g., lacks programming language, method, or details), ask clarifying questions before attempting to write the code.\n" | |
| "Think silently first and write your reasoning inside <think>...</think>. Then provide your final user-facing answer." | |
| ) | |
| full_prompt = [ | |
| {"role": "system", "content": sys_prompt}, | |
| {"role": "user", "content": message} | |
| ] | |
| prompt = code_tokenizer.apply_chat_template(full_prompt, tokenize=False, add_generation_prompt=True) | |
| response = "" | |
| for part in generate_stream_local(prompt, temperature, top_p, max_new_tokens): | |
| response += part | |
| yield response | |
| # Future work should separate the reasoning process from the final answer. | |
| # if "</think>" in response: | |
| # yield response.split("</think>")[-1].strip() | |
| def stop_code_generation_func(): | |
| stop_code_generation.set() | |
| return "π§Ύ Generated Code Output" | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| # πΌοΈ Image Description Tab | |
| with gr.Tab("πΌοΈ Image Description"): | |
| gr.Markdown("## π§ Qwen-VL: Vision-Language Streaming Chat with Image Upload") | |
| with gr.Row(equal_height=True): | |
| with gr.Column(scale=1): | |
| image_input = gr.Image( | |
| type="pil", | |
| label="π€ Upload Image", | |
| height=480, | |
| width=480 | |
| ) | |
| with gr.Column(scale=1): | |
| prompt_input = gr.Textbox( | |
| label="π¬ Prompt", | |
| placeholder="e.g. Describe the image content", | |
| value="Describe the picture", | |
| lines=2 | |
| ) | |
| with gr.Row(): | |
| temperature = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.7, | |
| step=0.05, | |
| label="π² Temperature", | |
| info="Controls randomness. Higher = more creative." | |
| ) | |
| top_p = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.95, | |
| step=0.05, | |
| label="π Top-p", | |
| info="Cumulative probability for nucleus sampling." | |
| ) | |
| max_new_tokens = gr.Slider( | |
| minimum=50, | |
| maximum=1000, | |
| value=500, | |
| step=10, | |
| label="π Max New Tokens", | |
| info="Maximum length of generated output." | |
| ) | |
| generate_btn = gr.Button("π Generate Description", variant="primary") | |
| stop_btn = gr.Button("βΉοΈ Stop and Clear", variant="stop") | |
| output = gr.Textbox( | |
| label="π Streaming Response", | |
| placeholder="The model will respond here...", | |
| lines=10, | |
| interactive=False | |
| ) | |
| generate_btn.click( | |
| fn=generate_response_image, | |
| inputs=[image_input, prompt_input, temperature, top_p, max_new_tokens], | |
| outputs=output | |
| ) | |
| stop_btn.click(fn=stop_image_generation_func, outputs=output) | |
| # π» Code Generator Tab | |
| with gr.Tab("π» Code Generator"): | |
| gr.Markdown("## π€ DeepSeek-R1-Distill-Qwen: Code Generation from Natural Language") | |
| with gr.Row(equal_height=True): | |
| with gr.Column(scale=2): | |
| code_des = gr.Textbox( | |
| label="π§Ύ Describe Your Code", | |
| placeholder="e.g. Write a Python function to reverse a string", | |
| lines=8 | |
| ) | |
| generate_code_btn = gr.Button("π§ Generate Code", variant="primary") | |
| stop_code_btn = gr.Button("βΉοΈ Stop and Clear", variant="stop") | |
| with gr.Column(scale=1): | |
| temperature_code = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.5, | |
| value=0.7, | |
| step=0.05, | |
| label="π² Temperature", | |
| info="Higher = more creative code." | |
| ) | |
| top_p_code = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.95, | |
| step=0.05, | |
| label="π Top-p", | |
| info="Top-p sampling filter." | |
| ) | |
| max_new_tokens_code = gr.Slider( | |
| minimum=50, | |
| maximum=2048, | |
| value=1000, | |
| step=10, | |
| label="π Max New Tokens", | |
| info="Maximum token length of generated code." | |
| ) | |
| output_code = gr.Markdown( | |
| value="π§Ύ Generated Code Output", | |
| label="π§Ύ Generated Code Output", | |
| show_label=True, | |
| visible=True, | |
| container=True, | |
| height = 300, | |
| show_copy_button=True | |
| ) | |
| generate_code_btn.click( | |
| fn=respond, | |
| inputs=[code_des, temperature_code, top_p_code, max_new_tokens_code], | |
| outputs=output_code | |
| ) | |
| stop_code_btn.click(fn=stop_code_generation_func, outputs=output_code) | |
| demo.launch() | |