Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| import spaces | |
| # Load model and tokenizer | |
| model_name_or_path = "tencent/Hunyuan-MT-7B" | |
| print("Loading model... This may take a few minutes.") | |
| tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name_or_path, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto" | |
| ) | |
| def respond(message, history, system_message=None, max_tokens=None, temperature=None, top_p=None): | |
| """ | |
| Generate response from Hunyuan-MT model | |
| """ | |
| # Set default values if None (happens during example caching) | |
| if system_message is None: | |
| system_message = "You are a helpful AI assistant." | |
| if max_tokens is None: | |
| max_tokens = 512 | |
| if temperature is None: | |
| temperature = 0.7 | |
| if top_p is None: | |
| top_p = 0.95 | |
| # Build conversation history | |
| messages = [] | |
| # Add system message if provided | |
| if system_message: | |
| messages.append({"role": "system", "content": system_message}) | |
| # Add conversation history | |
| for user_msg, assistant_msg in history: | |
| messages.append({"role": "user", "content": user_msg}) | |
| if assistant_msg: | |
| messages.append({"role": "assistant", "content": assistant_msg}) | |
| # Add current message | |
| messages.append({"role": "user", "content": message}) | |
| # Tokenize the conversation | |
| tokenized_chat = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=True, | |
| add_generation_prompt=True, | |
| return_tensors="pt" | |
| ) | |
| # Generate response | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| tokenized_chat.to(model.device), | |
| max_new_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| do_sample=True if temperature > 0 else False, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| # Decode only the new tokens | |
| response = tokenizer.decode(outputs[0][tokenized_chat.shape[-1]:], skip_special_tokens=True) | |
| return response | |
| # Create Gradio interface | |
| demo = gr.ChatInterface( | |
| respond, | |
| additional_inputs=[ | |
| gr.Textbox( | |
| value="You are a helpful AI assistant.", | |
| label="System Message", | |
| lines=2 | |
| ), | |
| gr.Slider( | |
| minimum=1, | |
| maximum=2048, | |
| value=512, | |
| step=1, | |
| label="Max New Tokens" | |
| ), | |
| gr.Slider( | |
| minimum=0, | |
| maximum=2, | |
| value=0.7, | |
| step=0.1, | |
| label="Temperature" | |
| ), | |
| gr.Slider( | |
| minimum=0, | |
| maximum=1, | |
| value=0.95, | |
| step=0.05, | |
| label="Top-p (nucleus sampling)" | |
| ), | |
| ], | |
| title="Hunyuan-MT-7B Chatbot", | |
| description="Chat with Tencent's Hunyuan-MT-7B model. This model is particularly good at translation tasks.", | |
| examples=[ | |
| ["Translate to Chinese: It's on the house.", "You are a helpful AI assistant.", 512, 0.7, 0.95], | |
| ["What are the main differences between Python and JavaScript?", "You are a helpful AI assistant.", 512, 0.7, 0.95], | |
| ["Explain quantum computing in simple terms.", "You are a helpful AI assistant.", 512, 0.7, 0.95], | |
| ], | |
| cache_examples=False, | |
| theme="soft" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |