Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from peft import PeftModel | |
| # Загружаем модель и токенизатор | |
| base_model_name = "t-tech/T-lite-it-1.0" | |
| lora_repo = "shao3d/my-t-lite-qlora" | |
| tokenizer = AutoTokenizer.from_pretrained(base_model_name) | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| base_model_name, | |
| device_map="cpu", | |
| torch_dtype=torch.float16 | |
| ) | |
| model = PeftModel.from_pretrained(base_model, lora_repo) | |
| model.eval() | |
| # Функция генерации ответа | |
| def generate_response(history): | |
| if not history: | |
| return [] | |
| user_message = history[-1][0] # Последний вопрос пользователя | |
| inputs = tokenizer(user_message, return_tensors="pt").to("cpu") | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=100, # Ограничение длины | |
| temperature=0.7, # Креативность | |
| top_p=0.9, # Разнообразие | |
| do_sample=True | |
| ) | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Убираем повторение вопроса | |
| if response.startswith(user_message): | |
| response = response[len(user_message):].strip() | |
| return history + [[user_message, response]] | |
| # Интерфейс Gradio | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Тест дообученной T-Lite") | |
| chatbot = gr.Chatbot() | |
| msg = gr.Textbox(placeholder="Напиши сообщение для модели...") | |
| clear = gr.Button("Очистить чат") | |
| msg.submit(generate_response, inputs=chatbot, outputs=chatbot) | |
| clear.click(lambda: [], None, chatbot) | |
| demo.launch() |