Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from functools import lru_cache | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline | |
| import torch | |
| MODEL_NAME = "openbmb/MiniCPM-V-4" | |
| def load_pipeline(): | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| MODEL_NAME, trust_remote_code=True | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| return TextGenerationPipeline( | |
| model=model, | |
| tokenizer=tokenizer, | |
| device=model.device.index if torch.cuda.is_available() else -1 | |
| ) | |
| def respond(user_message, chat_history): | |
| # ์ด์ ๋ํ์ ์ ๋ฉ์์ง๋ฅผ ํ๋์ prompt๋ก ํฉ์นจ | |
| history_text = "" | |
| for u, a in chat_history: | |
| history_text += f"็จๆท๏ผ{u}\nๅฉ็๏ผ{a}\n" | |
| prompt = history_text + f"็จๆท๏ผ{user_message}\nๅฉ็๏ผ" | |
| pipeline = load_pipeline() | |
| output = pipeline( | |
| prompt, | |
| max_length=history_text.count("\n") + len(user_message.split()) + 50, | |
| do_sample=True, | |
| top_k=50, | |
| top_p=0.95, | |
| num_return_sequences=1 | |
| )[0]["generated_text"] | |
| # ๋ชจ๋ธ์ด ๋ฐํํ ์ ์ฒด ํ ์คํธ์์ ์ด์์คํดํธ ์๋ต ๋ถ๋ถ๋ง ์ถ์ถ | |
| assistant_reply = output.split("ๅฉ็๏ผ", 1)[-1].strip() | |
| chat_history.append((user_message, assistant_reply)) | |
| return chat_history | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## MiniCPM-V-4 Chatbot Demo") | |
| chatbot = gr.Chatbot(label="๋ํ") | |
| user_input = gr.Textbox( | |
| placeholder="๋ฉ์์ง๋ฅผ ์ ๋ ฅํ์ธ์...", | |
| lines=1 | |
| ) | |
| clear_btn = gr.Button("์ด๊ธฐํ") | |
| user_input.submit( | |
| fn=respond, | |
| inputs=[user_input, chatbot], | |
| outputs=chatbot | |
| ) | |
| clear_btn.click( | |
| lambda: [], | |
| None, | |
| chatbot | |
| ) | |
| demo.launch() |