Spaces:
Sleeping
Sleeping
File size: 1,919 Bytes
e904b23 1623409 e904b23 1623409 1d2b77d e904b23 1623409 1d2b77d 1623409 e904b23 1623409 e904b23 1623409 e904b23 1623409 e904b23 1623409 e904b23 1623409 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import os
import gradio as gr
from functools import lru_cache
from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline
import torch
MODEL_NAME = "openbmb/MiniCPM-V-4"
@lru_cache(maxsize=1)
def load_pipeline():
tokenizer = AutoTokenizer.from_pretrained(
MODEL_NAME, trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
return TextGenerationPipeline(
model=model,
tokenizer=tokenizer,
device=model.device.index if torch.cuda.is_available() else -1
)
def respond(user_message, chat_history):
# ์ด์ ๋ํ์ ์ ๋ฉ์์ง๋ฅผ ํ๋์ prompt๋ก ํฉ์นจ
history_text = ""
for u, a in chat_history:
history_text += f"็จๆท๏ผ{u}\nๅฉ็๏ผ{a}\n"
prompt = history_text + f"็จๆท๏ผ{user_message}\nๅฉ็๏ผ"
pipeline = load_pipeline()
output = pipeline(
prompt,
max_length=history_text.count("\n") + len(user_message.split()) + 50,
do_sample=True,
top_k=50,
top_p=0.95,
num_return_sequences=1
)[0]["generated_text"]
# ๋ชจ๋ธ์ด ๋ฐํํ ์ ์ฒด ํ
์คํธ์์ ์ด์์คํดํธ ์๋ต ๋ถ๋ถ๋ง ์ถ์ถ
assistant_reply = output.split("ๅฉ็๏ผ", 1)[-1].strip()
chat_history.append((user_message, assistant_reply))
return chat_history
with gr.Blocks() as demo:
gr.Markdown("## MiniCPM-V-4 Chatbot Demo")
chatbot = gr.Chatbot(label="๋ํ")
user_input = gr.Textbox(
placeholder="๋ฉ์์ง๋ฅผ ์
๋ ฅํ์ธ์...",
lines=1
)
clear_btn = gr.Button("์ด๊ธฐํ")
user_input.submit(
fn=respond,
inputs=[user_input, chatbot],
outputs=chatbot
)
clear_btn.click(
lambda: [],
None,
chatbot
)
demo.launch() |