|
|
import gradio as gr |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
import torch |
|
|
|
|
|
model_name = "lamapi/next-1b" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
model_name, |
|
|
torch_dtype=torch.float16, |
|
|
device_map="auto" |
|
|
) |
|
|
model.eval() |
|
|
|
|
|
def chat(message, history): |
|
|
prompt = "" |
|
|
for user, bot in history or []: |
|
|
prompt += f"<start_of_turn>user\n{user}<end_of_turn>\n<start_of_turn>model\n{bot}<end_of_turn>\n" |
|
|
prompt += f"<start_of_turn>user\n{message}<end_of_turn>\n<start_of_turn>model\n" |
|
|
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
|
|
|
|
with torch.inference_mode(): |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=2048, |
|
|
do_sample=True, |
|
|
temperature=0.9, |
|
|
top_k=140, |
|
|
top_p=0.9 |
|
|
) |
|
|
|
|
|
text = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
reply = text.split("\nmodel")[-1].strip() |
|
|
return reply |
|
|
|
|
|
iface = gr.ChatInterface( |
|
|
fn=chat, |
|
|
title="Next-1B Chatbot ⚡" |
|
|
) |
|
|
|
|
|
iface.launch(share=True, server_name="0.0.0.0", server_port=7860) |
|
|
|