|
|
import torch |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
import gradio as gr |
|
|
|
|
|
|
|
|
MODEL_OPTIONS = { |
|
|
"Phi-3.5 Mini Instruct": "microsoft/Phi-3.5-mini-instruct", |
|
|
"Phi-3.5 MoE Instruct": "microsoft/Phi-3.5-MoE-instruct", |
|
|
"Phi-3 Mini 4K Instruct": "microsoft/Phi-3-mini-4k-instruct", |
|
|
"Phi-3 Mini 128K Instruct": "microsoft/Phi-3-mini-128k-instruct" |
|
|
} |
|
|
|
|
|
|
|
|
loaded_models = {} |
|
|
|
|
|
EXAMPLES = [ |
|
|
"Write a short story about a robot who learns to talk with a human.", |
|
|
"Summarize this paragraph: “From Stettin in the Baltic to Trieste in the Adriatic, an iron curtain has descended across the Continent. Behind that line lie all the capitals of the ancient states of Central and Eastern Europe. Warsaw, Berlin, Prague, Vienna, Budapest, Belgrade, Bucharest and Sofia, all these famous cities and the populations around them lie in what I must call the Soviet sphere, and all are subject in one form or another, not only to Soviet influence but to a very high and in some cases increasing measure of control from Moscow", |
|
|
"Explain how solar panels work in simple terms.", |
|
|
"Translate this sentence into Basque: 'The sea is calm today.'", |
|
|
"Write a noir-style intro for a detective in Amara." |
|
|
] |
|
|
|
|
|
|
|
|
def load_model(model_id): |
|
|
if model_id not in loaded_models: |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
model_id, |
|
|
trust_remote_code=True, |
|
|
torch_dtype=torch.float32 |
|
|
) |
|
|
model.eval() |
|
|
loaded_models[model_id] = (tokenizer, model) |
|
|
return loaded_models[model_id] |
|
|
|
|
|
|
|
|
def chat_with_model(user_input, model_choice): |
|
|
model_id = MODEL_OPTIONS[model_choice] |
|
|
tokenizer, model = load_model(model_id) |
|
|
|
|
|
messages = [{"role": "user", "content": user_input}] |
|
|
inputs = tokenizer.apply_chat_template( |
|
|
messages, |
|
|
add_generation_prompt=True, |
|
|
tokenize=True, |
|
|
return_dict=True, |
|
|
return_tensors="pt" |
|
|
).to("cpu") |
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=100, |
|
|
use_cache=False, |
|
|
do_sample=False, |
|
|
temperature=0.7, |
|
|
top_p=0.9 |
|
|
) |
|
|
|
|
|
response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True) |
|
|
return response.strip() |
|
|
|
|
|
|
|
|
with gr.Blocks(title="Phi-3 Instruct Explorer") as demo: |
|
|
gr.Markdown("## 🧠 Phi-3 Instruct Explorer\nSwitch between Phi-3 instruct models and test responses on CPU.") |
|
|
|
|
|
with gr.Row(): |
|
|
model_choice = gr.Dropdown( |
|
|
label="Choose a model", |
|
|
choices=list(MODEL_OPTIONS.keys()), |
|
|
value="Phi-3.5 Mini Instruct" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
user_input = gr.Textbox(label="Your message", placeholder="Ask me anything...") |
|
|
|
|
|
with gr.Row(): |
|
|
output = gr.Textbox(label="Model response") |
|
|
|
|
|
with gr.Row(): |
|
|
submit = gr.Button("Generate") |
|
|
|
|
|
|
|
|
gr.Markdown("### 🧪 Try an example prompt:") |
|
|
gr.Examples( |
|
|
examples=EXAMPLES, |
|
|
inputs=user_input |
|
|
) |
|
|
|
|
|
submit.click(fn=chat_with_model, inputs=[user_input, model_choice], outputs=output) |
|
|
|
|
|
demo.launch() |
|
|
|