phi-4-reasoning-plus

Runtime error

App Files Files Community

openfree commited on May 1

Commit

6d70605

verified ·

1 Parent(s): c92d933

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -45

app.py CHANGED Viewed

@@ -5,54 +5,33 @@ import torch
 from threading import Thread
 phi4_model_path = "microsoft/Phi-4-reasoning-plus"
-phi4_mini_model_path = "microsoft/Phi-4-mini-reasoning"
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 phi4_model = AutoModelForCausalLM.from_pretrained(phi4_model_path, torch_dtype="auto").to(device)
 phi4_tokenizer = AutoTokenizer.from_pretrained(phi4_model_path)
-phi4_mini_model = AutoModelForCausalLM.from_pretrained(phi4_mini_model_path, torch_dtype="auto").to(device)
-phi4_mini_tokenizer = AutoTokenizer.from_pretrained(phi4_mini_model_path)
 @spaces.GPU(duration=60)
-def generate_response(user_message, model_name, max_tokens, temperature, top_k, top_p, repetition_penalty, history_state):
     if not user_message.strip():
         return history_state, history_state
-    # Select models
-    if model_name == "Phi-4":
-        model = phi4_model
-        tokenizer = phi4_tokenizer
-        start_tag = "<|im_start|>"
-        sep_tag = "<|im_sep|>"
-        end_tag = "<|im_end|>"
-    elif model_name == "Phi-4-mini-instruct":
-        model = phi4_mini_model
-        tokenizer = phi4_mini_tokenizer
-        start_tag = ""
-        sep_tag = ""
-        end_tag = "<|end|>"
-    else:
-        raise ValueError("Error loading on models")
     # Recommended prompt settings by Microsoft
     system_message = "You are a friendly and knowledgeable assistant, here to help with any questions or tasks."
-    if model_name == "Phi-4":
-        prompt = f"{start_tag}system{sep_tag}{system_message}{end_tag}"
-        for message in history_state:
-            if message["role"] == "user":
-                prompt += f"{start_tag}user{sep_tag}{message['content']}{end_tag}"
-            elif message["role"] == "assistant" and message["content"]:
-                prompt += f"{start_tag}assistant{sep_tag}{message['content']}{end_tag}"
-        prompt += f"{start_tag}user{sep_tag}{user_message}{end_tag}{start_tag}assistant{sep_tag}"
-    else:
-        prompt = f"<|system|>{system_message}{end_tag}"
-        for message in history_state:
-            if message["role"] == "user":
-                prompt += f"<|user|>{message['content']}{end_tag}"
-            elif message["role"] == "assistant" and message["content"]:
-                prompt += f"<|assistant|>{message['content']}{end_tag}"
-        prompt += f"<|user|>{user_message}{end_tag}<|assistant|>"
     inputs = tokenizer(prompt, return_tensors="pt").to(device)
@@ -83,7 +62,7 @@ def generate_response(user_message, model_name, max_tokens, temperature, top_k,
         {"role": "assistant", "content": ""}
     ]
     for new_token in streamer:
-        cleaned_token = new_token.replace("<|im_start|>", "").replace("<|im_sep|>", "").replace("<|im_end|>", "").replace("<|end|>", "").replace("<|system|>", "").replace("<|user|>", "").replace("<|assistant|>", "")
         assistant_response += cleaned_token
         new_history[-1]["content"] = assistant_response.strip()
         yield new_history, new_history
@@ -91,7 +70,7 @@ def generate_response(user_message, model_name, max_tokens, temperature, top_k,
     yield new_history, new_history
 example_messages = {
-    "Learn about physics": "Explain Newton’s laws of motion.",
     "Discover space facts": "What are some interesting facts about black holes?",
     "Write a factorial function": "Write a Python function to calculate the factorial of a number."
 }
@@ -99,8 +78,8 @@ example_messages = {
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
-        # Phi-4 Models Chatbot
-        Welcome to the Phi-4 Chatbot! You can chat with Microsoft's Phi-4 or Phi-4-mini-instruct models. Adjust the settings on the left to customize the model's responses.
         """
     )
@@ -109,11 +88,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("### Settings")
-            model_dropdown = gr.Dropdown(
-                choices=["Phi-4", "Phi-4-mini-instruct"],
-                label="Select Model",
-                value="Phi-4"
-            )
             max_tokens_slider = gr.Slider(
                 minimum=64,
                 maximum=4096,
@@ -166,7 +140,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     submit_button.click(
         fn=generate_response,
-        inputs=[user_input, model_dropdown, max_tokens_slider, temperature_slider, top_k_slider, top_p_slider, repetition_penalty_slider, history_state],
         outputs=[chatbot, history_state]
     ).then(
         fn=lambda: gr.update(value=""),

 from threading import Thread
 phi4_model_path = "microsoft/Phi-4-reasoning-plus"
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 phi4_model = AutoModelForCausalLM.from_pretrained(phi4_model_path, torch_dtype="auto").to(device)
 phi4_tokenizer = AutoTokenizer.from_pretrained(phi4_model_path)
 @spaces.GPU(duration=60)
+def generate_response(user_message, max_tokens, temperature, top_k, top_p, repetition_penalty, history_state):
     if not user_message.strip():
         return history_state, history_state
+    # Phi-4 model settings
+    model = phi4_model
+    tokenizer = phi4_tokenizer
+    start_tag = "<|im_start|>"
+    sep_tag = "<|im_sep|>"
+    end_tag = "<|im_end|>"
     # Recommended prompt settings by Microsoft
     system_message = "You are a friendly and knowledgeable assistant, here to help with any questions or tasks."
+    prompt = f"{start_tag}system{sep_tag}{system_message}{end_tag}"
+    for message in history_state:
+        if message["role"] == "user":
+            prompt += f"{start_tag}user{sep_tag}{message['content']}{end_tag}"
+        elif message["role"] == "assistant" and message["content"]:
+            prompt += f"{start_tag}assistant{sep_tag}{message['content']}{end_tag}"
+    prompt += f"{start_tag}user{sep_tag}{user_message}{end_tag}{start_tag}assistant{sep_tag}"
     inputs = tokenizer(prompt, return_tensors="pt").to(device)
         {"role": "assistant", "content": ""}
     ]
     for new_token in streamer:
+        cleaned_token = new_token.replace("<|im_start|>", "").replace("<|im_sep|>", "").replace("<|im_end|>", "")
         assistant_response += cleaned_token
         new_history[-1]["content"] = assistant_response.strip()
         yield new_history, new_history
     yield new_history, new_history
 example_messages = {
+    "Learn about physics": "Explain Newton's laws of motion.",
     "Discover space facts": "What are some interesting facts about black holes?",
     "Write a factorial function": "Write a Python function to calculate the factorial of a number."
 }
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
+        # Phi-4-reasoning-plus Chatbot
+        Welcome to the Phi-4 Chatbot! You can chat with Microsoft's Phi-4-reasoning-plus model. Adjust the settings on the left to customize the model's responses.
         """
     )
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("### Settings")
             max_tokens_slider = gr.Slider(
                 minimum=64,
                 maximum=4096,
     submit_button.click(
         fn=generate_response,
+        inputs=[user_input, max_tokens_slider, temperature_slider, top_k_slider, top_p_slider, repetition_penalty_slider, history_state],
         outputs=[chatbot, history_state]
     ).then(
         fn=lambda: gr.update(value=""),