Spaces:

microsoft
/

phi-4-mini

Running

App Files Files Community

nguyenbh commited on Feb 25

Commit

38c463d

1 Parent(s): 88c6033

Use azure endpoint

Browse files

Files changed (2) hide show

app.py +107 -88
requirements.txt +2 -4

app.py CHANGED Viewed

@@ -1,108 +1,127 @@
-import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import os
-hf_token = os.getenv("YOUR_HF_TOKEN")
-# Load model and tokenizer
-print("Loading model and tokenizer...")
-model_path = "microsoft/Phi-4-mini-instruct"  # Can be changed to local path "./Phi-4-Mini-Instruct"
-tokenizer = AutoTokenizer.from_pretrained(
-    model_path,
-    padding_side="left",
-    token=hf_token,
-    trust_remote_code=True
-)
-model = AutoModelForCausalLM.from_pretrained(
-    model_path,
-    device_map="auto",
-    attn_implementation="eager",  # "flash_attention_2",
-    torch_dtype="auto",
-    token=hf_token,
-    trust_remote_code=True
 )
-# Create pipeline for easier inference
-pipe = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-)
-print("Model and tokenizer loaded successfully!")
-# Format chat history to messages format
-def format_chat_history(message, history):
-    messages = [
-        {"role": "system", "content": "You are a helpful AI assistant."}
-    ]
-    # Add chat history
-    for user_msg, assistant_msg in history:
-        messages.append({"role": "user", "content": user_msg})
-        messages.append({"role": "assistant", "content": assistant_msg})
-    # Add current message
     messages.append({"role": "user", "content": message})
-    return messages
-# Streaming response generator
-def predict(message, history):
-    messages = format_chat_history(message, history)
-    generation_args = {
-        "max_new_tokens": 1024,
-        "return_full_text": False,
-        "temperature": 0.001,
-        "top_p": 1.0,
-        "do_sample": True,
-        "streamer": None,  # Will be set in the generator
     }
-    # Initialize an empty response
-    partial_message = ""
-    history_with_message = history + [[message, partial_message]]
-    # Create a TextIteratorStreamer for streaming generation
-    from transformers import TextIteratorStreamer
-    from threading import Thread
-    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
-    generation_args["streamer"] = streamer
-    # Start a separate thread for generation
-    thread = Thread(target=pipe, args=(messages,), kwargs=generation_args)
-    thread.start()
-    # Stream the response
-    for new_text in streamer:
-        partial_message += new_text
-        yield history + [[message, partial_message]]
-# Create the Gradio interface
-css = """
-.chatbot-container {max-width: 800px; margin: auto;}
-.chat-header {text-align: center; margin-bottom: 20px;}
-"""
-with gr.Blocks(css=css) as demo:
-    gr.HTML("<div class='chat-header'><h1>Phi-4 Mini Chatbot</h1></div>")
-    with gr.Column(elem_classes="chatbot-container"):
-        chatbot = gr.Chatbot(height=400)
-        msg = gr.Textbox(placeholder="Type your message here...", label="Input")
-        clear = gr.Button("Clear Conversation")
-        msg.submit(predict, [msg, chatbot], [chatbot], queue=True, api_name="chat").then(
-            lambda: "", None, [msg]
-        )
-        clear.click(lambda: None, None, chatbot, queue=False)
 # Launch the app
-demo.launch(share=True)  # Set share=False if you don't want a public link

 import os
+import gradio as gr
+from azure.ai.inference import ChatCompletionsClient
+from azure.core.credentials import AzureKeyCredential
+# Azure Inference setup
+url = os.getenv("Azure_Endpoint")
+api_key = AzureKeyCredential(os.getenv("Azure_API_KEY"))
+# Initialize the ChatCompletionsClient
+client = ChatCompletionsClient(
+    endpoint=url,
+    credential=api_key,
+    stream=True
 )
+# Get and print model information (optional)
+try:
+    model_info = client.get_model_info()
+    print("Model name:", model_info.model_name)
+    print("Model type:", model_info.model_type)
+    print("Model provider name:", model_info.model_provider_name)
+except Exception as e:
+    print("Could not get model info:", str(e))
+# Configuration parameters
+default_temperature = 0.8
+default_max_tokens = 2048
+default_top_p = 0.1
+# Example prompts that users can try
+example_prompts = [
+    "I have $20,000 in my savings account, where I receive a 4% profit per year and payments twice a year. Can you please tell me how long it will take for me to become a millionaire?",
+    "I have total $500 create a plan with travel and food",
+    "I have $1000 and 5 years. Is it better to invest in a stock paying $15 quarterly dividends or in a 5% annual savings account?"
+]
+def get_azure_response(message, chat_history, temperature, max_tokens, top_p):
+    """
+    Function to get a response from the Azure Phi-4 model
+    """
+    # Prepare conversation history in the format expected by Azure
+    messages = [{"role": "system", "content": "You are a helpful AI assistant."}]
+    # Add conversation history
+    for human, assistant in chat_history:
+        messages.append({"role": "user", "content": human})
+        if assistant:  # Only add non-empty assistant messages
+            messages.append({"role": "assistant", "content": assistant})
+    # Add the current message
     messages.append({"role": "user", "content": message})
+    # Prepare the payload
+    payload = {
+        "messages": messages,
+        "max_tokens": max_tokens,
+        "temperature": temperature,
+        "top_p": top_p,
+        "presence_penalty": 0,
+        "frequency_penalty": 0,
     }
+    # Get response
+    try:
+        print("Sending request to Azure...")
+        response = client.complete(payload)
+        reply = response.choices[0].message.content
+        # Print usage statistics
+        print(f"Usage - Prompt tokens: {response.usage.prompt_tokens}, "
+              f"Completion tokens: {response.usage.completion_tokens}, "
+              f"Total tokens: {response.usage.total_tokens}")
+        return reply
+    except Exception as e:
+        print(f"Error getting response: {str(e)}")
+        return f"Error: {str(e)}"
+# Create the Gradio interface
+with gr.Blocks(title="Phi-4-mini Chatbot") as demo:
+    gr.Markdown("Chat with the Phi-4 mini model hosted on Azure AI")
+    # Create a chatbot component
+    chatbot = gr.Chatbot(height=300)
+    msg = gr.Textbox(label="Type your message here", placeholder="Ask me anything...", lines=1)
+    clear = gr.Button("Clear Conversation")
+    # Add examples section
+    with gr.Accordion("Try these examples", open=True):
+        examples = gr.Examples(
+            examples=example_prompts,
+            inputs=msg
+        )
+    # Add model parameter controls
+    with gr.Accordion("Model Parameters", open=False):
+        temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=default_temperature, step=0.1,
+                                label="Temperature (higher = more creative, lower = more focused)")
+        max_tokens_slider = gr.Slider(minimum=100, maximum=4096, value=default_max_tokens, step=100,
+                                      label="Max Tokens (maximum length of response)")
+        top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=default_top_p, step=0.1,
+                                label="Top P (diversity of response)")
+    # Simplified chat function that handles both sending and receiving messages
+    def chat(message, history, temperature, max_tokens, top_p):
+        if not message.strip():
+            return "", history
+        # Get response from Azure
+        response = get_azure_response(message, history, temperature, max_tokens, top_p)
+        # Add the exchange to history
+        history.append((message, response))
+        return "", history  # Clear the input field after sending
+    # Function to clear the conversation
+    def clear_conversation():
+        return [], default_temperature, default_max_tokens, default_top_p
+    # Set up event handlers - simplified approach
+    msg.submit(chat, [msg, chatbot, temp_slider, max_tokens_slider, top_p_slider], [msg, chatbot])
+    clear.click(clear_conversation, None, [chatbot, temp_slider, max_tokens_slider, top_p_slider])
 # Launch the app
+demo.launch(debug=True)  # Set share=True to generate a public URL for testing

requirements.txt CHANGED Viewed

@@ -1,4 +1,2 @@
-smolagents==1.9.2
-transformers==4.49.0
-torch
-accelerate


1	+ azure-ai-inference==1.0.0b9
2	+ azureml-inference-server-http==1.0.0