Spaces:
Running
Running
| import gradio as gr | |
| import requests | |
| import json | |
| # Define the URL for the local Ollama API and the model name | |
| OLLAMA_API_URL = "http://localhost:11434/api/generate" | |
| MODEL_NAME = "gemma-unsloth" # This must match the name used in `ollama create` in run.sh | |
| def generate_text(prompt, max_new_tokens=256, temperature=0.7): | |
| """ | |
| Function to send a prompt to the Ollama API and get a response. | |
| """ | |
| payload = { | |
| "model": MODEL_NAME, | |
| "prompt": prompt, | |
| "stream": False, # We want the full response at once | |
| "options": { | |
| "num_predict": max_new_tokens, | |
| "temperature": temperature, | |
| } | |
| } | |
| try: | |
| # Send a POST request to the Ollama API. | |
| # Increased timeout for potentially slow CPU inference. | |
| response = requests.post(OLLAMA_API_URL, json=payload, timeout=600) # 10 minutes timeout | |
| response.raise_for_status() # Raise an exception for HTTP errors (4xx or 5xx) | |
| result = response.json() | |
| return result.get("response", "No response from model.") | |
| except requests.exceptions.RequestException as e: | |
| return f"Error communicating with Ollama: {e}" | |
| # Create the Gradio interface | |
| iface = gr.Interface( | |
| fn=generate_text, | |
| inputs=[ | |
| gr.Textbox(lines=5, label="Enter your prompt", placeholder="Type your message here..."), | |
| gr.Slider(minimum=1, maximum=1024, value=256, label="Max New Tokens", info="Maximum number of tokens to generate."), | |
| gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature", info="Controls randomness in generation. Lower values are less random.") | |
| ], | |
| outputs="text", | |
| title=f"Ollama {MODEL_NAME} on Hugging Face Spaces (CPU-only)", | |
| description="Interact with a Gemma 3.4B IT QAT GGUF model served by Ollama on CPU. Please be patient, as CPU inference can be slow." | |
| ) | |
| # Launch the Gradio application | |
| # server_name="0.0.0.0" makes it accessible from outside the container. | |
| # server_port=7860 is the default port for Gradio apps on Hugging Face Spaces. | |
| if __name__ == "__main__": | |
| iface.launch(server_name="0.0.0.0", server_port=7860) |