|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
from ollama import AsyncClient |
|
|
import gradio as gr |
|
|
|
|
|
async def playground( |
|
|
message, |
|
|
history, |
|
|
num_ctx, |
|
|
temperature, |
|
|
repeat_penalty, |
|
|
min_p, |
|
|
top_k, |
|
|
top_p |
|
|
): |
|
|
if not isinstance(message, str) or not message.strip(): |
|
|
yield [] |
|
|
return |
|
|
|
|
|
client = AsyncClient( |
|
|
host=os.getenv("OLLAMA_API_BASE_URL"), |
|
|
headers={ |
|
|
"Authorization": f"Bearer {os.getenv('OLLAMA_API_KEY')}" |
|
|
} |
|
|
) |
|
|
|
|
|
messages = [] |
|
|
for item in history: |
|
|
if isinstance(item, dict) and "role" in item and "content" in item: |
|
|
messages.append({ |
|
|
"role": item["role"], |
|
|
"content": item["content"] |
|
|
}) |
|
|
messages.append({"role": "user", "content": message}) |
|
|
|
|
|
response = "" |
|
|
async for part in await client.chat( |
|
|
model="gemma3:270m", |
|
|
messages=messages, |
|
|
options={ |
|
|
"num_ctx": int(num_ctx), |
|
|
"temperature": float(temperature), |
|
|
"repeat_penalty": float(repeat_penalty), |
|
|
"min_p": float(min_p), |
|
|
"top_k": int(top_k), |
|
|
"top_p": float(top_p) |
|
|
}, |
|
|
stream=True |
|
|
): |
|
|
response += part.get("message", {}).get("content", "") |
|
|
yield response |
|
|
|
|
|
with gr.Blocks( |
|
|
fill_height=True, |
|
|
fill_width=True |
|
|
) as app: |
|
|
with gr.Sidebar(): |
|
|
gr.Markdown("## Ollama Playground by UltimaX Intelligence") |
|
|
gr.HTML( |
|
|
""" |
|
|
This space run the <b><a href= |
|
|
"https://huggingface.co/google/gemma-3-270m" |
|
|
target="_blank">Gemma 3 (270M)</a></b> model from |
|
|
<b>Google</b>, hosted on a server using <b>Ollama</b> and |
|
|
accessed via the <b>Ollama Python SDK</b>.<br><br> |
|
|
|
|
|
Official <b>documentation</b> for using Ollama with the |
|
|
Python SDK can be found |
|
|
<b><a href="https://github.com/ollama/ollama-python" |
|
|
target="_blank">here</a></b>.<br><br> |
|
|
|
|
|
Gemma 3 (270M) runs entirely on <b>CPU</b>, utilizing only a |
|
|
<b>single core</b>. Thanks to its small size, the model can |
|
|
operate efficiently on minimal hardware.<br><br> |
|
|
|
|
|
The Gemma 3 (270M) model can also be viewed or downloaded |
|
|
from the official Ollama website |
|
|
<b><a href="https://ollama.com/library/gemma3:270m" |
|
|
target="_blank">here</a></b>.<br><br> |
|
|
|
|
|
While Gemma 3 has multimodal capabilities, running it on CPU |
|
|
with a relatively small number of parameters may limit its |
|
|
contextual understanding. For this reason, the upload |
|
|
functionality has been disabled.<br><br> |
|
|
|
|
|
<b>Like this project? You can support me by buying a |
|
|
<a href="https://ko-fi.com/hadad" target="_blank"> |
|
|
coffee</a></b>. |
|
|
""" |
|
|
) |
|
|
gr.Markdown("---") |
|
|
gr.Markdown("## Model Parameters") |
|
|
num_ctx = gr.Slider( |
|
|
minimum=512, |
|
|
maximum=1024, |
|
|
value=512, |
|
|
step=128, |
|
|
label="Context Length (num_ctx)", |
|
|
info="Maximum context window size. Limited to CPU usage." |
|
|
) |
|
|
gr.Markdown("") |
|
|
temperature = gr.Slider( |
|
|
minimum=0.1, |
|
|
maximum=2.0, |
|
|
value=1.0, |
|
|
step=0.1, |
|
|
label="Temperature", |
|
|
info="Controls randomness in generation" |
|
|
) |
|
|
gr.Markdown("") |
|
|
repeat_penalty = gr.Slider( |
|
|
minimum=0.1, |
|
|
maximum=2.0, |
|
|
value=1.0, |
|
|
step=0.1, |
|
|
label="Repeat Penalty", |
|
|
info="Penalty for repeating tokens" |
|
|
) |
|
|
gr.Markdown("") |
|
|
min_p = gr.Slider( |
|
|
minimum=0.0, |
|
|
maximum=1.0, |
|
|
value=0.001, |
|
|
step=0.001, |
|
|
label="Min P", |
|
|
info="Minimum probability threshold" |
|
|
) |
|
|
gr.Markdown("") |
|
|
top_k = gr.Slider( |
|
|
minimum=0, |
|
|
maximum=100, |
|
|
value=64, |
|
|
step=1, |
|
|
label="Top K", |
|
|
info="Number of top tokens to consider" |
|
|
) |
|
|
gr.Markdown("") |
|
|
top_p = gr.Slider( |
|
|
minimum=0.0, |
|
|
maximum=1.0, |
|
|
value=0.95, |
|
|
step=0.05, |
|
|
label="Top P", |
|
|
info="Cumulative probability threshold" |
|
|
) |
|
|
gr.ChatInterface( |
|
|
fn=playground, |
|
|
additional_inputs=[ |
|
|
num_ctx, |
|
|
temperature, |
|
|
repeat_penalty, |
|
|
min_p, |
|
|
top_k, |
|
|
top_p |
|
|
], |
|
|
chatbot=gr.Chatbot( |
|
|
label="Ollama | Gemma 3 (270M)", |
|
|
type="messages", |
|
|
show_copy_button=True, |
|
|
scale=1 |
|
|
), |
|
|
type="messages", |
|
|
examples=[ |
|
|
["Please introduce yourself."], |
|
|
["What caused World War II?"], |
|
|
["Give me a short introduction to large language model."], |
|
|
["Explain about quantum computers."] |
|
|
], |
|
|
cache_examples=False, |
|
|
show_api=False |
|
|
) |
|
|
|
|
|
app.launch( |
|
|
server_name="0.0.0.0", |
|
|
pwa=True |
|
|
) |