Spaces:
Sleeping
Sleeping
jedick
commited on
Commit
Β·
9d0646a
1
Parent(s):
951d2c0
Change model to Qwen3-14B
Browse files
app.py
CHANGED
|
@@ -42,7 +42,7 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
|
|
| 42 |
if compute_mode == "local":
|
| 43 |
if not torch.cuda.is_available():
|
| 44 |
raise gr.Error(
|
| 45 |
-
"Local mode requires GPU.
|
| 46 |
print_exception=False,
|
| 47 |
)
|
| 48 |
|
|
@@ -244,7 +244,11 @@ with gr.Blocks(
|
|
| 244 |
],
|
| 245 |
value=("local" if torch.cuda.is_available() else "remote"),
|
| 246 |
label="Compute Mode",
|
| 247 |
-
info=(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
render=False,
|
| 249 |
)
|
| 250 |
|
|
@@ -355,8 +359,8 @@ with gr.Blocks(
|
|
| 355 |
if compute_mode == "local":
|
| 356 |
status_text = f"""
|
| 357 |
π Now in **local** mode, using ZeroGPU hardware<br>
|
| 358 |
-
β Response time is around
|
| 359 |
-
β¨ [
|
| 360 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
| 361 |
"""
|
| 362 |
return status_text
|
|
|
|
| 42 |
if compute_mode == "local":
|
| 43 |
if not torch.cuda.is_available():
|
| 44 |
raise gr.Error(
|
| 45 |
+
"Local mode requires GPU.",
|
| 46 |
print_exception=False,
|
| 47 |
)
|
| 48 |
|
|
|
|
| 244 |
],
|
| 245 |
value=("local" if torch.cuda.is_available() else "remote"),
|
| 246 |
label="Compute Mode",
|
| 247 |
+
info=(
|
| 248 |
+
"NOTE: remote mode is available even if you have exceeded your ZeroGPU quota"
|
| 249 |
+
if torch.cuda.is_available()
|
| 250 |
+
else "NOTE: local mode requires GPU"
|
| 251 |
+
),
|
| 252 |
render=False,
|
| 253 |
)
|
| 254 |
|
|
|
|
| 359 |
if compute_mode == "local":
|
| 360 |
status_text = f"""
|
| 361 |
π Now in **local** mode, using ZeroGPU hardware<br>
|
| 362 |
+
β Response time is around 1 minute<br>
|
| 363 |
+
β¨ [nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
|
| 364 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
| 365 |
"""
|
| 366 |
return status_text
|
graph.py
CHANGED
|
@@ -9,7 +9,7 @@ import os
|
|
| 9 |
|
| 10 |
# Local modules
|
| 11 |
from retriever import BuildRetriever
|
| 12 |
-
from prompts import query_prompt, generate_prompt,
|
| 13 |
from mods.tool_calling_llm import ToolCallingLLM
|
| 14 |
|
| 15 |
# Local modules
|
|
@@ -81,7 +81,7 @@ def ToolifyHF(chat_model, system_message, system_message_suffix="", think=False)
|
|
| 81 |
# system_message = "/no_think\n" + system_message
|
| 82 |
|
| 83 |
# Combine system prompt and tools template
|
| 84 |
-
tool_system_prompt_template = system_message +
|
| 85 |
|
| 86 |
class HuggingFaceWithTools(ToolCallingLLM, ChatHuggingFace):
|
| 87 |
|
|
|
|
| 9 |
|
| 10 |
# Local modules
|
| 11 |
from retriever import BuildRetriever
|
| 12 |
+
from prompts import query_prompt, generate_prompt, generic_tools_template
|
| 13 |
from mods.tool_calling_llm import ToolCallingLLM
|
| 14 |
|
| 15 |
# Local modules
|
|
|
|
| 81 |
# system_message = "/no_think\n" + system_message
|
| 82 |
|
| 83 |
# Combine system prompt and tools template
|
| 84 |
+
tool_system_prompt_template = system_message + generic_tools_template
|
| 85 |
|
| 86 |
class HuggingFaceWithTools(ToolCallingLLM, ChatHuggingFace):
|
| 87 |
|
main.py
CHANGED
|
@@ -40,7 +40,8 @@ openai_model = "gpt-4o-mini"
|
|
| 40 |
model_id = os.getenv("MODEL_ID")
|
| 41 |
if model_id is None:
|
| 42 |
# model_id = "HuggingFaceTB/SmolLM3-3B"
|
| 43 |
-
model_id = "google/gemma-3-12b-it"
|
|
|
|
| 44 |
|
| 45 |
# Suppress these messages:
|
| 46 |
# INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
|
|
|
|
| 40 |
model_id = os.getenv("MODEL_ID")
|
| 41 |
if model_id is None:
|
| 42 |
# model_id = "HuggingFaceTB/SmolLM3-3B"
|
| 43 |
+
# model_id = "google/gemma-3-12b-it"
|
| 44 |
+
model_id = "Qwen/Qwen3-14B"
|
| 45 |
|
| 46 |
# Suppress these messages:
|
| 47 |
# INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
|
prompts.py
CHANGED
|
@@ -84,9 +84,9 @@ You must always select one of the above tools and respond with only a JSON objec
|
|
| 84 |
|
| 85 |
"""
|
| 86 |
|
| 87 |
-
# Prompt template for Gemma
|
| 88 |
# Based on https://ai.google.dev/gemma/docs/capabilities/function-calling
|
| 89 |
-
|
| 90 |
|
| 91 |
### Functions
|
| 92 |
|
|
|
|
| 84 |
|
| 85 |
"""
|
| 86 |
|
| 87 |
+
# Prompt template for Gemma/Qwen with tools
|
| 88 |
# Based on https://ai.google.dev/gemma/docs/capabilities/function-calling
|
| 89 |
+
generic_tools_template = """
|
| 90 |
|
| 91 |
### Functions
|
| 92 |
|