Spaces:
Running
on
Zero
Running
on
Zero
jedick
commited on
Commit
Β·
32c7e5a
1
Parent(s):
4230e8f
Prevent automatic selection of remote mode
Browse files- app.py +14 -12
- prompts.py +1 -1
- requirements.txt +3 -1
app.py
CHANGED
|
@@ -19,11 +19,7 @@ import os
|
|
| 19 |
COMPUTE = "local"
|
| 20 |
search_type = "hybrid"
|
| 21 |
|
| 22 |
-
#
|
| 23 |
-
if not torch.cuda.is_available():
|
| 24 |
-
COMPUTE = "remote"
|
| 25 |
-
|
| 26 |
-
# Keep LangChain graph in a global variable (shared across sessions)
|
| 27 |
graph_local = None
|
| 28 |
graph_remote = None
|
| 29 |
|
|
@@ -31,9 +27,16 @@ graph_remote = None
|
|
| 31 |
def run_workflow(input, history, thread_id):
|
| 32 |
"""The main function to run the chat workflow"""
|
| 33 |
|
| 34 |
-
# Get global graph
|
| 35 |
global graph_local, graph_remote
|
| 36 |
if COMPUTE == "local":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
graph = graph_local
|
| 38 |
if COMPUTE == "remote":
|
| 39 |
graph = graph_remote
|
|
@@ -241,13 +244,12 @@ with gr.Blocks(
|
|
| 241 |
|
| 242 |
compute_mode = gr.Radio(
|
| 243 |
choices=[
|
| 244 |
-
"local"
|
| 245 |
"remote",
|
| 246 |
],
|
| 247 |
value=COMPUTE,
|
| 248 |
label="Compute Mode",
|
| 249 |
info=(None if torch.cuda.is_available() else "NOTE: local mode requires GPU"),
|
| 250 |
-
interactive=torch.cuda.is_available(),
|
| 251 |
render=False,
|
| 252 |
)
|
| 253 |
|
|
@@ -348,14 +350,14 @@ with gr.Blocks(
|
|
| 348 |
return intro
|
| 349 |
|
| 350 |
def get_status_text(compute_mode):
|
| 351 |
-
if compute_mode
|
| 352 |
status_text = f"""
|
| 353 |
π Now in **remote** mode, using the OpenAI API<br>
|
| 354 |
β οΈ **_Privacy Notice_**: Data sharing with OpenAI is enabled<br>
|
| 355 |
β¨ text-embedding-3-small and {openai_model}<br>
|
| 356 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
| 357 |
"""
|
| 358 |
-
if compute_mode
|
| 359 |
status_text = f"""
|
| 360 |
π Now in **local** mode, using ZeroGPU hardware<br>
|
| 361 |
β Response time is around 2 minutes<br>
|
|
@@ -459,9 +461,9 @@ with gr.Blocks(
|
|
| 459 |
COMPUTE = compute_mode
|
| 460 |
|
| 461 |
def set_avatar(compute_mode):
|
| 462 |
-
if compute_mode
|
| 463 |
image_file = "images/cloud.png"
|
| 464 |
-
if compute_mode
|
| 465 |
image_file = "images/chip.png"
|
| 466 |
return gr.update(
|
| 467 |
avatar_images=(
|
|
|
|
| 19 |
COMPUTE = "local"
|
| 20 |
search_type = "hybrid"
|
| 21 |
|
| 22 |
+
# Global variables for LangChain graph
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
graph_local = None
|
| 24 |
graph_remote = None
|
| 25 |
|
|
|
|
| 27 |
def run_workflow(input, history, thread_id):
|
| 28 |
"""The main function to run the chat workflow"""
|
| 29 |
|
| 30 |
+
# Get global graph depending on compute mode
|
| 31 |
global graph_local, graph_remote
|
| 32 |
if COMPUTE == "local":
|
| 33 |
+
# We don't want the app to switch into remote mode without notification,
|
| 34 |
+
# so ask the user to do it
|
| 35 |
+
if not torch.cuda.is_available():
|
| 36 |
+
raise gr.Error(
|
| 37 |
+
"Local mode requires GPU. Please select remote mode.",
|
| 38 |
+
print_exception=False,
|
| 39 |
+
)
|
| 40 |
graph = graph_local
|
| 41 |
if COMPUTE == "remote":
|
| 42 |
graph = graph_remote
|
|
|
|
| 244 |
|
| 245 |
compute_mode = gr.Radio(
|
| 246 |
choices=[
|
| 247 |
+
"local",
|
| 248 |
"remote",
|
| 249 |
],
|
| 250 |
value=COMPUTE,
|
| 251 |
label="Compute Mode",
|
| 252 |
info=(None if torch.cuda.is_available() else "NOTE: local mode requires GPU"),
|
|
|
|
| 253 |
render=False,
|
| 254 |
)
|
| 255 |
|
|
|
|
| 350 |
return intro
|
| 351 |
|
| 352 |
def get_status_text(compute_mode):
|
| 353 |
+
if compute_mode == "remote":
|
| 354 |
status_text = f"""
|
| 355 |
π Now in **remote** mode, using the OpenAI API<br>
|
| 356 |
β οΈ **_Privacy Notice_**: Data sharing with OpenAI is enabled<br>
|
| 357 |
β¨ text-embedding-3-small and {openai_model}<br>
|
| 358 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
| 359 |
"""
|
| 360 |
+
if compute_mode == "local":
|
| 361 |
status_text = f"""
|
| 362 |
π Now in **local** mode, using ZeroGPU hardware<br>
|
| 363 |
β Response time is around 2 minutes<br>
|
|
|
|
| 461 |
COMPUTE = compute_mode
|
| 462 |
|
| 463 |
def set_avatar(compute_mode):
|
| 464 |
+
if compute_mode == "remote":
|
| 465 |
image_file = "images/cloud.png"
|
| 466 |
+
if compute_mode == "local":
|
| 467 |
image_file = "images/chip.png"
|
| 468 |
return gr.update(
|
| 469 |
avatar_images=(
|
prompts.py
CHANGED
|
@@ -80,7 +80,7 @@ You must always select one of the above tools and respond with only a JSON objec
|
|
| 80 |
|
| 81 |
"""
|
| 82 |
|
| 83 |
-
# Prompt template for Gemma
|
| 84 |
# Based on https://ai.google.dev/gemma/docs/capabilities/function-calling
|
| 85 |
gemma_tools_template = """
|
| 86 |
|
|
|
|
| 80 |
|
| 81 |
"""
|
| 82 |
|
| 83 |
+
# Prompt template for Gemma 3 with tools
|
| 84 |
# Based on https://ai.google.dev/gemma/docs/capabilities/function-calling
|
| 85 |
gemma_tools_template = """
|
| 86 |
|
requirements.txt
CHANGED
|
@@ -7,8 +7,10 @@ langgraph>=0.4.7,<0.6
|
|
| 7 |
sentence-transformers>=4.1.0
|
| 8 |
# Required by Nomic embeddings
|
| 9 |
einops==0.8.1
|
| 10 |
-
# Gemma-3: >=4.50
|
| 11 |
# SmolLM3: >=4.53
|
|
|
|
|
|
|
|
|
|
| 12 |
transformers==4.53.3
|
| 13 |
# Commented because we have local modifications
|
| 14 |
#tool-calling-llm==0.1.2
|
|
|
|
| 7 |
sentence-transformers>=4.1.0
|
| 8 |
# Required by Nomic embeddings
|
| 9 |
einops==0.8.1
|
|
|
|
| 10 |
# SmolLM3: >=4.53
|
| 11 |
+
# Gemma 3: >=4.50
|
| 12 |
+
# Gemma 3 with 4.54.0 gives:
|
| 13 |
+
# ValueError: Max cache length is not consistent across layers
|
| 14 |
transformers==4.53.3
|
| 15 |
# Commented because we have local modifications
|
| 16 |
#tool-calling-llm==0.1.2
|