jedick commited on
Commit
32c7e5a
Β·
1 Parent(s): 4230e8f

Prevent automatic selection of remote mode

Browse files
Files changed (3) hide show
  1. app.py +14 -12
  2. prompts.py +1 -1
  3. requirements.txt +3 -1
app.py CHANGED
@@ -19,11 +19,7 @@ import os
19
  COMPUTE = "local"
20
  search_type = "hybrid"
21
 
22
- # Switch to remote mode if GPU isn't available
23
- if not torch.cuda.is_available():
24
- COMPUTE = "remote"
25
-
26
- # Keep LangChain graph in a global variable (shared across sessions)
27
  graph_local = None
28
  graph_remote = None
29
 
@@ -31,9 +27,16 @@ graph_remote = None
31
  def run_workflow(input, history, thread_id):
32
  """The main function to run the chat workflow"""
33
 
34
- # Get global graph for compute mode
35
  global graph_local, graph_remote
36
  if COMPUTE == "local":
 
 
 
 
 
 
 
37
  graph = graph_local
38
  if COMPUTE == "remote":
39
  graph = graph_remote
@@ -241,13 +244,12 @@ with gr.Blocks(
241
 
242
  compute_mode = gr.Radio(
243
  choices=[
244
- "local" if torch.cuda.is_available() else "local (not available)",
245
  "remote",
246
  ],
247
  value=COMPUTE,
248
  label="Compute Mode",
249
  info=(None if torch.cuda.is_available() else "NOTE: local mode requires GPU"),
250
- interactive=torch.cuda.is_available(),
251
  render=False,
252
  )
253
 
@@ -348,14 +350,14 @@ with gr.Blocks(
348
  return intro
349
 
350
  def get_status_text(compute_mode):
351
- if compute_mode.startswith("remote"):
352
  status_text = f"""
353
  πŸ“ Now in **remote** mode, using the OpenAI API<br>
354
  ⚠️ **_Privacy Notice_**: Data sharing with OpenAI is enabled<br>
355
  ✨ text-embedding-3-small and {openai_model}<br>
356
  🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
357
  """
358
- if compute_mode.startswith("local"):
359
  status_text = f"""
360
  πŸ“ Now in **local** mode, using ZeroGPU hardware<br>
361
  βŒ› Response time is around 2 minutes<br>
@@ -459,9 +461,9 @@ with gr.Blocks(
459
  COMPUTE = compute_mode
460
 
461
  def set_avatar(compute_mode):
462
- if compute_mode.startswith("remote"):
463
  image_file = "images/cloud.png"
464
- if compute_mode.startswith("local"):
465
  image_file = "images/chip.png"
466
  return gr.update(
467
  avatar_images=(
 
19
  COMPUTE = "local"
20
  search_type = "hybrid"
21
 
22
+ # Global variables for LangChain graph
 
 
 
 
23
  graph_local = None
24
  graph_remote = None
25
 
 
27
  def run_workflow(input, history, thread_id):
28
  """The main function to run the chat workflow"""
29
 
30
+ # Get global graph depending on compute mode
31
  global graph_local, graph_remote
32
  if COMPUTE == "local":
33
+ # We don't want the app to switch into remote mode without notification,
34
+ # so ask the user to do it
35
+ if not torch.cuda.is_available():
36
+ raise gr.Error(
37
+ "Local mode requires GPU. Please select remote mode.",
38
+ print_exception=False,
39
+ )
40
  graph = graph_local
41
  if COMPUTE == "remote":
42
  graph = graph_remote
 
244
 
245
  compute_mode = gr.Radio(
246
  choices=[
247
+ "local",
248
  "remote",
249
  ],
250
  value=COMPUTE,
251
  label="Compute Mode",
252
  info=(None if torch.cuda.is_available() else "NOTE: local mode requires GPU"),
 
253
  render=False,
254
  )
255
 
 
350
  return intro
351
 
352
  def get_status_text(compute_mode):
353
+ if compute_mode == "remote":
354
  status_text = f"""
355
  πŸ“ Now in **remote** mode, using the OpenAI API<br>
356
  ⚠️ **_Privacy Notice_**: Data sharing with OpenAI is enabled<br>
357
  ✨ text-embedding-3-small and {openai_model}<br>
358
  🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
359
  """
360
+ if compute_mode == "local":
361
  status_text = f"""
362
  πŸ“ Now in **local** mode, using ZeroGPU hardware<br>
363
  βŒ› Response time is around 2 minutes<br>
 
461
  COMPUTE = compute_mode
462
 
463
  def set_avatar(compute_mode):
464
+ if compute_mode == "remote":
465
  image_file = "images/cloud.png"
466
+ if compute_mode == "local":
467
  image_file = "images/chip.png"
468
  return gr.update(
469
  avatar_images=(
prompts.py CHANGED
@@ -80,7 +80,7 @@ You must always select one of the above tools and respond with only a JSON objec
80
 
81
  """
82
 
83
- # Prompt template for Gemma-3 with tools
84
  # Based on https://ai.google.dev/gemma/docs/capabilities/function-calling
85
  gemma_tools_template = """
86
 
 
80
 
81
  """
82
 
83
+ # Prompt template for Gemma 3 with tools
84
  # Based on https://ai.google.dev/gemma/docs/capabilities/function-calling
85
  gemma_tools_template = """
86
 
requirements.txt CHANGED
@@ -7,8 +7,10 @@ langgraph>=0.4.7,<0.6
7
  sentence-transformers>=4.1.0
8
  # Required by Nomic embeddings
9
  einops==0.8.1
10
- # Gemma-3: >=4.50
11
  # SmolLM3: >=4.53
 
 
 
12
  transformers==4.53.3
13
  # Commented because we have local modifications
14
  #tool-calling-llm==0.1.2
 
7
  sentence-transformers>=4.1.0
8
  # Required by Nomic embeddings
9
  einops==0.8.1
 
10
  # SmolLM3: >=4.53
11
+ # Gemma 3: >=4.50
12
+ # Gemma 3 with 4.54.0 gives:
13
+ # ValueError: Max cache length is not consistent across layers
14
  transformers==4.53.3
15
  # Commented because we have local modifications
16
  #tool-calling-llm==0.1.2