jedick commited on
Commit
5f82b5a
Β·
1 Parent(s): 6a3fed7

Start in edge mode

Browse files
Files changed (1) hide show
  1. app.py +9 -14
app.py CHANGED
@@ -15,13 +15,12 @@ import ast
15
  import os
16
 
17
  # Global settings for compute_mode and search_type
18
- COMPUTE = "cloud"
19
  search_type = "hybrid"
20
 
21
- # Check for GPU
22
- if COMPUTE == "edge":
23
- if not torch.cuda.is_available():
24
- raise Exception("Can't use edge compute with no GPU")
25
 
26
  # Keep LangChain graph in a global variable (shared across sessions)
27
  graph_edge = None
@@ -64,11 +63,6 @@ def run_workflow(input, history, thread_id):
64
 
65
  print(f"Using thread_id: {thread_id}")
66
 
67
- # # Display the user input in the history
68
- # history.append(gr.ChatMessage(role="user", content=input))
69
- # # Return the history and empty lists for emails and citations texboxes
70
- # yield history, [], []
71
-
72
  # Asynchronously stream graph steps for a single input
73
  # https://langchain-ai.lang.chat/langgraph/reference/graphs/#langgraph.graph.state.CompiledStateGraph
74
  for step in graph.stream(
@@ -215,8 +209,8 @@ with gr.Blocks(
215
 
216
  compute_mode = gr.Radio(
217
  choices=[
218
- "cloud",
219
  "edge" if torch.cuda.is_available() else "edge (not available)",
 
220
  ],
221
  value=COMPUTE,
222
  label="Compute Mode",
@@ -293,7 +287,7 @@ with gr.Blocks(
293
  **Chat with the [R-help mailing list archives]((https://stat.ethz.ch/pipermail/r-help/)).**
294
  An LLM turns your question into a search query, including year ranges, and generates an answer from the retrieved emails.
295
  You can ask follow-up questions with the chat history as context.
296
- ➑️ To clear the history and start a new chat, press the πŸ—‘οΈ trash button.
297
  **_Answers may be incorrect._**
298
  """
299
  return intro
@@ -302,13 +296,14 @@ with gr.Blocks(
302
  if compute_mode.startswith("cloud"):
303
  status_text = f"""
304
  πŸ“ Now in **cloud** mode, using the OpenAI API<br>
305
- ✨ text-embedding-3-small and {openai_model}<br>
306
  ⚠️ **_Privacy Notice_**: Data sharing with OpenAI is enabled<br>
 
307
  🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
308
  """
309
  if compute_mode.startswith("edge"):
310
  status_text = f"""
311
  πŸ“ Now in **edge** mode, using ZeroGPU hardware<br>
 
312
  ✨ Embeddings: [Nomic](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5); LLM: [{model_id}](https://huggingface.co/{model_id})<br>
313
  🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
314
  """
@@ -556,7 +551,7 @@ with gr.Blocks(
556
  # When app is launched, check if data is present, download it if necessary,
557
  # hide chat interface during downloading, show downloading and extracting
558
  # steps as textboxes, show error textbox if needed, restore chat interface,
559
- # and show database info
560
 
561
  # fmt: off
562
  demo.load(
 
15
  import os
16
 
17
  # Global settings for compute_mode and search_type
18
+ COMPUTE = "edge"
19
  search_type = "hybrid"
20
 
21
+ # Switch to cloud mode if GPU isn't available
22
+ if not torch.cuda.is_available():
23
+ COMPUTE = "cloud"
 
24
 
25
  # Keep LangChain graph in a global variable (shared across sessions)
26
  graph_edge = None
 
63
 
64
  print(f"Using thread_id: {thread_id}")
65
 
 
 
 
 
 
66
  # Asynchronously stream graph steps for a single input
67
  # https://langchain-ai.lang.chat/langgraph/reference/graphs/#langgraph.graph.state.CompiledStateGraph
68
  for step in graph.stream(
 
209
 
210
  compute_mode = gr.Radio(
211
  choices=[
 
212
  "edge" if torch.cuda.is_available() else "edge (not available)",
213
+ "cloud",
214
  ],
215
  value=COMPUTE,
216
  label="Compute Mode",
 
287
  **Chat with the [R-help mailing list archives]((https://stat.ethz.ch/pipermail/r-help/)).**
288
  An LLM turns your question into a search query, including year ranges, and generates an answer from the retrieved emails.
289
  You can ask follow-up questions with the chat history as context.
290
+ ➑️ To clear the history and start a new chat, press the πŸ—‘οΈ clear button.
291
  **_Answers may be incorrect._**
292
  """
293
  return intro
 
296
  if compute_mode.startswith("cloud"):
297
  status_text = f"""
298
  πŸ“ Now in **cloud** mode, using the OpenAI API<br>
 
299
  ⚠️ **_Privacy Notice_**: Data sharing with OpenAI is enabled<br>
300
+ ✨ text-embedding-3-small and {openai_model}<br>
301
  🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
302
  """
303
  if compute_mode.startswith("edge"):
304
  status_text = f"""
305
  πŸ“ Now in **edge** mode, using ZeroGPU hardware<br>
306
+ βŒ› Response time is ca. 2-3 minutes; please be patient<br>
307
  ✨ Embeddings: [Nomic](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5); LLM: [{model_id}](https://huggingface.co/{model_id})<br>
308
  🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
309
  """
 
551
  # When app is launched, check if data is present, download it if necessary,
552
  # hide chat interface during downloading, show downloading and extracting
553
  # steps as textboxes, show error textbox if needed, restore chat interface,
554
+ # and update database info
555
 
556
  # fmt: off
557
  demo.load(