Spaces:
Running
on
Zero
Running
on
Zero
jedick
commited on
Commit
Β·
5f82b5a
1
Parent(s):
6a3fed7
Start in edge mode
Browse files
app.py
CHANGED
|
@@ -15,13 +15,12 @@ import ast
|
|
| 15 |
import os
|
| 16 |
|
| 17 |
# Global settings for compute_mode and search_type
|
| 18 |
-
COMPUTE = "
|
| 19 |
search_type = "hybrid"
|
| 20 |
|
| 21 |
-
#
|
| 22 |
-
if
|
| 23 |
-
|
| 24 |
-
raise Exception("Can't use edge compute with no GPU")
|
| 25 |
|
| 26 |
# Keep LangChain graph in a global variable (shared across sessions)
|
| 27 |
graph_edge = None
|
|
@@ -64,11 +63,6 @@ def run_workflow(input, history, thread_id):
|
|
| 64 |
|
| 65 |
print(f"Using thread_id: {thread_id}")
|
| 66 |
|
| 67 |
-
# # Display the user input in the history
|
| 68 |
-
# history.append(gr.ChatMessage(role="user", content=input))
|
| 69 |
-
# # Return the history and empty lists for emails and citations texboxes
|
| 70 |
-
# yield history, [], []
|
| 71 |
-
|
| 72 |
# Asynchronously stream graph steps for a single input
|
| 73 |
# https://langchain-ai.lang.chat/langgraph/reference/graphs/#langgraph.graph.state.CompiledStateGraph
|
| 74 |
for step in graph.stream(
|
|
@@ -215,8 +209,8 @@ with gr.Blocks(
|
|
| 215 |
|
| 216 |
compute_mode = gr.Radio(
|
| 217 |
choices=[
|
| 218 |
-
"cloud",
|
| 219 |
"edge" if torch.cuda.is_available() else "edge (not available)",
|
|
|
|
| 220 |
],
|
| 221 |
value=COMPUTE,
|
| 222 |
label="Compute Mode",
|
|
@@ -293,7 +287,7 @@ with gr.Blocks(
|
|
| 293 |
**Chat with the [R-help mailing list archives]((https://stat.ethz.ch/pipermail/r-help/)).**
|
| 294 |
An LLM turns your question into a search query, including year ranges, and generates an answer from the retrieved emails.
|
| 295 |
You can ask follow-up questions with the chat history as context.
|
| 296 |
-
β‘οΈ To clear the history and start a new chat, press the ποΈ
|
| 297 |
**_Answers may be incorrect._**
|
| 298 |
"""
|
| 299 |
return intro
|
|
@@ -302,13 +296,14 @@ with gr.Blocks(
|
|
| 302 |
if compute_mode.startswith("cloud"):
|
| 303 |
status_text = f"""
|
| 304 |
π Now in **cloud** mode, using the OpenAI API<br>
|
| 305 |
-
β¨ text-embedding-3-small and {openai_model}<br>
|
| 306 |
β οΈ **_Privacy Notice_**: Data sharing with OpenAI is enabled<br>
|
|
|
|
| 307 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
| 308 |
"""
|
| 309 |
if compute_mode.startswith("edge"):
|
| 310 |
status_text = f"""
|
| 311 |
π Now in **edge** mode, using ZeroGPU hardware<br>
|
|
|
|
| 312 |
β¨ Embeddings: [Nomic](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5); LLM: [{model_id}](https://huggingface.co/{model_id})<br>
|
| 313 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
| 314 |
"""
|
|
@@ -556,7 +551,7 @@ with gr.Blocks(
|
|
| 556 |
# When app is launched, check if data is present, download it if necessary,
|
| 557 |
# hide chat interface during downloading, show downloading and extracting
|
| 558 |
# steps as textboxes, show error textbox if needed, restore chat interface,
|
| 559 |
-
# and
|
| 560 |
|
| 561 |
# fmt: off
|
| 562 |
demo.load(
|
|
|
|
| 15 |
import os
|
| 16 |
|
| 17 |
# Global settings for compute_mode and search_type
|
| 18 |
+
COMPUTE = "edge"
|
| 19 |
search_type = "hybrid"
|
| 20 |
|
| 21 |
+
# Switch to cloud mode if GPU isn't available
|
| 22 |
+
if not torch.cuda.is_available():
|
| 23 |
+
COMPUTE = "cloud"
|
|
|
|
| 24 |
|
| 25 |
# Keep LangChain graph in a global variable (shared across sessions)
|
| 26 |
graph_edge = None
|
|
|
|
| 63 |
|
| 64 |
print(f"Using thread_id: {thread_id}")
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
# Asynchronously stream graph steps for a single input
|
| 67 |
# https://langchain-ai.lang.chat/langgraph/reference/graphs/#langgraph.graph.state.CompiledStateGraph
|
| 68 |
for step in graph.stream(
|
|
|
|
| 209 |
|
| 210 |
compute_mode = gr.Radio(
|
| 211 |
choices=[
|
|
|
|
| 212 |
"edge" if torch.cuda.is_available() else "edge (not available)",
|
| 213 |
+
"cloud",
|
| 214 |
],
|
| 215 |
value=COMPUTE,
|
| 216 |
label="Compute Mode",
|
|
|
|
| 287 |
**Chat with the [R-help mailing list archives]((https://stat.ethz.ch/pipermail/r-help/)).**
|
| 288 |
An LLM turns your question into a search query, including year ranges, and generates an answer from the retrieved emails.
|
| 289 |
You can ask follow-up questions with the chat history as context.
|
| 290 |
+
β‘οΈ To clear the history and start a new chat, press the ποΈ clear button.
|
| 291 |
**_Answers may be incorrect._**
|
| 292 |
"""
|
| 293 |
return intro
|
|
|
|
| 296 |
if compute_mode.startswith("cloud"):
|
| 297 |
status_text = f"""
|
| 298 |
π Now in **cloud** mode, using the OpenAI API<br>
|
|
|
|
| 299 |
β οΈ **_Privacy Notice_**: Data sharing with OpenAI is enabled<br>
|
| 300 |
+
β¨ text-embedding-3-small and {openai_model}<br>
|
| 301 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
| 302 |
"""
|
| 303 |
if compute_mode.startswith("edge"):
|
| 304 |
status_text = f"""
|
| 305 |
π Now in **edge** mode, using ZeroGPU hardware<br>
|
| 306 |
+
β Response time is ca. 2-3 minutes; please be patient<br>
|
| 307 |
β¨ Embeddings: [Nomic](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5); LLM: [{model_id}](https://huggingface.co/{model_id})<br>
|
| 308 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
| 309 |
"""
|
|
|
|
| 551 |
# When app is launched, check if data is present, download it if necessary,
|
| 552 |
# hide chat interface during downloading, show downloading and extracting
|
| 553 |
# steps as textboxes, show error textbox if needed, restore chat interface,
|
| 554 |
+
# and update database info
|
| 555 |
|
| 556 |
# fmt: off
|
| 557 |
demo.load(
|