Spaces:

jedick
/

R-help-chat

Running on Zero

App Files Files Community

jedick commited on Jul 27

Commit

f8c72d3

1 Parent(s): dac4e7d

Use session state for LangChain graph

Browse files

Files changed (1) hide show

app.py +41 -48

app.py CHANGED Viewed

@@ -18,55 +18,58 @@ import os
 # Setup environment variables
 load_dotenv(dotenv_path=".env", override=True)
-# Global settings for compute_mode and search_type
-COMPUTE = "local"
 search_type = "hybrid"
-# Global variables for LangChain graph
-graph_local = None
-graph_remote = None
-def run_workflow(input, history, thread_id):
     """The main function to run the chat workflow"""
-    # Get global graph depending on compute mode
-    global graph_local, graph_remote
-    if COMPUTE == "local":
-        # We don't want the app to switch into remote mode without notification,
-        # so ask the user to do it
         if not torch.cuda.is_available():
             raise gr.Error(
                 "Local mode requires GPU. Please select remote mode.",
                 print_exception=False,
             )
-        graph = graph_local
-    if COMPUTE == "remote":
-        graph = graph_remote
     if graph is None:
         # Notify when we're loading the local model because it takes some time
-        if COMPUTE == "local":
             gr.Info(
                 f"Please wait for the local model to load",
                 duration=15,
                 title=f"Model loading...",
             )
         # Get the chat model and build the graph
-        chat_model = GetChatModel(COMPUTE)
-        graph_builder = BuildGraph(chat_model, COMPUTE, search_type)
         # Compile the graph with an in-memory checkpointer
         memory = MemorySaver()
         graph = graph_builder.compile(checkpointer=memory)
         # Set global graph for compute mode
-        if COMPUTE == "local":
-            graph_local = graph
-        if COMPUTE == "remote":
-            graph_remote = graph
-    # Notify when model finishes loading
-    gr.Success(f"{COMPUTE}", duration=4, title=f"Model loaded!")
-    print(f"Set graph for {COMPUTE}, {search_type}!")
     print(f"Using thread_id: {thread_id}")
@@ -180,13 +183,16 @@ def run_workflow(input, history, thread_id):
             yield history, None, citations
-def to_workflow(*args):
     """Wrapper function to call function with or without @spaces.GPU"""
-    if COMPUTE == "local":
-        for value in run_workflow_local(*args):
             yield value
-    if COMPUTE == "remote":
-        for value in run_workflow_remote(*args):
             yield value
@@ -236,7 +242,7 @@ with gr.Blocks(
             "local",
             "remote",
         ],
-        value=COMPUTE,
         label="Compute Mode",
         info=(None if torch.cuda.is_available() else "NOTE: local mode requires GPU"),
         render=False,
@@ -444,10 +450,6 @@ with gr.Blocks(
         """Return updated value for a component"""
         return gr.update(value=value)
-    def set_compute(compute_mode):
-        global COMPUTE
-        COMPUTE = compute_mode
     def set_avatar(compute_mode):
         if compute_mode == "remote":
             image_file = "images/cloud.png"
@@ -475,13 +477,6 @@ with gr.Blocks(
             # Display the content in the textbox
             return content, change_visibility(True)
-    #    def update_citations(citations):
-    #        if citations == []:
-    #            # Blank out and hide the citations textbox when new input is submitted
-    #            return "", change_visibility(False)
-    #        else:
-    #            return citations, change_visibility(True)
     # --------------
     # Event handlers
     # --------------
@@ -495,11 +490,6 @@ with gr.Blocks(
         return component.clear()
     compute_mode.change(
-        # Update global COMPUTE variable
-        set_compute,
-        [compute_mode],
-        api_name=False,
-    ).then(
         # Change the app status text
         get_status_text,
         [compute_mode],
@@ -527,7 +517,7 @@ with gr.Blocks(
     input.submit(
         # Submit input to the chatbot
         to_workflow,
-        [input, chatbot, thread_id],
         [chatbot, retrieved_emails, citations_text],
         api_name=False,
     )
@@ -661,6 +651,9 @@ with gr.Blocks(
     )
     # fmt: on
 if __name__ == "__main__":

 # Setup environment variables
 load_dotenv(dotenv_path=".env", override=True)
+# Global setting for search type
 search_type = "hybrid"
+# Global variables for LangChain graph: use dictionaries to store user-specific instances
+# https://www.gradio.app/guides/state-in-blocks
+graph_instances = {"local": {}, "remote": {}}
+def cleanup_graph(request: gr.Request):
+    if request.session_hash in graph_instances["local"]:
+        del graph_instances["local"][request.session_hash]
+        print(f"Deleted local graph for session {request.session_hash}")
+    if request.session_hash in graph_instances["remote"]:
+        del graph_instances["remote"][request.session_hash]
+        print(f"Deleted remote graph for session {request.session_hash}")
+def run_workflow(input, history, compute_mode, thread_id, session_hash):
     """The main function to run the chat workflow"""
+    # Error if user tries to run local mode without GPU
+    if compute_mode == "local":
         if not torch.cuda.is_available():
             raise gr.Error(
                 "Local mode requires GPU. Please select remote mode.",
                 print_exception=False,
             )
+    # Get graph for compute mode
+    graph = graph_instances[compute_mode].get(session_hash)
+    if graph is not None:
+        print(f"Get {compute_mode} graph for session {session_hash}")
     if graph is None:
         # Notify when we're loading the local model because it takes some time
+        if compute_mode == "local":
             gr.Info(
                 f"Please wait for the local model to load",
                 duration=15,
                 title=f"Model loading...",
             )
         # Get the chat model and build the graph
+        chat_model = GetChatModel(compute_mode)
+        graph_builder = BuildGraph(chat_model, compute_mode, search_type)
         # Compile the graph with an in-memory checkpointer
         memory = MemorySaver()
         graph = graph_builder.compile(checkpointer=memory)
         # Set global graph for compute mode
+        graph_instances[compute_mode][session_hash] = graph
+        print(f"Set {compute_mode} graph for session {session_hash}")
+        # Notify when model finishes loading
+        gr.Success(f"{compute_mode}", duration=4, title=f"Model loaded")
     print(f"Using thread_id: {thread_id}")
             yield history, None, citations
+def to_workflow(request: gr.Request, *args):
     """Wrapper function to call function with or without @spaces.GPU"""
+    compute_mode = args[2]
+    # Add session_hash to arguments
+    new_args = args + (request.session_hash,)
+    if compute_mode == "local":
+        for value in run_workflow_local(*new_args):
             yield value
+    if compute_mode == "remote":
+        for value in run_workflow_remote(*new_args):
             yield value
             "local",
             "remote",
         ],
+        value=("local" if torch.cuda.is_available() else "remote"),
         label="Compute Mode",
         info=(None if torch.cuda.is_available() else "NOTE: local mode requires GPU"),
         render=False,
         """Return updated value for a component"""
         return gr.update(value=value)
     def set_avatar(compute_mode):
         if compute_mode == "remote":
             image_file = "images/cloud.png"
             # Display the content in the textbox
             return content, change_visibility(True)
     # --------------
     # Event handlers
     # --------------
         return component.clear()
     compute_mode.change(
         # Change the app status text
         get_status_text,
         [compute_mode],
     input.submit(
         # Submit input to the chatbot
         to_workflow,
+        [input, chatbot, compute_mode, thread_id],
         [chatbot, retrieved_emails, citations_text],
         api_name=False,
     )
     )
     # fmt: on
+    # Clean up graph instances when page is closed/refreshed
+    demo.unload(cleanup_graph)
 if __name__ == "__main__":