Spaces:

jedick
/

R-help-chat

Running on Zero

jedick commited on Jul 31

Commit

d6be5fa

1 Parent(s): 77b89d7

Download model during app startup

Files changed (3) hide show

app.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import gradio as gr
-from main import GetChatModel
 from graph import BuildGraph
 from retriever import db_dir
-from langgraph.checkpoint.memory import MemorySaver
-from dotenv import load_dotenv
-from main import openai_model, model_id
 from util import get_sources, get_start_end_months
 from mods.tool_calling_llm import extract_think
 import requests
 import zipfile
 import shutil
@@ -19,10 +19,14 @@ import ast
 import os
 import re
 # Setup environment variables
 load_dotenv(dotenv_path=".env", override=True)
 # Global setting for search type
 search_type = "hybrid"
@@ -86,7 +90,7 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
                 title=f"Model loading...",
             )
         # Get the chat model and build the graph
-        chat_model = GetChatModel(compute_mode)
         graph_builder = BuildGraph(
             chat_model, compute_mode, search_type, think_answer=True
         )

 import gradio as gr
+from main import GetChatModel, openai_model, model_id
 from graph import BuildGraph
 from retriever import db_dir
 from util import get_sources, get_start_end_months
 from mods.tool_calling_llm import extract_think
+from huggingface_hub import snapshot_download
+from langgraph.checkpoint.memory import MemorySaver
+from dotenv import load_dotenv
 import requests
 import zipfile
 import shutil
 import os
 import re
 # Setup environment variables
 load_dotenv(dotenv_path=".env", override=True)
+# Download model snapshots from Hugging Face Hub
+print(f"Downloading/loading checkpoints for {model_id}...")
+ckpt_dir = snapshot_download(model_id, local_dir_use_symlinks=False)
+print(f"Using checkpoints from {ckpt_dir}")
 # Global setting for search type
 search_type = "hybrid"
                 title=f"Model loading...",
             )
         # Get the chat model and build the graph
+        chat_model = GetChatModel(compute_mode, ckpt_dir)
         graph_builder = BuildGraph(
             chat_model, compute_mode, search_type, think_answer=True
         )

main.py CHANGED Viewed

@@ -128,12 +128,13 @@ def ProcessDirectory(path, compute_mode):
             print(f"Chroma: no change for {file_path}")
-def GetChatModel(compute_mode):
     """
     Get a chat model.
     Args:
         compute_mode: Compute mode for chat model (remote or local)
     """
     if compute_mode == "remote":
@@ -148,9 +149,10 @@ def GetChatModel(compute_mode):
         # Define the pipeline to pass to the HuggingFacePipeline class
         # https://huggingface.co/blog/langchain
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
         model = AutoModelForCausalLM.from_pretrained(
-            model_id,
             # We need this to load the model in BF16 instead of fp32 (torch.float)
             torch_dtype=torch.bfloat16,
         )

             print(f"Chroma: no change for {file_path}")
+def GetChatModel(compute_mode, ckpt_dir=None):
     """
     Get a chat model.
     Args:
         compute_mode: Compute mode for chat model (remote or local)
+        ckpt_dir: Checkpoint directory for model weights (optional)
     """
     if compute_mode == "remote":
         # Define the pipeline to pass to the HuggingFacePipeline class
         # https://huggingface.co/blog/langchain
+        id_or_dir = ckpt_dir if ckpt_dir else model_id
+        tokenizer = AutoTokenizer.from_pretrained(id_or_dir)
         model = AutoModelForCausalLM.from_pretrained(
+            id_or_dir,
             # We need this to load the model in BF16 instead of fp32 (torch.float)
             torch_dtype=torch.bfloat16,
         )

mods/langchain_chroma.py CHANGED Viewed

@@ -470,7 +470,7 @@ class Chroma(VectorStore):
         See more: https://docs.trychroma.com/reference/py-collection#query
         """
-        # Fix for ValueError('Could not connect to tenant default_tenant. Are you sure it exists?')
         # https://github.com/langchain-ai/langchain/issues/26884
         chromadb.api.client.SharedSystemClient.clear_system_cache()
         return self._collection.query(

         See more: https://docs.trychroma.com/reference/py-collection#query
         """
+        # Possible fix for ValueError('Could not connect to tenant default_tenant. Are you sure it exists?')
         # https://github.com/langchain-ai/langchain/issues/26884
         chromadb.api.client.SharedSystemClient.clear_system_cache()
         return self._collection.query(