Spaces:

PabloVD
/

CAMELSDocBot

Sleeping

App Files Files Community

PabloVD commited on Oct 29, 2024

Commit

a08531a

1 Parent(s): fdb4410

Old version of packages

Browse files

Files changed (3) hide show

app.py +1 -2
requirements.txt +17 -4
worker.py +13 -22

app.py CHANGED Viewed

@@ -10,8 +10,7 @@ url = 'https://camels.readthedocs.io/_/downloads/en/latest/pdf/'
 r = requests.get(url, stream=True)
 document_path = Path('metadata.pdf')
 document_path.write_bytes(r.content)
-worker.process_document(document_path)
 def handle_prompt(message, history):
     bot_response = worker.process_prompt(message, history)

 r = requests.get(url, stream=True)
 document_path = Path('metadata.pdf')
 document_path.write_bytes(r.content)
+worker.process_document(str(document_path))
 def handle_prompt(message, history):
     bot_response = worker.process_prompt(message, history)

requirements.txt CHANGED Viewed

@@ -1,4 +1,17 @@
-langchain
-langchain-community
-langchain-huggingface
-chromadb

+Flask
+Flask_Cors
+pdf2image
+pypdf
+tiktoken
+pandas==1.5
+langchain==0.0.254
+atlassian-python-api==3.36.0
+chromadb==0.3.25
+huggingface-hub==0.16.4
+torch==2.0.1
+sentence-transformers==2.2.2
+InstructorEmbedding==1.0.0
+p4python==2023.1.2454917
+lxml==4.9.2
+bs4==0.0.1
+ibm-watson-machine-learning

worker.py CHANGED Viewed

@@ -1,21 +1,11 @@
 import torch
 from langchain.chains import RetrievalQA
-from langchain_community.embeddings import HuggingFaceInstructEmbeddings
-from langchain_community.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.vectorstores import Chroma
-from langchain_huggingface import HuggingFaceEndpoint
-from sentence_transformers import SentenceTransformer # Use SentenceTransformer module to use Hugging face Model
-import pip
-def install(package):
-    if hasattr(pip, 'main'):
-        pip.main(['install', package])
-    else:
-        pip._internal.main(['install', package])
-# Temporal fix for incompatibility between langchain_huggingface and sentence-transformers<2.6
-# install("sentence-transformers==2.2.2")
 # Check for GPU availability and set the appropriate device for computation.
 DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -30,10 +20,10 @@ embeddings = None
 def init_llm():
     global llm_hub, embeddings
     # Set up the environment variable for HuggingFace and initialize the desired model.
-    # tokenfile = open("api_token.txt")
-    # api_token = tokenfile.readline().replace("\n","")
-    # tokenfile.close()
-    # os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_token
     # repo name for the model
     # model_id = "tiiuae/falcon-7b-instruct"
@@ -42,7 +32,8 @@ def init_llm():
     # model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
     # load the model into the HuggingFaceHub
-    llm_hub = HuggingFaceEndpoint(repo_id=model_id, temperature=0.1, max_new_tokens=600, model_kwargs={"max_length":600})
     llm_hub.client.api_url = 'https://api-inference.huggingface.co/models/'+model_id
     # llm_hub.invoke('foo bar')
@@ -50,10 +41,10 @@ def init_llm():
     embedddings_model = "sentence-transformers/multi-qa-distilbert-cos-v1"
     # embedddings_model = "sentence-transformers/all-MiniLM-L6-v2"
-    emb_model = SentenceTransformer(embedddings_model)
     embeddings = HuggingFaceInstructEmbeddings(
-        model_name=emb_model,
         model_kwargs={"device": DEVICE}
     )

 import torch
 from langchain.chains import RetrievalQA
+from langchain.embeddings import HuggingFaceInstructEmbeddings
+from langchain.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.vectorstores import Chroma
+from langchain.llms import HuggingFaceHub
+import os
 # Check for GPU availability and set the appropriate device for computation.
 DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
 def init_llm():
     global llm_hub, embeddings
     # Set up the environment variable for HuggingFace and initialize the desired model.
+    tokenfile = open("api_token.txt")
+    api_token = tokenfile.readline().replace("\n","")
+    tokenfile.close()
+    os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_token
     # repo name for the model
     # model_id = "tiiuae/falcon-7b-instruct"
     # model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
     # load the model into the HuggingFaceHub
+    #llm_hub = HuggingFaceHub(repo_id=model_id, temperature=0.1, max_new_tokens=600, model_kwargs={"max_length":600})
+    llm_hub = HuggingFaceHub(repo_id=model_id, model_kwargs={"temperature": 0.1, "max_new_tokens": 600, "max_length": 600})
     llm_hub.client.api_url = 'https://api-inference.huggingface.co/models/'+model_id
     # llm_hub.invoke('foo bar')
     embedddings_model = "sentence-transformers/multi-qa-distilbert-cos-v1"
     # embedddings_model = "sentence-transformers/all-MiniLM-L6-v2"
+    # emb_model = SentenceTransformer(embedddings_model)
     embeddings = HuggingFaceInstructEmbeddings(
+        model_name=embedddings_model,
         model_kwargs={"device": DEVICE}
     )