Spaces:

PabloVD
/

CAMELSDocBot

Sleeping

App Files Files Community

CAMELSDocBot / worker.py

PabloVD

Old version of packages

a08531a about 1 year ago

raw

history blame

3.87 kB

	import torch
	from langchain.chains import RetrievalQA
	from langchain.embeddings import HuggingFaceInstructEmbeddings
	from langchain.document_loaders import PyPDFLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.vectorstores import Chroma
	from langchain.llms import HuggingFaceHub
	import os

	# Check for GPU availability and set the appropriate device for computation.
	DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

	# Global variables
	conversation_retrieval_chain = None
	chat_history = []
	llm_hub = None
	embeddings = None

	# Function to initialize the language model and its embeddings
	def init_llm():
	global llm_hub, embeddings
	# Set up the environment variable for HuggingFace and initialize the desired model.
	tokenfile = open("api_token.txt")
	api_token = tokenfile.readline().replace("\n","")
	tokenfile.close()
	os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_token

	# repo name for the model
	# model_id = "tiiuae/falcon-7b-instruct"
	model_id = "microsoft/Phi-3.5-mini-instruct"
	# model_id = "meta-llama/Llama-3.2-1B-Instruct"
	# model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"

	# load the model into the HuggingFaceHub
	#llm_hub = HuggingFaceHub(repo_id=model_id, temperature=0.1, max_new_tokens=600, model_kwargs={"max_length":600})
	llm_hub = HuggingFaceHub(repo_id=model_id, model_kwargs={"temperature": 0.1, "max_new_tokens": 600, "max_length": 600})
	llm_hub.client.api_url = 'https://api-inference.huggingface.co/models/'+model_id
	# llm_hub.invoke('foo bar')

	#Initialize embeddings using a pre-trained model to represent the text data.
	embedddings_model = "sentence-transformers/multi-qa-distilbert-cos-v1"
	# embedddings_model = "sentence-transformers/all-MiniLM-L6-v2"

	# emb_model = SentenceTransformer(embedddings_model)

	embeddings = HuggingFaceInstructEmbeddings(
	model_name=embedddings_model,
	model_kwargs={"device": DEVICE}
	)


	# Function to process a PDF document
	def process_document(document_path):
	global conversation_retrieval_chain

	# Load the document
	loader = PyPDFLoader(document_path)
	documents = loader.load()

	# Split the document into chunks
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
	texts = text_splitter.split_documents(documents)

	# Create an embeddings database using Chroma from the split text chunks.
	db = Chroma.from_documents(texts, embedding=embeddings)


	# --> Build the QA chain, which utilizes the LLM and retriever for answering questions.
	# By default, the vectorstore retriever uses similarity search.
	# If the underlying vectorstore support maximum marginal relevance search, you can specify that as the search type (search_type="mmr").
	# You can also specify search kwargs like k to use when doing retrieval. k represent how many search results send to llm
	retriever = db.as_retriever(search_type="mmr", search_kwargs={'k': 6, 'lambda_mult': 0.25})
	conversation_retrieval_chain = RetrievalQA.from_chain_type(
	llm=llm_hub,
	chain_type="stuff",
	retriever=retriever,
	return_source_documents=False,
	input_key = "question"
	# chain_type_kwargs={"prompt": prompt} # if you are using prompt template, you need to uncomment this part
	)


	# Function to process a user prompt
	def process_prompt(prompt, chat_history):
	global conversation_retrieval_chain
	# global chat_history

	# Query the model
	output = conversation_retrieval_chain.invoke({"question": prompt, "chat_history": chat_history})
	answer = output["result"]

	# Update the chat history
	chat_history.append((prompt, answer))

	# Return the model's response
	return answer

	# Initialize the language model
	init_llm()