Spaces:
Build error
Build error
| import os | |
| import pdb | |
| import pickle | |
| from langchain.llms import OpenAI | |
| from langchain.vectorstores.faiss import FAISS | |
| from langchain.chains import ChatVectorDBChain | |
| from langchain.prompts.prompt import PromptTemplate | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.document_loaders import UnstructuredFileLoader | |
| from langchain.embeddings import OpenAIEmbeddings | |
| _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. | |
| You can assume the discussion is about the video content. | |
| Chat History: | |
| {chat_history} | |
| Follow Up Input: {question} | |
| Standalone question:""" | |
| CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template) | |
| qa_template = """You are an AI assistant designed for answering questions about a video. | |
| You are given a document and a question, the document records what people see and hear from this video. | |
| Try to connet these information and provide a conversational answer. | |
| Question: {question} | |
| ========= | |
| {context} | |
| ========= | |
| """ | |
| QA_PROMPT = PromptTemplate(template=qa_template, input_variables=["question", "context"]) | |
| class LlmReasoner(): | |
| def __init__(self, args): | |
| self.history = [] | |
| self.gpt_version = args.gpt_version | |
| self.data_dir = args.data_dir | |
| self.tmp_dir = args.tmp_dir | |
| self.qa_chain = None | |
| self.vectorstore = None | |
| self.top_k = 3 | |
| self.llm = OpenAI(temperature=0, model_name=self.gpt_version) | |
| def exist_vectorstore(self, video_id): | |
| pkl_path = os.path.join(self.tmp_dir, f"{video_id}.pkl") | |
| log_path = os.path.join(self.data_dir, f"{video_id}.log") | |
| if os.path.exists(pkl_path) and os.path.exists(log_path): | |
| with open(pkl_path, 'rb') as file: | |
| self.vectorstore = pickle.load(file) | |
| self.qa_chain = ChatVectorDBChain.from_llm( | |
| self.llm, | |
| self.vectorstore, | |
| qa_prompt=QA_PROMPT, | |
| condense_question_prompt=CONDENSE_QUESTION_PROMPT, | |
| ) | |
| self.qa_chain.top_k_docs_for_context = self.top_k | |
| return True | |
| return False | |
| def create_vectorstore(self, video_id): | |
| pkl_path = os.path.join(self.tmp_dir, f"{video_id}.pkl") | |
| if not os.path.exists(pkl_path): | |
| loader = UnstructuredFileLoader(os.path.join(self.data_dir, f"{video_id}.log")) | |
| raw_documents = loader.load() | |
| # Split text | |
| text_splitter = RecursiveCharacterTextSplitter() | |
| documents = text_splitter.split_documents(raw_documents) | |
| # Load Data to vectorstore | |
| embeddings = OpenAIEmbeddings() | |
| vectorstore = FAISS.from_documents(documents, embeddings) | |
| # Save vectorstore | |
| with open(pkl_path, "wb") as f: | |
| pickle.dump(vectorstore, f) | |
| with open(pkl_path, 'rb') as file: | |
| self.vectorstore = pickle.load(file) | |
| self.qa_chain = ChatVectorDBChain.from_llm( | |
| self.llm, | |
| self.vectorstore, | |
| qa_prompt=QA_PROMPT, | |
| condense_question_prompt=CONDENSE_QUESTION_PROMPT, | |
| ) | |
| self.qa_chain.top_k_docs_for_context = self.top_k | |
| return | |
| def __call__(self, question): | |
| print(f"Question: {question}") | |
| response = self.qa_chain({"question": question, "chat_history": self.history})["answer"] | |
| self.history.append((question, response)) | |
| print(f"Assistant: {response}") | |
| print("\n") | |
| return response | |
| def clean_history(self): | |
| self.history = [] | |