Spaces:

HanLee
/

Demo

Sleeping

App Files Files Community

HanLee commited on Nov 27, 2023

Commit

bb87055

1 Parent(s): 7cf4793

chore: linting

Browse files

Files changed (1) hide show

app/app.py +18 -28

app/app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 # Chroma compatibility issue resolution
 # https://docs.trychroma.com/troubleshooting#sqlite
-__import__('pysqlite3')
 import sys
-sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
 from tempfile import NamedTemporaryFile
 from typing import List
@@ -11,7 +12,7 @@ import chainlit as cl
 from chainlit.types import AskFileResponse
 import chromadb
 from chromadb.config import Settings
-from langchain.chains import LLMChain, RetrievalQAWithSourcesChain
 from langchain.chat_models import ChatOpenAI
 from langchain.document_loaders import PDFPlumberLoader
 from langchain.embeddings.openai import OpenAIEmbeddings
@@ -31,7 +32,7 @@ def process_file(*, file: AskFileResponse) -> List[Document]:
     Args:
         file (AskFileResponse): input file to be processed
     Raises:
         ValueError: when we fail to process PDF files. We consider PDF file
         processing failure when there's no text returned. For example, PDFs
@@ -51,8 +52,7 @@ def process_file(*, file: AskFileResponse) -> List[Document]:
         documents = loader.load()
         text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=3000,
-            chunk_overlap=100
         )
         docs = text_splitter.split_documents(documents)
@@ -66,7 +66,9 @@ def process_file(*, file: AskFileResponse) -> List[Document]:
         return docs
-def create_search_engine(*, docs: List[Document], embeddings: Embeddings) -> VectorStore:
     """Takes a list of Langchain Documents and an embedding model API wrapper
     and build a search index using a VectorStore.
@@ -80,27 +82,21 @@ def create_search_engine(*, docs: List[Document], embeddings: Embeddings) -> Vec
     """
     # Initialize Chromadb client to enable resetting and disable telemtry
     client = chromadb.EphemeralClient()
-    client_settings=Settings(
-        allow_reset=True,
-        anonymized_telemetry=False
-    )
     # Reset the search engine to ensure we don't use old copies.
     # NOTE: we do not need this for production
-    search_engine = Chroma(
-        client=client,
-        client_settings=client_settings
-    )
     search_engine._client.reset()
     search_engine = Chroma.from_documents(
         client=client,
         documents=docs,
         embedding=embeddings,
-        client_settings=client_settings
     )
     return search_engine
 @cl.on_chat_start
 async def on_chat_start():
@@ -123,20 +119,17 @@ async def on_chat_start():
     # Process and save data in the user session
     msg = cl.Message(content=f"Processing `{file.name}`...")
     await msg.send()
     docs = process_file(file=file)
     cl.user_session.set("docs", docs)
     msg.content = f"`{file.name}` processed. Loading ..."
     await msg.update()
     # Indexing documents into our search engine
-    embeddings = OpenAIEmbeddings(
-        model="text-embedding-ada-002"
-    )
     try:
         search_engine = await cl.make_async(create_search_engine)(
-            docs=docs,
-            embeddings=embeddings
         )
     except Exception as e:
         await cl.Message(content=f"Error: {e}").send()
@@ -145,9 +138,7 @@ async def on_chat_start():
     await msg.update()
     model = ChatOpenAI(
-        model="gpt-3.5-turbo-16k-0613",
-        temperature=0,
-        streaming=True
     )
     chain = RetrievalQAWithSourcesChain.from_chain_type(
@@ -164,13 +155,12 @@ async def on_chat_start():
 @cl.on_message
 async def main(message: cl.Message):
     # Let's load the chain from user_session
     chain = cl.user_session.get("chain")  # type: RetrievalQAWithSourcesChain
     response = await chain.acall(
         message.content,
-        callbacks=[cl.AsyncLangchainCallbackHandler(stream_final_answer=True)]
     )
     answer = response["answer"]
     sources = response["sources"].strip()

 # Chroma compatibility issue resolution
 # https://docs.trychroma.com/troubleshooting#sqlite
+__import__("pysqlite3")
 import sys
+sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")
 from tempfile import NamedTemporaryFile
 from typing import List
 from chainlit.types import AskFileResponse
 import chromadb
 from chromadb.config import Settings
+from langchain.chains import RetrievalQAWithSourcesChain
 from langchain.chat_models import ChatOpenAI
 from langchain.document_loaders import PDFPlumberLoader
 from langchain.embeddings.openai import OpenAIEmbeddings
     Args:
         file (AskFileResponse): input file to be processed
     Raises:
         ValueError: when we fail to process PDF files. We consider PDF file
         processing failure when there's no text returned. For example, PDFs
         documents = loader.load()
         text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=3000, chunk_overlap=100
         )
         docs = text_splitter.split_documents(documents)
         return docs
+def create_search_engine(
+    *, docs: List[Document], embeddings: Embeddings
+) -> VectorStore:
     """Takes a list of Langchain Documents and an embedding model API wrapper
     and build a search index using a VectorStore.
     """
     # Initialize Chromadb client to enable resetting and disable telemtry
     client = chromadb.EphemeralClient()
+    client_settings = Settings(allow_reset=True, anonymized_telemetry=False)
     # Reset the search engine to ensure we don't use old copies.
     # NOTE: we do not need this for production
+    search_engine = Chroma(client=client, client_settings=client_settings)
     search_engine._client.reset()
     search_engine = Chroma.from_documents(
         client=client,
         documents=docs,
         embedding=embeddings,
+        client_settings=client_settings,
     )
     return search_engine
 @cl.on_chat_start
 async def on_chat_start():
     # Process and save data in the user session
     msg = cl.Message(content=f"Processing `{file.name}`...")
     await msg.send()
     docs = process_file(file=file)
     cl.user_session.set("docs", docs)
     msg.content = f"`{file.name}` processed. Loading ..."
     await msg.update()
     # Indexing documents into our search engine
+    embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
     try:
         search_engine = await cl.make_async(create_search_engine)(
+            docs=docs, embeddings=embeddings
         )
     except Exception as e:
         await cl.Message(content=f"Error: {e}").send()
     await msg.update()
     model = ChatOpenAI(
+        model="gpt-3.5-turbo-16k-0613", temperature=0, streaming=True
     )
     chain = RetrievalQAWithSourcesChain.from_chain_type(
 @cl.on_message
 async def main(message: cl.Message):
     # Let's load the chain from user_session
     chain = cl.user_session.get("chain")  # type: RetrievalQAWithSourcesChain
     response = await chain.acall(
         message.content,
+        callbacks=[cl.AsyncLangchainCallbackHandler(stream_final_answer=True)],
     )
     answer = response["answer"]
     sources = response["sources"].strip()