Spaces:

Samarth991
/

LLM-Chatbot

Sleeping

App Files Files Community

Samarth991 commited on Sep 10, 2023

Commit

e9840df

1 Parent(s): 278cbaa

adding my LLM-chatbot model

Browse files

Files changed (2) hide show

app.py +105 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import gradio as gr
+import torch as th
+from langchain.document_loaders import PDFMinerLoader,CSVLoader ,UnstructuredWordDocumentLoader,TextLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import SentenceTransformerEmbeddings
+from langchain.vectorstores import Chroma, FAISS
+from langchain import HuggingFaceHub
+DEVICE = 'cpu '
+FILE_EXT = ['pdf','text','csv','word','wav']
+def loading_pdf():
+    return "Loading..."
+def process_documents(documents,data_chunk=1000,chunk_overlap=50):
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=data_chunk, chunk_overlap=chunk_overlap)
+    texts = text_splitter.split_documents(documents[0])
+    return texts
+def get_hugging_face_model(model_id,API_key,temperature=0.1):
+    chat_llm = HuggingFaceHub(huggingfacehub_api_token=API_key,
+                                      repo_id=model_id,
+                                      model_kwargs={"temperature": temperature, "max_new_tokens": 2048})
+    return chat_llm
+def document_loading(file_data,doc_type='pdf',key=None):
+    embedding_model = SentenceTransformerEmbeddings(model_name='all-mpnet-base-v2',model_kwargs={"device": DEVICE})
+    document = None
+    if doc_type == 'pdf':
+        document = process_pdf_document(document_file_name=file_data)
+    elif doc_type == 'text':
+        document = process_text_document(document_file_name=file_data)
+    elif doc_type == 'csv':
+        document = process_csv_document(document_file_name=file_data)
+    elif doc_type == 'word':
+        document = process_word_document(document_file_name=file_data)
+    texts = process_documents(documents=document)
+    vectordb = FAISS.from_documents(documents=texts, embedding= embedding_model)
+def process_text_document(document_file_name):
+    loader = TextLoader(document_file_name)
+    document = loader.load()
+    return document
+def process_csv_document(document_file_name):
+    loader = CSVLoader(file_path=document_file_name)
+    document = loader.load()
+    return document
+def process_word_document(document_file_name):
+    loader = UnstructuredWordDocumentLoader(file_path=document_file_name)
+    document = loader.load()
+    return document
+def process_pdf_document(document_file_name):
+    loader = PDFMinerLoader(document_file_name)
+    document = loader.load()[0]
+    return document
+css="""
+#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
+"""
+title = """
+<div style="text-align: center;max-width: 700px;">
+    <h1>Chat with Data • OpenAI/HuggingFace</h1>
+    <p style="text-align: center;">Upload a file from your computer, click the "Load data to LangChain" button, <br />
+    when everything is ready, you can start asking questions about the data you uploaded ;) <br />
+    This version is just for QA retrival so it will not use chat history, and uses Hugging face as LLM,
+    so you don't need any key</p>
+</div>
+"""
+with gr.Blocks(css=css) as demo:
+    with gr.Column(elem_id="col-container"):
+        gr.HTML(title)
+        with gr.Column():
+            with gr.Box():
+                LLM_option = gr.Dropdown(['HuggingFace','OpenAI'],label='LLM',info='select the LLM to be used')
+                API_key = gr.Textbox(label="You OpenAI/Huggingface API key", type="password")
+            with gr.Column():
+                file_extension = gr.Dropdown(FILE_EXT, label="File Extensions", info="Select your files extensions!")
+                pdf_doc = gr.File(label="Load a File", file_types=FILE_EXT, type="file")
+                with gr.Row():
+                    langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
+                    load_pdf = gr.Button("Load file to langchain")
+        chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
+        question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+openai
+tiktoken
+chromadb
+langchain
+unstructured
+unstructured[local-inference]
+transformers