Spaces:
Runtime error
Runtime error
| # import gradio as gr | |
| # from qdrant_client import models, QdrantClient | |
| # from sentence_transformers import SentenceTransformer | |
| # from PyPDF2 import PdfReader | |
| # from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| # from langchain.callbacks.manager import CallbackManager | |
| # from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
| # # from langchain.llms import LlamaCpp | |
| # from langchain.vectorstores import Qdrant | |
| # from qdrant_client.http import models | |
| # # from langchain.llms import CTransformers | |
| # from ctransformers import AutoModelForCausalLM | |
| # # loading the embedding model - | |
| # encoder = SentenceTransformer('jinaai/jina-embedding-b-en-v1') | |
| # print("embedding model loaded.............................") | |
| # print("####################################################") | |
| # # loading the LLM | |
| # callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) | |
| # print("loading the LLM......................................") | |
| # # llm = LlamaCpp( | |
| # # model_path="TheBloke/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q8_0.gguf", | |
| # # n_ctx=2048, | |
| # # f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls | |
| # # callback_manager=callback_manager, | |
| # # verbose=True, | |
| # # ) | |
| # llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF", | |
| # model_file="llama-2-7b-chat.Q8_0.gguf", | |
| # model_type="llama", | |
| # # config = ctransformers.hub.AutoConfig, | |
| # # hf = True | |
| # temperature = 0.2, | |
| # max_new_tokens = 1024, | |
| # stop = ['\n'] | |
| # ) | |
| # print("LLM loaded........................................") | |
| # print("################################################################") | |
| # def get_chunks(text): | |
| # text_splitter = RecursiveCharacterTextSplitter( | |
| # # seperator = "\n", | |
| # chunk_size = 500, | |
| # chunk_overlap = 100, | |
| # length_function = len, | |
| # ) | |
| # chunks = text_splitter.split_text(text) | |
| # return chunks | |
| # pdf_path = './100 Weird Facts About the Human Body.pdf' | |
| # reader = PdfReader(pdf_path) | |
| # text = "" | |
| # num_of_pages = len(reader.pages) | |
| # for page in range(num_of_pages): | |
| # current_page = reader.pages[page] | |
| # text += current_page.extract_text() | |
| # chunks = get_chunks(text) | |
| # print("Chunks are ready.....................................") | |
| # print("######################################################") | |
| # qdrant = QdrantClient(path = "./db") | |
| # print("db created................................................") | |
| # print("#####################################################################") | |
| # qdrant.recreate_collection( | |
| # collection_name="my_facts", | |
| # vectors_config=models.VectorParams( | |
| # size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model | |
| # distance=models.Distance.COSINE, | |
| # ), | |
| # ) | |
| # print("Collection created........................................") | |
| # print("#########################################################") | |
| # li = [] | |
| # for i in range(len(chunks)): | |
| # li.append(i) | |
| # dic = zip(li, chunks) | |
| # dic= dict(dic) | |
| # qdrant.upload_records( | |
| # collection_name="my_facts", | |
| # records=[ | |
| # models.Record( | |
| # id=idx, | |
| # vector=encoder.encode(dic[idx]).tolist(), | |
| # payload= {dic[idx][:5] : dic[idx]} | |
| # ) for idx in dic.keys() | |
| # ], | |
| # ) | |
| # print("Records uploaded........................................") | |
| # print("###########################################################") | |
| # def chat(question): | |
| # # question = input("ask question from pdf.....") | |
| # hits = qdrant.search( | |
| # collection_name="my_facts", | |
| # query_vector=encoder.encode(question).tolist(), | |
| # limit=3 | |
| # ) | |
| # context = [] | |
| # for hit in hits: | |
| # context.append(list(hit.payload.values())[0]) | |
| # context = context[0] + context[1] + context[2] | |
| # system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions. | |
| # Read the given context before answering questions and think step by step. If you can not answer a user question based on | |
| # the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question.""" | |
| # B_INST, E_INST = "[INST]", "[/INST]" | |
| # B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n" | |
| # SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS | |
| # instruction = f""" | |
| # Context: {context} | |
| # User: {question}""" | |
| # prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST | |
| # result = llm(prompt_template) | |
| # return result | |
| # gr.Interface( | |
| # fn = chat, | |
| # inputs = gr.Textbox(lines = 10, placeholder = "Enter your question here π"), | |
| # outputs = gr.Textbox(lines = 10, placeholder = "Your answer will be here soon π"), | |
| # title="Q&N with PDF π©π»βπ»πβπ»π‘", | |
| # description="This app facilitates a conversation with PDFs available on https://www.delo.si/assets/media/other/20110728/100%20Weird%20Facts%20About%20the%20Human%20Body.pdfπ‘", | |
| # theme="soft", | |
| # examples=["Hello", "what is the speed of human nerve impulses?"], | |
| # # cache_examples=True, | |
| # ).launch() | |
| import gradio as gr | |
| from threading import Thread | |
| from queue import SimpleQueue | |
| from typing import Any, Dict, List, Union | |
| from langchain.callbacks.base import BaseCallbackHandler | |
| from langchain.schema import LLMResult | |
| from qdrant_client import models, QdrantClient | |
| from sentence_transformers import SentenceTransformer | |
| from PyPDF2 import PdfReader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from qdrant_client.models import PointStruct | |
| import os | |
| from langchain.callbacks.manager import CallbackManager | |
| from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
| # from qdrant_client import QdrantClient | |
| # from langchain import VectorDBQA - This is obsolete | |
| from langchain.chains import RetrievalQA | |
| from langchain.llms import LlamaCpp | |
| # from PyPDF2 import PdfReader | |
| from langchain.vectorstores import Qdrant | |
| # from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.embeddings import HuggingFaceBgeEmbeddings | |
| from transformers import AutoModel | |
| from qdrant_client.http import models | |
| # from sentence_transformers import SentenceTransformer | |
| from langchain.prompts import PromptTemplate | |
| from ctransformers import AutoModelForCausalLM | |
| # loading the embedding model - | |
| encoder = SentenceTransformer("all-MiniLM-L6-v2") | |
| print("embedding model loaded.............................") | |
| print("####################################################") | |
| # loading the LLM | |
| callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) | |
| print("loading the LLM......................................") | |
| # llm = LlamaCpp( | |
| # model_path="/home/devangpagare/llm/models/llama-2-7b-chat.Q3_K_S.gguf", | |
| # # n_gpu_layers=n_gpu_layers, | |
| # # n_batch=n_batch, | |
| # n_ctx=2048, | |
| # f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls | |
| # callback_manager=callback_manager, | |
| # verbose=True, | |
| # ) | |
| llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF", | |
| model_file="llama-2-7b-chat.Q3_K_S.gguf", | |
| model_type="llama", | |
| # config = ctransformers.hub.AutoConfig, | |
| # hf = True | |
| temperature = 0.2, | |
| # max_new_tokens = 1024, | |
| # stop = ['\n'] | |
| ) | |
| print("LLM loaded........................................") | |
| print("################################################################") | |
| def get_chunks(text): | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| # seperator = "\n", | |
| chunk_size = 500, | |
| chunk_overlap = 100, | |
| length_function = len, | |
| ) | |
| chunks = text_splitter.split_text(text) | |
| return chunks | |
| pdf_path = './100 Weird Facts About the Human Body.pdf' | |
| reader = PdfReader(pdf_path) | |
| text = "" | |
| num_of_pages = len(reader.pages) | |
| for page in range(num_of_pages): | |
| current_page = reader.pages[page] | |
| text += current_page.extract_text() | |
| chunks = get_chunks(text) | |
| print(chunks) | |
| print("Chunks are ready.....................................") | |
| print("######################################################") | |
| qdrant = QdrantClient(path = "./db") | |
| print("db created................................................") | |
| print("#####################################################################") | |
| qdrant.recreate_collection( | |
| collection_name="my_facts", | |
| vectors_config=models.VectorParams( | |
| size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model | |
| distance=models.Distance.COSINE, | |
| ), | |
| ) | |
| print("Collection created........................................") | |
| print("#########################################################") | |
| # starting a list of same size as chunks | |
| li = [] | |
| for i in range(len(chunks)): | |
| li.append(i) | |
| # concantinating the li and chunks to create a dcitionary | |
| dic = zip(li, chunks) | |
| dic= dict(dic) | |
| qdrant.upload_records( | |
| collection_name="my_facts", | |
| records=[ | |
| models.Record( | |
| id=idx, | |
| vector=encoder.encode(dic[idx]).tolist(), | |
| payload= {dic[idx][:5] : dic[idx]} | |
| ## payload is always suppose to be a dictionary with both keys and values as strings. To do this, I used first 5 chars of | |
| ## every value as key to make the payload. | |
| ) for idx in dic.keys() | |
| ], | |
| ) | |
| print("Records uploaded........................................") | |
| print("###########################################################") | |
| def chat(question): | |
| # question = input("ask question from pdf.....") | |
| hits = qdrant.search( | |
| collection_name="my_facts", | |
| query_vector=encoder.encode(question).tolist(), | |
| limit=3 | |
| ) | |
| context = [] | |
| for hit in hits: | |
| # print(hit.payload, "score:", hit.score) | |
| context.append(list(hit.payload.values())[0]) | |
| # context += str(hit.payload[hit.payload.values()[:5]]) | |
| # print("##################################################################") | |
| context = context[0] + context[1] + context[2] | |
| system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions. | |
| Read the given context before answering questions and think step by step. If you can not answer a user question based on | |
| the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question.""" | |
| B_INST, E_INST = "[INST]", "[/INST]" | |
| B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n" | |
| SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS | |
| instruction = f""" | |
| Context: {context} | |
| User: {question}""" | |
| prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST | |
| result = llm(prompt_template) | |
| return result | |
| gr.Interface( | |
| fn = chat, | |
| inputs = gr.Textbox(lines = 10, placeholder = "Enter your question here π"), | |
| outputs = gr.Textbox(lines = 10, placeholder = "Your answer will be here soon π"), | |
| title="Q&N with PDF π©π»βπ»πβπ»π‘", | |
| description="This app facilitates a conversation with PDFs available on https://www.delo.si/assets/media/other/20110728/100%20Weird%20Facts%20About%20the%20Human%20Body.pdfπ‘", | |
| theme="soft", | |
| examples=["Hello", "what is the speed of human nerve impulses?"], | |
| # cache_examples=True, | |
| ).launch() | |