chat-with-docs

Runtime error

App Files Files Community

chat-with-docs / app.py

herMaster

using complete local code and loading llm through ctransformers.

1a8b103 almost 2 years ago

raw

history blame

12 kB

	# import gradio as gr
	# from qdrant_client import models, QdrantClient
	# from sentence_transformers import SentenceTransformer
	# from PyPDF2 import PdfReader
	# from langchain.text_splitter import RecursiveCharacterTextSplitter
	# from langchain.callbacks.manager import CallbackManager
	# from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
	# # from langchain.llms import LlamaCpp
	# from langchain.vectorstores import Qdrant
	# from qdrant_client.http import models
	# # from langchain.llms import CTransformers
	# from ctransformers import AutoModelForCausalLM





	# # loading the embedding model -

	# encoder = SentenceTransformer('jinaai/jina-embedding-b-en-v1')

	# print("embedding model loaded.............................")
	# print("####################################################")

	# # loading the LLM

	# callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

	# print("loading the LLM......................................")

	# # llm = LlamaCpp(
	# # model_path="TheBloke/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q8_0.gguf",
	# # n_ctx=2048,
	# # f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls
	# # callback_manager=callback_manager,
	# # verbose=True,
	# # )

	# llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
	# model_file="llama-2-7b-chat.Q8_0.gguf",
	# model_type="llama",
	# # config = ctransformers.hub.AutoConfig,
	# # hf = True
	# temperature = 0.2,
	# max_new_tokens = 1024,
	# stop = ['\n']
	# )



	# print("LLM loaded........................................")
	# print("################################################################")

	# def get_chunks(text):
	# text_splitter = RecursiveCharacterTextSplitter(
	# # seperator = "\n",
	# chunk_size = 500,
	# chunk_overlap = 100,
	# length_function = len,
	# )

	# chunks = text_splitter.split_text(text)
	# return chunks


	# pdf_path = './100 Weird Facts About the Human Body.pdf'


	# reader = PdfReader(pdf_path)
	# text = ""
	# num_of_pages = len(reader.pages)
	# for page in range(num_of_pages):
	# current_page = reader.pages[page]
	# text += current_page.extract_text()


	# chunks = get_chunks(text)

	# print("Chunks are ready.....................................")
	# print("######################################################")

	# qdrant = QdrantClient(path = "./db")
	# print("db created................................................")
	# print("#####################################################################")

	# qdrant.recreate_collection(
	# collection_name="my_facts",
	# vectors_config=models.VectorParams(
	# size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
	# distance=models.Distance.COSINE,
	# ),
	# )

	# print("Collection created........................................")
	# print("#########################################################")



	# li = []
	# for i in range(len(chunks)):
	# li.append(i)

	# dic = zip(li, chunks)
	# dic= dict(dic)

	# qdrant.upload_records(
	# collection_name="my_facts",
	# records=[
	# models.Record(
	# id=idx,
	# vector=encoder.encode(dic[idx]).tolist(),
	# payload= {dic[idx][:5] : dic[idx]}
	# ) for idx in dic.keys()
	# ],
	# )

	# print("Records uploaded........................................")
	# print("###########################################################")

	# def chat(question):
	# # question = input("ask question from pdf.....")


	# hits = qdrant.search(
	# collection_name="my_facts",
	# query_vector=encoder.encode(question).tolist(),
	# limit=3
	# )
	# context = []
	# for hit in hits:
	# context.append(list(hit.payload.values())[0])

	# context = context[0] + context[1] + context[2]

	# system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
	# Read the given context before answering questions and think step by step. If you can not answer a user question based on
	# the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question."""


	# B_INST, E_INST = "[INST]", "[/INST]"

	# B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"

	# SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS

	# instruction = f"""
	# Context: {context}
	# User: {question}"""

	# prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST

	# result = llm(prompt_template)
	# return result

	# gr.Interface(
	# fn = chat,
	# inputs = gr.Textbox(lines = 10, placeholder = "Enter your question here 👉"),
	# outputs = gr.Textbox(lines = 10, placeholder = "Your answer will be here soon 🚀"),
	# title="Q&N with PDF 👩🏻‍💻📓✍🏻💡",
	# description="This app facilitates a conversation with PDFs available on https://www.delo.si/assets/media/other/20110728/100%20Weird%20Facts%20About%20the%20Human%20Body.pdf💡",
	# theme="soft",
	# examples=["Hello", "what is the speed of human nerve impulses?"],
	# # cache_examples=True,
	# ).launch()


	import gradio as gr
	from threading import Thread
	from queue import SimpleQueue
	from typing import Any, Dict, List, Union
	from langchain.callbacks.base import BaseCallbackHandler
	from langchain.schema import LLMResult
	from qdrant_client import models, QdrantClient
	from sentence_transformers import SentenceTransformer
	from PyPDF2 import PdfReader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from qdrant_client.models import PointStruct
	import os
	from langchain.callbacks.manager import CallbackManager
	from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
	# from qdrant_client import QdrantClient
	# from langchain import VectorDBQA - This is obsolete
	from langchain.chains import RetrievalQA
	from langchain.llms import LlamaCpp
	# from PyPDF2 import PdfReader
	from langchain.vectorstores import Qdrant
	# from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings import HuggingFaceBgeEmbeddings
	from transformers import AutoModel
	from qdrant_client.http import models
	# from sentence_transformers import SentenceTransformer
	from langchain.prompts import PromptTemplate
	from ctransformers import AutoModelForCausalLM

	# loading the embedding model -

	encoder = SentenceTransformer("all-MiniLM-L6-v2")

	print("embedding model loaded.............................")
	print("####################################################")

	# loading the LLM

	callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

	print("loading the LLM......................................")

	# llm = LlamaCpp(
	# model_path="/home/devangpagare/llm/models/llama-2-7b-chat.Q3_K_S.gguf",
	# # n_gpu_layers=n_gpu_layers,
	# # n_batch=n_batch,
	# n_ctx=2048,
	# f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls
	# callback_manager=callback_manager,
	# verbose=True,
	# )

	llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
	model_file="llama-2-7b-chat.Q3_K_S.gguf",
	model_type="llama",
	# config = ctransformers.hub.AutoConfig,
	# hf = True
	temperature = 0.2,
	# max_new_tokens = 1024,
	# stop = ['\n']
	)


	print("LLM loaded........................................")
	print("################################################################")

	def get_chunks(text):
	text_splitter = RecursiveCharacterTextSplitter(
	# seperator = "\n",
	chunk_size = 500,
	chunk_overlap = 100,
	length_function = len,
	)

	chunks = text_splitter.split_text(text)
	return chunks


	pdf_path = './100 Weird Facts About the Human Body.pdf'


	reader = PdfReader(pdf_path)
	text = ""
	num_of_pages = len(reader.pages)
	for page in range(num_of_pages):
	current_page = reader.pages[page]
	text += current_page.extract_text()


	chunks = get_chunks(text)
	print(chunks)
	print("Chunks are ready.....................................")
	print("######################################################")

	qdrant = QdrantClient(path = "./db")
	print("db created................................................")
	print("#####################################################################")

	qdrant.recreate_collection(
	collection_name="my_facts",
	vectors_config=models.VectorParams(
	size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
	distance=models.Distance.COSINE,
	),
	)

	print("Collection created........................................")
	print("#########################################################")


	# starting a list of same size as chunks
	li = []
	for i in range(len(chunks)):
	li.append(i)
	# concantinating the li and chunks to create a dcitionary
	dic = zip(li, chunks)
	dic= dict(dic)

	qdrant.upload_records(
	collection_name="my_facts",
	records=[
	models.Record(
	id=idx,
	vector=encoder.encode(dic[idx]).tolist(),
	payload= {dic[idx][:5] : dic[idx]}
	## payload is always suppose to be a dictionary with both keys and values as strings. To do this, I used first 5 chars of
	## every value as key to make the payload.
	) for idx in dic.keys()
	],
	)

	print("Records uploaded........................................")
	print("###########################################################")

	def chat(question):
	# question = input("ask question from pdf.....")


	hits = qdrant.search(
	collection_name="my_facts",
	query_vector=encoder.encode(question).tolist(),
	limit=3
	)
	context = []
	for hit in hits:
	# print(hit.payload, "score:", hit.score)
	context.append(list(hit.payload.values())[0])
	# context += str(hit.payload[hit.payload.values()[:5]])
	# print("##################################################################")

	context = context[0] + context[1] + context[2]

	system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
	Read the given context before answering questions and think step by step. If you can not answer a user question based on
	the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question."""


	B_INST, E_INST = "[INST]", "[/INST]"

	B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"

	SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS

	instruction = f"""
	Context: {context}
	User: {question}"""

	prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST

	result = llm(prompt_template)
	return result

	gr.Interface(
	fn = chat,
	inputs = gr.Textbox(lines = 10, placeholder = "Enter your question here 👉"),
	outputs = gr.Textbox(lines = 10, placeholder = "Your answer will be here soon 🚀"),
	title="Q&N with PDF 👩🏻‍💻📓✍🏻💡",
	description="This app facilitates a conversation with PDFs available on https://www.delo.si/assets/media/other/20110728/100%20Weird%20Facts%20About%20the%20Human%20Body.pdf💡",
	theme="soft",
	examples=["Hello", "what is the speed of human nerve impulses?"],
	# cache_examples=True,
	).launch()