Spaces:

hohieu
/

pdf_rag_chatbot

Runtime error

App Files Files Community

pdf_rag_chatbot / src /app.py

hohieu

remove open api key

9400a7d over 1 year ago

raw

history blame contribute delete

4.3 kB

	import streamlit as st
	from pyvi.ViTokenizer import tokenize
	from services.generate_embedding import generate_embedding
	import pymongo
	import time
	from indexing import indexData, SHEET_ID, SHEET_NAME
	from langchain_openai import ChatOpenAI
	from langchain.prompts import ChatPromptTemplate
	import os

	# Connect DB
	client = pymongo.MongoClient(
	"mongodb+srv://rag:p9vojYc9fafYwxE9@rag.xswi7nq.mongodb.net/?retryWrites=true&w=majority&appName=RAG"
	)
	db = client.rag
	collection = db.questionAndAnswers

	with st.expander('Dataset'):
	col1 , col2 = st.columns(2)
	with col1:
	st.markdown(
	"""
	<div style="display:flex; gap: 16px; align-items: center">
	<a style="font-size: 14px"
	href="https://docs.google.com/spreadsheets/d/1MKB6MHgL_lrPB1I69fj2VcVrgmSAMLVNZR1EwSyTSeA/edit#gid=0">Link
	question & answers</a>
	</div>
	""",
	unsafe_allow_html=True,
	)

	with col2:
	if st.button('Re-train'):
	placeholder = st.empty()
	placeholder.empty()
	placeholder.write('Training ...')
	indexData(SHEET_ID, SHEET_NAME)
	placeholder.write('Completed')



	def generateAnswer(context: str, question: str):
	prompt = ChatPromptTemplate.from_messages(
	[
	(
	"user","""Trả lời câu hỏi của người dùng dựa vào thông tin có trong thẻ <context> </context> được cho bên dưới. Nếu context không chứa những thông tin liên quan tới câu hỏi, thì đừng trả lời và chỉ trả lời là "Tôi không biết". <context> {context} </context> Câu hỏi: {question}""",
	),
	]
	)
	messages = prompt.invoke({"context": context, "question": question});
	print(messages)
	chat = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0.8)
	response = chat.invoke(messages)
	return response.content


	def stream_response(answer: str):
	for word in answer.split(" "):
	yield word + " "
	time.sleep(0.03)


	# Initialize chat history
	if "messages" not in st.session_state:
	st.session_state.messages = []

	# Display chat messages from history on app rerun
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"], unsafe_allow_html=True)

	# React to user input
	if prompt := st.chat_input(""):
	tokenized_prompt = tokenize(prompt)

	# Add user message to chat history
	st.session_state.messages.append({"role": "user", "content": tokenized_prompt})

	# Display user message in chat message container
	with st.chat_message("user"):
	st.markdown(tokenized_prompt)

	embedding = generate_embedding(tokenized_prompt)
	results = collection.aggregate(
	[
	{
	"$vectorSearch": {
	"queryVector": embedding,
	"path": "question_embedding",
	"numCandidates": 10,
	"limit": 10,
	"index": "vector_index",
	}
	}
	]
	)

	posibleQuestions = ""
	context = ""
	question = ""
	index = 0
	for document in results:
	posibleQuestions = posibleQuestions + f"<li>{document['question']}</li>"
	context =context + "\n\n" + document['question'] + ": " + document['answer']
	if index == 0:
	question = document["question"]
	index = index + 1
	posibleQuestions = f"""<ol> <p style="font-weight: 600">Câu hỏi liên quan: </p> {posibleQuestions}</ol>"""

	answer = generateAnswer(context, prompt);
	response = f"""<p>{answer}</p>
	{posibleQuestions}
	"""

	# Display assistant response in chat message container
	with st.chat_message("assistant"):
	st.markdown(response, unsafe_allow_html=True)
	# st.markdown(f"""<p style="font-weight: 600">Question: {question}</p>""", unsafe_allow_html=True)
	# st.write_stream(stream_response(answer))
	# st.markdown(posibleQuestions, unsafe_allow_html=True)

	# Add assistant response to chat history
	st.session_state.messages.append({"role": "assistant", "content": response})