Spaces:
Runtime error
Runtime error
Kushwanth Chowday Kandala
commited on
promt_engineer using bard text genaration integrated with Retrival context text.
Browse files
app.py
CHANGED
|
@@ -7,6 +7,7 @@ from io import StringIO
|
|
| 7 |
import PyPDF2
|
| 8 |
from tqdm import tqdm
|
| 9 |
import math
|
|
|
|
| 10 |
# import json
|
| 11 |
|
| 12 |
# st.config(PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION="python")
|
|
@@ -77,6 +78,27 @@ def get_pinecone_semantic_index(pinecone):
|
|
| 77 |
# st.text(f"Succesfully connected to the pinecone index")
|
| 78 |
return index
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
def chat_actions():
|
| 81 |
|
| 82 |
pinecone = connect_pinecone()
|
|
@@ -90,14 +112,16 @@ def chat_actions():
|
|
| 90 |
# create the query vector
|
| 91 |
query_vector = query_embedding.tolist()
|
| 92 |
# now query vector database
|
| 93 |
-
result = index.query(query_vector, top_k=5, include_metadata=True) #
|
| 94 |
|
| 95 |
# Create a list of lists
|
| 96 |
data = []
|
|
|
|
| 97 |
i = 0
|
| 98 |
for res in result['matches']:
|
| 99 |
i = i + 1
|
| 100 |
data.append([f"{i}⭐", res['score'], res['metadata']['text']])
|
|
|
|
| 101 |
|
| 102 |
# Create a DataFrame from the list of lists
|
| 103 |
resdf = pd.DataFrame(data, columns=['TopRank', 'Score', 'Text'])
|
|
@@ -105,6 +129,7 @@ def chat_actions():
|
|
| 105 |
with st.sidebar:
|
| 106 |
st.markdown("*:red[semantic search results]* with **:green[Retrieval Augmented Generation]** ***(RAG)***.")
|
| 107 |
st.dataframe(resdf)
|
|
|
|
| 108 |
|
| 109 |
for res in result['matches']:
|
| 110 |
st.session_state["chat_history"].append(
|
|
@@ -169,15 +194,6 @@ def create_embeddings():
|
|
| 169 |
# Display the contents of the file
|
| 170 |
# st.write(file_contents)
|
| 171 |
|
| 172 |
-
# def promt_engineer(text):
|
| 173 |
-
# promt_template = """
|
| 174 |
-
# write a concise summary of the following text delimited by triple backquotes.
|
| 175 |
-
# return your response in bullet points which convers the key points of the text.
|
| 176 |
-
|
| 177 |
-
# ```{text}```
|
| 178 |
-
|
| 179 |
-
# BULLET POINT SUMMARY:
|
| 180 |
-
# """
|
| 181 |
|
| 182 |
with st.sidebar:
|
| 183 |
st.markdown("""
|
|
@@ -187,6 +203,7 @@ with st.sidebar:
|
|
| 187 |
- It Takes couple of mins after upload the pdf
|
| 188 |
- Now Chat with model to get the summarized info
|
| 189 |
- Generate Promted reponses on the upload pdf
|
|
|
|
| 190 |
""")
|
| 191 |
uploaded_files = st.file_uploader('Choose your .pdf file', type="pdf", accept_multiple_files=True, key="uploaded_files", on_change=create_embeddings)
|
| 192 |
# for uploaded_file in uploaded_files:
|
|
@@ -211,3 +228,4 @@ with st.sidebar:
|
|
| 211 |
# print_out(pages)
|
| 212 |
# combine_text(pages)
|
| 213 |
# promt_engineer(text)
|
|
|
|
|
|
| 7 |
import PyPDF2
|
| 8 |
from tqdm import tqdm
|
| 9 |
import math
|
| 10 |
+
from transformers import pipeline
|
| 11 |
# import json
|
| 12 |
|
| 13 |
# st.config(PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION="python")
|
|
|
|
| 78 |
# st.text(f"Succesfully connected to the pinecone index")
|
| 79 |
return index
|
| 80 |
|
| 81 |
+
def promt_engineer(text):
|
| 82 |
+
promt_template = """
|
| 83 |
+
write a concise summary of the following text delimited by triple backquotes.
|
| 84 |
+
return your response in bullet points which convers the key points of the text.
|
| 85 |
+
|
| 86 |
+
```{text}```
|
| 87 |
+
|
| 88 |
+
BULLET POINT SUMMARY:
|
| 89 |
+
"""
|
| 90 |
+
# Load the summarization pipeline with the specified model
|
| 91 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
| 92 |
+
|
| 93 |
+
# Generate the prompt
|
| 94 |
+
prompt = prompt_template.format(text=text)
|
| 95 |
+
|
| 96 |
+
# Generate the summary
|
| 97 |
+
summary = summarizer(prompt, max_length=100, min_length=50)[0]["summary_text"]
|
| 98 |
+
|
| 99 |
+
with st.sidebar:
|
| 100 |
+
st.write(summary)
|
| 101 |
+
|
| 102 |
def chat_actions():
|
| 103 |
|
| 104 |
pinecone = connect_pinecone()
|
|
|
|
| 112 |
# create the query vector
|
| 113 |
query_vector = query_embedding.tolist()
|
| 114 |
# now query vector database
|
| 115 |
+
result = index.query(query_vector, top_k=5, include_metadata=True) # result is a list of tuples
|
| 116 |
|
| 117 |
# Create a list of lists
|
| 118 |
data = []
|
| 119 |
+
consolidated_text = ""
|
| 120 |
i = 0
|
| 121 |
for res in result['matches']:
|
| 122 |
i = i + 1
|
| 123 |
data.append([f"{i}⭐", res['score'], res['metadata']['text']])
|
| 124 |
+
consolidated_text.append(f"{res['metadata']['text']}\n\n")
|
| 125 |
|
| 126 |
# Create a DataFrame from the list of lists
|
| 127 |
resdf = pd.DataFrame(data, columns=['TopRank', 'Score', 'Text'])
|
|
|
|
| 129 |
with st.sidebar:
|
| 130 |
st.markdown("*:red[semantic search results]* with **:green[Retrieval Augmented Generation]** ***(RAG)***.")
|
| 131 |
st.dataframe(resdf)
|
| 132 |
+
promt_engineer(consolidated_text)
|
| 133 |
|
| 134 |
for res in result['matches']:
|
| 135 |
st.session_state["chat_history"].append(
|
|
|
|
| 194 |
# Display the contents of the file
|
| 195 |
# st.write(file_contents)
|
| 196 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
with st.sidebar:
|
| 199 |
st.markdown("""
|
|
|
|
| 203 |
- It Takes couple of mins after upload the pdf
|
| 204 |
- Now Chat with model to get the summarized info
|
| 205 |
- Generate Promted reponses on the upload pdf
|
| 206 |
+
- Provides summarized results and QA's using GPT models
|
| 207 |
""")
|
| 208 |
uploaded_files = st.file_uploader('Choose your .pdf file', type="pdf", accept_multiple_files=True, key="uploaded_files", on_change=create_embeddings)
|
| 209 |
# for uploaded_file in uploaded_files:
|
|
|
|
| 228 |
# print_out(pages)
|
| 229 |
# combine_text(pages)
|
| 230 |
# promt_engineer(text)
|
| 231 |
+
|