Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- dockerfile +17 -0
- requirements.txt +19 -0
- templates/analyze.html +143 -0
- webapp.py +279 -0
dockerfile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use an official Python runtime as a parent image
|
| 2 |
+
FROM python:3.9-slim
|
| 3 |
+
|
| 4 |
+
# Set the working directory in the container
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Copy the current directory contents into the container at /app
|
| 8 |
+
COPY . /app
|
| 9 |
+
|
| 10 |
+
# Install dependencies
|
| 11 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 12 |
+
|
| 13 |
+
# Expose port 8000
|
| 14 |
+
EXPOSE 8000
|
| 15 |
+
|
| 16 |
+
# Run the application using uvicorn
|
| 17 |
+
CMD ["uvicorn", "webapp:app", "--host", "0.0.0.0", "--port", "8000"]
|
requirements.txt
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
google-generativeai
|
| 3 |
+
langchain-google-genai
|
| 4 |
+
langchain
|
| 5 |
+
pypdf
|
| 6 |
+
langchain-community
|
| 7 |
+
unstructured
|
| 8 |
+
openpyxl
|
| 9 |
+
docx2txt
|
| 10 |
+
python-magic
|
| 11 |
+
python-pptx
|
| 12 |
+
jinja2
|
| 13 |
+
nest-asyncio
|
| 14 |
+
faiss-cpu
|
| 15 |
+
tiktoken
|
| 16 |
+
networkx
|
| 17 |
+
pandas
|
| 18 |
+
uvicorn
|
| 19 |
+
python-multipart
|
templates/analyze.html
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>File Analysis</title>
|
| 7 |
+
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
|
| 8 |
+
<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css" rel="stylesheet">
|
| 9 |
+
<style>
|
| 10 |
+
body {
|
| 11 |
+
background-color: #f8f9fa;
|
| 12 |
+
font-family: 'Arial', sans-serif;
|
| 13 |
+
}
|
| 14 |
+
.container {
|
| 15 |
+
margin-top: 50px;
|
| 16 |
+
margin-bottom: 50px;
|
| 17 |
+
border-radius: 10px;
|
| 18 |
+
background: white;
|
| 19 |
+
padding: 30px;
|
| 20 |
+
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1);
|
| 21 |
+
}
|
| 22 |
+
h2, h3 {
|
| 23 |
+
color: #343a40;
|
| 24 |
+
margin-bottom: 20px;
|
| 25 |
+
}
|
| 26 |
+
.form-control, .form-select {
|
| 27 |
+
margin-bottom: 15px;
|
| 28 |
+
border-radius: 8px;
|
| 29 |
+
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
| 30 |
+
}
|
| 31 |
+
.form-control:focus, .form-select:focus {
|
| 32 |
+
border-color: #007bff;
|
| 33 |
+
box-shadow: 0 0 5px rgba(0, 123, 255, 0.5);
|
| 34 |
+
}
|
| 35 |
+
.btn {
|
| 36 |
+
border-radius: 8px;
|
| 37 |
+
transition: background-color 0.3s ease, transform 0.2s ease;
|
| 38 |
+
}
|
| 39 |
+
.btn-primary {
|
| 40 |
+
background-color: #0d6efd;
|
| 41 |
+
border: none;
|
| 42 |
+
}
|
| 43 |
+
.btn-primary:hover {
|
| 44 |
+
background-color: #0b5ed7;
|
| 45 |
+
transform: translateY(-2px);
|
| 46 |
+
}
|
| 47 |
+
.btn-secondary {
|
| 48 |
+
background-color: #6c757d;
|
| 49 |
+
border: none;
|
| 50 |
+
}
|
| 51 |
+
.btn-secondary:hover {
|
| 52 |
+
background-color: #5a6268;
|
| 53 |
+
transform: translateY(-2px);
|
| 54 |
+
}
|
| 55 |
+
.summary {
|
| 56 |
+
background-color: #ffffff;
|
| 57 |
+
padding: 20px;
|
| 58 |
+
border-radius: 8px;
|
| 59 |
+
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
| 60 |
+
margin-top: 20px;
|
| 61 |
+
}
|
| 62 |
+
.list-group-item {
|
| 63 |
+
background-color: #ffffff;
|
| 64 |
+
border: 1px solid #dee2e6;
|
| 65 |
+
border-radius: 8px;
|
| 66 |
+
margin-bottom: 10px;
|
| 67 |
+
}
|
| 68 |
+
.list-group-item:hover {
|
| 69 |
+
background-color: #f1f1f1;
|
| 70 |
+
}
|
| 71 |
+
.conversation-history {
|
| 72 |
+
margin-top: 20px;
|
| 73 |
+
}
|
| 74 |
+
</style>
|
| 75 |
+
</head>
|
| 76 |
+
<body>
|
| 77 |
+
|
| 78 |
+
<div class="container">
|
| 79 |
+
<h2 class="text-center">File Analysis</h2>
|
| 80 |
+
|
| 81 |
+
{% if not summary %}
|
| 82 |
+
<form action="/" method="post" enctype="multipart/form-data" class="bg-light p-4 border rounded shadow-sm">
|
| 83 |
+
<h5>Upload File</h5>
|
| 84 |
+
<input type="file" name="file" accept=".pdf,.pptx,.csv,.xlsx,.mp3,.docx" class="form-control">
|
| 85 |
+
|
| 86 |
+
<label>Select Summary Length:</label>
|
| 87 |
+
<select name="summary_length" class="form-select">
|
| 88 |
+
<option value="2 sentences">Short</option>
|
| 89 |
+
<option value="5 sentences">Medium</option>
|
| 90 |
+
<option value="10 sentences">Long</option>
|
| 91 |
+
</select>
|
| 92 |
+
<br>
|
| 93 |
+
|
| 94 |
+
<label>Who are you?</label>
|
| 95 |
+
<input type="text" name="iam" id="iam" class="form-control" required>
|
| 96 |
+
|
| 97 |
+
<label>What's the document context about?</label>
|
| 98 |
+
<input type="text" name="context" id="context" class="form-control" required>
|
| 99 |
+
|
| 100 |
+
<label>Output Expectation (What you want to analyze?)</label>
|
| 101 |
+
<input type="text" name="output" id="output" class="form-control" required>
|
| 102 |
+
|
| 103 |
+
<label>Input your Google Gemini API Key</label>
|
| 104 |
+
<input type="text" name="api_key" id="api_key" class="form-control">
|
| 105 |
+
|
| 106 |
+
<input type="submit" value="Analyze" class="btn btn-primary mt-3">
|
| 107 |
+
</form>
|
| 108 |
+
{% endif %}
|
| 109 |
+
|
| 110 |
+
{% if summary %}
|
| 111 |
+
<div class="summary">
|
| 112 |
+
<h3>Summary:</h3>
|
| 113 |
+
<p>{{ summary|safe }}</p>
|
| 114 |
+
|
| 115 |
+
{% if show_conversation %}
|
| 116 |
+
<h3>Conversation</h3>
|
| 117 |
+
<form action="/ask" method="post" class="mb-3">
|
| 118 |
+
<input type="text" name="question" class="form-control" placeholder="Ask your question">
|
| 119 |
+
<input type="submit" value="Ask" class="btn btn-secondary mt-2">
|
| 120 |
+
</form>
|
| 121 |
+
{% endif %}
|
| 122 |
+
</div>
|
| 123 |
+
{% endif %}
|
| 124 |
+
|
| 125 |
+
{% if question_responses %}
|
| 126 |
+
<br>
|
| 127 |
+
<h3>Conversation History:</h3>
|
| 128 |
+
<ul class="list-group conversation-history">
|
| 129 |
+
{% for question, response in question_responses %}
|
| 130 |
+
<li class="list-group-item">
|
| 131 |
+
<strong>Question:</strong> {{ question }}<br>
|
| 132 |
+
<strong>Response:</strong> {{ response|safe }}
|
| 133 |
+
</li>
|
| 134 |
+
{% endfor %}
|
| 135 |
+
</ul>
|
| 136 |
+
{% endif %}
|
| 137 |
+
</div>
|
| 138 |
+
|
| 139 |
+
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
|
| 140 |
+
<script src="https://cdn.jsdelivr.net/npm/@popperjs/core@2.11.6/dist/umd/popper.min.js"></script>
|
| 141 |
+
<script src="https://stackpath.bootstrapcdn.com/bootstrap/5.3.0/js/bootstrap.min.js"></script>
|
| 142 |
+
</body>
|
| 143 |
+
</html>
|
webapp.py
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, File, UploadFile, Form, Request, HTTPException
|
| 2 |
+
from fastapi.responses import HTMLResponse
|
| 3 |
+
from fastapi.templating import Jinja2Templates
|
| 4 |
+
from typing import List, Optional
|
| 5 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 6 |
+
from langchain_community.document_loaders import PyPDFLoader, UnstructuredCSVLoader, UnstructuredExcelLoader, Docx2txtLoader, UnstructuredPowerPointLoader
|
| 7 |
+
from langchain.chains import StuffDocumentsChain
|
| 8 |
+
from langchain.chains.llm import LLMChain
|
| 9 |
+
from langchain.prompts import PromptTemplate
|
| 10 |
+
from langchain.vectorstores import FAISS
|
| 11 |
+
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
| 12 |
+
import json
|
| 13 |
+
import os
|
| 14 |
+
import google.generativeai as genai
|
| 15 |
+
import re
|
| 16 |
+
import nest_asyncio
|
| 17 |
+
import nltk
|
| 18 |
+
from langchain.text_splitter import CharacterTextSplitter
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
app = FastAPI()
|
| 22 |
+
templates = Jinja2Templates(directory="templates")
|
| 23 |
+
|
| 24 |
+
if os.getenv("FASTAPI_ENV") == "development":
|
| 25 |
+
nest_asyncio.apply()
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
nltk.download('averaged_perceptron_tagger_eng')
|
| 29 |
+
from nltk.tokenize import word_tokenize
|
| 30 |
+
|
| 31 |
+
# Initialize your model and other variables
|
| 32 |
+
uploaded_file_path = None
|
| 33 |
+
document_analyzed = False
|
| 34 |
+
summary = None
|
| 35 |
+
question_responses = []
|
| 36 |
+
api = None
|
| 37 |
+
llm = None
|
| 38 |
+
|
| 39 |
+
safety_settings = [
|
| 40 |
+
{"category": "HARM_CATEGORY_DANGEROUS", "threshold": "BLOCK_NONE"},
|
| 41 |
+
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
|
| 42 |
+
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
|
| 43 |
+
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
|
| 44 |
+
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
def format_text(text: str) -> str:
|
| 48 |
+
text = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', text)
|
| 49 |
+
text = text.replace('*', '<br>')
|
| 50 |
+
return text
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
# Route for main page
|
| 55 |
+
@app.get("/", response_class=HTMLResponse)
|
| 56 |
+
async def read_main(request: Request):
|
| 57 |
+
return templates.TemplateResponse("analyze.html", {
|
| 58 |
+
"request": request,
|
| 59 |
+
"summary": summary,
|
| 60 |
+
"show_conversation": document_analyzed,
|
| 61 |
+
"question_responses": question_responses
|
| 62 |
+
})
|
| 63 |
+
|
| 64 |
+
# Route for analyzing documents
|
| 65 |
+
@app.post("/", response_class=HTMLResponse)
|
| 66 |
+
async def analyze_document(
|
| 67 |
+
request: Request,
|
| 68 |
+
api_key: str = Form(...),
|
| 69 |
+
iam: str = Form(...),
|
| 70 |
+
context: str = Form(...),
|
| 71 |
+
output: str = Form(...),
|
| 72 |
+
summary_length: str = Form(...),
|
| 73 |
+
file: UploadFile = File(...)
|
| 74 |
+
):
|
| 75 |
+
global uploaded_file_path, document_analyzed, summary, question_responses, api, llm
|
| 76 |
+
loader = None
|
| 77 |
+
|
| 78 |
+
try:
|
| 79 |
+
# Initialize or update API key and models
|
| 80 |
+
api = api_key
|
| 81 |
+
genai.configure(api_key=api)
|
| 82 |
+
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", google_api_key=api)
|
| 83 |
+
|
| 84 |
+
# Save the uploaded file
|
| 85 |
+
uploaded_file_path = "uploaded_file" + os.path.splitext(file.filename)[1]
|
| 86 |
+
with open(uploaded_file_path, "wb") as f:
|
| 87 |
+
f.write(file.file.read())
|
| 88 |
+
|
| 89 |
+
# Determine the file type and load accordingly
|
| 90 |
+
file_extension = os.path.splitext(uploaded_file_path)[1].lower()
|
| 91 |
+
print(f"File extension: {file_extension}") # Debugging statement
|
| 92 |
+
|
| 93 |
+
if file_extension == ".pdf":
|
| 94 |
+
loader = PyPDFLoader(uploaded_file_path)
|
| 95 |
+
elif file_extension == ".csv":
|
| 96 |
+
loader = UnstructuredCSVLoader(uploaded_file_path, mode="elements", encoding="utf8")
|
| 97 |
+
elif file_extension == ".xlsx":
|
| 98 |
+
loader = UnstructuredExcelLoader(uploaded_file_path, mode="elements")
|
| 99 |
+
elif file_extension == ".docx":
|
| 100 |
+
loader = Docx2txtLoader(uploaded_file_path)
|
| 101 |
+
elif file_extension == ".pptx":
|
| 102 |
+
loader = UnstructuredPowerPointLoader(uploaded_file_path)
|
| 103 |
+
elif file_extension == ".mp3":
|
| 104 |
+
# Process audio files differently
|
| 105 |
+
audio_file = genai.upload_file(path=uploaded_file_path)
|
| 106 |
+
model = genai.GenerativeModel(model_name="gemini-1.5-flash")
|
| 107 |
+
prompt = f"I am an {iam}. This file is about {context}. Answer the question based on this file: {output}. Write a {summary_length} concise summary."
|
| 108 |
+
response = model.generate_content([prompt, audio_file], safety_settings=safety_settings)
|
| 109 |
+
summary = format_text(response.text)
|
| 110 |
+
document_analyzed = True
|
| 111 |
+
outputs = {"summary": summary}
|
| 112 |
+
with open("output_summary.json", "w") as outfile:
|
| 113 |
+
json.dump(outputs, outfile)
|
| 114 |
+
return templates.TemplateResponse("analyze.html", {
|
| 115 |
+
"request": request,
|
| 116 |
+
"summary": summary,
|
| 117 |
+
"show_conversation": document_analyzed,
|
| 118 |
+
"question_responses": question_responses
|
| 119 |
+
})
|
| 120 |
+
|
| 121 |
+
# If no loader is set, raise an exception
|
| 122 |
+
if loader is None:
|
| 123 |
+
raise HTTPException(status_code=400, detail=f"Unsupported file type: {file_extension}")
|
| 124 |
+
|
| 125 |
+
docs = loader.load()
|
| 126 |
+
prompt_template = PromptTemplate.from_template(
|
| 127 |
+
f"I am an {iam}. This file is about {context}. Answer the question based on this file: {output}. Write a {summary_length} concise summary of the following text: {{text}}"
|
| 128 |
+
)
|
| 129 |
+
llm_chain = LLMChain(llm=llm, prompt=prompt_template)
|
| 130 |
+
stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")
|
| 131 |
+
response = stuff_chain.invoke(docs)
|
| 132 |
+
summary = format_text(response["output_text"])
|
| 133 |
+
document_analyzed = True
|
| 134 |
+
outputs = {"summary": summary}
|
| 135 |
+
with open("output.json", "w") as outfile:
|
| 136 |
+
json.dump(outputs, outfile)
|
| 137 |
+
return templates.TemplateResponse("analyze.html", {
|
| 138 |
+
"request": request,
|
| 139 |
+
"summary": summary,
|
| 140 |
+
"show_conversation": document_analyzed,
|
| 141 |
+
"question_responses": question_responses
|
| 142 |
+
})
|
| 143 |
+
|
| 144 |
+
except Exception as e:
|
| 145 |
+
raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
|
| 146 |
+
|
| 147 |
+
# Route for asking questions
|
| 148 |
+
from langchain.text_splitter import CharacterTextSplitter # Ensure this is imported
|
| 149 |
+
|
| 150 |
+
@app.post("/ask", response_class=HTMLResponse)
|
| 151 |
+
async def ask_question(request: Request, question: str = Form(...)):
|
| 152 |
+
global uploaded_file_path, question_responses, llm, api
|
| 153 |
+
|
| 154 |
+
loader = None
|
| 155 |
+
|
| 156 |
+
if uploaded_file_path:
|
| 157 |
+
# Determine the file type and load accordingly
|
| 158 |
+
file_extension = os.path.splitext(uploaded_file_path)[1].lower()
|
| 159 |
+
if file_extension == ".pdf":
|
| 160 |
+
loader = PyPDFLoader(uploaded_file_path)
|
| 161 |
+
elif file_extension == ".csv":
|
| 162 |
+
loader = UnstructuredCSVLoader(uploaded_file_path, mode="elements")
|
| 163 |
+
elif file_extension == ".xlsx":
|
| 164 |
+
loader = UnstructuredExcelLoader(uploaded_file_path, mode="elements")
|
| 165 |
+
elif file_extension == ".docx":
|
| 166 |
+
loader = Docx2txtLoader(uploaded_file_path)
|
| 167 |
+
elif file_extension == ".pptx":
|
| 168 |
+
loader = UnstructuredPowerPointLoader(uploaded_file_path)
|
| 169 |
+
elif file_extension == ".mp3":
|
| 170 |
+
audio_file = genai.upload_file(path=uploaded_file_path)
|
| 171 |
+
model = genai.GenerativeModel(model_name="gemini-1.5-flash")
|
| 172 |
+
latest_conversation = request.cookies.get("latest_question_response", "")
|
| 173 |
+
prompt = "Answer the question based on the speech: " + question + (f" Latest conversation: {latest_conversation}" if latest_conversation else "")
|
| 174 |
+
response = model.generate_content([prompt, audio_file], safety_settings=safety_settings)
|
| 175 |
+
current_response = response.text
|
| 176 |
+
current_question = f"You asked: {question}"
|
| 177 |
+
|
| 178 |
+
# Save the latest question and response to the session
|
| 179 |
+
question_responses.append((current_question, current_response))
|
| 180 |
+
|
| 181 |
+
# Perform vector embedding and search
|
| 182 |
+
text = current_response # Use the summary generated from the MP3 content
|
| 183 |
+
os.environ["GOOGLE_API_KEY"] = api
|
| 184 |
+
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
|
| 185 |
+
summary_embedding = embeddings.embed_query(text)
|
| 186 |
+
document_search = FAISS.from_texts([text], embeddings)
|
| 187 |
+
|
| 188 |
+
if document_search:
|
| 189 |
+
query_embedding = embeddings.embed_query(question)
|
| 190 |
+
results = document_search.similarity_search_by_vector(query_embedding, k=1)
|
| 191 |
+
|
| 192 |
+
if results:
|
| 193 |
+
current_response = results[0].page_content
|
| 194 |
+
else:
|
| 195 |
+
current_response = "No matching document found in the database."
|
| 196 |
+
else:
|
| 197 |
+
current_response = "Vector database not initialized."
|
| 198 |
+
|
| 199 |
+
# Append the question and response from FAISS search
|
| 200 |
+
question_responses.append((current_question, current_response))
|
| 201 |
+
|
| 202 |
+
# Save all results including FAISS response to output.json
|
| 203 |
+
save_to_json(summary, question_responses)
|
| 204 |
+
|
| 205 |
+
# Save the latest question and response to the session
|
| 206 |
+
response = templates.TemplateResponse("analyze.html", {"request": request, "summary": summary, "show_conversation": document_analyzed, "question_responses": question_responses})
|
| 207 |
+
response.set_cookie(key="latest_question_response", value=current_response)
|
| 208 |
+
return response
|
| 209 |
+
|
| 210 |
+
# If no loader is set, raise an exception
|
| 211 |
+
if loader is None:
|
| 212 |
+
raise HTTPException(status_code=400, detail=f"Unsupported file type: {file_extension}")
|
| 213 |
+
|
| 214 |
+
docs = loader.load()
|
| 215 |
+
text = "\n".join([doc.page_content for doc in docs])
|
| 216 |
+
os.environ["GOOGLE_API_KEY"] = api
|
| 217 |
+
|
| 218 |
+
# Split the text into chunks
|
| 219 |
+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 220 |
+
chunks = text_splitter.split_text(text)
|
| 221 |
+
|
| 222 |
+
# Define the Summarize Chain for the question
|
| 223 |
+
latest_conversation = request.cookies.get("latest_question_response", "")
|
| 224 |
+
template1 = question + """ answer the question based on the following:
|
| 225 |
+
"{text}"
|
| 226 |
+
:""" + (f" Answer the Question with no more than 3 sentences. Latest conversation: {latest_conversation}" if latest_conversation else "")
|
| 227 |
+
|
| 228 |
+
current_response = ""
|
| 229 |
+
for chunk in chunks:
|
| 230 |
+
prompt1 = PromptTemplate.from_template(template1.format(text=chunk))
|
| 231 |
+
# Initialize the LLMChain with the prompt
|
| 232 |
+
llm_chain1 = LLMChain(llm=llm, prompt=prompt1)
|
| 233 |
+
response1 = llm_chain1.invoke({"text": chunk})
|
| 234 |
+
current_response += response1["text"] + "\n"
|
| 235 |
+
|
| 236 |
+
# Generate embeddings for the combined responses
|
| 237 |
+
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
|
| 238 |
+
summary_embedding = embeddings.embed_query(current_response)
|
| 239 |
+
document_search = FAISS.from_texts([current_response], embeddings)
|
| 240 |
+
|
| 241 |
+
# Perform a search on the FAISS vector database if it's initialized
|
| 242 |
+
if document_search:
|
| 243 |
+
query_embedding = embeddings.embed_query(question)
|
| 244 |
+
results = document_search.similarity_search_by_vector(query_embedding, k=1)
|
| 245 |
+
|
| 246 |
+
if results:
|
| 247 |
+
current_response = format_text(results[0].page_content)
|
| 248 |
+
else:
|
| 249 |
+
current_response = "No matching document found in the database."
|
| 250 |
+
else:
|
| 251 |
+
current_response = "Vector database not initialized."
|
| 252 |
+
|
| 253 |
+
# Append the question and response from FAISS search
|
| 254 |
+
current_question = f"You asked: {question}"
|
| 255 |
+
question_responses.append((current_question, current_response))
|
| 256 |
+
|
| 257 |
+
# Save all results to output.json
|
| 258 |
+
save_to_json(summary, question_responses)
|
| 259 |
+
|
| 260 |
+
# Save the latest question and response to the session
|
| 261 |
+
response = templates.TemplateResponse("analyze.html", {"request": request, "summary": summary, "show_conversation": document_analyzed, "question_responses": question_responses})
|
| 262 |
+
response.set_cookie(key="latest_question_response", value=current_response)
|
| 263 |
+
return response
|
| 264 |
+
else:
|
| 265 |
+
raise HTTPException(status_code=400, detail="No file has been uploaded yet.")
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
def save_to_json(summary, question_responses):
|
| 270 |
+
outputs = {
|
| 271 |
+
"summary": summary,
|
| 272 |
+
"question_responses": question_responses
|
| 273 |
+
}
|
| 274 |
+
with open("output_summary.json", "w") as outfile:
|
| 275 |
+
json.dump(outputs, outfile)
|
| 276 |
+
|
| 277 |
+
if __name__ == "__main__":
|
| 278 |
+
import uvicorn
|
| 279 |
+
uvicorn.run(app, host="127.0.0.1", port=8000)
|