|
|
import streamlit as st |
|
|
from streamlit_lottie import st_lottie |
|
|
import requests |
|
|
from io import BytesIO |
|
|
from docx import Document |
|
|
import pdfplumber |
|
|
from gtts import gTTS |
|
|
import os |
|
|
import base64 |
|
|
|
|
|
|
|
|
def load_lottieurl(url): |
|
|
r = requests.get(url) |
|
|
if r.status_code != 200: |
|
|
return None |
|
|
return r.json() |
|
|
|
|
|
lottie_astronaut = load_lottieurl("https://lottie.host/b86c724d-556d-4a7a-a9b2-277f8099687b/J5c91vW5qS.json") |
|
|
|
|
|
|
|
|
def read_docx(file): |
|
|
try: |
|
|
document = Document(file) |
|
|
full_text = [] |
|
|
for para in document.paragraphs: |
|
|
full_text.append(para.text) |
|
|
return "\n".join(full_text) |
|
|
except Exception as e: |
|
|
return f"Error reading DOCX file: {e}" |
|
|
|
|
|
def read_pdf(file): |
|
|
try: |
|
|
text = "" |
|
|
with pdfplumber.open(file) as pdf: |
|
|
for page in pdf.pages: |
|
|
text += page.extract_text() + "\n" |
|
|
return text |
|
|
except Exception as e: |
|
|
return f"Error reading PDF file: {e}" |
|
|
|
|
|
def analyze_text(text): |
|
|
|
|
|
word_count = len(text.split()) |
|
|
char_count = len(text) |
|
|
sentences = text.split('.') |
|
|
sentence_count = len([s.strip() for s in sentences if s.strip()]) |
|
|
return f"Word Count: {word_count}\nCharacter Count: {char_count}\nSentence Count: {sentence_count}" |
|
|
|
|
|
def text_to_speech(text, language='en'): |
|
|
try: |
|
|
tts = gTTS(text=text, lang=language, slow=False) |
|
|
audio_file = 'temp_audio.mp3' |
|
|
tts.save(audio_file) |
|
|
return audio_file |
|
|
except Exception as e: |
|
|
st.error(f"Error generating speech: {e}") |
|
|
return None |
|
|
|
|
|
def play_audio(audio_file): |
|
|
with open(audio_file, "rb") as f: |
|
|
data = f.read() |
|
|
b64 = base64.b64encode(data).decode() |
|
|
md = f""" |
|
|
<audio controls autoplay="true"> |
|
|
<source src="data:audio/mp3;base64,{b64}" type="audio/mp3"> |
|
|
</audio> |
|
|
""" |
|
|
st.markdown(md, unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.set_page_config(page_title="AI Document Reader & Analyzer", page_icon=":book:") |
|
|
|
|
|
st.subheader("π AI Document Reader & Analyzer") |
|
|
st_lottie(lottie_astronaut, height=150) |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload a DOCX or PDF file", type=["docx", "pdf"]) |
|
|
|
|
|
if uploaded_file is not None: |
|
|
file_extension = uploaded_file.name.split(".")[-1].lower() |
|
|
document_text = "" |
|
|
|
|
|
with st.spinner(f"Reading and processing your {file_extension.upper()} file..."): |
|
|
if file_extension == "docx": |
|
|
document_text = read_docx(uploaded_file) |
|
|
elif file_extension == "pdf": |
|
|
document_text = read_pdf(uploaded_file) |
|
|
|
|
|
if document_text: |
|
|
st.subheader("Document Content:") |
|
|
st.text_area("Text from the document", document_text, height=300) |
|
|
|
|
|
st.subheader("Document Analysis:") |
|
|
analysis = analyze_text(document_text) |
|
|
st.write(analysis) |
|
|
|
|
|
st.subheader("Virtual Voice Reader:") |
|
|
language_choice = st.selectbox("Select language for voice:", ["en", "hi", "es", "fr", "de", "ja", "ko", "pt", "ru", "zh-cn"]) |
|
|
if st.button("Read with Virtual Voice"): |
|
|
with st.spinner("Generating and playing audio..."): |
|
|
audio_file = text_to_speech(document_text, language=language_choice) |
|
|
if audio_file: |
|
|
play_audio(audio_file) |
|
|
|
|
|
os.remove(audio_file) |
|
|
else: |
|
|
st.error("Could not extract text from the uploaded file.") |
|
|
|
|
|
st.markdown("---") |
|
|
st.info("This AI Space can read DOCX and PDF files, analyze basic statistics, and read the content using a virtual voice. You can expand the analysis capabilities with more advanced Natural Language Processing (NLP) techniques.") |