import streamlit as st from streamlit_lottie import st_lottie import requests from io import BytesIO from docx import Document import pdfplumber from gtts import gTTS import os import base64 # --- Load Assets --- def load_lottieurl(url): r = requests.get(url) if r.status_code != 200: return None return r.json() lottie_astronaut = load_lottieurl("https://lottie.host/b86c724d-556d-4a7a-a9b2-277f8099687b/J5c91vW5qS.json") # --- Functions --- def read_docx(file): try: document = Document(file) full_text = [] for para in document.paragraphs: full_text.append(para.text) return "\n".join(full_text) except Exception as e: return f"Error reading DOCX file: {e}" def read_pdf(file): try: text = "" with pdfplumber.open(file) as pdf: for page in pdf.pages: text += page.extract_text() + "\n" return text except Exception as e: return f"Error reading PDF file: {e}" def analyze_text(text): # Basic analysis - you can expand this with more sophisticated NLP techniques word_count = len(text.split()) char_count = len(text) sentences = text.split('.') sentence_count = len([s.strip() for s in sentences if s.strip()]) return f"Word Count: {word_count}\nCharacter Count: {char_count}\nSentence Count: {sentence_count}" def text_to_speech(text, language='en'): try: tts = gTTS(text=text, lang=language, slow=False) audio_file = 'temp_audio.mp3' tts.save(audio_file) return audio_file except Exception as e: st.error(f"Error generating speech: {e}") return None def play_audio(audio_file): with open(audio_file, "rb") as f: data = f.read() b64 = base64.b64encode(data).decode() md = f""" """ st.markdown(md, unsafe_allow_html=True) # --- Streamlit App --- st.set_page_config(page_title="AI Document Reader & Analyzer", page_icon=":book:") st.subheader("🚀 AI Document Reader & Analyzer") st_lottie(lottie_astronaut, height=150) uploaded_file = st.file_uploader("Upload a DOCX or PDF file", type=["docx", "pdf"]) if uploaded_file is not None: file_extension = uploaded_file.name.split(".")[-1].lower() document_text = "" with st.spinner(f"Reading and processing your {file_extension.upper()} file..."): if file_extension == "docx": document_text = read_docx(uploaded_file) elif file_extension == "pdf": document_text = read_pdf(uploaded_file) if document_text: st.subheader("Document Content:") st.text_area("Text from the document", document_text, height=300) st.subheader("Document Analysis:") analysis = analyze_text(document_text) st.write(analysis) st.subheader("Virtual Voice Reader:") language_choice = st.selectbox("Select language for voice:", ["en", "hi", "es", "fr", "de", "ja", "ko", "pt", "ru", "zh-cn"]) if st.button("Read with Virtual Voice"): with st.spinner("Generating and playing audio..."): audio_file = text_to_speech(document_text, language=language_choice) if audio_file: play_audio(audio_file) # Clean up the temporary audio file os.remove(audio_file) else: st.error("Could not extract text from the uploaded file.") st.markdown("---") st.info("This AI Space can read DOCX and PDF files, analyze basic statistics, and read the content using a virtual voice. You can expand the analysis capabilities with more advanced Natural Language Processing (NLP) techniques.")