Tejha commited on
Commit
f7b8004
·
verified ·
1 Parent(s): 297bcca

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -0
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_lottie import st_lottie
3
+ import requests
4
+ from io import BytesIO
5
+ from docx import Document
6
+ import pdfplumber
7
+ from gtts import gTTS
8
+ import os
9
+ import base64
10
+
11
+ # --- Load Assets ---
12
+ def load_lottieurl(url):
13
+ r = requests.get(url)
14
+ if r.status_code != 200:
15
+ return None
16
+ return r.json()
17
+
18
+ lottie_astronaut = load_lottieurl("https://lottie.host/b86c724d-556d-4a7a-a9b2-277f8099687b/J5c91vW5qS.json")
19
+
20
+ # --- Functions ---
21
+ def read_docx(file):
22
+ try:
23
+ document = Document(file)
24
+ full_text = []
25
+ for para in document.paragraphs:
26
+ full_text.append(para.text)
27
+ return "\n".join(full_text)
28
+ except Exception as e:
29
+ return f"Error reading DOCX file: {e}"
30
+
31
+ def read_pdf(file):
32
+ try:
33
+ text = ""
34
+ with pdfplumber.open(file) as pdf:
35
+ for page in pdf.pages:
36
+ text += page.extract_text() + "\n"
37
+ return text
38
+ except Exception as e:
39
+ return f"Error reading PDF file: {e}"
40
+
41
+ def analyze_text(text):
42
+ # Basic analysis - you can expand this with more sophisticated NLP techniques
43
+ word_count = len(text.split())
44
+ char_count = len(text)
45
+ sentences = text.split('.')
46
+ sentence_count = len([s.strip() for s in sentences if s.strip()])
47
+ return f"Word Count: {word_count}\nCharacter Count: {char_count}\nSentence Count: {sentence_count}"
48
+
49
+ def text_to_speech(text, language='en'):
50
+ try:
51
+ tts = gTTS(text=text, lang=language, slow=False)
52
+ audio_file = 'temp_audio.mp3'
53
+ tts.save(audio_file)
54
+ return audio_file
55
+ except Exception as e:
56
+ st.error(f"Error generating speech: {e}")
57
+ return None
58
+
59
+ def play_audio(audio_file):
60
+ with open(audio_file, "rb") as f:
61
+ data = f.read()
62
+ b64 = base64.b64encode(data).decode()
63
+ md = f"""
64
+ <audio controls autoplay="true">
65
+ <source src="data:audio/mp3;base64,{b64}" type="audio/mp3">
66
+ </audio>
67
+ """
68
+ st.markdown(md, unsafe_allow_html=True)
69
+
70
+ # --- Streamlit App ---
71
+ st.set_page_config(page_title="AI Document Reader & Analyzer", page_icon=":book:")
72
+
73
+ st.subheader("🚀 AI Document Reader & Analyzer")
74
+ st_lottie(lottie_astronaut, height=150)
75
+
76
+ uploaded_file = st.file_uploader("Upload a DOCX or PDF file", type=["docx", "pdf"])
77
+
78
+ if uploaded_file is not None:
79
+ file_extension = uploaded_file.name.split(".")[-1].lower()
80
+ document_text = ""
81
+
82
+ with st.spinner(f"Reading and processing your {file_extension.upper()} file..."):
83
+ if file_extension == "docx":
84
+ document_text = read_docx(uploaded_file)
85
+ elif file_extension == "pdf":
86
+ document_text = read_pdf(uploaded_file)
87
+
88
+ if document_text:
89
+ st.subheader("Document Content:")
90
+ st.text_area("Text from the document", document_text, height=300)
91
+
92
+ st.subheader("Document Analysis:")
93
+ analysis = analyze_text(document_text)
94
+ st.write(analysis)
95
+
96
+ st.subheader("Virtual Voice Reader:")
97
+ language_choice = st.selectbox("Select language for voice:", ["en", "hi", "es", "fr", "de", "ja", "ko", "pt", "ru", "zh-cn"])
98
+ if st.button("Read with Virtual Voice"):
99
+ with st.spinner("Generating and playing audio..."):
100
+ audio_file = text_to_speech(document_text, language=language_choice)
101
+ if audio_file:
102
+ play_audio(audio_file)
103
+ # Clean up the temporary audio file
104
+ os.remove(audio_file)
105
+ else:
106
+ st.error("Could not extract text from the uploaded file.")
107
+
108
+ st.markdown("---")
109
+ st.info("This AI Space can read DOCX and PDF files, analyze basic statistics, and read the content using a virtual voice. You can expand the analysis capabilities with more advanced Natural Language Processing (NLP) techniques.")