shukdevdattaEX commited on
Commit
3a9e21d
Β·
verified Β·
1 Parent(s): 0b8a82b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +209 -0
app.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import openai
3
+ import base64
4
+ from PIL import Image
5
+ import io
6
+ import fitz # PyMuPDF for PDF handling
7
+
8
+
9
+ # Extract text from PDF
10
+ def extract_text_from_pdf(pdf_file):
11
+ try:
12
+ text = ""
13
+ pdf_document = fitz.open(pdf_file)
14
+ for page_num in range(len(pdf_document)):
15
+ page = pdf_document[page_num]
16
+ text += page.get_text()
17
+ pdf_document.close()
18
+ return text
19
+ except Exception as e:
20
+ return f"Error extracting text from PDF: {str(e)}"
21
+
22
+
23
+ # Generate MCQ quiz from PDF
24
+ def generate_mcq_quiz(pdf_content, num_questions, openai_api_key, model_choice):
25
+ if not openai_api_key:
26
+ return "Error: No API key provided."
27
+ openai.api_key = openai_api_key
28
+ limited_content = pdf_content[:8000]
29
+ prompt = f"""Based on the following document content, generate {num_questions} multiple-choice quiz questions.
30
+ For each question:
31
+ 1. Write a clear question
32
+ 2. Give 4 options (A, B, C, D)
33
+ 3. Indicate the correct answer
34
+ 4. Briefly explain why the answer is correct
35
+
36
+ Document:
37
+ {limited_content}
38
+ """
39
+ try:
40
+ response = openai.ChatCompletion.create(
41
+ model=model_choice,
42
+ messages=[{"role": "user", "content": prompt}]
43
+ )
44
+ return response.choices[0].message.content
45
+ except Exception as e:
46
+ return f"Error generating quiz: {str(e)}"
47
+
48
+
49
+ # Convert image to base64
50
+ def get_base64_string_from_image(pil_image):
51
+ buffered = io.BytesIO()
52
+ pil_image.save(buffered, format="PNG")
53
+ return base64.b64encode(buffered.getvalue()).decode("utf-8")
54
+
55
+
56
+ # Transcribe audio
57
+ def transcribe_audio(audio, openai_api_key):
58
+ if not openai_api_key:
59
+ return "Error: No API key provided."
60
+ openai.api_key = openai_api_key
61
+ try:
62
+ with open(audio, 'rb') as f:
63
+ audio_bytes = f.read()
64
+ file_obj = io.BytesIO(audio_bytes)
65
+ file_obj.name = 'audio.wav'
66
+ transcription = openai.Audio.transcribe(file=file_obj, model="whisper-1")
67
+ return transcription.text
68
+ except Exception as e:
69
+ return f"Error transcribing audio: {str(e)}"
70
+
71
+
72
+ # Generate response for text/image/pdf
73
+ def generate_response(input_text, image, pdf_content, openai_api_key, reasoning_effort, model_choice):
74
+ if not openai_api_key:
75
+ return "Error: No API key provided."
76
+ openai.api_key = openai_api_key
77
+
78
+ if pdf_content and input_text:
79
+ input_text = f"Based on the document below, answer the question:\n\n{input_text}\n\nDocument:\n{pdf_content}"
80
+ elif image:
81
+ image_b64 = get_base64_string_from_image(image)
82
+ input_text = f"data:image/png;base64,{image_b64}"
83
+
84
+ try:
85
+ response = openai.ChatCompletion.create(
86
+ model=model_choice,
87
+ messages=[{"role": "user", "content": input_text}],
88
+ max_completion_tokens=2000
89
+ )
90
+ return response.choices[0].message.content
91
+ except Exception as e:
92
+ return f"Error calling OpenAI API: {str(e)}"
93
+
94
+
95
+ # Chatbot logic
96
+ def chatbot(input_text, image, audio, pdf_file, openai_api_key, reasoning_effort, model_choice, pdf_content, num_quiz_questions, pdf_quiz_mode, history):
97
+ if history is None:
98
+ history = []
99
+
100
+ if audio:
101
+ input_text = transcribe_audio(audio, openai_api_key)
102
+
103
+ new_pdf_content = pdf_content
104
+ if pdf_file:
105
+ new_pdf_content = extract_text_from_pdf(pdf_file)
106
+
107
+ if pdf_quiz_mode:
108
+ if new_pdf_content:
109
+ quiz = generate_mcq_quiz(new_pdf_content, int(num_quiz_questions), openai_api_key, model_choice)
110
+ history.append((f"πŸ“˜ Generated {num_quiz_questions} quiz questions", quiz))
111
+ else:
112
+ history.append(("No PDF detected", "Please upload a PDF file first."))
113
+ else:
114
+ response = generate_response(input_text, image, new_pdf_content, openai_api_key, reasoning_effort, model_choice)
115
+ if input_text:
116
+ history.append((input_text, response))
117
+ elif image:
118
+ history.append(("πŸ–ΌοΈ [Image Uploaded]", response))
119
+ elif pdf_file:
120
+ history.append(("πŸ“„ [PDF Uploaded]", response))
121
+ else:
122
+ history.append(("No input", "Please provide input."))
123
+
124
+ return "", None, None, None, new_pdf_content, history
125
+
126
+
127
+ # Reset all fields
128
+ def clear_history():
129
+ return "", None, None, None, "", []
130
+
131
+
132
+ # Extract text when PDF uploaded
133
+ def process_pdf(pdf_file):
134
+ if pdf_file is None:
135
+ return ""
136
+ return extract_text_from_pdf(pdf_file)
137
+
138
+
139
+ # Switch between input modes
140
+ def update_input_type(choice):
141
+ if choice == "Text":
142
+ return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
143
+ elif choice == "Image":
144
+ return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
145
+ elif choice == "Voice":
146
+ return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
147
+ elif choice == "PDF":
148
+ return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(value=False)
149
+ elif choice == "PDF(QUIZ)":
150
+ return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(value=True)
151
+
152
+
153
+ # Build Gradio interface
154
+ def create_interface():
155
+ with gr.Blocks() as demo:
156
+ gr.Markdown("## 🧠 Multimodal Chatbot β€” Text | Image | Voice | PDF | Quiz")
157
+
158
+ pdf_content = gr.State("")
159
+
160
+ openai_api_key = gr.Textbox(label="πŸ”‘ OpenAI API Key", type="password", placeholder="sk-...")
161
+
162
+ input_type = gr.Radio(
163
+ ["Text", "Image", "Voice", "PDF", "PDF(QUIZ)"],
164
+ label="Choose Input Type",
165
+ value="Text"
166
+ )
167
+
168
+ input_text = gr.Textbox(label="Enter your question or text", lines=2, visible=True)
169
+ image_input = gr.Image(label="Upload Image", type="pil", visible=False)
170
+ audio_input = gr.Audio(label="Upload/Record Audio", type="filepath", visible=False)
171
+ pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"], visible=False)
172
+ quiz_questions_slider = gr.Slider(1, 20, value=5, step=1, label="Number of Quiz Questions", visible=False)
173
+ quiz_mode = gr.Checkbox(label="Quiz Mode", visible=False, value=False)
174
+
175
+ with gr.Row():
176
+ reasoning_effort = gr.Dropdown(["low", "medium", "high"], value="medium", label="Reasoning Effort")
177
+ model_choice = gr.Dropdown(["o1", "o3-mini"], value="o1", label="Model")
178
+
179
+ submit_btn = gr.Button("Submit")
180
+ clear_btn = gr.Button("Clear Chat")
181
+
182
+ chat_history = gr.Chatbot(label="Chat History")
183
+
184
+ # Input type handling
185
+ input_type.change(
186
+ fn=update_input_type,
187
+ inputs=[input_type],
188
+ outputs=[input_text, image_input, audio_input, pdf_input, quiz_questions_slider, quiz_mode]
189
+ )
190
+
191
+ # PDF upload processing
192
+ pdf_input.change(fn=process_pdf, inputs=[pdf_input], outputs=[pdf_content])
193
+
194
+ # Submit
195
+ submit_btn.click(
196
+ fn=chatbot,
197
+ inputs=[input_text, image_input, audio_input, pdf_input, openai_api_key, reasoning_effort, model_choice, pdf_content, quiz_questions_slider, quiz_mode, chat_history],
198
+ outputs=[input_text, image_input, audio_input, pdf_input, pdf_content, chat_history]
199
+ )
200
+
201
+ # Clear
202
+ clear_btn.click(fn=clear_history, inputs=[], outputs=[input_text, image_input, audio_input, pdf_input, pdf_content, chat_history])
203
+
204
+ return demo
205
+
206
+
207
+ if __name__ == "__main__":
208
+ demo = create_interface()
209
+ demo.launch()