Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import openai | |
| import base64 | |
| from PIL import Image | |
| import io | |
| import fitz # PyMuPDF for PDF handling | |
| # Extract text from PDF | |
| def extract_text_from_pdf(pdf_file): | |
| try: | |
| text = "" | |
| pdf_document = fitz.open(pdf_file) | |
| for page_num in range(len(pdf_document)): | |
| page = pdf_document[page_num] | |
| text += page.get_text() | |
| pdf_document.close() | |
| return text | |
| except Exception as e: | |
| return f"Error extracting text from PDF: {str(e)}" | |
| # Generate MCQ quiz from PDF | |
| def generate_mcq_quiz(pdf_content, num_questions, openai_api_key, model_choice): | |
| if not openai_api_key: | |
| return "Error: No API key provided." | |
| openai.api_key = openai_api_key | |
| limited_content = pdf_content[:8000] | |
| prompt = f"""Based on the following document content, generate {num_questions} multiple-choice quiz questions. | |
| For each question: | |
| 1. Write a clear question | |
| 2. Give 4 options (A, B, C, D) | |
| 3. Indicate the correct answer | |
| 4. Briefly explain why the answer is correct | |
| Document: | |
| {limited_content} | |
| """ | |
| try: | |
| response = openai.ChatCompletion.create( | |
| model=model_choice, | |
| messages=[{"role": "user", "content": prompt}] | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| return f"Error generating quiz: {str(e)}" | |
| # Convert image to base64 | |
| def get_base64_string_from_image(pil_image): | |
| buffered = io.BytesIO() | |
| pil_image.save(buffered, format="PNG") | |
| return base64.b64encode(buffered.getvalue()).decode("utf-8") | |
| # Transcribe audio | |
| def transcribe_audio(audio, openai_api_key): | |
| if not openai_api_key: | |
| return "Error: No API key provided." | |
| openai.api_key = openai_api_key | |
| try: | |
| with open(audio, 'rb') as f: | |
| audio_bytes = f.read() | |
| file_obj = io.BytesIO(audio_bytes) | |
| file_obj.name = 'audio.wav' | |
| transcription = openai.Audio.transcribe(file=file_obj, model="whisper-1") | |
| return transcription.text | |
| except Exception as e: | |
| return f"Error transcribing audio: {str(e)}" | |
| # Generate response for text/image/pdf | |
| def generate_response(input_text, image, pdf_content, openai_api_key, reasoning_effort, model_choice): | |
| if not openai_api_key: | |
| return "Error: No API key provided." | |
| openai.api_key = openai_api_key | |
| if pdf_content and input_text: | |
| input_text = f"Based on the document below, answer the question:\n\n{input_text}\n\nDocument:\n{pdf_content}" | |
| elif image: | |
| image_b64 = get_base64_string_from_image(image) | |
| input_text = f"data:image/png;base64,{image_b64}" | |
| try: | |
| response = openai.ChatCompletion.create( | |
| model=model_choice, | |
| messages=[{"role": "user", "content": input_text}], | |
| max_completion_tokens=2000 | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| return f"Error calling OpenAI API: {str(e)}" | |
| # Chatbot logic | |
| def chatbot(input_text, image, audio, pdf_file, openai_api_key, reasoning_effort, model_choice, pdf_content, num_quiz_questions, pdf_quiz_mode, history): | |
| if history is None: | |
| history = [] | |
| if audio: | |
| input_text = transcribe_audio(audio, openai_api_key) | |
| new_pdf_content = pdf_content | |
| if pdf_file: | |
| new_pdf_content = extract_text_from_pdf(pdf_file) | |
| if pdf_quiz_mode: | |
| if new_pdf_content: | |
| quiz = generate_mcq_quiz(new_pdf_content, int(num_quiz_questions), openai_api_key, model_choice) | |
| history.append((f"π Generated {num_quiz_questions} quiz questions", quiz)) | |
| else: | |
| history.append(("No PDF detected", "Please upload a PDF file first.")) | |
| else: | |
| response = generate_response(input_text, image, new_pdf_content, openai_api_key, reasoning_effort, model_choice) | |
| if input_text: | |
| history.append((input_text, response)) | |
| elif image: | |
| history.append(("πΌοΈ [Image Uploaded]", response)) | |
| elif pdf_file: | |
| history.append(("π [PDF Uploaded]", response)) | |
| else: | |
| history.append(("No input", "Please provide input.")) | |
| return "", None, None, None, new_pdf_content, history | |
| # Reset all fields | |
| def clear_history(): | |
| return "", None, None, None, "", [] | |
| # Extract text when PDF uploaded | |
| def process_pdf(pdf_file): | |
| if pdf_file is None: | |
| return "" | |
| return extract_text_from_pdf(pdf_file) | |
| # Switch between input modes | |
| def update_input_type(choice): | |
| if choice == "Text": | |
| return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False) | |
| elif choice == "Image": | |
| return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False) | |
| elif choice == "Voice": | |
| return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(value=False) | |
| elif choice == "PDF": | |
| return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(value=False) | |
| elif choice == "PDF(QUIZ)": | |
| return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(value=True) | |
| # Build Gradio interface | |
| def create_interface(): | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## π§ Multimodal Chatbot β Text | Image | Voice | PDF | Quiz") | |
| pdf_content = gr.State("") | |
| openai_api_key = gr.Textbox(label="π OpenAI API Key", type="password", placeholder="sk-...") | |
| input_type = gr.Radio( | |
| ["Text", "Image", "Voice", "PDF", "PDF(QUIZ)"], | |
| label="Choose Input Type", | |
| value="Text" | |
| ) | |
| input_text = gr.Textbox(label="Enter your question or text", lines=2, visible=True) | |
| image_input = gr.Image(label="Upload Image", type="pil", visible=False) | |
| audio_input = gr.Audio(label="Upload/Record Audio", type="filepath", visible=False) | |
| pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"], visible=False) | |
| quiz_questions_slider = gr.Slider(1, 20, value=5, step=1, label="Number of Quiz Questions", visible=False) | |
| quiz_mode = gr.Checkbox(label="Quiz Mode", visible=False, value=False) | |
| with gr.Row(): | |
| reasoning_effort = gr.Dropdown(["low", "medium", "high"], value="medium", label="Reasoning Effort") | |
| model_choice = gr.Dropdown(["o1", "o3-mini"], value="o1", label="Model") | |
| submit_btn = gr.Button("Submit") | |
| clear_btn = gr.Button("Clear Chat") | |
| chat_history = gr.Chatbot(label="Chat History") | |
| # Input type handling | |
| input_type.change( | |
| fn=update_input_type, | |
| inputs=[input_type], | |
| outputs=[input_text, image_input, audio_input, pdf_input, quiz_questions_slider, quiz_mode] | |
| ) | |
| # PDF upload processing | |
| pdf_input.change(fn=process_pdf, inputs=[pdf_input], outputs=[pdf_content]) | |
| # Submit | |
| submit_btn.click( | |
| fn=chatbot, | |
| inputs=[input_text, image_input, audio_input, pdf_input, openai_api_key, reasoning_effort, model_choice, pdf_content, quiz_questions_slider, quiz_mode, chat_history], | |
| outputs=[input_text, image_input, audio_input, pdf_input, pdf_content, chat_history] | |
| ) | |
| # Clear | |
| clear_btn.click(fn=clear_history, inputs=[], outputs=[input_text, image_input, audio_input, pdf_input, pdf_content, chat_history]) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = create_interface() | |
| demo.launch() |