Spaces:
Sleeping
Sleeping
File size: 7,846 Bytes
3a9e21d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
import gradio as gr
import openai
import base64
from PIL import Image
import io
import fitz # PyMuPDF for PDF handling
# Extract text from PDF
def extract_text_from_pdf(pdf_file):
try:
text = ""
pdf_document = fitz.open(pdf_file)
for page_num in range(len(pdf_document)):
page = pdf_document[page_num]
text += page.get_text()
pdf_document.close()
return text
except Exception as e:
return f"Error extracting text from PDF: {str(e)}"
# Generate MCQ quiz from PDF
def generate_mcq_quiz(pdf_content, num_questions, openai_api_key, model_choice):
if not openai_api_key:
return "Error: No API key provided."
openai.api_key = openai_api_key
limited_content = pdf_content[:8000]
prompt = f"""Based on the following document content, generate {num_questions} multiple-choice quiz questions.
For each question:
1. Write a clear question
2. Give 4 options (A, B, C, D)
3. Indicate the correct answer
4. Briefly explain why the answer is correct
Document:
{limited_content}
"""
try:
response = openai.ChatCompletion.create(
model=model_choice,
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content
except Exception as e:
return f"Error generating quiz: {str(e)}"
# Convert image to base64
def get_base64_string_from_image(pil_image):
buffered = io.BytesIO()
pil_image.save(buffered, format="PNG")
return base64.b64encode(buffered.getvalue()).decode("utf-8")
# Transcribe audio
def transcribe_audio(audio, openai_api_key):
if not openai_api_key:
return "Error: No API key provided."
openai.api_key = openai_api_key
try:
with open(audio, 'rb') as f:
audio_bytes = f.read()
file_obj = io.BytesIO(audio_bytes)
file_obj.name = 'audio.wav'
transcription = openai.Audio.transcribe(file=file_obj, model="whisper-1")
return transcription.text
except Exception as e:
return f"Error transcribing audio: {str(e)}"
# Generate response for text/image/pdf
def generate_response(input_text, image, pdf_content, openai_api_key, reasoning_effort, model_choice):
if not openai_api_key:
return "Error: No API key provided."
openai.api_key = openai_api_key
if pdf_content and input_text:
input_text = f"Based on the document below, answer the question:\n\n{input_text}\n\nDocument:\n{pdf_content}"
elif image:
image_b64 = get_base64_string_from_image(image)
input_text = f"data:image/png;base64,{image_b64}"
try:
response = openai.ChatCompletion.create(
model=model_choice,
messages=[{"role": "user", "content": input_text}],
max_completion_tokens=2000
)
return response.choices[0].message.content
except Exception as e:
return f"Error calling OpenAI API: {str(e)}"
# Chatbot logic
def chatbot(input_text, image, audio, pdf_file, openai_api_key, reasoning_effort, model_choice, pdf_content, num_quiz_questions, pdf_quiz_mode, history):
if history is None:
history = []
if audio:
input_text = transcribe_audio(audio, openai_api_key)
new_pdf_content = pdf_content
if pdf_file:
new_pdf_content = extract_text_from_pdf(pdf_file)
if pdf_quiz_mode:
if new_pdf_content:
quiz = generate_mcq_quiz(new_pdf_content, int(num_quiz_questions), openai_api_key, model_choice)
history.append((f"π Generated {num_quiz_questions} quiz questions", quiz))
else:
history.append(("No PDF detected", "Please upload a PDF file first."))
else:
response = generate_response(input_text, image, new_pdf_content, openai_api_key, reasoning_effort, model_choice)
if input_text:
history.append((input_text, response))
elif image:
history.append(("πΌοΈ [Image Uploaded]", response))
elif pdf_file:
history.append(("π [PDF Uploaded]", response))
else:
history.append(("No input", "Please provide input."))
return "", None, None, None, new_pdf_content, history
# Reset all fields
def clear_history():
return "", None, None, None, "", []
# Extract text when PDF uploaded
def process_pdf(pdf_file):
if pdf_file is None:
return ""
return extract_text_from_pdf(pdf_file)
# Switch between input modes
def update_input_type(choice):
if choice == "Text":
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
elif choice == "Image":
return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
elif choice == "Voice":
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
elif choice == "PDF":
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(value=False)
elif choice == "PDF(QUIZ)":
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(value=True)
# Build Gradio interface
def create_interface():
with gr.Blocks() as demo:
gr.Markdown("## π§ Multimodal Chatbot β Text | Image | Voice | PDF | Quiz")
pdf_content = gr.State("")
openai_api_key = gr.Textbox(label="π OpenAI API Key", type="password", placeholder="sk-...")
input_type = gr.Radio(
["Text", "Image", "Voice", "PDF", "PDF(QUIZ)"],
label="Choose Input Type",
value="Text"
)
input_text = gr.Textbox(label="Enter your question or text", lines=2, visible=True)
image_input = gr.Image(label="Upload Image", type="pil", visible=False)
audio_input = gr.Audio(label="Upload/Record Audio", type="filepath", visible=False)
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"], visible=False)
quiz_questions_slider = gr.Slider(1, 20, value=5, step=1, label="Number of Quiz Questions", visible=False)
quiz_mode = gr.Checkbox(label="Quiz Mode", visible=False, value=False)
with gr.Row():
reasoning_effort = gr.Dropdown(["low", "medium", "high"], value="medium", label="Reasoning Effort")
model_choice = gr.Dropdown(["o1", "o3-mini"], value="o1", label="Model")
submit_btn = gr.Button("Submit")
clear_btn = gr.Button("Clear Chat")
chat_history = gr.Chatbot(label="Chat History")
# Input type handling
input_type.change(
fn=update_input_type,
inputs=[input_type],
outputs=[input_text, image_input, audio_input, pdf_input, quiz_questions_slider, quiz_mode]
)
# PDF upload processing
pdf_input.change(fn=process_pdf, inputs=[pdf_input], outputs=[pdf_content])
# Submit
submit_btn.click(
fn=chatbot,
inputs=[input_text, image_input, audio_input, pdf_input, openai_api_key, reasoning_effort, model_choice, pdf_content, quiz_questions_slider, quiz_mode, chat_history],
outputs=[input_text, image_input, audio_input, pdf_input, pdf_content, chat_history]
)
# Clear
clear_btn.click(fn=clear_history, inputs=[], outputs=[input_text, image_input, audio_input, pdf_input, pdf_content, chat_history])
return demo
if __name__ == "__main__":
demo = create_interface()
demo.launch() |