Spaces:
Build error
Build error
| import gradio as gr | |
| import os | |
| import fitz # PyMuPDF | |
| from PIL import Image | |
| import io | |
| import base64 | |
| from docx import Document | |
| import textwrap | |
| import re | |
| from groq import Groq | |
| import google.generativeai as genai | |
| import tempfile | |
| GOOGLE_API_KEY = "AIzaSyBEtOsNm1I8YdsQSDjF8SauSQwiGzvDHLY" | |
| GROQ_API_KEY = "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" | |
| genai.configure(api_key=GOOGLE_API_KEY) | |
| model = genai.GenerativeModel('gemini-2.0-flash-thinking-exp-01-21') | |
| client = Groq(api_key=GROQ_API_KEY) | |
| # Helper function to encode images to base64 | |
| def encode_image(image): | |
| with io.BytesIO() as image_bytes: | |
| image.save(image_bytes, format='PNG') | |
| return base64.b64encode(image_bytes.getvalue()).decode('utf-8') | |
| # Convert a PDF page to an image | |
| def pdf_page_to_image(pdf_path, page_num=0): | |
| pdf_document = fitz.open(pdf_path) | |
| page = pdf_document.load_page(page_num) | |
| pix = page.get_pixmap() | |
| image = Image.open(io.BytesIO(pix.tobytes("png"))) | |
| return image | |
| # Process PDFs and generate a question paper | |
| def process_pdfs_and_generate_question_paper(uploaded_files, name, number_of_sections, sections, difficulty_level, calculated_marks): | |
| # Save uploaded files locally | |
| # folder_path = "uploaded_pdfs" | |
| temp_dir = tempfile.mkdtemp() | |
| # if not os.path.exists(folder_path): | |
| # os.makedirs(folder_path) | |
| # Save uploaded files locally | |
| for file in uploaded_files: | |
| file_path = os.path.join(temp_dir, os.path.basename(file)) # Use only the filename | |
| with open(file, "rb") as input_file: # Read the file from its original path | |
| with open(file_path, "wb") as f: # Save it to the target folder | |
| f.write(input_file.read()) | |
| # Extract text from PDFs | |
| full_ocr_text = "" | |
| pdf_files = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if f.endswith('.pdf')] | |
| for pdf_file in pdf_files: | |
| pdf_document = fitz.open(pdf_file) | |
| total_pages = pdf_document.page_count | |
| pdf_document.close() | |
| for page_num in range(total_pages): | |
| image = pdf_page_to_image(pdf_file, page_num) | |
| base64_image = encode_image(image) | |
| # Simulating OCR process (replace with actual OCR API call) | |
| # Iterate through all PDF files | |
| # Send the image to the Groq API for OCR | |
| chat_completion = client.chat.completions.create( | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": f"You are a professional OCR system. Extract the text from the images with highest accuracy. Don't write anything extra."}, | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/jpeg;base64,{base64_image}", | |
| }, | |
| }, | |
| ], | |
| } | |
| ], | |
| model="meta-llama/llama-4-scout-17b-16e-instruct", # Use the appropriate vision model | |
| ) | |
| # Append the OCR result for this page to the full result | |
| full_ocr_text += f"--- OCR result for Page {page_num + 1} of {pdf_file} ---\n" | |
| full_ocr_text += chat_completion.choices[0].message.content + "\n" | |
| full_ocr_text += "-" * 50 + "\n" | |
| # full_ocr_text += f"Extracted text from page {page_num + 1} of {os.path.basename(pdf_file)}.\n" | |
| # Generate question distribution | |
| question_distribution = "" | |
| for section in sections: | |
| question_distribution += f"- {section['num_questions']} {section['section_name']} questions, each carrying {section['marks_per_question']} marks.\n" | |
| # Simulate question paper generation (replace with actual API call) | |
| prompt = f""" | |
| Based on the content provided below, please generate a well-structured {calculated_marks}-mark question paper in English. The paper should consist of the following: | |
| {question_distribution} | |
| The total mark distribution should be strictly adhered to, with no question exceeding the specified marks. | |
| - The difficulty level of the paper should be {difficulty_level} on a scale of 5, meaning the questions should be tricky and require in-depth understanding. | |
| - Ensure no repetition of questions or concepts throughout the paper. | |
| - Include critical mathematical problems wherever relevant, and feel free to create challenging new problems of a similar type to the ones presented in the content. | |
| - Do not include answers to the questions in the paper. | |
| - Each question will be versatile in concept. | |
| - Some of the questions will be application based, which the student need to think critically before answering. | |
| - Some questions will be conceptual to test students concept depth. | |
| The questions should not be overly simple, and they should test the understanding and application of concepts, not just recall. Maintain a high standard throughout the paper. | |
| Content: | |
| \n\n{full_ocr_text} | |
| """ | |
| response = model.generate_content(prompt) | |
| # question_paper = f"Generated Question Paper:\nName: {name}\nDifficulty Level: {difficulty_level}/5\n{question_distribution}\n\nContent:\n{full_ocr_text}" | |
| return response | |
| def save_markdown_to_word(markdown_text, name, file_name): | |
| # Create a new Document | |
| doc = Document() | |
| # Add a title for the markdown content | |
| doc.add_heading(f"{name}", 0) | |
| # Replace bullet points and indent text | |
| text = markdown_text.replace('•', ' *') | |
| indented_text = textwrap.indent(text, '', predicate=lambda _: True) | |
| # Split the text into lines | |
| lines = indented_text.split('\n') | |
| for line in lines: | |
| # Check if the line contains text surrounded by ** for bold formatting | |
| if '**' in line: | |
| # Remove the ** and make the text bold | |
| bold_text = re.sub(r'\*\*(.*?)\*\*', r'\1', line) # Remove the ** and keep the text inside | |
| # Add the line with bold formatting | |
| p = doc.add_paragraph() | |
| run = p.add_run(bold_text) | |
| run.bold = True | |
| elif '*' in line: | |
| # Remove the * and make the text italic | |
| italic_text = re.sub(r'\*(.*?)\*', r'\1', line) # Remove the * and keep the text inside | |
| # Add the line with italic formatting | |
| p = doc.add_paragraph() | |
| run = p.add_run(italic_text) | |
| run.italic = True | |
| else: | |
| # Add regular lines | |
| doc.add_paragraph(line) | |
| # Save the document | |
| doc.save(file_name) | |
| print(f"Markdown content saved to {file_name}") | |
| # Save the document to an in-memory BytesIO object | |
| # doc_stream = BytesIO() | |
| # doc.save(doc_stream) | |
| # doc_stream.seek(0) # Move the stream position to the start | |
| return file_name | |
| def main_interface(uploaded_files, name, number_of_sections, difficulty_level, *args): | |
| sections = [] | |
| for i in range(number_of_sections): | |
| print(args[i * 2 + 1]) | |
| section_name = args[i * 3] | |
| num_questions = int(args[i * 3 + 1]) | |
| marks_per_question = float(args[i * 3 + 2]) | |
| sections.append({"section_name": section_name, "num_questions": num_questions, "marks_per_question": marks_per_question}) | |
| calculated_marks = sum(section['num_questions'] * section['marks_per_question'] for section in sections) | |
| response = process_pdfs_and_generate_question_paper(uploaded_files, name, number_of_sections, sections, difficulty_level, calculated_marks) | |
| word_file = save_markdown_to_word(response.text, name, f"{name}.docx") | |
| return response.text, word_file | |
| def generate_interface(): | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| gr.Markdown( | |
| """ | |
| # **Question Paper Generator** | |
| ### *Effortless paper creation with just a click!* | |
| """ | |
| ) | |
| with gr.Row(): | |
| uploaded_files = gr.File(label="Upload PDF Files", file_types=['.pdf'], file_count="multiple") | |
| with gr.Row(): | |
| name = gr.Textbox(label="Exam Name") | |
| number_of_sections = gr.Number(label="Number of Sections", precision=0, value=1) | |
| with gr.Row(): | |
| difficulty_level = gr.Slider(label="Difficulty Level", minimum=1, maximum=5, step=0.1) | |
| # Section Inputs | |
| section_inputs = [] | |
| section_containers = [] # Containers to update dynamically | |
| for i in range(10): # Allow up to 10 sections | |
| with gr.Row(visible=(i == 0)) as section_row: | |
| section_name = gr.Textbox(label=f"Section {i + 1} Name", placeholder="Enter section name") | |
| num_questions = gr.Number(label=f"Section {i + 1} Number of Questions", precision=0, value=1) | |
| marks_per_question = gr.Number(label=f"Section {i + 1} Marks per Question", value=1) | |
| section_inputs.extend([section_name, num_questions, marks_per_question]) | |
| section_containers.append(section_row) | |
| with gr.Row(): | |
| generate_button = gr.Button("Generate Question Paper") | |
| with gr.Row(): | |
| output_text = gr.Textbox(label="Generated Question Paper", lines=10, interactive=False, scale=1) | |
| download_button = gr.File(label="Download as Word Document", scale=1) | |
| # Update the visibility of section inputs based on `number_of_sections` | |
| def update_section_inputs(number_of_sections): | |
| updates = [] | |
| for i in range(10): | |
| updates.append(section_containers[i].update(visible=(i < number_of_sections))) | |
| return updates | |
| number_of_sections.change( | |
| update_section_inputs, | |
| inputs=[number_of_sections], | |
| outputs=section_containers, | |
| ) | |
| generate_button.click( | |
| main_interface, | |
| inputs=[uploaded_files, name, number_of_sections, difficulty_level] + section_inputs, | |
| outputs=[output_text, download_button], | |
| ) | |
| return demo | |
| demo = generate_interface() | |
| demo.launch(share = True) |