Spaces:
Build error
Build error
File size: 10,323 Bytes
6e6ac0e 2269f28 6e6ac0e bfc7307 19525fd 6e6ac0e 118809a 6e6ac0e 118809a 6e6ac0e 118809a 6e6ac0e 43b1e53 6e6ac0e bfc7307 6e6ac0e 1df81a4 aa9258b 1df81a4 6e6ac0e 1df81a4 6e6ac0e 1df81a4 6e6ac0e 1df81a4 6e6ac0e c7ff1d2 6e6ac0e aa9258b 6e6ac0e f2b0bf1 33fd0cd a539ed3 f2b0bf1 6e6ac0e 2e1b8af 6e6ac0e bfc7307 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 |
import gradio as gr
import os
import fitz # PyMuPDF
from PIL import Image
import io
import base64
from docx import Document
import textwrap
import re
from groq import Groq
import google.generativeai as genai
import tempfile
GOOGLE_API_KEY = "AIzaSyBEtOsNm1I8YdsQSDjF8SauSQwiGzvDHLY"
GROQ_API_KEY = "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"
genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel('gemini-2.0-flash-thinking-exp-01-21')
client = Groq(api_key=GROQ_API_KEY)
# Helper function to encode images to base64
def encode_image(image):
with io.BytesIO() as image_bytes:
image.save(image_bytes, format='PNG')
return base64.b64encode(image_bytes.getvalue()).decode('utf-8')
# Convert a PDF page to an image
def pdf_page_to_image(pdf_path, page_num=0):
pdf_document = fitz.open(pdf_path)
page = pdf_document.load_page(page_num)
pix = page.get_pixmap()
image = Image.open(io.BytesIO(pix.tobytes("png")))
return image
# Process PDFs and generate a question paper
def process_pdfs_and_generate_question_paper(uploaded_files, name, number_of_sections, sections, difficulty_level, calculated_marks):
# Save uploaded files locally
# folder_path = "uploaded_pdfs"
temp_dir = tempfile.mkdtemp()
# if not os.path.exists(folder_path):
# os.makedirs(folder_path)
# Save uploaded files locally
for file in uploaded_files:
file_path = os.path.join(temp_dir, os.path.basename(file)) # Use only the filename
with open(file, "rb") as input_file: # Read the file from its original path
with open(file_path, "wb") as f: # Save it to the target folder
f.write(input_file.read())
# Extract text from PDFs
full_ocr_text = ""
pdf_files = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if f.endswith('.pdf')]
for pdf_file in pdf_files:
pdf_document = fitz.open(pdf_file)
total_pages = pdf_document.page_count
pdf_document.close()
for page_num in range(total_pages):
image = pdf_page_to_image(pdf_file, page_num)
base64_image = encode_image(image)
# Simulating OCR process (replace with actual OCR API call)
# Iterate through all PDF files
# Send the image to the Groq API for OCR
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": f"You are a professional OCR system. Extract the text from the images with highest accuracy. Don't write anything extra."},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
},
},
],
}
],
model="meta-llama/llama-4-scout-17b-16e-instruct", # Use the appropriate vision model
)
# Append the OCR result for this page to the full result
full_ocr_text += f"--- OCR result for Page {page_num + 1} of {pdf_file} ---\n"
full_ocr_text += chat_completion.choices[0].message.content + "\n"
full_ocr_text += "-" * 50 + "\n"
# full_ocr_text += f"Extracted text from page {page_num + 1} of {os.path.basename(pdf_file)}.\n"
# Generate question distribution
question_distribution = ""
for section in sections:
question_distribution += f"- {section['num_questions']} {section['section_name']} questions, each carrying {section['marks_per_question']} marks.\n"
# Simulate question paper generation (replace with actual API call)
prompt = f"""
Based on the content provided below, please generate a well-structured {calculated_marks}-mark question paper in English. The paper should consist of the following:
{question_distribution}
The total mark distribution should be strictly adhered to, with no question exceeding the specified marks.
- The difficulty level of the paper should be {difficulty_level} on a scale of 5, meaning the questions should be tricky and require in-depth understanding.
- Ensure no repetition of questions or concepts throughout the paper.
- Include critical mathematical problems wherever relevant, and feel free to create challenging new problems of a similar type to the ones presented in the content.
- Do not include answers to the questions in the paper.
- Each question will be versatile in concept.
- Some of the questions will be application based, which the student need to think critically before answering.
- Some questions will be conceptual to test students concept depth.
The questions should not be overly simple, and they should test the understanding and application of concepts, not just recall. Maintain a high standard throughout the paper.
Content:
\n\n{full_ocr_text}
"""
response = model.generate_content(prompt)
# question_paper = f"Generated Question Paper:\nName: {name}\nDifficulty Level: {difficulty_level}/5\n{question_distribution}\n\nContent:\n{full_ocr_text}"
return response
def save_markdown_to_word(markdown_text, name, file_name):
# Create a new Document
doc = Document()
# Add a title for the markdown content
doc.add_heading(f"{name}", 0)
# Replace bullet points and indent text
text = markdown_text.replace('•', ' *')
indented_text = textwrap.indent(text, '', predicate=lambda _: True)
# Split the text into lines
lines = indented_text.split('\n')
for line in lines:
# Check if the line contains text surrounded by ** for bold formatting
if '**' in line:
# Remove the ** and make the text bold
bold_text = re.sub(r'\*\*(.*?)\*\*', r'\1', line) # Remove the ** and keep the text inside
# Add the line with bold formatting
p = doc.add_paragraph()
run = p.add_run(bold_text)
run.bold = True
elif '*' in line:
# Remove the * and make the text italic
italic_text = re.sub(r'\*(.*?)\*', r'\1', line) # Remove the * and keep the text inside
# Add the line with italic formatting
p = doc.add_paragraph()
run = p.add_run(italic_text)
run.italic = True
else:
# Add regular lines
doc.add_paragraph(line)
# Save the document
doc.save(file_name)
print(f"Markdown content saved to {file_name}")
# Save the document to an in-memory BytesIO object
# doc_stream = BytesIO()
# doc.save(doc_stream)
# doc_stream.seek(0) # Move the stream position to the start
return file_name
def main_interface(uploaded_files, name, number_of_sections, difficulty_level, *args):
sections = []
for i in range(number_of_sections):
print(args[i * 2 + 1])
section_name = args[i * 3]
num_questions = int(args[i * 3 + 1])
marks_per_question = float(args[i * 3 + 2])
sections.append({"section_name": section_name, "num_questions": num_questions, "marks_per_question": marks_per_question})
calculated_marks = sum(section['num_questions'] * section['marks_per_question'] for section in sections)
response = process_pdfs_and_generate_question_paper(uploaded_files, name, number_of_sections, sections, difficulty_level, calculated_marks)
word_file = save_markdown_to_word(response.text, name, f"{name}.docx")
return response.text, word_file
def generate_interface():
with gr.Blocks() as demo:
with gr.Row():
gr.Markdown(
"""
# **Question Paper Generator**
### *Effortless paper creation with just a click!*
"""
)
with gr.Row():
uploaded_files = gr.File(label="Upload PDF Files", file_types=['.pdf'], file_count="multiple")
with gr.Row():
name = gr.Textbox(label="Exam Name")
number_of_sections = gr.Number(label="Number of Sections", precision=0, value=1)
with gr.Row():
difficulty_level = gr.Slider(label="Difficulty Level", minimum=1, maximum=5, step=0.1)
# Section Inputs
section_inputs = []
section_containers = [] # Containers to update dynamically
for i in range(10): # Allow up to 10 sections
with gr.Row(visible=(i == 0)) as section_row:
section_name = gr.Textbox(label=f"Section {i + 1} Name", placeholder="Enter section name")
num_questions = gr.Number(label=f"Section {i + 1} Number of Questions", precision=0, value=1)
marks_per_question = gr.Number(label=f"Section {i + 1} Marks per Question", value=1)
section_inputs.extend([section_name, num_questions, marks_per_question])
section_containers.append(section_row)
with gr.Row():
generate_button = gr.Button("Generate Question Paper")
with gr.Row():
output_text = gr.Textbox(label="Generated Question Paper", lines=10, interactive=False, scale=1)
download_button = gr.File(label="Download as Word Document", scale=1)
# Update the visibility of section inputs based on `number_of_sections`
def update_section_inputs(number_of_sections):
updates = []
for i in range(10):
updates.append(section_containers[i].update(visible=(i < number_of_sections)))
return updates
number_of_sections.change(
update_section_inputs,
inputs=[number_of_sections],
outputs=section_containers,
)
generate_button.click(
main_interface,
inputs=[uploaded_files, name, number_of_sections, difficulty_level] + section_inputs,
outputs=[output_text, download_button],
)
return demo
demo = generate_interface()
demo.launch(share = True) |