Spaces:

Snigdhapaul2003
/

AI_Question_Paper_Setter

Build error

App Files Files Community

AI_Question_Paper_Setter / app.py

Snigdhapaul2003

Update app.py

bfc7307 verified 7 months ago

raw

history blame contribute delete

10.3 kB

	import gradio as gr
	import os
	import fitz # PyMuPDF
	from PIL import Image
	import io
	import base64
	from docx import Document
	import textwrap
	import re
	from groq import Groq
	import google.generativeai as genai
	import tempfile


	GOOGLE_API_KEY = "AIzaSyBEtOsNm1I8YdsQSDjF8SauSQwiGzvDHLY"
	GROQ_API_KEY = "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"

	genai.configure(api_key=GOOGLE_API_KEY)
	model = genai.GenerativeModel('gemini-2.0-flash-thinking-exp-01-21')

	client = Groq(api_key=GROQ_API_KEY)

	# Helper function to encode images to base64
	def encode_image(image):
	with io.BytesIO() as image_bytes:
	image.save(image_bytes, format='PNG')
	return base64.b64encode(image_bytes.getvalue()).decode('utf-8')

	# Convert a PDF page to an image
	def pdf_page_to_image(pdf_path, page_num=0):
	pdf_document = fitz.open(pdf_path)
	page = pdf_document.load_page(page_num)
	pix = page.get_pixmap()
	image = Image.open(io.BytesIO(pix.tobytes("png")))
	return image

	# Process PDFs and generate a question paper
	def process_pdfs_and_generate_question_paper(uploaded_files, name, number_of_sections, sections, difficulty_level, calculated_marks):
	# Save uploaded files locally
	# folder_path = "uploaded_pdfs"
	temp_dir = tempfile.mkdtemp()

	# if not os.path.exists(folder_path):
	# os.makedirs(folder_path)


	# Save uploaded files locally
	for file in uploaded_files:

	file_path = os.path.join(temp_dir, os.path.basename(file)) # Use only the filename
	with open(file, "rb") as input_file: # Read the file from its original path
	with open(file_path, "wb") as f: # Save it to the target folder
	f.write(input_file.read())


	# Extract text from PDFs
	full_ocr_text = ""
	pdf_files = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if f.endswith('.pdf')]


	for pdf_file in pdf_files:
	pdf_document = fitz.open(pdf_file)
	total_pages = pdf_document.page_count
	pdf_document.close()

	for page_num in range(total_pages):
	image = pdf_page_to_image(pdf_file, page_num)
	base64_image = encode_image(image)
	# Simulating OCR process (replace with actual OCR API call)
	# Iterate through all PDF files


	# Send the image to the Groq API for OCR
	chat_completion = client.chat.completions.create(
	messages=[
	{
	"role": "user",
	"content": [
	{"type": "text", "text": f"You are a professional OCR system. Extract the text from the images with highest accuracy. Don't write anything extra."},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{base64_image}",
	},
	},
	],
	}
	],
	model="meta-llama/llama-4-scout-17b-16e-instruct", # Use the appropriate vision model
	)

	# Append the OCR result for this page to the full result
	full_ocr_text += f"--- OCR result for Page {page_num + 1} of {pdf_file} ---\n"
	full_ocr_text += chat_completion.choices[0].message.content + "\n"
	full_ocr_text += "-" * 50 + "\n"
	# full_ocr_text += f"Extracted text from page {page_num + 1} of {os.path.basename(pdf_file)}.\n"


	# Generate question distribution
	question_distribution = ""
	for section in sections:
	question_distribution += f"- {section['num_questions']} {section['section_name']} questions, each carrying {section['marks_per_question']} marks.\n"

	# Simulate question paper generation (replace with actual API call)
	prompt = f"""
	Based on the content provided below, please generate a well-structured {calculated_marks}-mark question paper in English. The paper should consist of the following:
	{question_distribution}
	The total mark distribution should be strictly adhered to, with no question exceeding the specified marks.
	- The difficulty level of the paper should be {difficulty_level} on a scale of 5, meaning the questions should be tricky and require in-depth understanding.
	- Ensure no repetition of questions or concepts throughout the paper.
	- Include critical mathematical problems wherever relevant, and feel free to create challenging new problems of a similar type to the ones presented in the content.
	- Do not include answers to the questions in the paper.
	- Each question will be versatile in concept.
	- Some of the questions will be application based, which the student need to think critically before answering.
	- Some questions will be conceptual to test students concept depth.
	The questions should not be overly simple, and they should test the understanding and application of concepts, not just recall. Maintain a high standard throughout the paper.
	Content:
	\n\n{full_ocr_text}
	"""

	response = model.generate_content(prompt)
	# question_paper = f"Generated Question Paper:\nName: {name}\nDifficulty Level: {difficulty_level}/5\n{question_distribution}\n\nContent:\n{full_ocr_text}"

	return response


	def save_markdown_to_word(markdown_text, name, file_name):
	# Create a new Document
	doc = Document()

	# Add a title for the markdown content
	doc.add_heading(f"{name}", 0)

	# Replace bullet points and indent text
	text = markdown_text.replace('•', ' *')
	indented_text = textwrap.indent(text, '', predicate=lambda _: True)

	# Split the text into lines
	lines = indented_text.split('\n')

	for line in lines:
	# Check if the line contains text surrounded by ** for bold formatting
	if '**' in line:
	# Remove the ** and make the text bold
	bold_text = re.sub(r'\\(.?)\\', r'\1', line) # Remove the * and keep the text inside
	# Add the line with bold formatting
	p = doc.add_paragraph()
	run = p.add_run(bold_text)
	run.bold = True

	elif '*' in line:
	# Remove the * and make the text italic
	italic_text = re.sub(r'\(.?)\', r'\1', line) # Remove the and keep the text inside
	# Add the line with italic formatting
	p = doc.add_paragraph()
	run = p.add_run(italic_text)
	run.italic = True

	else:
	# Add regular lines
	doc.add_paragraph(line)

	# Save the document
	doc.save(file_name)
	print(f"Markdown content saved to {file_name}")

	# Save the document to an in-memory BytesIO object
	# doc_stream = BytesIO()
	# doc.save(doc_stream)
	# doc_stream.seek(0) # Move the stream position to the start

	return file_name

	def main_interface(uploaded_files, name, number_of_sections, difficulty_level, *args):
	sections = []

	for i in range(number_of_sections):
	print(args[i * 2 + 1])
	section_name = args[i * 3]
	num_questions = int(args[i * 3 + 1])
	marks_per_question = float(args[i * 3 + 2])
	sections.append({"section_name": section_name, "num_questions": num_questions, "marks_per_question": marks_per_question})

	calculated_marks = sum(section['num_questions'] * section['marks_per_question'] for section in sections)

	response = process_pdfs_and_generate_question_paper(uploaded_files, name, number_of_sections, sections, difficulty_level, calculated_marks)
	word_file = save_markdown_to_word(response.text, name, f"{name}.docx")
	return response.text, word_file


	def generate_interface():
	with gr.Blocks() as demo:
	with gr.Row():
	gr.Markdown(
	"""
	# Question Paper Generator
	### Effortless paper creation with just a click!
	"""
	)

	with gr.Row():
	uploaded_files = gr.File(label="Upload PDF Files", file_types=['.pdf'], file_count="multiple")

	with gr.Row():
	name = gr.Textbox(label="Exam Name")
	number_of_sections = gr.Number(label="Number of Sections", precision=0, value=1)

	with gr.Row():
	difficulty_level = gr.Slider(label="Difficulty Level", minimum=1, maximum=5, step=0.1)

	# Section Inputs
	section_inputs = []
	section_containers = [] # Containers to update dynamically
	for i in range(10): # Allow up to 10 sections
	with gr.Row(visible=(i == 0)) as section_row:
	section_name = gr.Textbox(label=f"Section {i + 1} Name", placeholder="Enter section name")
	num_questions = gr.Number(label=f"Section {i + 1} Number of Questions", precision=0, value=1)
	marks_per_question = gr.Number(label=f"Section {i + 1} Marks per Question", value=1)
	section_inputs.extend([section_name, num_questions, marks_per_question])
	section_containers.append(section_row)

	with gr.Row():
	generate_button = gr.Button("Generate Question Paper")

	with gr.Row():
	output_text = gr.Textbox(label="Generated Question Paper", lines=10, interactive=False, scale=1)
	download_button = gr.File(label="Download as Word Document", scale=1)

	# Update the visibility of section inputs based on `number_of_sections`
	def update_section_inputs(number_of_sections):
	updates = []
	for i in range(10):
	updates.append(section_containers[i].update(visible=(i < number_of_sections)))
	return updates

	number_of_sections.change(
	update_section_inputs,
	inputs=[number_of_sections],
	outputs=section_containers,
	)

	generate_button.click(
	main_interface,
	inputs=[uploaded_files, name, number_of_sections, difficulty_level] + section_inputs,
	outputs=[output_text, download_button],
	)

	return demo

	demo = generate_interface()
	demo.launch(share = True)