File size: 10,323 Bytes
6e6ac0e
 
 
 
 
 
 
 
 
 
 
2269f28
6e6ac0e
 
bfc7307
 
19525fd
6e6ac0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118809a
 
 
 
 
6e6ac0e
 
 
 
 
118809a
6e6ac0e
 
 
 
 
 
 
118809a
6e6ac0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43b1e53
6e6ac0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfc7307
6e6ac0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1df81a4
aa9258b
1df81a4
6e6ac0e
1df81a4
 
 
 
 
 
 
 
 
 
 
6e6ac0e
1df81a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e6ac0e
1df81a4
6e6ac0e
c7ff1d2
 
 
 
 
 
 
6e6ac0e
 
 
 
 
 
 
 
 
 
 
 
 
aa9258b
6e6ac0e
 
 
 
 
 
f2b0bf1
 
33fd0cd
 
a539ed3
f2b0bf1
6e6ac0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2e1b8af
 
6e6ac0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfc7307
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
import gradio as gr
import os
import fitz  # PyMuPDF
from PIL import Image
import io
import base64
from docx import Document
import textwrap
import re
from groq import Groq
import google.generativeai as genai
import tempfile


GOOGLE_API_KEY = "AIzaSyBEtOsNm1I8YdsQSDjF8SauSQwiGzvDHLY"
GROQ_API_KEY = "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"

genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel('gemini-2.0-flash-thinking-exp-01-21')

client = Groq(api_key=GROQ_API_KEY)

# Helper function to encode images to base64
def encode_image(image):
    with io.BytesIO() as image_bytes:
        image.save(image_bytes, format='PNG')
        return base64.b64encode(image_bytes.getvalue()).decode('utf-8')

# Convert a PDF page to an image
def pdf_page_to_image(pdf_path, page_num=0):
    pdf_document = fitz.open(pdf_path)
    page = pdf_document.load_page(page_num)
    pix = page.get_pixmap()
    image = Image.open(io.BytesIO(pix.tobytes("png")))
    return image

# Process PDFs and generate a question paper
def process_pdfs_and_generate_question_paper(uploaded_files, name, number_of_sections, sections, difficulty_level, calculated_marks):
    # Save uploaded files locally
    # folder_path = "uploaded_pdfs"
    temp_dir = tempfile.mkdtemp()
    
    # if not os.path.exists(folder_path):
    #     os.makedirs(folder_path)


    # Save uploaded files locally
    for file in uploaded_files:
        
        file_path = os.path.join(temp_dir, os.path.basename(file))  # Use only the filename
        with open(file, "rb") as input_file:  # Read the file from its original path
            with open(file_path, "wb") as f:  # Save it to the target folder
                f.write(input_file.read())


    # Extract text from PDFs
    full_ocr_text = ""
    pdf_files = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if f.endswith('.pdf')]


    for pdf_file in pdf_files:
        pdf_document = fitz.open(pdf_file)
        total_pages = pdf_document.page_count
        pdf_document.close()

        for page_num in range(total_pages):
            image = pdf_page_to_image(pdf_file, page_num)
            base64_image = encode_image(image)
            # Simulating OCR process (replace with actual OCR API call)
            # Iterate through all PDF files


            # Send the image to the Groq API for OCR
            chat_completion = client.chat.completions.create(
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {"type": "text", "text": f"You are a professional OCR system. Extract the text from the images with highest accuracy. Don't write anything extra."},
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": f"data:image/jpeg;base64,{base64_image}",
                                },
                            },
                        ],
                    }
                ],
                model="meta-llama/llama-4-scout-17b-16e-instruct",  # Use the appropriate vision model
            )

            # Append the OCR result for this page to the full result
            full_ocr_text += f"--- OCR result for Page {page_num + 1} of {pdf_file} ---\n"
            full_ocr_text += chat_completion.choices[0].message.content + "\n"
            full_ocr_text += "-" * 50 + "\n"
            # full_ocr_text += f"Extracted text from page {page_num + 1} of {os.path.basename(pdf_file)}.\n"


    # Generate question distribution
    question_distribution = ""
    for section in sections:
        question_distribution += f"- {section['num_questions']} {section['section_name']} questions, each carrying {section['marks_per_question']} marks.\n"

    # Simulate question paper generation (replace with actual API call)
    prompt = f"""
          Based on the content provided below, please generate a well-structured {calculated_marks}-mark question paper in English. The paper should consist of the following:
          {question_distribution}
          The total mark distribution should be strictly adhered to, with no question exceeding the specified marks.
          - The difficulty level of the paper should be {difficulty_level} on a scale of 5, meaning the questions should be tricky and require in-depth understanding.
          - Ensure no repetition of questions or concepts throughout the paper.
          - Include critical mathematical problems wherever relevant, and feel free to create challenging new problems of a similar type to the ones presented in the content.
          - Do not include answers to the questions in the paper.
          - Each question will be versatile in concept.
          - Some of the questions will be application based, which the student need to think critically before answering.
          - Some questions will be conceptual to test students concept depth.
          The questions should not be overly simple, and they should test the understanding and application of concepts, not just recall. Maintain a high standard throughout the paper.
          Content:
          \n\n{full_ocr_text}
          """

    response = model.generate_content(prompt)
    # question_paper = f"Generated Question Paper:\nName: {name}\nDifficulty Level: {difficulty_level}/5\n{question_distribution}\n\nContent:\n{full_ocr_text}"

    return response


def save_markdown_to_word(markdown_text, name, file_name):
    # Create a new Document
    doc = Document()

    # Add a title for the markdown content
    doc.add_heading(f"{name}", 0)

    # Replace bullet points and indent text
    text = markdown_text.replace('•', '  *')
    indented_text = textwrap.indent(text, '', predicate=lambda _: True)

    # Split the text into lines
    lines = indented_text.split('\n')

    for line in lines:
        # Check if the line contains text surrounded by ** for bold formatting
        if '**' in line:
            # Remove the ** and make the text bold
            bold_text = re.sub(r'\*\*(.*?)\*\*', r'\1', line)  # Remove the ** and keep the text inside
            # Add the line with bold formatting
            p = doc.add_paragraph()
            run = p.add_run(bold_text)
            run.bold = True

        elif '*' in line:
            # Remove the * and make the text italic
            italic_text = re.sub(r'\*(.*?)\*', r'\1', line)  # Remove the * and keep the text inside
            # Add the line with italic formatting
            p = doc.add_paragraph()
            run = p.add_run(italic_text)
            run.italic = True

        else:
            # Add regular lines
            doc.add_paragraph(line)

    # Save the document
    doc.save(file_name)
    print(f"Markdown content saved to {file_name}")

    # Save the document to an in-memory BytesIO object
    # doc_stream = BytesIO()
    # doc.save(doc_stream)
    # doc_stream.seek(0)  # Move the stream position to the start

    return file_name

def main_interface(uploaded_files, name, number_of_sections, difficulty_level, *args):
    sections = []

    for i in range(number_of_sections):
        print(args[i * 2 + 1])
        section_name = args[i * 3]
        num_questions = int(args[i * 3 + 1])
        marks_per_question = float(args[i * 3 + 2])
        sections.append({"section_name": section_name, "num_questions": num_questions, "marks_per_question": marks_per_question})

    calculated_marks = sum(section['num_questions'] * section['marks_per_question'] for section in sections)

    response = process_pdfs_and_generate_question_paper(uploaded_files, name, number_of_sections, sections, difficulty_level, calculated_marks)
    word_file = save_markdown_to_word(response.text, name, f"{name}.docx")
    return response.text, word_file


def generate_interface():
    with gr.Blocks() as demo:
        with gr.Row():
            gr.Markdown(
                """
                # **Question Paper Generator**  
                ### *Effortless paper creation with just a click!*
                """
            )

        with gr.Row():
            uploaded_files = gr.File(label="Upload PDF Files", file_types=['.pdf'], file_count="multiple")

        with gr.Row():
            name = gr.Textbox(label="Exam Name")
            number_of_sections = gr.Number(label="Number of Sections", precision=0, value=1)

        with gr.Row():
            difficulty_level = gr.Slider(label="Difficulty Level", minimum=1, maximum=5, step=0.1)

        # Section Inputs
        section_inputs = []
        section_containers = []  # Containers to update dynamically
        for i in range(10):  # Allow up to 10 sections
            with gr.Row(visible=(i == 0)) as section_row:
                section_name = gr.Textbox(label=f"Section {i + 1} Name", placeholder="Enter section name")
                num_questions = gr.Number(label=f"Section {i + 1} Number of Questions", precision=0, value=1)
                marks_per_question = gr.Number(label=f"Section {i + 1} Marks per Question", value=1)
                section_inputs.extend([section_name, num_questions, marks_per_question])
            section_containers.append(section_row)

        with gr.Row():
            generate_button = gr.Button("Generate Question Paper")

        with gr.Row():
            output_text = gr.Textbox(label="Generated Question Paper", lines=10, interactive=False, scale=1)
            download_button = gr.File(label="Download as Word Document", scale=1)

        # Update the visibility of section inputs based on `number_of_sections`
        def update_section_inputs(number_of_sections):
            updates = []
            for i in range(10):
                updates.append(section_containers[i].update(visible=(i < number_of_sections)))
            return updates

        number_of_sections.change(
            update_section_inputs,
            inputs=[number_of_sections],
            outputs=section_containers,
        )

        generate_button.click(
            main_interface,
            inputs=[uploaded_files, name, number_of_sections, difficulty_level] + section_inputs,
            outputs=[output_text, download_button],
        )

    return demo

demo = generate_interface()
demo.launch(share = True)