Spaces:
Sleeping
Sleeping
| import os | |
| from datetime import datetime | |
| import random | |
| from typing import List | |
| import gradio as gr | |
| from datasets import load_dataset, Dataset, DatasetDict | |
| from huggingface_hub import whoami, InferenceClient | |
| import black # Add black import | |
| # Initialize the inference client | |
| client = InferenceClient( | |
| api_key=os.getenv("HF_API_KEY"), # Make sure to set this environment variable | |
| ) | |
| # Load questions from Hugging Face dataset | |
| EXAM_MAX_QUESTIONS = int( | |
| os.getenv("EXAM_MAX_QUESTIONS", 5) | |
| ) # Limit quiz to max questions | |
| EXAM_PASSING_SCORE = float(os.getenv("EXAM_PASSING_SCORE", 0.8)) | |
| EXAM_DATASET_ID = "burtenshaw/dummy-code-quiz" | |
| # prep the dataset for the quiz | |
| ds = load_dataset(EXAM_DATASET_ID, split="train", download_mode="force_redownload") | |
| quiz_data = list(ds) # Convert dataset to list instead of using to_list() | |
| random.shuffle(quiz_data) | |
| if EXAM_MAX_QUESTIONS: | |
| quiz_data = quiz_data[:EXAM_MAX_QUESTIONS] | |
| def format_python_code(code: str) -> str: | |
| """Format Python code using black.""" | |
| try: | |
| return black.format_str(code, mode=black.Mode()) | |
| except Exception as e: | |
| gr.Warning(f"Code formatting failed: {str(e)}") | |
| return code | |
| def check_code( | |
| user_code: str, solution: str, challenge: str, assessment_criteria: List[str] | |
| ): | |
| """ | |
| Use LLM to evaluate if the user's code solution is correct. | |
| Returns True if the solution is correct, False otherwise. | |
| """ | |
| # Format both user code and solution | |
| formatted_user_code = format_python_code(user_code) | |
| formatted_solution = format_python_code(solution) | |
| assessment_criteria_str = "\n".join( | |
| [f"{i + 1}. {c}" for i, c in enumerate(assessment_criteria)] | |
| ) | |
| prompt = f"""You are an expert Python programming instructor evaluating a student's code solution. | |
| Challenge: | |
| {challenge} | |
| Reference Solution: | |
| {formatted_solution} | |
| Student's Solution: | |
| {formatted_user_code} | |
| Assessment Criteria: | |
| {assessment_criteria_str} | |
| Evaluate if the student's solution is functionally equivalent to the reference solution. | |
| Consider: | |
| 1. Does it solve the problem correctly? | |
| 2. Does it handle edge cases appropriately? | |
| 3. Does it follow the requirements of the challenge? | |
| 4. Does it meet the assessment criteria? | |
| Respond with ONLY "CORRECT" or "INCORRECT" followed by a brief explanation. | |
| """ | |
| messages = [{"role": "user", "content": prompt}] | |
| try: | |
| completion = client.chat.completions.create( | |
| model="Qwen/Qwen2.5-Coder-32B-Instruct", | |
| messages=messages, | |
| max_tokens=500, | |
| ) | |
| response = completion.choices[0].message.content.strip() | |
| # Extract the verdict from the response | |
| is_correct = response.upper().startswith("CORRECT") | |
| # Add the explanation to the status text with emoji | |
| explanation = response.split("\n", 1)[1] if "\n" in response else "" | |
| status = "β Correct!" if is_correct else "β Incorrect!" | |
| gr.Info(f"{status}\n\n{explanation}") | |
| return is_correct | |
| except Exception as e: | |
| gr.Warning(f"Error checking code: {str(e)}") | |
| # Fall back to simple string comparison if LLM fails | |
| is_correct = formatted_user_code.strip() == formatted_solution.strip() | |
| status = "β Correct!" if is_correct else "β Incorrect!" | |
| gr.Info(f"{status} (Fallback comparison)") | |
| return is_correct | |
| def on_user_logged_in(token: gr.OAuthToken | None): | |
| """ | |
| Handle user login state. | |
| On a valid token, hide the login button and reveal the Start button while keeping Next and Submit hidden. | |
| Also, clear the question text, code input, status, and image. | |
| """ | |
| if token is not None: | |
| return ( | |
| gr.update(visible=False), # login_btn hidden | |
| gr.update(visible=True), # start_btn shown | |
| gr.update(visible=False), # next_btn hidden | |
| gr.update(visible=False), # submit_btn hidden | |
| "", # Clear question_text | |
| gr.update(value="", visible=False), # Clear code_input | |
| "", # Clear status_text | |
| gr.update(value="", visible=False), # Clear question_image | |
| ) | |
| else: | |
| return ( | |
| gr.update(visible=True), # login_btn visible | |
| gr.update(visible=False), # start_btn hidden | |
| gr.update(visible=False), # next_btn hidden | |
| gr.update(visible=False), # submit_btn hidden | |
| "", | |
| gr.update(value="", visible=False), | |
| "", | |
| gr.update(value="", visible=False), | |
| ) | |
| def push_results_to_hub( | |
| user_answers: list, token: gr.OAuthToken | None, signed_in_message: str | |
| ): | |
| """Push results to Hugging Face Hub.""" | |
| print(f"signed_in_message: {signed_in_message}") | |
| if not user_answers: # Check if there are any answers to submit | |
| gr.Warning("No answers to submit!") | |
| return "No answers to submit!" | |
| if token is None: | |
| gr.Warning("Please log in to Hugging Face before pushing!") | |
| return "Please log in to Hugging Face before pushing!" | |
| # Calculate grade | |
| correct_count = sum(1 for answer in user_answers if answer["is_correct"]) | |
| total_questions = len(user_answers) | |
| grade = correct_count / total_questions if total_questions > 0 else 0 | |
| if grade < float(EXAM_PASSING_SCORE): | |
| gr.Warning( | |
| f"Score {grade:.1%} below passing threshold of {float(EXAM_PASSING_SCORE):.1%}" | |
| ) | |
| return f"You scored {grade:.1%}. Please try again to achieve at least {float(EXAM_PASSING_SCORE):.1%}" | |
| gr.Info("Submitting answers to the Hub. Please wait...", duration=2) | |
| user_info = whoami(token=token.token) | |
| username = user_info["name"] | |
| repo_id = f"{EXAM_DATASET_ID}_responses" | |
| submission_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| # Create a dataset with the user's answers and metadata | |
| submission_data = [ | |
| { | |
| "username": username, | |
| "datetime": submission_time, | |
| "grade": grade, | |
| **answer, # Include all answer data | |
| } | |
| for answer in user_answers | |
| ] | |
| try: | |
| # Try to load existing dataset | |
| existing_ds = load_dataset(repo_id) | |
| # Convert to DatasetDict if it isn't already | |
| if not isinstance(existing_ds, dict): | |
| existing_ds = DatasetDict({"default": existing_ds}) | |
| except Exception: | |
| # If dataset doesn't exist, create empty DatasetDict | |
| existing_ds = DatasetDict() | |
| # Create new dataset from submission | |
| new_ds = Dataset.from_list(submission_data) | |
| # Add or update the split for this user | |
| existing_ds[username] = new_ds | |
| # Push the updated dataset to the Hub | |
| existing_ds.push_to_hub( | |
| repo_id, | |
| private=True, # Make it private by default since it contains student submissions | |
| ) | |
| return f"Your responses have been submitted to the Hub! Final grade: {grade:.1%}" | |
| def handle_quiz(question_idx, user_answers, submitted_code, is_start): | |
| """Handle quiz state and progression""" | |
| # Hide the start button once the first question is shown | |
| start_btn_update = gr.update(visible=False) if is_start else None | |
| # If this is the first time (start=True), begin at question_idx=0 | |
| if is_start: | |
| question_idx = 0 | |
| else: | |
| # If not the first question and there's a submission, store the user's last submission | |
| if ( | |
| question_idx < len(quiz_data) and submitted_code.strip() | |
| ): # Only check if there's code | |
| current_q = quiz_data[question_idx] | |
| # Format the submitted code before checking | |
| formatted_code = format_python_code(submitted_code) | |
| is_correct = check_code( | |
| formatted_code, | |
| current_q["solution"], | |
| current_q["challenge"], | |
| current_q["assessment_criteria"], | |
| ) | |
| user_answers.append( | |
| { | |
| "challenge": current_q["challenge"], | |
| "submitted_code": formatted_code, # Store formatted code | |
| "correct_solution": current_q["solution"], | |
| "assessment_criteria": current_q["assessment_criteria"], | |
| "is_correct": is_correct, | |
| } | |
| ) | |
| question_idx += 1 | |
| # If we've reached the end, show final results | |
| if question_idx >= len(quiz_data): | |
| correct_count = sum(1 for answer in user_answers if answer["is_correct"]) | |
| grade = correct_count / len(user_answers) | |
| results_text = ( | |
| f"**Quiz Complete!**\n\n" | |
| f"Your score: {grade:.1%}\n" | |
| f"Passing score: {float(EXAM_PASSING_SCORE):.1%}\n\n" | |
| f"Your answers:\n\n" | |
| ) | |
| for idx, answer in enumerate(user_answers): | |
| results_text += ( | |
| f"Question {idx + 1}: {'β ' if answer['is_correct'] else 'β'}\n" | |
| ) | |
| return ( | |
| "", # question_text cleared | |
| gr.update(value="", visible=False), # hide code_input | |
| f"{'β Passed!' if grade >= EXAM_PASSING_SCORE else 'β Did not pass'}", # status_text | |
| question_idx, # updated question index | |
| user_answers, # accumulated answers | |
| gr.update(visible=False), # start_btn hidden for quiz-in-progress | |
| gr.update(visible=False), # next_btn hidden on completion | |
| gr.update(visible=True), # submit_btn shown | |
| gr.update(value=results_text, visible=True), # final_markdown with results | |
| gr.update(visible=False), # question_image hidden on completion | |
| ) | |
| else: | |
| # Show the next question | |
| q = quiz_data[question_idx] | |
| challenge_text = f"## Question {question_idx + 1} \n### {q['challenge']}" | |
| return ( | |
| challenge_text, # question_text | |
| gr.update(value=q["placeholder"], visible=True), # code_input | |
| "Submit your code solution and click 'Next' to continue.", # status_text | |
| question_idx, # updated question_idx | |
| user_answers, # user_answers | |
| gr.update(visible=False), # start_btn hidden | |
| gr.update(visible=True), # next_btn visible | |
| gr.update(visible=False), # submit_btn hidden | |
| gr.update(visible=False), # final_markdown hidden | |
| gr.update( | |
| value=q["image"], visible=True if q["image"] else False | |
| ), # question_image with current question image | |
| ) | |
| with gr.Blocks() as demo: | |
| demo.title = f"Coding Quiz: {EXAM_DATASET_ID}" | |
| # State variables | |
| question_idx = gr.State(value=0) | |
| user_answers = gr.State(value=[]) | |
| with gr.Row(variant="compact"): | |
| gr.Markdown(f"## Welcome to the {EXAM_DATASET_ID} Quiz") | |
| with gr.Row(variant="compact"): | |
| gr.Markdown( | |
| "Log in first, then click 'Start' to begin. Complete each coding challenge, click 'Next', " | |
| "and finally click 'Submit' to publish your results to the Hugging Face Hub." | |
| ) | |
| with gr.Row(variant="panel"): | |
| with gr.Column(): | |
| question_text = gr.Markdown("") | |
| question_image = gr.Image( | |
| label="Question Image", visible=False, type="pil" | |
| ) # Add image component | |
| with gr.Column(): | |
| code_input = gr.Code(language="python", label="Your Solution", visible=False) | |
| with gr.Row(variant="compact"): | |
| status_text = gr.Markdown("") | |
| with gr.Row(variant="compact"): | |
| login_btn = gr.LoginButton() | |
| start_btn = gr.Button("Start") | |
| next_btn = gr.Button("Next βοΈ", visible=False) | |
| submit_btn = gr.Button("Submit β ", visible=False) | |
| with gr.Row(variant="compact"): | |
| final_markdown = gr.Markdown("", visible=False) | |
| login_btn.click( | |
| fn=on_user_logged_in, | |
| inputs=None, | |
| outputs=[ | |
| login_btn, | |
| start_btn, | |
| next_btn, | |
| submit_btn, | |
| question_text, | |
| code_input, | |
| status_text, | |
| question_image, | |
| ], | |
| ) | |
| start_btn.click( | |
| fn=handle_quiz, | |
| inputs=[question_idx, user_answers, code_input, gr.State(True)], | |
| outputs=[ | |
| question_text, # Markdown with question text | |
| code_input, # Code input field | |
| status_text, # Status text (instructions/status messages) | |
| question_idx, # Updated question index (state) | |
| user_answers, # Updated user answers (state) | |
| start_btn, # Update for start button (will be hidden) | |
| next_btn, # Update for next button (shown for in-progress quiz) | |
| submit_btn, # Update for submit button (hidden until end) | |
| final_markdown, # Final results markdown (hidden until quiz ends) | |
| question_image, # Image update for the quiz question | |
| ], | |
| ) | |
| next_btn.click( | |
| fn=handle_quiz, | |
| inputs=[question_idx, user_answers, code_input, gr.State(False)], | |
| outputs=[ | |
| question_text, | |
| code_input, | |
| status_text, | |
| question_idx, | |
| user_answers, | |
| start_btn, | |
| next_btn, | |
| submit_btn, | |
| final_markdown, | |
| question_image, | |
| ], | |
| ) | |
| submit_btn.click( | |
| fn=push_results_to_hub, | |
| inputs=[user_answers, login_btn], | |
| outputs=status_text, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |