Spaces:

Narsil
/

eval_playground

Sleeping

App Files Files Community

Narsil commited on Jul 23

Commit

e2152af

unverified ·

1 Parent(s): 378cb5f

Push.

Browse files

Files changed (6) hide show

README.md +60 -5
app.py +570 -0
flake.lock +27 -0
flake.nix +41 -0
get_popular_eval_datasets.py +100 -0
requirements.txt +5 -0

README.md CHANGED Viewed

@@ -1,12 +1,67 @@
 ---
-title: Eval Playground
-emoji: 🌖
-colorFrom: pink
 colorTo: green
 sdk: gradio
-sdk_version: 5.38.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Evaluation Dataset Quiz
+emoji: 🧠
+colorFrom: blue
 colorTo: green
 sdk: gradio
+sdk_version: 4.19.2
 app_file: app.py
 pinned: false
+license: mit
 ---
+# HuggingFace Evaluation Dataset Quiz
+Test your knowledge with questions from popular evaluation datasets!
+## Features
+- 🎯 Interactive quiz interface built with Gradio
+- 📊 8 popular evaluation datasets including:
+  - GSM8K (Grade School Math)
+  - MMLU (Massive Multitask Language Understanding)
+  - AI2 ARC (Science Questions)
+  - HellaSwag (Commonsense NLI)
+  - WinoGrande (Winograd Schema)
+  - BoolQ (Boolean Questions)
+  - SQuAD (Reading Comprehension)
+  - PIQA (Physical Reasoning)
+- 🎲 Random question selection
+- ✅ Immediate feedback on answers
+- 📈 Score tracking
+- 🔄 Support for multiple question formats:
+  - Multiple choice
+  - True/False
+  - Text input for QA tasks
+## How to Use
+1. **Select a Dataset**: Choose from the available evaluation datasets
+2. **Choose Number of Questions**: Select how many questions you want (5-20)
+3. **Start Quiz**: Click "Start Quiz" to begin
+4. **Answer Questions**: Select or type your answer and click "Submit Answer"
+5. **Get Feedback**: See if you got it right and learn the correct answer
+6. **Continue**: Click "Next Question" to proceed
+7. **View Score**: See your final score at the end
+## Local Development
+```bash
+# Clone the repository
+git clone <your-repo-url>
+cd eval_quiz_app
+# Install dependencies
+pip install -r requirements.txt
+# Run the app
+python app.py
+```
+## Deployment
+This app is designed to run on HuggingFace Spaces. Simply push to your Space repository and it will deploy automatically.
+## Contributing
+Feel free to add more datasets or improve the quiz functionality!

app.py ADDED Viewed

	@@ -0,0 +1,570 @@

+import gradio as gr
+from datasets import load_dataset, get_dataset_config_names
+import random
+from typing import List, Tuple
+import logging
+# Set up logging
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+# Popular evaluation datasets with their configurations
+EVAL_DATASETS = {
+    "openai/gsm8k": {
+        "name": "GSM8K - Grade School Math",
+        "type": "qa",
+        "config": "main",
+        "question_field": "question",
+        "answer_field": "answer",
+        "split": "train",
+    },
+    "cais/mmlu": {
+        "name": "MMLU - Massive Multitask Language Understanding",
+        "type": "multiple_choice",
+        "config": "all",
+        "question_field": "question",
+        "choices_field": "choices",
+        "answer_field": "answer",
+        "split": "test",
+    },
+    "allenai/ai2_arc": {
+        "name": "AI2 ARC - Science Questions",
+        "type": "multiple_choice",
+        "config": "ARC-Challenge",
+        "question_field": "question",
+        "choices_field": "choices",
+        "answer_field": "answerKey",
+        "split": "train",
+    },
+    "Rowan/hellaswag": {
+        "name": "HellaSwag - Commonsense NLI",
+        "type": "multiple_choice",
+        "question_field": "ctx",
+        "choices_field": "endings",
+        "answer_field": "label",
+        "split": "train",
+    },
+    "allenai/winogrande": {
+        "name": "WinoGrande - Winograd Schema",
+        "type": "binary_choice",
+        "config": "winogrande_xl",
+        "question_field": "sentence",
+        "option1_field": "option1",
+        "option2_field": "option2",
+        "answer_field": "answer",
+        "split": "train",
+    },
+    "google/boolq": {
+        "name": "BoolQ - Boolean Questions",
+        "type": "true_false",
+        "question_field": "question",
+        "context_field": "passage",
+        "answer_field": "answer",
+        "split": "train",
+    },
+    "rajpurkar/squad": {
+        "name": "SQuAD - Reading Comprehension",
+        "type": "extractive_qa",
+        "question_field": "question",
+        "context_field": "context",
+        "answer_field": "answers",
+        "split": "train",
+    },
+    "allenai/piqa": {
+        "name": "PIQA - Physical Reasoning",
+        "type": "binary_choice",
+        "question_field": "goal",
+        "option1_field": "sol1",
+        "option2_field": "sol2",
+        "answer_field": "label",
+        "split": "train",
+    },
+}
+class QuizApp:
+    def __init__(self):
+        self.current_dataset = None
+        self.current_dataset_name = None
+        self.questions = []
+        self.current_question_idx = 0
+        self.score = 0
+        self.total_questions = 0
+    def load_dataset_questions(self, dataset_name: str, num_questions: int = 10):
+        """Load random questions from the selected dataset"""
+        try:
+            config = EVAL_DATASETS[dataset_name]
+            # Try to load dataset with config if specified
+            try:
+                if "config" in config:
+                    dataset = load_dataset(
+                        dataset_name, config["config"], split=config["split"]
+                    )
+                else:
+                    dataset = load_dataset(dataset_name, split=config["split"])
+            except ValueError as e:
+                # If config is missing, try to get available configs
+                if "Config name is missing" in str(e):
+                    configs = get_dataset_config_names(dataset_name)
+                    # Use first config or "all" if available
+                    if "all" in configs:
+                        selected_config = "all"
+                    else:
+                        selected_config = configs[0]
+                    print(
+                        f"Auto-selected config '{selected_config}' for {dataset_name}"
+                    )
+                    dataset = load_dataset(
+                        dataset_name, selected_config, split=config["split"]
+                    )
+                else:
+                    raise e
+            # Sample random questions
+            total_examples = len(dataset)
+            num_questions = min(num_questions, total_examples)
+            indices = random.sample(range(total_examples), num_questions)
+            self.questions = []
+            for idx in indices:
+                example = dataset[idx]
+                self.questions.append(example)
+            self.current_dataset = config
+            self.current_dataset_name = dataset_name
+            self.current_question_idx = 0
+            self.score = 0
+            self.total_questions = len(self.questions)
+            return True, f"Loaded {num_questions} questions from {config['name']}"
+        except Exception as e:
+            return False, f"Error loading dataset: {str(e)}"
+    def get_current_question(self) -> Tuple[str, List[str], str]:
+        """Get the current question formatted for display"""
+        if not self.questions or self.current_question_idx >= len(self.questions):
+            return "", [], ""
+        question_data = self.questions[self.current_question_idx]
+        config = self.current_dataset
+        logging.info(f"\n{'=' * 60}")
+        logging.info(f"Dataset: {self.current_dataset_name}")
+        logging.info(f"Question {self.current_question_idx + 1}/{self.total_questions}")
+        logging.info(f"Raw question data: {repr(question_data)}")
+        logging.info(f"{'=' * 60}\n")
+        # Format question based on dataset type
+        question_type = config["type"]
+        if question_type == "multiple_choice":
+            question = question_data[config["question_field"]]
+            choices = question_data[config["choices_field"]]
+            if config["answer_field"] in question_data:
+                answer = question_data[config["answer_field"]]
+            else:
+                answer = ""
+            # Format choices with letters
+            formatted_choices = [
+                f"{chr(65 + i)}. {choice}" for i, choice in enumerate(choices)
+            ]
+            return question, formatted_choices, question_type
+        elif question_type == "true_false":
+            question = question_data[config["question_field"]]
+            if "context_field" in config:
+                context = question_data[config["context_field"]]
+                question = f"Context: {context}\n\nQuestion: {question}"
+            return question, ["True", "False"], question_type
+        elif question_type == "binary_choice":
+            question = question_data[config["question_field"]]
+            option1 = question_data[config["option1_field"]]
+            option2 = question_data[config["option2_field"]]
+            return question, [f"A. {option1}", f"B. {option2}"], question_type
+        elif question_type == "qa" or question_type == "extractive_qa":
+            question = question_data[config["question_field"]]
+            if "context_field" in config and config["context_field"] in question_data:
+                context = question_data[config["context_field"]]
+                question = f"Context: {context[:500]}...\n\nQuestion: {question}"
+            return question, [], question_type
+        return "", [], ""
+    def format_answer(self, answer: str, dataset_name: str) -> str:
+        """Format answer based on dataset type for better readability"""
+        if dataset_name == "openai/gsm8k":
+            # GSM8K has specific formatting with equations and final answer
+            # Replace <<...>> with proper math formatting
+            import re
+            # Convert <<equation>> to LaTeX
+            answer = re.sub(r"<<([^>]+)>>", r"$\\1$", answer)
+            # Format the final answer line
+            answer = answer.replace("####", "\n\n**Final Answer:**")
+            # Ensure proper line breaks
+            answer = answer.replace(". ", ".\n")
+            return answer
+        elif dataset_name == "cais/mmlu":
+            # MMLU answers are usually single letters or short phrases
+            return answer
+        elif dataset_name == "rajpurkar/squad":
+            # SQuAD answers might need context
+            return answer
+        else:
+            # Default formatting for other datasets
+            return answer
+    def check_answer(self, user_answer: str) -> Tuple[bool, str]:
+        """Check if the user's answer is correct"""
+        if not self.questions or self.current_question_idx >= len(self.questions):
+            return False, "No question available"
+        question_data = self.questions[self.current_question_idx]
+        config = self.current_dataset
+        question_type = config["type"]
+        if question_type == "multiple_choice":
+            correct_answer_idx = question_data[config["answer_field"]]
+            # Handle both numeric and letter answers
+            if isinstance(correct_answer_idx, int):
+                correct_letter = chr(65 + correct_answer_idx)
+            else:
+                correct_letter = str(correct_answer_idx)
+            user_letter = user_answer.strip().upper()[0] if user_answer else ""
+            is_correct = user_letter == correct_letter
+            if is_correct:
+                return True, "✅ **Correct!**"
+            else:
+                choices = question_data[config["choices_field"]]
+                correct_choice = (
+                    choices[correct_answer_idx]
+                    if isinstance(correct_answer_idx, int)
+                    else correct_answer_idx
+                )
+                logging.info(f"Raw answer (multiple choice): {repr(correct_choice)}")
+                formatted_answer = self.format_answer(
+                    correct_choice, self.current_dataset_name
+                )
+                return (
+                    False,
+                    f"❌ **Incorrect**\n\nThe correct answer was **{correct_letter}**:\n\n{formatted_answer}",
+                )
+        elif question_type == "true_false":
+            correct_answer = question_data[config["answer_field"]]
+            user_bool = user_answer.lower().strip() == "true"
+            is_correct = user_bool == correct_answer
+            if is_correct:
+                return True, "✅ **Correct!**"
+            else:
+                return (
+                    False,
+                    f"❌ **Incorrect**\n\nThe correct answer was **{correct_answer}**",
+                )
+        elif question_type == "binary_choice":
+            correct_answer_idx = question_data[config["answer_field"]]
+            user_idx = 0 if user_answer.strip().upper().startswith("A") else 1
+            is_correct = user_idx == correct_answer_idx
+            if is_correct:
+                return True, "✅ **Correct!**"
+            else:
+                correct_letter = "A" if correct_answer_idx == 0 else "B"
+                option_field = (
+                    config["option1_field"]
+                    if correct_answer_idx == 0
+                    else config["option2_field"]
+                )
+                correct_option = question_data[option_field]
+                logging.info(f"Raw answer (binary choice): {repr(correct_option)}")
+                formatted_answer = self.format_answer(
+                    correct_option, self.current_dataset_name
+                )
+                return (
+                    False,
+                    f"❌ **Incorrect**\n\nThe correct answer was **{correct_letter}**:\n\n{formatted_answer}",
+                )
+        elif question_type in ["qa", "extractive_qa"]:
+            # For QA, we'll do a simple check - in real app, you'd want more sophisticated matching
+            correct_answer = question_data[config["answer_field"]]
+            if isinstance(correct_answer, dict) and "text" in correct_answer:
+                correct_answer = (
+                    correct_answer["text"][0] if correct_answer["text"] else ""
+                )
+            elif isinstance(correct_answer, list) and len(correct_answer) > 0:
+                correct_answer = (
+                    correct_answer[0]["text"]
+                    if isinstance(correct_answer[0], dict)
+                    else str(correct_answer[0])
+                )
+            else:
+                correct_answer = str(correct_answer)
+            # Extract final answer for GSM8K and similar datasets
+            import re
+            # For GSM8K, extract the final answer after ####
+            if "####" in correct_answer:
+                final_answer_match = re.search(r"####\s*(.+)", correct_answer)
+                if final_answer_match:
+                    final_answer = final_answer_match.group(1).strip()
+                else:
+                    final_answer = correct_answer
+            else:
+                final_answer = correct_answer
+            # Extract numbers from both answers for comparison
+            correct_numbers = re.findall(r"-?\d+\.?\d*", final_answer)
+            user_numbers = re.findall(r"-?\d+\.?\d*", user_answer)
+            # Check if answers match
+            is_correct = False
+            # If both have numbers, compare the numbers
+            if correct_numbers and user_numbers:
+                # Convert to float for comparison to handle decimals
+                try:
+                    correct_num = float(
+                        correct_numbers[-1]
+                    )  # Take the last number as final answer
+                    user_num = float(user_numbers[-1])  # Take the last number from user
+                    is_correct = (
+                        abs(correct_num - user_num) < 0.0001
+                    )  # Small tolerance for float comparison
+                except ValueError:
+                    # Fall back to string comparison
+                    is_correct = correct_numbers[-1] == user_numbers[-1]
+            else:
+                # Fall back to substring matching for non-numeric answers
+                is_correct = (
+                    user_answer.lower().strip() in correct_answer.lower()
+                    or correct_answer.lower() in user_answer.lower().strip()
+                )
+            if is_correct:
+                return True, "✅ **Correct!**"
+            else:
+                logging.info(f"Raw answer (QA): {repr(correct_answer)}")
+                logging.info(f"Extracted final answer: {repr(final_answer)}")
+                logging.info(
+                    f"Correct numbers: {correct_numbers}, User numbers: {user_numbers}"
+                )
+                formatted_answer = self.format_answer(
+                    correct_answer, self.current_dataset_name
+                )
+                return (
+                    False,
+                    f"❌ **Incorrect**\n\n**The correct answer was:**\n\n{formatted_answer}",
+                )
+        return False, "Unknown question type"
+# Create global quiz app instance
+quiz_app = QuizApp()
+def create_dataset_display():
+    """Create the dataset listing display"""
+    dataset_info = []
+    for dataset_id, config in EVAL_DATASETS.items():
+        dataset_info.append(
+            f"**{config['name']}**\n- Dataset: {dataset_id}\n- Type: {config['type']}"
+        )
+    return "\n\n".join(dataset_info)
+def start_quiz(dataset_choice: str, num_questions: int):
+    """Start a new quiz with the selected dataset"""
+    # Extract dataset ID from the choice
+    dataset_id = None
+    for did, config in EVAL_DATASETS.items():
+        if config["name"] in dataset_choice:
+            dataset_id = did
+            break
+    if not dataset_id:
+        return (
+            "Please select a dataset",
+            "",
+            "",
+            gr.update(visible=False),
+            gr.update(visible=False),
+            "0/0",
+        )
+    success, message = quiz_app.load_dataset_questions(dataset_id, num_questions)
+    if success:
+        question, choices, q_type = quiz_app.get_current_question()
+        if q_type in ["multiple_choice", "true_false", "binary_choice"]:
+            return (
+                message,
+                question,
+                gr.update(choices=choices, visible=True, value=None),
+                gr.update(visible=False),
+                gr.update(visible=True),
+                f"Question 1/{quiz_app.total_questions}",
+            )
+        else:
+            return (
+                message,
+                question,
+                gr.update(visible=False),
+                gr.update(visible=True, value=""),
+                gr.update(visible=True),
+                f"Question 1/{quiz_app.total_questions}",
+            )
+    else:
+        return (
+            message,
+            "",
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            "0/0",
+        )
+def submit_answer(answer_choice, answer_text):
+    """Submit answer and show feedback"""
+    # Determine which answer to use
+    if answer_choice:
+        answer = answer_choice
+    else:
+        answer = answer_text
+    is_correct, feedback = quiz_app.check_answer(answer)
+    if is_correct:
+        quiz_app.score += 1
+    return gr.update(value=feedback, visible=True), gr.update(visible=True)
+def next_question():
+    """Move to the next question"""
+    quiz_app.current_question_idx += 1
+    if quiz_app.current_question_idx >= quiz_app.total_questions:
+        # Quiz complete
+        final_score = f"## 🎉 Quiz Complete!\n\n**Your score:** {quiz_app.score}/{quiz_app.total_questions} ({quiz_app.score / quiz_app.total_questions * 100:.1f}%)"
+        return (
+            gr.update(value=final_score, visible=True),
+            "",
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            "Quiz Complete",
+        )
+    question, choices, q_type = quiz_app.get_current_question()
+    if q_type in ["multiple_choice", "true_false", "binary_choice"]:
+        return (
+            gr.update(value="", visible=False),  # Clear feedback
+            question,
+            gr.update(choices=choices, visible=True, value=None),
+            gr.update(visible=False),
+            gr.update(visible=True),
+            gr.update(visible=False),
+            f"Question {quiz_app.current_question_idx + 1}/{quiz_app.total_questions}",
+        )
+    else:
+        return (
+            gr.update(value="", visible=False),  # Clear feedback
+            question,
+            gr.update(visible=False),
+            gr.update(visible=True, value=""),
+            gr.update(visible=True),
+            gr.update(visible=False),
+            f"Question {quiz_app.current_question_idx + 1}/{quiz_app.total_questions}",
+        )
+# Create Gradio interface
+with gr.Blocks(title="HuggingFace Evaluation Dataset Quiz") as demo:
+    gr.Markdown("# 🤗 Evaluation Dataset Quiz")
+    gr.Markdown(
+        "Test yourself with questions from popular HuggingFace evaluation datasets!"
+    )
+    with gr.Tabs():
+        with gr.Tab("Dataset Selection"):
+            with gr.Row():
+                dataset_dropdown = gr.Dropdown(
+                    choices=[config["name"] for config in EVAL_DATASETS.values()],
+                    label="Select Dataset",
+                    value=list(EVAL_DATASETS.values())[0]["name"],
+                )
+                num_questions_slider = gr.Slider(
+                    minimum=5, maximum=20, value=10, step=1, label="Number of Questions"
+                )
+            start_button = gr.Button("Start Quiz", variant="primary")
+            status_message = gr.Textbox(label="Status", interactive=False)
+        with gr.Tab("Quiz"):
+            progress_text = gr.Textbox(label="Progress", value="0/0", interactive=False)
+            question_display = gr.Textbox(label="Question", lines=5, interactive=False)
+            # Answer inputs (one will be visible at a time)
+            answer_radio = gr.Radio(label="Select your answer", visible=False)
+            answer_textbox = gr.Textbox(label="Type your answer", visible=False)
+            submit_button = gr.Button("Submit Answer", variant="primary", visible=False)
+            feedback_display = gr.Markdown(label="Feedback", visible=True)
+            next_button = gr.Button("Next Question", visible=False)
+    # Connect events
+    start_button.click(
+        start_quiz,
+        inputs=[dataset_dropdown, num_questions_slider],
+        outputs=[
+            status_message,
+            question_display,
+            answer_radio,
+            answer_textbox,
+            submit_button,
+            progress_text,
+        ],
+    )
+    submit_button.click(
+        submit_answer,
+        inputs=[answer_radio, answer_textbox],
+        outputs=[feedback_display, next_button],
+    )
+    next_button.click(
+        next_question,
+        outputs=[
+            feedback_display,
+            question_display,
+            answer_radio,
+            answer_textbox,
+            submit_button,
+            next_button,
+            progress_text,
+        ],
+    )
+if __name__ == "__main__":
+    demo.launch()

flake.lock ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "nodes": {
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1730531603,
+        "narHash": "sha256-Dqg6si5CqIzm87sp57j5nTaeBbWhHFaVyG7V6L8k3lY=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "7ffd9ae656aec493492b44d0ddfb28e79a1ea25d",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixos-unstable",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "nixpkgs": "nixpkgs"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}

flake.nix ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  inputs = {
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
+  };
+  outputs =
+    { nixpkgs, ... }:
+    let
+      forAllSystems = nixpkgs.lib.genAttrs [
+        "aarch64-linux"
+        "x86_64-linux"
+        "aarch64-darwin"
+      ];
+    in
+    {
+      devShells = forAllSystems (
+        system:
+        let
+          pkgs = nixpkgs.legacyPackages.${system};
+        in
+        {
+          default = pkgs.mkShell {
+            buildInputs = with pkgs; [
+              rustup
+              python3Packages.python
+              python3Packages.venvShellHook
+            ];
+            venvDir = "./.venv";
+            postVenvCreation = ''
+              unset SOURCE_DATE_EPOCH
+            '';
+            postShellHook = ''
+              unset SOURCE_DATE_EPOCH
+            '';
+            LD_LIBRARY_PATH = "$LD_LIBRARY_PATH:${pkgs.stdenv.cc.cc.lib}/lib:${pkgs.zlib}/lib:/run/opengl-driver/lib";
+          };
+        }
+      );
+    };
+}

get_popular_eval_datasets.py ADDED Viewed

	@@ -0,0 +1,100 @@

+#!/usr/bin/env python3
+"""
+Script to fetch the 10 most used evaluation datasets from Hugging Face.
+"""
+import requests
+from typing import List, Dict
+def get_popular_eval_datasets(limit: int = 10) -> List[Dict]:
+    """
+    Fetch popular evaluation datasets from Hugging Face Hub API.
+    Args:
+        limit: Number of datasets to return
+    Returns:
+        List of dataset information dictionaries
+    """
+    # Common evaluation dataset tags and keywords
+    eval_keywords = [
+        "evaluation", "benchmark", "eval", "test-set", "validation",
+        "leaderboard", "assessment", "metric"
+    ]
+    # Search for datasets with evaluation-related tags
+    base_url = "https://huggingface.co/api/datasets"
+    params = {
+        "sort": "downloads",  # Sort by most downloaded
+        "direction": "-1",    # Descending order
+        "limit": 100,         # Get more to filter
+        "full": "true"
+    }
+    response = requests.get(base_url, params=params)
+    response.raise_for_status()
+    datasets = response.json()
+    # Filter for evaluation datasets
+    eval_datasets = []
+    for dataset in datasets:
+        # Check if dataset has evaluation-related tags or is commonly used for eval
+        tags = dataset.get("tags", [])
+        dataset_id = dataset.get("id", "").lower()
+        # Check for eval keywords in tags or dataset name
+        is_eval = any(
+            any(keyword in str(tag).lower() for keyword in eval_keywords)
+            for tag in tags
+        ) or any(keyword in dataset_id for keyword in eval_keywords)
+        # Also include well-known evaluation datasets
+        known_eval_datasets = [
+            "glue", "superglue", "squad", "xnli", "hellaswag", "winogrande",
+            "arc", "mmlu", "gsm8k", "humaneval", "mbpp", "truthfulqa",
+            "bigbench", "c4", "piqa", "siqa", "boolq", "copa", "multirc",
+            "record", "rte", "wic", "wsc", "cb", "axb", "axg", "swag",
+            "race", "qnli", "wnli", "sst", "cola", "stsb", "mrpc", "qqp"
+        ]
+        if any(known in dataset_id for known in known_eval_datasets):
+            is_eval = True
+        if is_eval:
+            eval_datasets.append({
+                "name": dataset.get("id", ""),
+                "downloads": dataset.get("downloads", 0),
+                "likes": dataset.get("likes", 0),
+                "tags": [tag for tag in tags if isinstance(tag, str)][:5],  # First 5 tags
+                "description": dataset.get("description", "")[:200]  # First 200 chars
+            })
+    # Sort by downloads and return top N
+    eval_datasets.sort(key=lambda x: x["downloads"], reverse=True)
+    return eval_datasets[:limit]
+def main():
+    """Main function to fetch and display popular evaluation datasets."""
+    print("Fetching the 10 most used evaluation datasets from Hugging Face...\n")
+    try:
+        datasets = get_popular_eval_datasets(10)
+        for i, dataset in enumerate(datasets, 1):
+            print(f"{i}. {dataset['name']}")
+            print(f"   Downloads: {dataset['downloads']:,}")
+            print(f"   Likes: {dataset['likes']}")
+            if dataset['tags']:
+                print(f"   Tags: {', '.join(dataset['tags'])}")
+            if dataset['description']:
+                print(f"   Description: {dataset['description']}...")
+            print()
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching data from Hugging Face: {e}")
+    except Exception as e:
+        print(f"An error occurred: {e}")
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio
+datasets
+transformers
+requests
+huggingface-hub