Spaces:
Sleeping
Sleeping
| """ | |
| Test script for Dynamic Highscores application. | |
| This script tests the key functionality of the Dynamic Highscores application | |
| to ensure everything works as expected before deployment. | |
| """ | |
| import os | |
| import unittest | |
| import tempfile | |
| import sqlite3 | |
| from unittest.mock import MagicMock, patch | |
| # Import components to test | |
| from database_schema import DynamicHighscoresDB | |
| from auth import HuggingFaceAuth | |
| from benchmark_selection import BenchmarkSelector | |
| from evaluation_queue import EvaluationQueue | |
| from leaderboard import Leaderboard | |
| class TestDynamicHighscores(unittest.TestCase): | |
| """Test cases for Dynamic Highscores application.""" | |
| def setUp(self): | |
| """Set up test environment.""" | |
| # Create temporary database | |
| self.db_fd, self.db_path = tempfile.mkstemp() | |
| self.db = DynamicHighscoresDB(self.db_path) | |
| # Mock auth manager | |
| self.auth_manager = HuggingFaceAuth(self.db) | |
| # Mock components | |
| self.benchmark_selector = BenchmarkSelector(self.db, self.auth_manager) | |
| self.evaluation_queue = EvaluationQueue(self.db, self.auth_manager) | |
| self.leaderboard = Leaderboard(self.db) | |
| def tearDown(self): | |
| """Clean up test environment.""" | |
| os.close(self.db_fd) | |
| os.unlink(self.db_path) | |
| def test_database_schema(self): | |
| """Test database schema creation.""" | |
| # Check if tables were created | |
| conn = sqlite3.connect(self.db_path) | |
| cursor = conn.cursor() | |
| # Get list of tables | |
| cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") | |
| tables = cursor.fetchall() | |
| table_names = [table[0] for table in tables] | |
| # Check if all expected tables exist | |
| expected_tables = ['users', 'benchmarks', 'models', 'evaluations', 'queue'] | |
| for table in expected_tables: | |
| self.assertIn(table, table_names) | |
| conn.close() | |
| def test_user_management(self): | |
| """Test user management functionality.""" | |
| # Add a test user | |
| user_id = self.db.add_user("test_user", "test_hf_id", False) | |
| self.assertIsNotNone(user_id) | |
| # Add an admin user | |
| admin_id = self.db.add_user("admin_user", "admin_hf_id", True) | |
| self.assertIsNotNone(admin_id) | |
| # Test submission limits | |
| self.assertTrue(self.db.can_submit_today(user_id)) | |
| self.db.update_submission_date(user_id) | |
| self.assertFalse(self.db.can_submit_today(user_id)) | |
| # Admin should always be able to submit | |
| self.assertTrue(self.db.can_submit_today(admin_id)) | |
| def test_benchmark_management(self): | |
| """Test benchmark management functionality.""" | |
| # Add a test benchmark | |
| benchmark_id = self.db.add_benchmark( | |
| name="Test Benchmark", | |
| dataset_id="test/dataset", | |
| description="Test description", | |
| metrics={"accuracy": 1.0} | |
| ) | |
| self.assertIsNotNone(benchmark_id) | |
| # Get benchmarks | |
| benchmarks = self.db.get_benchmarks() | |
| self.assertEqual(len(benchmarks), 1) | |
| self.assertEqual(benchmarks[0]["name"], "Test Benchmark") | |
| def test_model_management(self): | |
| """Test model management functionality.""" | |
| # Add a test user | |
| user_id = self.db.add_user("test_user", "test_hf_id", False) | |
| # Add a test model | |
| model_id = self.db.add_model( | |
| name="Test Model", | |
| hf_model_id="test/model", | |
| user_id=user_id, | |
| tag="Reasoning", | |
| parameters="7B", | |
| description="Test model description" | |
| ) | |
| self.assertIsNotNone(model_id) | |
| # Get models | |
| models = self.db.get_models() | |
| self.assertEqual(len(models), 1) | |
| self.assertEqual(models[0]["name"], "Test Model") | |
| # Get models by tag | |
| models = self.db.get_models(tag="Reasoning") | |
| self.assertEqual(len(models), 1) | |
| self.assertEqual(models[0]["tag"], "Reasoning") | |
| def test_evaluation_management(self): | |
| """Test evaluation management functionality.""" | |
| # Add a test user | |
| user_id = self.db.add_user("test_user", "test_hf_id", False) | |
| # Add a test model | |
| model_id = self.db.add_model( | |
| name="Test Model", | |
| hf_model_id="test/model", | |
| user_id=user_id, | |
| tag="Reasoning" | |
| ) | |
| # Add a test benchmark | |
| benchmark_id = self.db.add_benchmark( | |
| name="Test Benchmark", | |
| dataset_id="test/dataset" | |
| ) | |
| # Add a test evaluation | |
| evaluation_id = self.db.add_evaluation( | |
| model_id=model_id, | |
| benchmark_id=benchmark_id | |
| ) | |
| self.assertIsNotNone(evaluation_id) | |
| # Update evaluation status | |
| self.db.update_evaluation_status( | |
| evaluation_id=evaluation_id, | |
| status="running" | |
| ) | |
| # Get next in queue | |
| next_eval = self.db.get_next_in_queue() | |
| self.assertIsNotNone(next_eval) | |
| self.assertEqual(next_eval["evaluation_id"], evaluation_id) | |
| # Complete evaluation | |
| self.db.update_evaluation_status( | |
| evaluation_id=evaluation_id, | |
| status="completed", | |
| results={"accuracy": 0.85}, | |
| score=85.0 | |
| ) | |
| # Get evaluation results | |
| results = self.db.get_evaluation_results() | |
| self.assertEqual(len(results), 1) | |
| self.assertEqual(results[0]["score"], 85.0) | |
| def test_leaderboard(self): | |
| """Test leaderboard functionality.""" | |
| # Add test data | |
| user_id = self.db.add_user("test_user", "test_hf_id", False) | |
| # Add models with different tags | |
| model1_id = self.db.add_model( | |
| name="Model 1", | |
| hf_model_id="test/model1", | |
| user_id=user_id, | |
| tag="Reasoning" | |
| ) | |
| model2_id = self.db.add_model( | |
| name="Model 2", | |
| hf_model_id="test/model2", | |
| user_id=user_id, | |
| tag="Coding" | |
| ) | |
| # Add a benchmark | |
| benchmark_id = self.db.add_benchmark( | |
| name="Test Benchmark", | |
| dataset_id="test/dataset" | |
| ) | |
| # Add evaluations | |
| eval1_id = self.db.add_evaluation( | |
| model_id=model1_id, | |
| benchmark_id=benchmark_id | |
| ) | |
| eval2_id = self.db.add_evaluation( | |
| model_id=model2_id, | |
| benchmark_id=benchmark_id | |
| ) | |
| # Complete evaluations | |
| self.db.update_evaluation_status( | |
| evaluation_id=eval1_id, | |
| status="completed", | |
| results={"accuracy": 0.9}, | |
| score=90.0 | |
| ) | |
| self.db.update_evaluation_status( | |
| evaluation_id=eval2_id, | |
| status="completed", | |
| results={"accuracy": 0.8}, | |
| score=80.0 | |
| ) | |
| # Get leaderboard data | |
| df = self.leaderboard.get_leaderboard_data() | |
| self.assertEqual(len(df), 2) | |
| # Test filtering by tag | |
| df_reasoning = self.leaderboard.get_leaderboard_data(tag="Reasoning") | |
| self.assertEqual(len(df_reasoning), 1) | |
| self.assertEqual(df_reasoning.iloc[0]["score"], 90.0) | |
| df_coding = self.leaderboard.get_leaderboard_data(tag="Coding") | |
| self.assertEqual(len(df_coding), 1) | |
| self.assertEqual(df_coding.iloc[0]["score"], 80.0) | |
| if __name__ == "__main__": | |
| unittest.main() | |