Spaces:

Enderchef
/

AI-Leaderboard

Sleeping

App Files Files Community

AI-Leaderboard / test_app.py

Quazim0t0

Upload 12 files

6b3e7b5 verified 9 months ago

raw

history blame

7.71 kB

	"""
	Test script for Dynamic Highscores application.

	This script tests the key functionality of the Dynamic Highscores application
	to ensure everything works as expected before deployment.
	"""

	import os
	import unittest
	import tempfile
	import sqlite3
	from unittest.mock import MagicMock, patch

	# Import components to test
	from database_schema import DynamicHighscoresDB
	from auth import HuggingFaceAuth
	from benchmark_selection import BenchmarkSelector
	from evaluation_queue import EvaluationQueue
	from leaderboard import Leaderboard

	class TestDynamicHighscores(unittest.TestCase):
	"""Test cases for Dynamic Highscores application."""

	def setUp(self):
	"""Set up test environment."""
	# Create temporary database
	self.db_fd, self.db_path = tempfile.mkstemp()
	self.db = DynamicHighscoresDB(self.db_path)

	# Mock auth manager
	self.auth_manager = HuggingFaceAuth(self.db)

	# Mock components
	self.benchmark_selector = BenchmarkSelector(self.db, self.auth_manager)
	self.evaluation_queue = EvaluationQueue(self.db, self.auth_manager)
	self.leaderboard = Leaderboard(self.db)

	def tearDown(self):
	"""Clean up test environment."""
	os.close(self.db_fd)
	os.unlink(self.db_path)

	def test_database_schema(self):
	"""Test database schema creation."""
	# Check if tables were created
	conn = sqlite3.connect(self.db_path)
	cursor = conn.cursor()

	# Get list of tables
	cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
	tables = cursor.fetchall()
	table_names = [table[0] for table in tables]

	# Check if all expected tables exist
	expected_tables = ['users', 'benchmarks', 'models', 'evaluations', 'queue']
	for table in expected_tables:
	self.assertIn(table, table_names)

	conn.close()

	def test_user_management(self):
	"""Test user management functionality."""
	# Add a test user
	user_id = self.db.add_user("test_user", "test_hf_id", False)
	self.assertIsNotNone(user_id)

	# Add an admin user
	admin_id = self.db.add_user("admin_user", "admin_hf_id", True)
	self.assertIsNotNone(admin_id)

	# Test submission limits
	self.assertTrue(self.db.can_submit_today(user_id))
	self.db.update_submission_date(user_id)
	self.assertFalse(self.db.can_submit_today(user_id))

	# Admin should always be able to submit
	self.assertTrue(self.db.can_submit_today(admin_id))

	def test_benchmark_management(self):
	"""Test benchmark management functionality."""
	# Add a test benchmark
	benchmark_id = self.db.add_benchmark(
	name="Test Benchmark",
	dataset_id="test/dataset",
	description="Test description",
	metrics={"accuracy": 1.0}
	)
	self.assertIsNotNone(benchmark_id)

	# Get benchmarks
	benchmarks = self.db.get_benchmarks()
	self.assertEqual(len(benchmarks), 1)
	self.assertEqual(benchmarks[0]["name"], "Test Benchmark")

	def test_model_management(self):
	"""Test model management functionality."""
	# Add a test user
	user_id = self.db.add_user("test_user", "test_hf_id", False)

	# Add a test model
	model_id = self.db.add_model(
	name="Test Model",
	hf_model_id="test/model",
	user_id=user_id,
	tag="Reasoning",
	parameters="7B",
	description="Test model description"
	)
	self.assertIsNotNone(model_id)

	# Get models
	models = self.db.get_models()
	self.assertEqual(len(models), 1)
	self.assertEqual(models[0]["name"], "Test Model")

	# Get models by tag
	models = self.db.get_models(tag="Reasoning")
	self.assertEqual(len(models), 1)
	self.assertEqual(models[0]["tag"], "Reasoning")

	def test_evaluation_management(self):
	"""Test evaluation management functionality."""
	# Add a test user
	user_id = self.db.add_user("test_user", "test_hf_id", False)

	# Add a test model
	model_id = self.db.add_model(
	name="Test Model",
	hf_model_id="test/model",
	user_id=user_id,
	tag="Reasoning"
	)

	# Add a test benchmark
	benchmark_id = self.db.add_benchmark(
	name="Test Benchmark",
	dataset_id="test/dataset"
	)

	# Add a test evaluation
	evaluation_id = self.db.add_evaluation(
	model_id=model_id,
	benchmark_id=benchmark_id
	)
	self.assertIsNotNone(evaluation_id)

	# Update evaluation status
	self.db.update_evaluation_status(
	evaluation_id=evaluation_id,
	status="running"
	)

	# Get next in queue
	next_eval = self.db.get_next_in_queue()
	self.assertIsNotNone(next_eval)
	self.assertEqual(next_eval["evaluation_id"], evaluation_id)

	# Complete evaluation
	self.db.update_evaluation_status(
	evaluation_id=evaluation_id,
	status="completed",
	results={"accuracy": 0.85},
	score=85.0
	)

	# Get evaluation results
	results = self.db.get_evaluation_results()
	self.assertEqual(len(results), 1)
	self.assertEqual(results[0]["score"], 85.0)

	def test_leaderboard(self):
	"""Test leaderboard functionality."""
	# Add test data
	user_id = self.db.add_user("test_user", "test_hf_id", False)

	# Add models with different tags
	model1_id = self.db.add_model(
	name="Model 1",
	hf_model_id="test/model1",
	user_id=user_id,
	tag="Reasoning"
	)

	model2_id = self.db.add_model(
	name="Model 2",
	hf_model_id="test/model2",
	user_id=user_id,
	tag="Coding"
	)

	# Add a benchmark
	benchmark_id = self.db.add_benchmark(
	name="Test Benchmark",
	dataset_id="test/dataset"
	)

	# Add evaluations
	eval1_id = self.db.add_evaluation(
	model_id=model1_id,
	benchmark_id=benchmark_id
	)

	eval2_id = self.db.add_evaluation(
	model_id=model2_id,
	benchmark_id=benchmark_id
	)

	# Complete evaluations
	self.db.update_evaluation_status(
	evaluation_id=eval1_id,
	status="completed",
	results={"accuracy": 0.9},
	score=90.0
	)

	self.db.update_evaluation_status(
	evaluation_id=eval2_id,
	status="completed",
	results={"accuracy": 0.8},
	score=80.0
	)

	# Get leaderboard data
	df = self.leaderboard.get_leaderboard_data()
	self.assertEqual(len(df), 2)

	# Test filtering by tag
	df_reasoning = self.leaderboard.get_leaderboard_data(tag="Reasoning")
	self.assertEqual(len(df_reasoning), 1)
	self.assertEqual(df_reasoning.iloc[0]["score"], 90.0)

	df_coding = self.leaderboard.get_leaderboard_data(tag="Coding")
	self.assertEqual(len(df_coding), 1)
	self.assertEqual(df_coding.iloc[0]["score"], 80.0)

	if __name__ == "__main__":
	unittest.main()