Spaces:

Surn
/

BattleWords

Running

File size: 10,158 Bytes

1b1b6cc

# file: tests/test_compare_difficulty_functions.py
import os
import sys
import pytest

# Ensure the modules path is available
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

from battlewords.modules.constants import HF_API_TOKEN
from battlewords.modules.storage import gen_full_url, _get_json_from_repo, HF_REPO_ID, SHORTENER_JSON_FILE
from battlewords.word_loader import compute_word_difficulties, compute_word_difficulties2, compute_word_difficulties3

# Ensure the token is set for Hugging Face Hub
if HF_API_TOKEN:
    os.environ["HF_API_TOKEN"] = HF_API_TOKEN

# Define sample_words as a global variable
sample_words = []

def test_compare_difficulty_functions_for_challenge(capsys):
    """
    Compare compute_word_difficulties, compute_word_difficulties2, and compute_word_difficulties3
    for all users in a challenge identified by short_id.
    """
    global sample_words  # Ensure we modify the global variable

    # Use a fixed short id for testing
    short_id = "hDjsB_dl"

    # Step 1: Resolve short ID to full URL
    status, full_url = gen_full_url(
        short_url=short_id,
        repo_id=HF_REPO_ID,
        json_file=SHORTENER_JSON_FILE
    )

    if status != "success_retrieved_full" or not full_url:
        print(
            f"Could not resolve short id '{short_id}'. "
            f"Status: {status}. "
            f"Check repo '{HF_REPO_ID}' and mapping file '{SHORTENER_JSON_FILE}'."
        )
        captured = capsys.readouterr()
        assert "Could not resolve short id" in captured.out
        assert not full_url, "full_url should be empty/None on failure"
        print("settings.json was not found or could not be resolved.")
        return

    print(f"✓ Resolved short id '{short_id}' to full URL: {full_url}")

    # Step 2: Extract file path from full URL
    url_parts = full_url.split("/resolve/main/")
    assert len(url_parts) == 2, f"Invalid full URL format: {full_url}"
    file_path = url_parts[1]

    # Step 3: Download and parse settings.json
    settings = _get_json_from_repo(HF_REPO_ID, file_path, repo_type="dataset")
    assert settings, "Failed to download or parse settings.json"
    print(f"✓ Downloaded settings.json")

    # Validate settings structure
    assert "challenge_id" in settings
    assert "wordlist_source" in settings
    assert "users" in settings
    
    wordlist_source = settings.get("wordlist_source", "wordlist.txt")
    users = settings.get("users", [])
    
    print(f"\nChallenge ID: {settings['challenge_id']}")
    print(f"Wordlist Source: {wordlist_source}")
    print(f"Number of Users: {len(users)}")

    # Step 4: Determine wordlist file path
    # Assuming the wordlist is in battlewords/words/ directory
    words_dir = os.path.join(os.path.dirname(__file__), "..", "battlewords", "words")
    wordlist_path = os.path.join(words_dir, wordlist_source)
    
    # If wordlist doesn't exist, try classic.txt as fallback
    if not os.path.exists(wordlist_path):
        print(f"⚠ Wordlist '{wordlist_source}' not found, using 'classic.txt' as fallback")
        wordlist_path = os.path.join(words_dir, "classic.txt")
    
    assert os.path.exists(wordlist_path), f"Wordlist file not found: {wordlist_path}"
    print(f"✓ Using wordlist: {wordlist_path}")

    # Step 5: Compare difficulty functions for each user
    print("\n" + "="*80)
    print("DIFFICULTY COMPARISON BY USER")
    print("="*80)

    all_results = []    

    for user_idx, user in enumerate(users, 1):
        user_name = user.get("name", f"User {user_idx}")
        word_list = user.get("word_list", [])
        sample_words += word_list  # Update the global variable with the latest word list
        
        if not word_list:
            print(f"\n[{user_idx}] {user_name}: No words assigned, skipping")
            continue

        print(f"\n[{user_idx}] {user_name}")
        print(f"     Words: {len(word_list)} words")
        print(f"     Sample: {', '.join(word_list[:5])}{'...' if len(word_list) > 5 else ''}")

        # Compute difficulties using all three functions
        total_diff1, difficulties1 = compute_word_difficulties(wordlist_path, word_list)
        total_diff2, difficulties2 = compute_word_difficulties2(wordlist_path, word_list)
        total_diff3, difficulties3 = compute_word_difficulties3(wordlist_path, word_list)

        print(f"\n     Function 1 (compute_word_difficulties):")
        print(f"       Total Difficulty: {total_diff1:.4f}")
        print(f"       Words Processed: {len(difficulties1)}")
        
        print(f"\n     Function 2 (compute_word_difficulties2):")
        print(f"       Total Difficulty: {total_diff2:.4f}")
        print(f"       Words Processed: {len(difficulties2)}")

        print(f"\n     Function 3 (compute_word_difficulties3):")
        print(f"       Total Difficulty: {total_diff3:.4f}")
        print(f"       Words Processed: {len(difficulties3)}")

        # Calculate statistics
        if difficulties1 and difficulties2 and difficulties3:
            avg_diff1 = total_diff1 / len(difficulties1)
            avg_diff2 = total_diff2 / len(difficulties2)
            avg_diff3 = total_diff3 / len(difficulties3)
            
            print(f"\n     Comparison:")
            print(f"       Average Difficulty (Func1): {avg_diff1:.4f}")
            print(f"       Average Difficulty (Func2): {avg_diff2:.4f}")
            print(f"       Average Difficulty (Func3): {avg_diff3:.4f}")
            print(f"       Difference (Func1 vs Func2): {abs(avg_diff1 - avg_diff2):.4f}")
            print(f"       Difference (Func1 vs Func3): {abs(avg_diff1 - avg_diff3):.4f}")
            print(f"       Difference (Func2 vs Func3): {abs(avg_diff2 - avg_diff3):.4f}")

        # Store results for final summary
        all_results.append({
            "user_name": user_name,
            "word_count": len(word_list),
            "total_diff1": total_diff1,
            "total_diff2": total_diff2,
            "total_diff3": total_diff3,
            "difficulties1": difficulties1,
            "difficulties2": difficulties2,
            "difficulties3": difficulties3,
        })

    # Step 6: Print summary comparison
    print("\n" + "="*80)
    print("OVERALL SUMMARY")
    print("="*80)

    if all_results:
        total1_sum = sum(r["total_diff1"] for r in all_results)
        total2_sum = sum(r["total_diff2"] for r in all_results)
        total3_sum = sum(r["total_diff3"] for r in all_results)
        total_words = sum(r["word_count"] for r in all_results)

        print(f"\nTotal Users Analyzed: {len(all_results)}")
        print(f"Total Words Across All Users: {total_words}")
        print(f"\nAggregate Total Difficulty:")
        print(f"  Function 1: {total1_sum:.4f}")
        print(f"  Function 2: {total2_sum:.4f}")
        print(f"  Function 3: {total3_sum:.4f}")
        print(f"  Difference (Func1 vs Func2): {abs(total1_sum - total2_sum):.4f}")
        print(f"  Difference (Func1 vs Func3): {abs(total1_sum - total3_sum):.4f}")
        print(f"  Difference (Func2 vs Func3): {abs(total2_sum - total3_sum):.4f}")

        # Validate that all functions returned results for all users
        assert all(r["difficulties1"] for r in all_results), "Function 1 failed for some users"
        assert all(r["difficulties2"] for r in all_results), "Function 2 failed for some users"
        assert all(r["difficulties3"] for r in all_results), "Function 3 failed for some users"
        
        print("\n✓ All tests passed!")
    else:
        print("\n⚠ No users with words found in this challenge")


def test_compare_difficulty_functions_with_classic_wordlist():
    """
    Test all three difficulty functions using the classic.txt wordlist
    with a sample set of words.
    """
    global sample_words  # Use the global variable

    words_dir = os.path.join(os.path.dirname(__file__), "..", "battlewords", "words")
    wordlist_path = os.path.join(words_dir, "classic.txt")
    
    if not os.path.exists(wordlist_path):
        pytest.skip(f"classic.txt not found at {wordlist_path}")
    
    # Use the global sample_words if already populated, otherwise set a default
    if not sample_words:
        sample_words = ["ABLE", "ACID", "AREA", "ARMY", "BEAR", "BOWL", "CAVE", "COIN", "ECHO", "GOLD"]
    
    print("\n" + "="*80)
    print("TESTING WITH CLASSIC.TXT WORDLIST")
    print("="*80)
    print(f"Sample Words: {', '.join(sample_words)}")
    
    # Compute difficulties
    total_diff1, difficulties1 = compute_word_difficulties(wordlist_path, sample_words)
    total_diff2, difficulties2 = compute_word_difficulties2(wordlist_path, sample_words)
    total_diff3, difficulties3 = compute_word_difficulties3(wordlist_path, sample_words)
    
    print(f"\nFunction compute_word_difficulties Results:")
    print(f"  Total Difficulty: {total_diff1:.4f}")
    for word in sample_words:
        if word in difficulties1:
            print(f"    {word}: {difficulties1[word]:.4f}")
    
    print(f"\nFunction compute_word_difficulties2 Results:")
    print(f"  Total Difficulty: {total_diff2:.4f}")
    for word in sample_words:
        if word in difficulties2:
            print(f"    {word}: {difficulties2[word]:.4f}")
    
    print(f"\nFunction compute_word_difficulties3 Results:")
    print(f"  Total Difficulty: {total_diff3:.4f}")
    for word in sample_words:
        if word in difficulties3:
            print(f"    {word}: {difficulties3[word]:.4f}")
    
    # Assertions
    assert len(difficulties1) == len(set(sample_words)), "Function 1 didn't process all words"
    assert len(difficulties2) == len(set(sample_words)), "Function 2 didn't process all words"
    assert len(difficulties3) == len(set(sample_words)), "Function 3 didn't process all words"
    assert total_diff1 > 0, "Function 1 total difficulty should be positive"
    assert total_diff2 > 0, "Function 2 total difficulty should be positive"
    assert total_diff3 > 0, "Function 3 total difficulty should be positive"
    
    print("\n✓ Classic wordlist test passed!")


if __name__ == "__main__":
    pytest.main(["-s", "-v", __file__])