BattleWords / battlewords /word_loader.py
Surn's picture
Add EASY MODE and fix wordlist selections
99bf0ab
raw
history blame
5.01 kB
from __future__ import annotations
import re
import os
from typing import Dict, List, Optional
import streamlit as st
from importlib import resources
# Minimal built-ins used if the external file is missing or too small
FALLBACK_WORDS: Dict[int, List[str]] = {
4: [
"TREE", "BOAT", "WIND", "FROG", "LION", "MOON", "FORK", "GLOW", "GAME", "CODE",
"DATA", "BLUE", "GOLD", "ROAD", "STAR",
],
5: [
"APPLE", "RIVER", "STONE", "PLANT", "MOUSE", "BOARD", "CHAIR", "SCALE", "SMILE", "CLOUD",
],
6: [
"ORANGE", "PYTHON", "STREAM", "MARKET", "FOREST", "THRIVE", "LOGGER", "BREATH", "DOMAIN", "GALAXY",
],
}
MIN_REQUIRED = 25 # Per specs: require >= 500 per length before using file contents
def get_wordlist_files() -> list[str]:
words_dir = os.path.join(os.path.dirname(__file__), "words")
if not os.path.isdir(words_dir):
return []
files = [f for f in os.listdir(words_dir) if f.lower().endswith(".txt")]
return sorted(files)
@st.cache_data(show_spinner=False)
def load_word_list(selected_file: Optional[str] = None) -> Dict[int, List[str]]:
"""
Load a word list, filter to uppercase A–Z, lengths in {4,5,6}, and dedupe while preserving order.
If `selected_file` is provided, load battlewords/words/<selected_file>.
Otherwise, try on-disk default battlewords/words/wordlist.txt; if unavailable, try packaged resource.
If fewer than 500 entries exist for any required length, fall back to built-ins
for that length (per specs).
NOTE: To ensure cache updates when the user picks a different file, always pass
the `selected_file` argument from the UI/generator.
"""
words_by_len: Dict[int, List[str]] = {4: [], 5: [], 6: []}
used_source = "fallback"
def _finalize(wbl: Dict[int, List[str]], source: str) -> Dict[int, List[str]]:
try:
st.session_state.wordlist_source = source
st.session_state.wordlist_selected = selected_file or "wordlist.txt"
st.session_state.word_counts = {k: len(v) for k, v in wbl.items()}
except Exception:
pass
return wbl
def _read_text_from_disk(fname: str) -> str:
words_dir = os.path.join(os.path.dirname(__file__), "words")
path = os.path.join(words_dir, fname)
with open(path, "r", encoding="utf-8") as f:
return f.read()
def _read_default_text() -> Optional[str]:
# Prefer the on-disk default in the editable repo
try:
return _read_text_from_disk("wordlist.txt")
except Exception:
pass
# Fallback to packaged data if available
try:
return resources.files("battlewords.words").joinpath("wordlist.txt").read_text(encoding="utf-8")
except Exception:
return None
try:
text: Optional[str] = None
source_label = "fallback"
if selected_file:
# Validate selection against available files to avoid bad paths
available = set(get_wordlist_files())
if selected_file not in available:
raise FileNotFoundError(f"Selected word list '{selected_file}' not found in words/ directory.")
text = _read_text_from_disk(selected_file)
source_label = f"file:{selected_file}"
else:
text = _read_default_text()
if text is not None:
source_label = "default"
if text is None:
raise FileNotFoundError("No word list file found on disk or in packaged resources.")
seen = {4: set(), 5: set(), 6: set()}
for raw in text.splitlines():
line = raw.strip()
if not line or line.startswith("#"):
continue
if "#" in line:
line = line.split("#", 1)[0].strip()
word = line.upper()
if not re.fullmatch(r"[A-Z]+", word):
continue
L = len(word)
if L in (4, 5, 6) and word not in seen[L]:
words_by_len[L].append(word)
seen[L].add(word)
counts = {k: len(v) for k, v in words_by_len.items()}
if all(counts[k] >= MIN_REQUIRED for k in (4, 5, 6)):
used_source = source_label
return _finalize(words_by_len, used_source)
# Per spec: fallback for any length below threshold
mixed: Dict[int, List[str]] = {
4: words_by_len[4] if counts[4] >= MIN_REQUIRED else FALLBACK_WORDS[4],
5: words_by_len[5] if counts[5] >= MIN_REQUIRED else FALLBACK_WORDS[5],
6: words_by_len[6] if counts[6] >= MIN_REQUIRED else FALLBACK_WORDS[6],
}
used_source = f"{source_label}+fallback" if any(counts[k] >= MIN_REQUIRED for k in (4, 5, 6)) else "fallback"
return _finalize(mixed, used_source)
except Exception:
# Missing file or read error
used_source = "fallback"
return _finalize(FALLBACK_WORDS, used_source)