import os import time import shutil from datetime import datetime from pathlib import Path # ===================================================== # ⚙️ CONFIG # ===================================================== CACHE_DIR = "/tmp/hf_cache" STARTUP_LOG = "/tmp/startup_log.txt" MAX_CACHE_GB = 48.0 MODEL_TIMEOUT = int(os.environ.get("MODEL_TIMEOUT", "300")) # seconds os.environ["HF_HOME"] = CACHE_DIR os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1" os.environ.setdefault("CUDA_VISIBLE_DEVICES", "") os.makedirs(CACHE_DIR, exist_ok=True) # ===================================================== # 🪶 LOGGING # ===================================================== def _log(msg): ts = datetime.utcnow().strftime("%H:%M:%S") line = f"[{ts}] {msg}" try: print(line, flush=True) except Exception: pass try: with open(STARTUP_LOG, "a") as f: f.write(line + "\n") except Exception: pass # ===================================================== # 💾 CACHE CLEANER # ===================================================== def _dir_size_bytes(path: Path) -> int: total = 0 try: for p in path.rglob("*"): if p.is_file(): try: total += p.stat().st_size except Exception: pass except Exception: pass return total def clean_cache_if_needed(): try: total = sum( os.path.getsize(os.path.join(r, f)) for r, _, files in os.walk(CACHE_DIR) for f in files ) except Exception: total = 0 gb = total / (1024**3) if gb > MAX_CACHE_GB: try: shutil.rmtree(CACHE_DIR, ignore_errors=True) os.makedirs(CACHE_DIR, exist_ok=True) _log(f"🧹 Cache cleaned ({gb:.2f} GB exceeded)") except Exception as e: _log(f"⚠️ Cache cleanup failed: {e}") else: _log(f"💾 Cache OK: {gb:.2f} GB") # ===================================================== # 🧠 MODEL LOADERS WITH PROGRESS # ===================================================== _sentence_model = None _spellchecker = None _moderator = None _loaded_models = {} _start_time = time.time() _model_status = { "model": {"status": "pending", "progress": 0.0, "elapsed": 0.0, "start_ts": None}, "spell": {"status": "pending", "progress": 0.0, "elapsed": 0.0, "start_ts": None}, "moderator": {"status": "pending", "progress": 0.0, "elapsed": 0.0, "start_ts": None}, } def _set_status(name_key, status=None, progress=None, elapsed=None, start_ts=None): st = _model_status.get(name_key, {}) if status is not None: st["status"] = status if progress is not None: st["progress"] = progress if elapsed is not None: st["elapsed"] = elapsed if start_ts is not None: st["start_ts"] = start_ts _model_status[name_key] = st def get_progress(): return {k: dict(v) for k, v in _model_status.items()} def _wait_for_assignment(var_getter, timeout_s=10.0, poll=0.1): deadline = time.time() + timeout_s while time.time() < deadline: try: v = var_getter() if v is not None: return v except Exception: pass time.sleep(poll) return None def _load_sentence_model(): global _sentence_model name_key = "model" start = time.time() _set_status(name_key, status="loading", progress=0.0, elapsed=0.0, start_ts=start) _log("🧠 Loading SentenceTransformer...") try: from sentence_transformers import SentenceTransformer model = SentenceTransformer("all-MiniLM-L6-v2", device="cpu") _sentence_model = model # Direct assignment, skip wait loop since we're synchronous elapsed = time.time() - start _loaded_models["SentenceTransformer"] = elapsed _set_status(name_key, status="ready", progress=1.0, elapsed=round(elapsed, 2)) _log(f"✅ SentenceTransformer completed in {elapsed:.2f}s (object ready)") except Exception as e: _set_status(name_key, status="error", progress=0.0, elapsed=round(time.time() - start, 2)) _log(f"⚠️ SentenceTransformer load failed: {e}") def _load_spellchecker(): global _spellchecker name_key = "spell" start = time.time() _set_status(name_key, status="loading", progress=0.0, elapsed=0.0, start_ts=start) _log("✍️ Loading SpellChecker...") try: from spellchecker import SpellChecker spell = SpellChecker() _spellchecker = spell elapsed = time.time() - start _loaded_models["SpellChecker"] = elapsed _set_status(name_key, status="ready", progress=1.0, elapsed=round(elapsed, 2)) _log(f"✅ SpellChecker completed in {elapsed:.2f}s (object ready)") except Exception as e: _set_status(name_key, status="error", progress=0.0, elapsed=round(time.time() - start, 2)) _log(f"⚠️ SpellChecker load failed: {e}") def _load_moderator(): global _moderator name_key = "moderator" start = time.time() _set_status(name_key, status="loading", progress=0.0, elapsed=0.0, start_ts=start) _log("🧰 Loading Moderator...") try: from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification model_name = "unitary/toxic-bert" tokenizer = AutoTokenizer.from_pretrained(model_name) model_local = AutoModelForSequenceClassification.from_pretrained(model_name) moderator = pipeline("text-classification", model=model_local, tokenizer=tokenizer, device=-1) _moderator = moderator elapsed = time.time() - start _loaded_models["Moderator"] = elapsed _set_status(name_key, status="ready", progress=1.0, elapsed=round(elapsed, 2)) _log(f"✅ Moderator completed in {elapsed:.2f}s (object ready)") except Exception as e: _set_status(name_key, status="error", progress=0.0, elapsed=round(time.time() - start, 2)) _log(f"⚠️ Moderator load failed: {e}") # ===================================================== # ⚡ PUBLIC INTERFACE (SYNCHRONOUS LOAD) # ===================================================== class Lazy: def __init__(self): _log("===== JusticeAI Lazy Loader (SYNC MODE) =====") _log(f"Cache Directory: {CACHE_DIR}") clean_cache_if_needed() _load_sentence_model() _load_spellchecker() _load_moderator() total = sum(_loaded_models.values()) if _loaded_models else (time.time() - _start_time) _log(f"⏱️ All models attempted in {total:.2f}s total") @property def model(self): _log(f"Accessed lazy.model: {_sentence_model}") return _sentence_model @property def spell(self): _log(f"Accessed lazy.spell: {_spellchecker}") return _spellchecker @property def moderator(self): _log(f"Accessed lazy.moderator: {_moderator}") return _moderator def get_progress(self): return get_progress() def get_startup_log_path(self): return STARTUP_LOG lazy = Lazy()