participatory-planner / cleanup_storage.py
thadillo
Fix HF Spaces storage limit (50GB) error
e70b2c2
#!/usr/bin/env python3
"""
Storage cleanup script for Hugging Face Spaces
Removes old/unused models and cache to prevent storage limit errors
"""
import os
import shutil
from pathlib import Path
def cleanup_storage():
"""Remove unnecessary files to reduce storage usage"""
# Define paths
cache_dir = Path("/data/.cache/huggingface")
models_dir = Path("/data/models")
# 1. Clean up duplicate model downloads in cache
if cache_dir.exists():
# Remove old versions of models (keep only latest)
for subdir in ["models", "hub"]:
target_dir = cache_dir / subdir
if target_dir.exists():
# Keep only the most recent 2 model versions
model_dirs = sorted(target_dir.glob("**/snapshots/*"), key=os.path.getmtime, reverse=True)
for old_model in model_dirs[2:]: # Keep 2 most recent, delete rest
if old_model.is_dir():
try:
shutil.rmtree(old_model)
print(f"Cleaned up old model cache: {old_model}")
except Exception as e:
print(f"Error cleaning {old_model}: {e}")
# 2. Clean up old fine-tuned models (keep only active ones)
if models_dir.exists():
finetuned_dir = models_dir / "finetuned"
if finetuned_dir.exists():
# This would require database access to know which models are active
# For now, just report the size
total_size = sum(f.stat().st_size for f in finetuned_dir.rglob('*') if f.is_file())
print(f"Fine-tuned models size: {total_size / (1024**3):.2f} GB")
# 3. Report storage usage
if Path("/data").exists():
total_size = sum(f.stat().st_size for f in Path("/data").rglob('*') if f.is_file())
print(f"Total /data storage: {total_size / (1024**3):.2f} GB")
# Breakdown by directory
for subdir in [".cache", "models"]:
dir_path = Path("/data") / subdir
if dir_path.exists():
dir_size = sum(f.stat().st_size for f in dir_path.rglob('*') if f.is_file())
print(f" {subdir}: {dir_size / (1024**3):.2f} GB")
if __name__ == "__main__":
cleanup_storage()