File size: 2,446 Bytes
7829d29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/bin/bash
set -e

echo "🔧 PostBuild starting — optimizing CT-Chat Space..."

# -------------------------------------------------------
# 1️⃣ Fix dependency mismatches (Gradio & Websockets)
# -------------------------------------------------------
pip install --force-reinstall --no-cache-dir "websockets>=12" "gradio-client>=1.3.0"

# -------------------------------------------------------
# 2️⃣ Create and register shared NLTK data directory
# -------------------------------------------------------
echo "📁 Preparing shared NLTK data directory..."
export NLTK_DATA="/usr/local/share/nltk_data"
mkdir -p $NLTK_DATA
chmod -R 777 $NLTK_DATA

# -------------------------------------------------------
# 3️⃣ Preload all required NLTK resources (including punkt_tab)
# -------------------------------------------------------
echo "📦 Downloading NLTK resources..."
python -m nltk.downloader -d $NLTK_DATA \
    punkt punkt_tab averaged_perceptron_tagger averaged_perceptron_tagger_eng stopwords wordnet omw-1.4

# -------------------------------------------------------
# 4️⃣ Verify NLTK installs and paths
# -------------------------------------------------------
python - <<'PYCODE'
import nltk, os
print(f"NLTK data path → {nltk.data.path}")
for pkg in ["punkt", "punkt_tab", "averaged_perceptron_tagger_eng", "stopwords", "wordnet"]:
    try:
        nltk.data.find(pkg)
        print(f"✅ Verified NLTK resource: {pkg}")
    except LookupError:
        print(f"⚠️ Missing NLTK resource: {pkg}")
PYCODE

# -------------------------------------------------------
# 5️⃣ Clean caches (stay <50GB)
# -------------------------------------------------------
echo "🧹 Cleaning Hugging Face + Torch caches..."
rm -rf /root/.cache/* || true
rm -rf /home/user/.cache/* || true
rm -rf /usr/local/share/nltk_data/taggers/__pycache__ || true
rm -rf /home/user/app/hf_cache/* || true
rm -rf /home/user/app/logs/* || true

# -------------------------------------------------------
# 6️⃣ Ensure writable temporary cache for runtime
# -------------------------------------------------------
echo "📦 Preparing /tmp/hf_cache..."
mkdir -p /tmp/hf_cache
chmod -R 777 /tmp/hf_cache

# -------------------------------------------------------
# ✅ Done
# -------------------------------------------------------
echo "✅ PostBuild completed successfully — NLTK preloaded (punkt_tab OK), cache ready at /tmp/hf_cache."