File size: 3,870 Bytes
7b27e12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/env bash
# setup.sh
# Goal:
# - Try to install Apex and FlashAttention from the Hugging Face Hub (no local cache).
# - If both succeed (import OK), start vince.py.
# - If any fails, start start.py.

set -u

echo "🔧 Setup: Installing from Hugging Face Hub (Apex, FlashAttention)
for L40S cuda 12.4.1 devel 22.04"

# --- Config ---
export SELF_HF_REPO_ID="${SELF_HF_REPO_ID:-XCarleX/Apex-l40s}"
export HF_HOME="${HF_HOME:-/app/model_cache}"
export HF_HUB_CACHE="${HF_HUB_CACHE:-$HF_HOME/hub}"
export HF_HUB_ENABLE_HF_TRANSFER="${HF_HUB_ENABLE_HF_TRANSFER:-1}"
export PATH="$HOME/.local/bin:$PATH"

mkdir -p /app/wheels "$HF_HOME"
chmod -R 777 /app/wheels || true

# --- Ensure hub client is available ---
python - <<'PY' 2>/dev/null || python -m pip install -q -U "huggingface_hub[hf_transfer]" || python -m pip install -q -U huggingface_hub
import huggingface_hub
print("huggingface_hub OK")
PY

# --- Runtime tags (Python, CUDA) to pick the right wheel ---
read -r PY_TAG CU_TAG <<EOF
$(python - <<'PY'
import sys, torch
py_tag=f"cp{sys.version_info.major}{sys.version_info.minor}"
cu_tag="cu"+(torch.version.cuda or "0").replace(".","")
print(py_tag, cu_tag)
PY
)
EOF
echo "[env] Python=${PY_TAG} CUDA=${CU_TAG}"

# --- Checkers (return 0/1) ---
check_apex() {
  python - <<'PY' >/dev/null 2>&1
from importlib import import_module
from apex.normalization import FusedLayerNorm
import_module("fused_layer_norm_cuda")
PY
  [ $? -eq 0 ] && { echo "[apex] import OK"; return 0; } || { echo "[apex] import failed"; return 1; }
}

check_flashattn() {
  python - <<'PY' >/dev/null 2>&1
import flash_attn
PY
  [ $? -eq 0 ] && { echo "[flash_attn] import OK"; return 0; } || { echo "[flash_attn] import failed"; return 1; }
}

# --- Download from Hub (prefer Python/CUDA match) ---
install_from_hf () {
  local PKG="$1"
  echo "[hub] Searching wheel for ${PKG} in ${SELF_HF_REPO_ID}"
  python - <<'PY' "$PKG" "$PY_TAG" "$CU_TAG" 2>/dev/null || exit 1
import os, sys
from huggingface_hub import HfApi, hf_hub_download, HfFolder
pkg, py_tag, cu_tag = sys.argv[1], sys.argv[2], sys.argv[3]
repo = os.environ.get("SELF_HF_REPO_ID","XCarleX/Apex-l40s")
api = HfApi(token=os.getenv("HF_TOKEN") or HfFolder.get_token())
try:
    files = api.list_repo_files(repo_id=repo, repo_type="model")
except Exception:
    raise SystemExit(0)
cands=[f for f in files if f.endswith(".whl") and f.rsplit("/",1)[-1].startswith(pkg+"-") and py_tag in f]
pref=[f for f in cands if cu_tag and cu_tag in f] or cands
if not pref:
    raise SystemExit(0)
target=sorted(pref, reverse=True)[0]
print(target)
path=hf_hub_download(repo_id=repo, filename=target, repo_type="model", local_dir="/app/wheels")
print(path)
PY
}

# --- Try Apex from Hub ---
APEX_OK=1
if ! check_apex; then
  OUT="$(install_from_hf "apex" || true)"
  if [ -n "${OUT}" ]; then
    WHEEL_PATH="$(printf "%s\n" "${OUT}" | tail -n1)"
    echo "[hub] Downloaded: ${WHEEL_PATH}"
    python -m pip install -q -U --no-deps "${WHEEL_PATH}" || true
    check_apex && APEX_OK=0 || APEX_OK=1
  else
    echo "[hub] No compatible apex wheel found"
    APEX_OK=1
  fi
else
  APEX_OK=0
fi

# --- Try FlashAttention from Hub ---
FLASH_OK=1
if ! check_flashattn; then
  OUT="$(install_from_hf "flash_attn" || true)"
  if [ -n "${OUT}" ]; then
    WHEEL_PATH="$(printf "%s\n" "${OUT}" | tail -n1)"
    echo "[hub] Downloaded: ${WHEEL_PATH}"
    python -m pip install -q -U --no-deps "${WHEEL_PATH}" || true
    check_flashattn && FLASH_OK=0 || FLASH_OK=1
  else
    echo "[hub] No compatible flash_attn wheel found"
    FLASH_OK=1
  fi
else
  FLASH_OK=0
fi

# --- Decide launcher ---
if [ ${APEX_OK} -eq 0 ] && [ ${FLASH_OK} -eq 0 ]; then
  echo "[setup] Dependencies OK from Hub -> launching run.sh"
  chmod +x ./run.sh
  ./run.sh
  echo "[setup] Missing dependencies -> launching builder.py"
  chmod +x ./builder.sh
  ./builder.sh
fi