File size: 5,937 Bytes
5f7901d
 
 
462bfaf
5f7901d
462bfaf
 
 
 
 
 
 
5f7901d
 
 
462bfaf
5f7901d
 
 
462bfaf
 
 
 
5f7901d
e3108f3
5f7901d
462bfaf
 
 
5f7901d
 
e3108f3
462bfaf
 
5f7901d
 
462bfaf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f7901d
 
 
462bfaf
 
 
5f7901d
 
 
462bfaf
 
 
5f7901d
 
 
462bfaf
 
 
5f7901d
462bfaf
5f7901d
 
 
462bfaf
 
5f7901d
462bfaf
e3108f3
 
2ce4fc9
462bfaf
 
 
 
 
 
e3108f3
462bfaf
e3108f3
5f7901d
 
 
462bfaf
 
 
 
 
 
 
e3108f3
 
462bfaf
5f7901d
e3108f3
 
 
5f7901d
 
e3108f3
5f7901d
 
e3108f3
462bfaf
 
 
e3108f3
 
462bfaf
e3108f3
 
462bfaf
e3108f3
 
462bfaf
 
 
5f7901d
 
462bfaf
 
 
 
 
e3108f3
462bfaf
 
5f7901d
 
462bfaf
 
 
5f7901d
 
e3108f3
 
462bfaf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f7901d
462bfaf
5f7901d
 
 
462bfaf
e3108f3
462bfaf
 
 
 
e3108f3
462bfaf
5f7901d
 
 
2ce4fc9
5f7901d
462bfaf
5f7901d
e3108f3
462bfaf
e3108f3
 
 
462bfaf
e3108f3
462bfaf
5f7901d
 
462bfaf
e3108f3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
#!/usr/bin/env bash
set -euo pipefail

echo "🚀 Builder completo — FlashAttention LayerNorm, Apex, Q8, FlashAttention (GitHub) + upload"

# ===== Configurações e diretórios =====
APP_WHEELS="/app/wheels"
APP_CUDA_CACHE="/app/cuda_cache"
SRC_DIR="$APP_WHEELS/src"
mkdir -p "$APP_WHEELS" "$APP_CUDA_CACHE" "$SRC_DIR"
chmod -R 777 "$APP_WHEELS" || true
export CUDA_CACHE_PATH="$APP_CUDA_CACHE"

# Preserva licença NGC (se existir)
if [ -f "/NGC-DL-CONTAINER-LICENSE" ]; then
  cp -f /NGC-DL-CONTAINER-LICENSE "$APP_WHEELS/" || true
fi

# ===== Dependências mínimas =====
python -m pip install -v -U \
  pip build setuptools wheel hatchling hatch-vcs \
  scikit-build-core cmake ninja packaging \
  "huggingface_hub[hf_transfer]" || true

# ===== Tags de ambiente (Python/CUDA/Torch) =====
PY_TAG="$(python -c 'import sys; print(f"cp{sys.version_info[0]}{sys.version_info[1]}")' 2>/dev/null || echo cp310)"
TORCH_VER="$(python - <<'PY'  
try; import torch, re; v=torch.__version__; print(re.sub(r'\+.*$', '', v))
except; print("unknown")
PY
)"
CU_TAG="$(python - <<'PY'
try; import torch; cu=getattr(torch.version,"cuda",None); echo="cu"+cu.replace(".","") if cu else ""; print(echo)
except; print("")
PY
)"
echo "[env] PY_TAG=$PY_TAG TORCH_VER=$TORCH_VER CU_TAG=$CU_TAG"

# ===== Funções de checagem =====
check_flash_layer_norm_bin() {
  python - <<'PY'
import importlib, sys
modules = [
  "dropout_layer_norm",
  "flash_attn.ops.layer_norm",
  "flash_attn.ops.rms_norm",
]
for m in modules:
    try: importlib.import_module(m); sys.exit(0)
    except: pass
sys.exit(1)
PY
}

check_apex() {
  python - <<'PY'
import sys
try:
    from apex.normalization import FusedLayerNorm
    import importlib; importlib.import_module("fused_layer_norm_cuda")
    sys.exit(0)
except:
    sys.exit(1)
PY
}

check_q8() {
  python - <<'PY'
import importlib.util, sys
spec = importlib.util.find_spec("ltx_q8_kernels") or importlib.util.find_spec("q8_kernels")
sys.exit(0 if spec else 1)
PY
}

# ===== Helpers Hugging Face =====
install_from_hf_by_prefix() {
  local PREFIX="$1"
  python - <<'PY' || return 1
import os, sys, re
from huggingface_hub import HfApi, hf_hub_download, HfFolder
repo = os.getenv("SELF_HF_REPO_ID","eeuuia/Tmp")
token = os.getenv("HF_TOKEN") or HfFolder.get_token()
api = HfApi(token=token)
files = api.list_repo_files(repo_id=repo, repo_type="model")
cands = [f for f in files if f.endswith(".whl") and "/${PREFIX}-" in f and "${PY_TAG}" in f]
pref = [f for f in cands if "${CU_TAG}" in f] or cands
if not pref: sys.exit(1)
target = sorted(pref, reverse=True)[0]
hf_hub_download(repo_id=repo, filename=target, repo_type="model", local_dir="$APP_WHEELS")
print(target)
PY
}

# ===== Build functions =====
build_flash_layer_norm() {
  echo "=== FlashAttn LayerNorm ==="
  if install_from_hf_by_prefix "flash-attn"; then
    python -m pip install -v --no-deps "$APP_WHEELS"/flash_attn-*.whl || true
    check_flash_layer_norm_bin && return 0
    echo "Wheel HF falhou, build local"
  fi

  SRC="$SRC_DIR/flash-attention"
  if [ -d "$SRC/.git" ]; then
    git -C "$SRC" fetch --all -p || true
    git -C "$SRC" reset --hard origin/main || true
    git -C "$SRC" clean -fdx || true
  else
    rm -rf "$SRC"
    git clone --depth 1 https://github.com/Dao-AILab/flash-attention "$SRC"
  fi

  export TORCH_CUDA_ARCH_LIST="$(python - <<'PY'
import torch,sys
try: cc="%d.%d"%torch.cuda.get_device_capability(0); print(cc)
except: print("8.9")
PY
  )"
  echo "[build] TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST"

  pushd "$SRC/csrc/layer_norm" >/dev/null
    python -m pip wheel -v --no-build-isolation --no-deps . -w "$APP_WHEELS" || true
  popd >/dev/null

  WHEEL=$(ls -t "$APP_WHEELS"/*flash*layer*norm*-*.whl 2>/dev/null | head -n1)
  python -m pip install -v --no-deps "${WHEEL:-$SRC/csrc/layer_norm}" || true
  check_flash_layer_norm_bin || echo "⚠️ LayerNorm import falhou"
}

build_apex() {
  echo "=== Apex ==="
  SRC="$SRC_DIR/apex"
  rm -rf "$SRC"
  git clone --depth 1 https://github.com/NVIDIA/apex "$SRC"
  export APEX_CPP_EXT=1 APEX_CUDA_EXT=1 APEX_ALL_CONTRIB_EXT=0
  python -m pip wheel -v --no-build-isolation --no-deps "$SRC" -w "$APP_WHEELS" || true
  python -m pip install -v --no-deps "$APP_WHEELS"/apex-*.whl || true
}

build_q8() {
  echo "=== Q8 Kernels ==="
  SRC="$SRC_DIR/q8_kernels"
  rm -rf "$SRC"
  git clone --filter=blob:none "$Q8_REPO" "$SRC"
  git -C "$SRC" checkout "$Q8_COMMIT"
  git -C "$SRC" submodule update --init --recursive
  python -m pip wheel -v --no-build-isolation --no-deps "$SRC" -w "$APP_WHEELS" || true
  python -m pip install -v --no-deps "$APP_WHEELS"/q8_kernels-*.whl || true
}

build_flash_attention_full() {
  echo "=== FlashAttention (full GitHub) ==="
  SRC="$SRC_DIR/flash-attention-full"
  rm -rf "$SRC"
  git clone --depth 1 https://github.com/Dao-AILab/flash-attention "$SRC"
  pushd "$SRC" >/dev/null
    export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-cuda}"
    python -m pip wheel -v --no-build-isolation --no-deps . -w "$APP_WHEELS" || true
  popd >/dev/null
  W=$(ls -t "$APP_WHEELS"/flash_attn-*.whl 2>/dev/null | head -n1)
  if [ -n "$W" ]; then
    python -m pip install -v --no-deps "$W"
  else
    python -m pip install -v --no-deps git+https://github.com/Dao-AILab/flash-attention
  fi
}

# ===== Execução principal =====

build_apex
build_q8
build_flash_attention_full
build_flash_layer_norm

# ===== Upload de wheels =====
python - <<'PY'
import os
from huggingface_hub import HfApi, HfFolder
repo = os.getenv("SELF_HF_REPO_ID","eeuuia/Tmp")
token = os.getenv("HF_TOKEN") or HfFolder.get_token()
if not token: exit(0)
api = HfApi(token=token)
api.upload_folder(
    folder_path="$APP_WHEELS",
    repo_id=repo,
    repo_type="model",
    allow_patterns=["*.whl","NGC-DL-CONTAINER-LICENSE"],
    ignore_patterns=["**/src/**",".git/**"],
)
print("✅ Upload concluído.")
PY

chmod -R 777 "$APP_WHEELS" || true
echo "✅ Builder finalizado."