Spaces:
Paused
Paused
| set -euo pipefail | |
| echo "🚀 Builder completo — FlashAttention LayerNorm, Apex, Q8, FlashAttention (GitHub) + upload" | |
| # ===== Configurações e diretórios ===== | |
| APP_WHEELS="/app/wheels" | |
| APP_CUDA_CACHE="/app/cuda_cache" | |
| SRC_DIR="$APP_WHEELS/src" | |
| mkdir -p "$APP_WHEELS" "$APP_CUDA_CACHE" "$SRC_DIR" | |
| chmod -R 777 "$APP_WHEELS" || true | |
| export CUDA_CACHE_PATH="$APP_CUDA_CACHE" | |
| # Preserva licença NGC (se existir) | |
| if [ -f "/NGC-DL-CONTAINER-LICENSE" ]; then | |
| cp -f /NGC-DL-CONTAINER-LICENSE "$APP_WHEELS/" || true | |
| fi | |
| # ===== Dependências mínimas ===== | |
| python -m pip install -v -U \ | |
| pip build setuptools wheel hatchling hatch-vcs \ | |
| scikit-build-core cmake ninja packaging \ | |
| "huggingface_hub[hf_transfer]" || true | |
| # ===== Tags de ambiente (Python/CUDA/Torch) ===== | |
| PY_TAG="$(python -c 'import sys; print(f"cp{sys.version_info[0]}{sys.version_info[1]}")' 2>/dev/null || echo cp310)" | |
| TORCH_VER="$(python - <<'PY' | |
| try; import torch, re; v=torch.__version__; print(re.sub(r'\+.*$', '', v)) | |
| except; print("unknown") | |
| PY | |
| )" | |
| CU_TAG="$(python - <<'PY' | |
| try; import torch; cu=getattr(torch.version,"cuda",None); echo="cu"+cu.replace(".","") if cu else ""; print(echo) | |
| except; print("") | |
| PY | |
| )" | |
| echo "[env] PY_TAG=$PY_TAG TORCH_VER=$TORCH_VER CU_TAG=$CU_TAG" | |
| # ===== Funções de checagem ===== | |
| check_flash_layer_norm_bin() { | |
| python - <<'PY' | |
| import importlib, sys | |
| modules = [ | |
| "dropout_layer_norm", | |
| "flash_attn.ops.layer_norm", | |
| "flash_attn.ops.rms_norm", | |
| ] | |
| for m in modules: | |
| try: importlib.import_module(m); sys.exit(0) | |
| except: pass | |
| sys.exit(1) | |
| PY | |
| } | |
| check_apex() { | |
| python - <<'PY' | |
| import sys | |
| try: | |
| from apex.normalization import FusedLayerNorm | |
| import importlib; importlib.import_module("fused_layer_norm_cuda") | |
| sys.exit(0) | |
| except: | |
| sys.exit(1) | |
| PY | |
| } | |
| check_q8() { | |
| python - <<'PY' | |
| import importlib.util, sys | |
| spec = importlib.util.find_spec("ltx_q8_kernels") or importlib.util.find_spec("q8_kernels") | |
| sys.exit(0 if spec else 1) | |
| PY | |
| } | |
| # ===== Helpers Hugging Face ===== | |
| install_from_hf_by_prefix() { | |
| local PREFIX="$1" | |
| python - <<'PY' || return 1 | |
| import os, sys, re | |
| from huggingface_hub import HfApi, hf_hub_download, HfFolder | |
| repo = os.getenv("SELF_HF_REPO_ID","eeuuia/Tmp") | |
| token = os.getenv("HF_TOKEN") or HfFolder.get_token() | |
| api = HfApi(token=token) | |
| files = api.list_repo_files(repo_id=repo, repo_type="model") | |
| cands = [f for f in files if f.endswith(".whl") and "/${PREFIX}-" in f and "${PY_TAG}" in f] | |
| pref = [f for f in cands if "${CU_TAG}" in f] or cands | |
| if not pref: sys.exit(1) | |
| target = sorted(pref, reverse=True)[0] | |
| hf_hub_download(repo_id=repo, filename=target, repo_type="model", local_dir="$APP_WHEELS") | |
| print(target) | |
| PY | |
| } | |
| # ===== Build functions ===== | |
| build_flash_layer_norm() { | |
| echo "=== FlashAttn LayerNorm ===" | |
| if install_from_hf_by_prefix "flash-attn"; then | |
| python -m pip install -v --no-deps "$APP_WHEELS"/flash_attn-*.whl || true | |
| check_flash_layer_norm_bin && return 0 | |
| echo "Wheel HF falhou, build local" | |
| fi | |
| SRC="$SRC_DIR/flash-attention" | |
| if [ -d "$SRC/.git" ]; then | |
| git -C "$SRC" fetch --all -p || true | |
| git -C "$SRC" reset --hard origin/main || true | |
| git -C "$SRC" clean -fdx || true | |
| else | |
| rm -rf "$SRC" | |
| git clone --depth 1 https://github.com/Dao-AILab/flash-attention "$SRC" | |
| fi | |
| export TORCH_CUDA_ARCH_LIST="$(python - <<'PY' | |
| import torch,sys | |
| try: cc="%d.%d"%torch.cuda.get_device_capability(0); print(cc) | |
| except: print("8.9") | |
| PY | |
| )" | |
| echo "[build] TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST" | |
| pushd "$SRC/csrc/layer_norm" >/dev/null | |
| python -m pip wheel -v --no-build-isolation --no-deps . -w "$APP_WHEELS" || true | |
| popd >/dev/null | |
| WHEEL=$(ls -t "$APP_WHEELS"/*flash*layer*norm*-*.whl 2>/dev/null | head -n1) | |
| python -m pip install -v --no-deps "${WHEEL:-$SRC/csrc/layer_norm}" || true | |
| check_flash_layer_norm_bin || echo "⚠️ LayerNorm import falhou" | |
| } | |
| build_apex() { | |
| echo "=== Apex ===" | |
| SRC="$SRC_DIR/apex" | |
| rm -rf "$SRC" | |
| git clone --depth 1 https://github.com/NVIDIA/apex "$SRC" | |
| export APEX_CPP_EXT=1 APEX_CUDA_EXT=1 APEX_ALL_CONTRIB_EXT=0 | |
| python -m pip wheel -v --no-build-isolation --no-deps "$SRC" -w "$APP_WHEELS" || true | |
| python -m pip install -v --no-deps "$APP_WHEELS"/apex-*.whl || true | |
| } | |
| build_q8() { | |
| echo "=== Q8 Kernels ===" | |
| SRC="$SRC_DIR/q8_kernels" | |
| rm -rf "$SRC" | |
| git clone --filter=blob:none "$Q8_REPO" "$SRC" | |
| git -C "$SRC" checkout "$Q8_COMMIT" | |
| git -C "$SRC" submodule update --init --recursive | |
| python -m pip wheel -v --no-build-isolation --no-deps "$SRC" -w "$APP_WHEELS" || true | |
| python -m pip install -v --no-deps "$APP_WHEELS"/q8_kernels-*.whl || true | |
| } | |
| build_flash_attention_full() { | |
| echo "=== FlashAttention (full GitHub) ===" | |
| SRC="$SRC_DIR/flash-attention-full" | |
| rm -rf "$SRC" | |
| git clone --depth 1 https://github.com/Dao-AILab/flash-attention "$SRC" | |
| pushd "$SRC" >/dev/null | |
| export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-cuda}" | |
| python -m pip wheel -v --no-build-isolation --no-deps . -w "$APP_WHEELS" || true | |
| popd >/dev/null | |
| W=$(ls -t "$APP_WHEELS"/flash_attn-*.whl 2>/dev/null | head -n1) | |
| if [ -n "$W" ]; then | |
| python -m pip install -v --no-deps "$W" | |
| else | |
| python -m pip install -v --no-deps git+https://github.com/Dao-AILab/flash-attention | |
| fi | |
| } | |
| # ===== Execução principal ===== | |
| build_apex | |
| build_q8 | |
| build_flash_attention_full | |
| build_flash_layer_norm | |
| # ===== Upload de wheels ===== | |
| python - <<'PY' | |
| import os | |
| from huggingface_hub import HfApi, HfFolder | |
| repo = os.getenv("SELF_HF_REPO_ID","eeuuia/Tmp") | |
| token = os.getenv("HF_TOKEN") or HfFolder.get_token() | |
| if not token: exit(0) | |
| api = HfApi(token=token) | |
| api.upload_folder( | |
| folder_path="$APP_WHEELS", | |
| repo_id=repo, | |
| repo_type="model", | |
| allow_patterns=["*.whl","NGC-DL-CONTAINER-LICENSE"], | |
| ignore_patterns=["**/src/**",".git/**"], | |
| ) | |
| print("✅ Upload concluído.") | |
| PY | |
| chmod -R 777 "$APP_WHEELS" || true | |
| echo "✅ Builder finalizado." | |