Test / builder.sh
eeuuia's picture
Update builder.sh
2ce4fc9 verified
#!/usr/bin/env bash
set -euo pipefail
echo "🚀 Builder completo — FlashAttention LayerNorm, Apex, Q8, FlashAttention (GitHub) + upload"
# ===== Configurações e diretórios =====
APP_WHEELS="/app/wheels"
APP_CUDA_CACHE="/app/cuda_cache"
SRC_DIR="$APP_WHEELS/src"
mkdir -p "$APP_WHEELS" "$APP_CUDA_CACHE" "$SRC_DIR"
chmod -R 777 "$APP_WHEELS" || true
export CUDA_CACHE_PATH="$APP_CUDA_CACHE"
# Preserva licença NGC (se existir)
if [ -f "/NGC-DL-CONTAINER-LICENSE" ]; then
cp -f /NGC-DL-CONTAINER-LICENSE "$APP_WHEELS/" || true
fi
# ===== Dependências mínimas =====
python -m pip install -v -U \
pip build setuptools wheel hatchling hatch-vcs \
scikit-build-core cmake ninja packaging \
"huggingface_hub[hf_transfer]" || true
# ===== Tags de ambiente (Python/CUDA/Torch) =====
PY_TAG="$(python -c 'import sys; print(f"cp{sys.version_info[0]}{sys.version_info[1]}")' 2>/dev/null || echo cp310)"
TORCH_VER="$(python - <<'PY'
try; import torch, re; v=torch.__version__; print(re.sub(r'\+.*$', '', v))
except; print("unknown")
PY
)"
CU_TAG="$(python - <<'PY'
try; import torch; cu=getattr(torch.version,"cuda",None); echo="cu"+cu.replace(".","") if cu else ""; print(echo)
except; print("")
PY
)"
echo "[env] PY_TAG=$PY_TAG TORCH_VER=$TORCH_VER CU_TAG=$CU_TAG"
# ===== Funções de checagem =====
check_flash_layer_norm_bin() {
python - <<'PY'
import importlib, sys
modules = [
"dropout_layer_norm",
"flash_attn.ops.layer_norm",
"flash_attn.ops.rms_norm",
]
for m in modules:
try: importlib.import_module(m); sys.exit(0)
except: pass
sys.exit(1)
PY
}
check_apex() {
python - <<'PY'
import sys
try:
from apex.normalization import FusedLayerNorm
import importlib; importlib.import_module("fused_layer_norm_cuda")
sys.exit(0)
except:
sys.exit(1)
PY
}
check_q8() {
python - <<'PY'
import importlib.util, sys
spec = importlib.util.find_spec("ltx_q8_kernels") or importlib.util.find_spec("q8_kernels")
sys.exit(0 if spec else 1)
PY
}
# ===== Helpers Hugging Face =====
install_from_hf_by_prefix() {
local PREFIX="$1"
python - <<'PY' || return 1
import os, sys, re
from huggingface_hub import HfApi, hf_hub_download, HfFolder
repo = os.getenv("SELF_HF_REPO_ID","eeuuia/Tmp")
token = os.getenv("HF_TOKEN") or HfFolder.get_token()
api = HfApi(token=token)
files = api.list_repo_files(repo_id=repo, repo_type="model")
cands = [f for f in files if f.endswith(".whl") and "/${PREFIX}-" in f and "${PY_TAG}" in f]
pref = [f for f in cands if "${CU_TAG}" in f] or cands
if not pref: sys.exit(1)
target = sorted(pref, reverse=True)[0]
hf_hub_download(repo_id=repo, filename=target, repo_type="model", local_dir="$APP_WHEELS")
print(target)
PY
}
# ===== Build functions =====
build_flash_layer_norm() {
echo "=== FlashAttn LayerNorm ==="
if install_from_hf_by_prefix "flash-attn"; then
python -m pip install -v --no-deps "$APP_WHEELS"/flash_attn-*.whl || true
check_flash_layer_norm_bin && return 0
echo "Wheel HF falhou, build local"
fi
SRC="$SRC_DIR/flash-attention"
if [ -d "$SRC/.git" ]; then
git -C "$SRC" fetch --all -p || true
git -C "$SRC" reset --hard origin/main || true
git -C "$SRC" clean -fdx || true
else
rm -rf "$SRC"
git clone --depth 1 https://github.com/Dao-AILab/flash-attention "$SRC"
fi
export TORCH_CUDA_ARCH_LIST="$(python - <<'PY'
import torch,sys
try: cc="%d.%d"%torch.cuda.get_device_capability(0); print(cc)
except: print("8.9")
PY
)"
echo "[build] TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST"
pushd "$SRC/csrc/layer_norm" >/dev/null
python -m pip wheel -v --no-build-isolation --no-deps . -w "$APP_WHEELS" || true
popd >/dev/null
WHEEL=$(ls -t "$APP_WHEELS"/*flash*layer*norm*-*.whl 2>/dev/null | head -n1)
python -m pip install -v --no-deps "${WHEEL:-$SRC/csrc/layer_norm}" || true
check_flash_layer_norm_bin || echo "⚠️ LayerNorm import falhou"
}
build_apex() {
echo "=== Apex ==="
SRC="$SRC_DIR/apex"
rm -rf "$SRC"
git clone --depth 1 https://github.com/NVIDIA/apex "$SRC"
export APEX_CPP_EXT=1 APEX_CUDA_EXT=1 APEX_ALL_CONTRIB_EXT=0
python -m pip wheel -v --no-build-isolation --no-deps "$SRC" -w "$APP_WHEELS" || true
python -m pip install -v --no-deps "$APP_WHEELS"/apex-*.whl || true
}
build_q8() {
echo "=== Q8 Kernels ==="
SRC="$SRC_DIR/q8_kernels"
rm -rf "$SRC"
git clone --filter=blob:none "$Q8_REPO" "$SRC"
git -C "$SRC" checkout "$Q8_COMMIT"
git -C "$SRC" submodule update --init --recursive
python -m pip wheel -v --no-build-isolation --no-deps "$SRC" -w "$APP_WHEELS" || true
python -m pip install -v --no-deps "$APP_WHEELS"/q8_kernels-*.whl || true
}
build_flash_attention_full() {
echo "=== FlashAttention (full GitHub) ==="
SRC="$SRC_DIR/flash-attention-full"
rm -rf "$SRC"
git clone --depth 1 https://github.com/Dao-AILab/flash-attention "$SRC"
pushd "$SRC" >/dev/null
export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-cuda}"
python -m pip wheel -v --no-build-isolation --no-deps . -w "$APP_WHEELS" || true
popd >/dev/null
W=$(ls -t "$APP_WHEELS"/flash_attn-*.whl 2>/dev/null | head -n1)
if [ -n "$W" ]; then
python -m pip install -v --no-deps "$W"
else
python -m pip install -v --no-deps git+https://github.com/Dao-AILab/flash-attention
fi
}
# ===== Execução principal =====
build_apex
build_q8
build_flash_attention_full
build_flash_layer_norm
# ===== Upload de wheels =====
python - <<'PY'
import os
from huggingface_hub import HfApi, HfFolder
repo = os.getenv("SELF_HF_REPO_ID","eeuuia/Tmp")
token = os.getenv("HF_TOKEN") or HfFolder.get_token()
if not token: exit(0)
api = HfApi(token=token)
api.upload_folder(
folder_path="$APP_WHEELS",
repo_id=repo,
repo_type="model",
allow_patterns=["*.whl","NGC-DL-CONTAINER-LICENSE"],
ignore_patterns=["**/src/**",".git/**"],
)
print("✅ Upload concluído.")
PY
chmod -R 777 "$APP_WHEELS" || true
echo "✅ Builder finalizado."