Spaces:
Paused
Paused
File size: 5,937 Bytes
5f7901d 462bfaf 5f7901d 462bfaf 5f7901d 462bfaf 5f7901d 462bfaf 5f7901d e3108f3 5f7901d 462bfaf 5f7901d e3108f3 462bfaf 5f7901d 462bfaf 5f7901d 462bfaf 5f7901d 462bfaf 5f7901d 462bfaf 5f7901d 462bfaf 5f7901d 462bfaf 5f7901d 462bfaf e3108f3 2ce4fc9 462bfaf e3108f3 462bfaf e3108f3 5f7901d 462bfaf e3108f3 462bfaf 5f7901d e3108f3 5f7901d e3108f3 5f7901d e3108f3 462bfaf e3108f3 462bfaf e3108f3 462bfaf e3108f3 462bfaf 5f7901d 462bfaf e3108f3 462bfaf 5f7901d 462bfaf 5f7901d e3108f3 462bfaf 5f7901d 462bfaf 5f7901d 462bfaf e3108f3 462bfaf e3108f3 462bfaf 5f7901d 2ce4fc9 5f7901d 462bfaf 5f7901d e3108f3 462bfaf e3108f3 462bfaf e3108f3 462bfaf 5f7901d 462bfaf e3108f3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
#!/usr/bin/env bash
set -euo pipefail
echo "🚀 Builder completo — FlashAttention LayerNorm, Apex, Q8, FlashAttention (GitHub) + upload"
# ===== Configurações e diretórios =====
APP_WHEELS="/app/wheels"
APP_CUDA_CACHE="/app/cuda_cache"
SRC_DIR="$APP_WHEELS/src"
mkdir -p "$APP_WHEELS" "$APP_CUDA_CACHE" "$SRC_DIR"
chmod -R 777 "$APP_WHEELS" || true
export CUDA_CACHE_PATH="$APP_CUDA_CACHE"
# Preserva licença NGC (se existir)
if [ -f "/NGC-DL-CONTAINER-LICENSE" ]; then
cp -f /NGC-DL-CONTAINER-LICENSE "$APP_WHEELS/" || true
fi
# ===== Dependências mínimas =====
python -m pip install -v -U \
pip build setuptools wheel hatchling hatch-vcs \
scikit-build-core cmake ninja packaging \
"huggingface_hub[hf_transfer]" || true
# ===== Tags de ambiente (Python/CUDA/Torch) =====
PY_TAG="$(python -c 'import sys; print(f"cp{sys.version_info[0]}{sys.version_info[1]}")' 2>/dev/null || echo cp310)"
TORCH_VER="$(python - <<'PY'
try; import torch, re; v=torch.__version__; print(re.sub(r'\+.*$', '', v))
except; print("unknown")
PY
)"
CU_TAG="$(python - <<'PY'
try; import torch; cu=getattr(torch.version,"cuda",None); echo="cu"+cu.replace(".","") if cu else ""; print(echo)
except; print("")
PY
)"
echo "[env] PY_TAG=$PY_TAG TORCH_VER=$TORCH_VER CU_TAG=$CU_TAG"
# ===== Funções de checagem =====
check_flash_layer_norm_bin() {
python - <<'PY'
import importlib, sys
modules = [
"dropout_layer_norm",
"flash_attn.ops.layer_norm",
"flash_attn.ops.rms_norm",
]
for m in modules:
try: importlib.import_module(m); sys.exit(0)
except: pass
sys.exit(1)
PY
}
check_apex() {
python - <<'PY'
import sys
try:
from apex.normalization import FusedLayerNorm
import importlib; importlib.import_module("fused_layer_norm_cuda")
sys.exit(0)
except:
sys.exit(1)
PY
}
check_q8() {
python - <<'PY'
import importlib.util, sys
spec = importlib.util.find_spec("ltx_q8_kernels") or importlib.util.find_spec("q8_kernels")
sys.exit(0 if spec else 1)
PY
}
# ===== Helpers Hugging Face =====
install_from_hf_by_prefix() {
local PREFIX="$1"
python - <<'PY' || return 1
import os, sys, re
from huggingface_hub import HfApi, hf_hub_download, HfFolder
repo = os.getenv("SELF_HF_REPO_ID","eeuuia/Tmp")
token = os.getenv("HF_TOKEN") or HfFolder.get_token()
api = HfApi(token=token)
files = api.list_repo_files(repo_id=repo, repo_type="model")
cands = [f for f in files if f.endswith(".whl") and "/${PREFIX}-" in f and "${PY_TAG}" in f]
pref = [f for f in cands if "${CU_TAG}" in f] or cands
if not pref: sys.exit(1)
target = sorted(pref, reverse=True)[0]
hf_hub_download(repo_id=repo, filename=target, repo_type="model", local_dir="$APP_WHEELS")
print(target)
PY
}
# ===== Build functions =====
build_flash_layer_norm() {
echo "=== FlashAttn LayerNorm ==="
if install_from_hf_by_prefix "flash-attn"; then
python -m pip install -v --no-deps "$APP_WHEELS"/flash_attn-*.whl || true
check_flash_layer_norm_bin && return 0
echo "Wheel HF falhou, build local"
fi
SRC="$SRC_DIR/flash-attention"
if [ -d "$SRC/.git" ]; then
git -C "$SRC" fetch --all -p || true
git -C "$SRC" reset --hard origin/main || true
git -C "$SRC" clean -fdx || true
else
rm -rf "$SRC"
git clone --depth 1 https://github.com/Dao-AILab/flash-attention "$SRC"
fi
export TORCH_CUDA_ARCH_LIST="$(python - <<'PY'
import torch,sys
try: cc="%d.%d"%torch.cuda.get_device_capability(0); print(cc)
except: print("8.9")
PY
)"
echo "[build] TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST"
pushd "$SRC/csrc/layer_norm" >/dev/null
python -m pip wheel -v --no-build-isolation --no-deps . -w "$APP_WHEELS" || true
popd >/dev/null
WHEEL=$(ls -t "$APP_WHEELS"/*flash*layer*norm*-*.whl 2>/dev/null | head -n1)
python -m pip install -v --no-deps "${WHEEL:-$SRC/csrc/layer_norm}" || true
check_flash_layer_norm_bin || echo "⚠️ LayerNorm import falhou"
}
build_apex() {
echo "=== Apex ==="
SRC="$SRC_DIR/apex"
rm -rf "$SRC"
git clone --depth 1 https://github.com/NVIDIA/apex "$SRC"
export APEX_CPP_EXT=1 APEX_CUDA_EXT=1 APEX_ALL_CONTRIB_EXT=0
python -m pip wheel -v --no-build-isolation --no-deps "$SRC" -w "$APP_WHEELS" || true
python -m pip install -v --no-deps "$APP_WHEELS"/apex-*.whl || true
}
build_q8() {
echo "=== Q8 Kernels ==="
SRC="$SRC_DIR/q8_kernels"
rm -rf "$SRC"
git clone --filter=blob:none "$Q8_REPO" "$SRC"
git -C "$SRC" checkout "$Q8_COMMIT"
git -C "$SRC" submodule update --init --recursive
python -m pip wheel -v --no-build-isolation --no-deps "$SRC" -w "$APP_WHEELS" || true
python -m pip install -v --no-deps "$APP_WHEELS"/q8_kernels-*.whl || true
}
build_flash_attention_full() {
echo "=== FlashAttention (full GitHub) ==="
SRC="$SRC_DIR/flash-attention-full"
rm -rf "$SRC"
git clone --depth 1 https://github.com/Dao-AILab/flash-attention "$SRC"
pushd "$SRC" >/dev/null
export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-cuda}"
python -m pip wheel -v --no-build-isolation --no-deps . -w "$APP_WHEELS" || true
popd >/dev/null
W=$(ls -t "$APP_WHEELS"/flash_attn-*.whl 2>/dev/null | head -n1)
if [ -n "$W" ]; then
python -m pip install -v --no-deps "$W"
else
python -m pip install -v --no-deps git+https://github.com/Dao-AILab/flash-attention
fi
}
# ===== Execução principal =====
build_apex
build_q8
build_flash_attention_full
build_flash_layer_norm
# ===== Upload de wheels =====
python - <<'PY'
import os
from huggingface_hub import HfApi, HfFolder
repo = os.getenv("SELF_HF_REPO_ID","eeuuia/Tmp")
token = os.getenv("HF_TOKEN") or HfFolder.get_token()
if not token: exit(0)
api = HfApi(token=token)
api.upload_folder(
folder_path="$APP_WHEELS",
repo_id=repo,
repo_type="model",
allow_patterns=["*.whl","NGC-DL-CONTAINER-LICENSE"],
ignore_patterns=["**/src/**",".git/**"],
)
print("✅ Upload concluído.")
PY
chmod -R 777 "$APP_WHEELS" || true
echo "✅ Builder finalizado."
|