Test

Paused

File size: 5,937 Bytes

5f7901d
 
 
462bfaf
5f7901d
462bfaf
 
 
 
 
 
 
5f7901d
 
 
462bfaf
5f7901d
 
 
462bfaf
 
 
 
5f7901d
e3108f3
5f7901d
462bfaf
 
 
5f7901d
 
e3108f3
462bfaf
 
5f7901d
 
462bfaf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f7901d
 
 
462bfaf
 
 
5f7901d
 
 
462bfaf
 
 
5f7901d
 
 
462bfaf
 
 
5f7901d
462bfaf
5f7901d
 
 
462bfaf
 
5f7901d
462bfaf
e3108f3
 
2ce4fc9
462bfaf
 
 
 
 
 
e3108f3
462bfaf
e3108f3
5f7901d
 
 
462bfaf
 
 
 
 
 
 
e3108f3
 
462bfaf
5f7901d
e3108f3
 
 
5f7901d
 
e3108f3
5f7901d
 
e3108f3
462bfaf
 
 
e3108f3
 
462bfaf
e3108f3
 
462bfaf
e3108f3
 
462bfaf
 
 
5f7901d
 
462bfaf
 
 
 
 
e3108f3
462bfaf
 
5f7901d
 
462bfaf
 
 
5f7901d
 
e3108f3
 
462bfaf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f7901d
462bfaf
5f7901d
 
 
462bfaf
e3108f3
462bfaf
 
 
 
e3108f3
462bfaf
5f7901d
 
 
2ce4fc9
5f7901d
462bfaf
5f7901d
e3108f3
462bfaf
e3108f3
 
 
462bfaf
e3108f3
462bfaf
5f7901d
 
462bfaf
e3108f3

#!/usr/bin/env bash
set -euo pipefail

echo "🚀 Builder completo — FlashAttention LayerNorm, Apex, Q8, FlashAttention (GitHub) + upload"

# ===== Configurações e diretórios =====
APP_WHEELS="/app/wheels"
APP_CUDA_CACHE="/app/cuda_cache"
SRC_DIR="$APP_WHEELS/src"
mkdir -p "$APP_WHEELS" "$APP_CUDA_CACHE" "$SRC_DIR"
chmod -R 777 "$APP_WHEELS" || true
export CUDA_CACHE_PATH="$APP_CUDA_CACHE"

# Preserva licença NGC (se existir)
if [ -f "/NGC-DL-CONTAINER-LICENSE" ]; then
  cp -f /NGC-DL-CONTAINER-LICENSE "$APP_WHEELS/" || true
fi

# ===== Dependências mínimas =====
python -m pip install -v -U \
  pip build setuptools wheel hatchling hatch-vcs \
  scikit-build-core cmake ninja packaging \
  "huggingface_hub[hf_transfer]" || true

# ===== Tags de ambiente (Python/CUDA/Torch) =====
PY_TAG="$(python -c 'import sys; print(f"cp{sys.version_info[0]}{sys.version_info[1]}")' 2>/dev/null || echo cp310)"
TORCH_VER="$(python - <<'PY'  
try; import torch, re; v=torch.__version__; print(re.sub(r'\+.*$', '', v))
except; print("unknown")
PY
)"
CU_TAG="$(python - <<'PY'
try; import torch; cu=getattr(torch.version,"cuda",None); echo="cu"+cu.replace(".","") if cu else ""; print(echo)
except; print("")
PY
)"
echo "[env] PY_TAG=$PY_TAG TORCH_VER=$TORCH_VER CU_TAG=$CU_TAG"

# ===== Funções de checagem =====
check_flash_layer_norm_bin() {
  python - <<'PY'
import importlib, sys
modules = [
  "dropout_layer_norm",
  "flash_attn.ops.layer_norm",
  "flash_attn.ops.rms_norm",
]
for m in modules:
    try: importlib.import_module(m); sys.exit(0)
    except: pass
sys.exit(1)
PY
}

check_apex() {
  python - <<'PY'
import sys
try:
    from apex.normalization import FusedLayerNorm
    import importlib; importlib.import_module("fused_layer_norm_cuda")
    sys.exit(0)
except:
    sys.exit(1)
PY
}

check_q8() {
  python - <<'PY'
import importlib.util, sys
spec = importlib.util.find_spec("ltx_q8_kernels") or importlib.util.find_spec("q8_kernels")
sys.exit(0 if spec else 1)
PY
}

# ===== Helpers Hugging Face =====
install_from_hf_by_prefix() {
  local PREFIX="$1"
  python - <<'PY' || return 1
import os, sys, re
from huggingface_hub import HfApi, hf_hub_download, HfFolder
repo = os.getenv("SELF_HF_REPO_ID","eeuuia/Tmp")
token = os.getenv("HF_TOKEN") or HfFolder.get_token()
api = HfApi(token=token)
files = api.list_repo_files(repo_id=repo, repo_type="model")
cands = [f for f in files if f.endswith(".whl") and "/${PREFIX}-" in f and "${PY_TAG}" in f]
pref = [f for f in cands if "${CU_TAG}" in f] or cands
if not pref: sys.exit(1)
target = sorted(pref, reverse=True)[0]
hf_hub_download(repo_id=repo, filename=target, repo_type="model", local_dir="$APP_WHEELS")
print(target)
PY
}

# ===== Build functions =====
build_flash_layer_norm() {
  echo "=== FlashAttn LayerNorm ==="
  if install_from_hf_by_prefix "flash-attn"; then
    python -m pip install -v --no-deps "$APP_WHEELS"/flash_attn-*.whl || true
    check_flash_layer_norm_bin && return 0
    echo "Wheel HF falhou, build local"
  fi

  SRC="$SRC_DIR/flash-attention"
  if [ -d "$SRC/.git" ]; then
    git -C "$SRC" fetch --all -p || true
    git -C "$SRC" reset --hard origin/main || true
    git -C "$SRC" clean -fdx || true
  else
    rm -rf "$SRC"
    git clone --depth 1 https://github.com/Dao-AILab/flash-attention "$SRC"
  fi

  export TORCH_CUDA_ARCH_LIST="$(python - <<'PY'
import torch,sys
try: cc="%d.%d"%torch.cuda.get_device_capability(0); print(cc)
except: print("8.9")
PY
  )"
  echo "[build] TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST"

  pushd "$SRC/csrc/layer_norm" >/dev/null
    python -m pip wheel -v --no-build-isolation --no-deps . -w "$APP_WHEELS" || true
  popd >/dev/null

  WHEEL=$(ls -t "$APP_WHEELS"/*flash*layer*norm*-*.whl 2>/dev/null | head -n1)
  python -m pip install -v --no-deps "${WHEEL:-$SRC/csrc/layer_norm}" || true
  check_flash_layer_norm_bin || echo "⚠️ LayerNorm import falhou"
}

build_apex() {
  echo "=== Apex ==="
  SRC="$SRC_DIR/apex"
  rm -rf "$SRC"
  git clone --depth 1 https://github.com/NVIDIA/apex "$SRC"
  export APEX_CPP_EXT=1 APEX_CUDA_EXT=1 APEX_ALL_CONTRIB_EXT=0
  python -m pip wheel -v --no-build-isolation --no-deps "$SRC" -w "$APP_WHEELS" || true
  python -m pip install -v --no-deps "$APP_WHEELS"/apex-*.whl || true
}

build_q8() {
  echo "=== Q8 Kernels ==="
  SRC="$SRC_DIR/q8_kernels"
  rm -rf "$SRC"
  git clone --filter=blob:none "$Q8_REPO" "$SRC"
  git -C "$SRC" checkout "$Q8_COMMIT"
  git -C "$SRC" submodule update --init --recursive
  python -m pip wheel -v --no-build-isolation --no-deps "$SRC" -w "$APP_WHEELS" || true
  python -m pip install -v --no-deps "$APP_WHEELS"/q8_kernels-*.whl || true
}

build_flash_attention_full() {
  echo "=== FlashAttention (full GitHub) ==="
  SRC="$SRC_DIR/flash-attention-full"
  rm -rf "$SRC"
  git clone --depth 1 https://github.com/Dao-AILab/flash-attention "$SRC"
  pushd "$SRC" >/dev/null
    export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-cuda}"
    python -m pip wheel -v --no-build-isolation --no-deps . -w "$APP_WHEELS" || true
  popd >/dev/null
  W=$(ls -t "$APP_WHEELS"/flash_attn-*.whl 2>/dev/null | head -n1)
  if [ -n "$W" ]; then
    python -m pip install -v --no-deps "$W"
  else
    python -m pip install -v --no-deps git+https://github.com/Dao-AILab/flash-attention
  fi
}

# ===== Execução principal =====

build_apex
build_q8
build_flash_attention_full
build_flash_layer_norm

# ===== Upload de wheels =====
python - <<'PY'
import os
from huggingface_hub import HfApi, HfFolder
repo = os.getenv("SELF_HF_REPO_ID","eeuuia/Tmp")
token = os.getenv("HF_TOKEN") or HfFolder.get_token()
if not token: exit(0)
api = HfApi(token=token)
api.upload_folder(
    folder_path="$APP_WHEELS",
    repo_id=repo,
    repo_type="model",
    allow_patterns=["*.whl","NGC-DL-CONTAINER-LICENSE"],
    ignore_patterns=["**/src/**",".git/**"],
)
print("✅ Upload concluído.")
PY

chmod -R 777 "$APP_WHEELS" || true
echo "✅ Builder finalizado."