Test

Paused

App Files Files Community

eeuuia commited on 29 days ago

Commit

462bfaf

verified ·

1 Parent(s): e3108f3

Update builder.sh

Browse files

Files changed (1) hide show

builder.sh +109 -283

builder.sh CHANGED Viewed

@@ -1,188 +1,106 @@
 #!/usr/bin/env bash
 set -euo pipefail
-echo "🚀 Builder (FlashAttn LayerNorm extra + Apex + Q8) — runtime com GPU visível"
-# ===== Config e diretórios =====
-mkdir -p /app/wheels /app/cuda_cache /app/wheels/src
-chmod -R 777 /app/wheels || true
-export CUDA_CACHE_PATH="/app/cuda_cache"
 # Preserva licença NGC (se existir)
 if [ -f "/NGC-DL-CONTAINER-LICENSE" ]; then
-  cp -f /NGC-DL-CONTAINER-LICENSE /app/wheels/NGC-DL-CONTAINER-LICENSE || true
 fi
 # ===== Dependências mínimas =====
-python -m pip install -v -U pip build setuptools wheel hatchling hatch-vcs scikit-build-core cmake ninja packaging "huggingface_hub[hf_transfer]" || true
 # ===== Tags de ambiente (Python/CUDA/Torch) =====
 PY_TAG="$(python -c 'import sys; print(f"cp{sys.version_info[0]}{sys.version_info[1]}")' 2>/dev/null || echo cp310)"
-TORCH_VER="$(python - <<'PY'
-try:
-    import torch, re
-    v = torch.__version__
-    print(re.sub(r'\+.*$', '', v))
-except Exception:
-    print("unknown")
 PY
 )"
 CU_TAG="$(python - <<'PY'
-try:
-    import torch
-    cu = getattr(torch.version, "cuda", None)
-    print("cu"+cu.replace(".","")) if cu else print("")
-except Exception:
-    print("")
 PY
 )"
-echo "[env] PY_TAG=${PY_TAG} TORCH_VER=${TORCH_VER} CU_TAG=${CU_TAG}"
-# ============================================================================
-#                               CHECKERS
-# ============================================================================
-# Checa especificamente o módulo nativo requerido pelo layer_norm (sem checar 'flash-attn' geral)
-check_flash_layer_norm_bin () {
-python - <<'PY'
-import importlib
-ok = False
-# extensões conhecidas produzidas por csrc/layer_norm
-for name in [
-    "dropout_layer_norm",                 # nome do módulo nativo
-    "flash_attn.ops.layer_norm",          # wrapper python que usa o nativo
-    "flash_attn.ops.rms_norm",            # pode depender do mesmo backend em alguns empacotamentos
-]:
-    try:
-        importlib.import_module(name)
-        ok = True
-        break
-    except Exception:
-        pass
-raise SystemExit(0 if ok else 1)
 PY
 }
-check_apex () {
-python - <<'PY'
 try:
     from apex.normalization import FusedLayerNorm
     import importlib; importlib.import_module("fused_layer_norm_cuda")
-    ok = True
-except Exception:
-    ok = False
-raise SystemExit(0 if ok else 1)
 PY
 }
-check_q8 () {
-python - <<'PY'
-import importlib.util
 spec = importlib.util.find_spec("ltx_q8_kernels") or importlib.util.find_spec("q8_kernels")
-raise SystemExit(0 if spec else 1)
 PY
 }
-# ============================================================================
-#                         DOWNLOAD DO HUB (GENÉRICO)
-# ============================================================================
-# Instala uma wheel do HF por prefixo simples (ex.: apex-, q8_kernels-)
-install_from_hf_by_prefix () {
   local PREFIX="$1"
-  echo "[hub] Procurando wheels '${PREFIX}-*.whl' em ${SELF_HF_REPO_ID} com tags ${PY_TAG}/${CU_TAG}"
-  python - "$PREFIX" "$PY_TAG" "$CU_TAG" <<'PY' || exit 0
-import os, sys
-from huggingface_hub import HfApi, hf_hub_download, HfFolder
-prefix, py_tag, cu_tag = sys.argv[1], sys.argv[2], sys.argv[3]
-repo = os.environ.get("SELF_HF_REPO_ID","euIaxs22/Aduc-sdr")
-api = HfApi(token=os.getenv("HF_TOKEN") or HfFolder.get_token())
-try:
-    files = api.list_repo_files(repo_id=repo, repo_type="model")
-except Exception:
-    raise SystemExit(0)
-def match(name: str) -> bool:
-    return name.endswith(".whl") and name.rsplit("/",1)[-1].startswith(prefix + "-") and (py_tag in name)
-cands = [f for f in files if match(f)]
-pref = [f for f in cands if cu_tag and cu_tag in f] or cands
-if not pref:
-    raise SystemExit(0)
-target = sorted(pref, reverse=True)[0]
-print(target)
-path = hf_hub_download(repo_id=repo, filename=target, repo_type="model", local_dir="/app/wheels")
-print(path)
-PY
-}
-# Instala wheels do submódulo layer_norm aceitando variantes de nome
-install_flash_layer_norm_from_hf () {
-  echo "[hub] Procurando wheels FlashAttention LayerNorm em ${SELF_HF_REPO_ID}"
-  python - "$PY_TAG" "$CU_TAG" <<'PY' || exit 0
 import os, sys, re
 from huggingface_hub import HfApi, hf_hub_download, HfFolder
-py_tag, cu_tag = sys.argv[1], sys.argv[2]
-repo = os.environ.get("SELF_HF_REPO_ID","euIaxs22/Aduc-sdr")
-api = HfApi(token=os.getenv("HF_TOKEN") or HfFolder.get_token())
-try:
-    files = api.list_repo_files(repo_id=repo, repo_type="model")
-except Exception:
-    raise SystemExit(0)
-pats = [
-    r"^flash[_-]?attn[_-]?.*layer[_-]?norm-.*\.whl$",
-    r"^dropout[_-]?layer[_-]?norm-.*\.whl$",
-]
-def ok(fn: str) -> bool:
-    name = fn.rsplit("/",1)[-1]
-    if py_tag not in name: return False
-    return any(re.search(p, name, flags=re.I) for p in pats)
-cands = [f for f in files if ok(f)]
-pref = [f for f in cands if cu_tag and cu_tag in f] or cands
-if not pref:
-    raise SystemExit(0)
 target = sorted(pref, reverse=True)[0]
 print(target)
-path = hf_hub_download(repo_id=repo, filename=target, repo_type="model", local_dir="/app/wheels")
-print(path)
 PY
 }
-# ============================================================================
-#                                BUILDERS
-# ============================================================================
-# Passo extra: SIEMPRE tenta instalar o submódulo layer_norm via wheel do HF;
-# se não houver wheel compatível, compila a partir de csrc/layer_norm e gera wheel.
-build_or_install_flash_layer_norm () {
-  echo "[flow] === FlashAttn LayerNorm (passo extra) ==="
-  # 1) Tentar instalar wheel do HF primeiro (evita recompilar)
-  HF_OUT="$(install_flash_layer_norm_from_hf || true)"
-  if [ -n "${HF_OUT:-}" ]; then
-    WHEEL_PATH="$(printf "%s\n" "${HF_OUT}" | tail -n1)"
-    echo "[hub] Baixado: ${WHEEL_PATH}"
-    python -m pip install -v -U --no-build-isolation --no-deps "${WHEEL_PATH}" || true
-    if check_flash_layer_norm_bin; then
-      echo "[flow] FlashAttn LayerNorm: OK via wheel do Hub"
-      return 0
-    fi
-    echo "[flow] Wheel do Hub não resolveu import; seguirá com build"
-  else
-    echo "[hub] Nenhuma wheel compatível encontrada para FlashAttn LayerNorm"
   fi
-  # 2) Build from source do submódulo csrc/layer_norm -> wheel
-  local SRC="/app/wheels/src/flash-attn"
-  echo "[build] Preparando fonte FlashAttention (layer_norm) em ${SRC}"
   if [ -d "$SRC/.git" ]; then
     git -C "$SRC" fetch --all -p || true
     git -C "$SRC" reset --hard origin/main || true
@@ -192,177 +110,85 @@ build_or_install_flash_layer_norm () {
     git clone --depth 1 https://github.com/Dao-AILab/flash-attention "$SRC"
   fi
-  # Define CC alvo a partir da GPU ativa (reduz tempo/ruído de build)
   export TORCH_CUDA_ARCH_LIST="$(python - <<'PY'
-import torch
-try:
-    cc = "%d.%d" % torch.cuda.get_device_capability(0)
-    print(cc)
-except Exception:
-    print("8.9")  # fallback p/ Ada (L40S) caso build sem GPU visível
 PY
   )"
-  echo "[build] TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}"
   pushd "$SRC/csrc/layer_norm" >/dev/null
-  export MAX_JOBS="${MAX_JOBS:-90}"
-  # Gera wheel reutilizável
-  python -m pip wheel -v --no-build-isolation --no-deps . -w /app/wheels || true
   popd >/dev/null
-  # Instala a wheel gerada
-  local W="$(ls -t /app/wheels/*flash*attn*layer*norm*-*.whl 2>/dev/null | head -n1 || true)"
-  if [ -z "${W}" ]; then
-    W="$(ls -t /app/wheels/*dropout*layer*norm*-*.whl 2>/dev/null | head -n1 || true)"
-  fi
-  if [ -z "${W}" ]; then
-    # fallback para qualquer .whl recém gerado
-    W="$(ls -t /app/wheels/*.whl 2>/dev/null | head -n1 || true)"
-  fi
-  if [ -n "${W}" ]; then
-    python -m pip install -v -U --no-deps "${W}" || true
-    echo "[build] FlashAttn LayerNorm instalado da wheel: ${W}"
-  else
-    echo "[build] Nenhuma wheel gerada; instalando direto do source (último recurso)"
-    python -m pip install -v --no-build-isolation "$SRC/csrc/layer_norm" || true
-  fi
-  # Checagem final do binário
-  if check_flash_layer_norm_bin; then
-    echo "[flow] FlashAttn LayerNorm: import OK após build"
-    return 0
-  fi
-  echo "[flow] FlashAttn LayerNorm: falhou import após build"
-  return 1
 }
-## Instalação do FlashAttention completo do GitHub
-echo "Instalando FlashAttention completo do GitHub"
-# clonagem do repositório
-git clone --depth 1 https://github.com/Dao-AILab/flash-attention appwheelssrc/flash-attention-full
-# build de wheel para a GPU ativa (ajusta para L40s via TORCHCUDAARCHLIST)
-pushd appwheelssrc/flash-attention-full > /dev/null
-export TORCH_CUDA_ARCH_LIST="${TORCHCUDAARCHLIST}"
-python -m pip wheel -v --no-build-isolation --no-deps . -w ../../appwheels
-popd > /dev/null
-# instalação do wheel gerado
-WHEEL=$(ls -t appwheels/flash_attn-*.whl | head -n1)
-if [ -n "$WHEEL" ]; then
-  python -m pip install -v --no-build-isolation --no-deps "$WHEEL"
-else
-  # fallback para pip direto do Git
-  python -m pip install -v --no-build-isolation --no-deps git+https://github.com/Dao-AILab/flash-attention
-fi
-echo "FlashAttention completo instalado com sucesso"
-build_apex () {
-  local SRC="/app/wheels/src/apex"
-  echo "[build] Preparando fonte Apex em ${SRC}"
-  if [ -d "$SRC/.git" ]; then
-    git -C "$SRC" fetch --all -p || true
-    git -C "$SRC" reset --hard HEAD || true
-    git -C "$SRC" clean -fdx || true
-  else
-    rm -rf "$SRC"
-    git clone --depth 1 https://github.com/NVIDIA/apex "$SRC"
-  fi
-  echo "[build] Compilando Apex -> wheel"
   export APEX_CPP_EXT=1 APEX_CUDA_EXT=1 APEX_ALL_CONTRIB_EXT=0
-  python -m pip wheel -v --no-build-isolation --no-deps "$SRC" -w /app/wheels || true
-  local W="$(ls -t /app/wheels/apex-*.whl 2>/dev/null | head -n1 || true)"
-  if [ -n "${W}" ]; then
-    python -m pip install -v -U --no-deps "${W}" || true
-    echo "[build] Apex instalado da wheel recém-compilada: ${W}"
-  else
-    echo "[build] Nenhuma wheel Apex gerada; instalando do source"
-    python -m pip install -v --no-build-isolation "$SRC" || true
-  fi
 }
-Q8_REPO="${Q8_REPO:-https://github.com/Lightricks/LTX-Video-Q8-Kernels}"
-Q8_COMMIT="${Q8_COMMIT:-f3066edea210082799ca5a2bbf9ef0321c5dd8fc}"
-build_q8 () {
-  local SRC="/app/wheels/src/q8_kernels"
   rm -rf "$SRC"
   git clone --filter=blob:none "$Q8_REPO" "$SRC"
   git -C "$SRC" checkout "$Q8_COMMIT"
   git -C "$SRC" submodule update --init --recursive
-  echo "[build] Compilando Q8 Kernels -> wheel"
-  python -m pip wheel -v --no-build-isolation "$SRC" -w /app/wheels || true
-  local W="$(ls -t /app/wheels/q8_kernels-*.whl 2>/dev/null | head -n1 || true)"
-  if [ -n "${W}" ]; then
-    python -m pip install -v -U --no-deps "${W}" || true
-    echo "[build] Q8 instalado da wheel recém-compilada: ${W}"
   else
-    echo "[build] Nenhuma wheel q8_kernels gerada; instalando do source"
-    python -m pip install -v --no-build-isolation "$SRC" || true
   fi
 }
-# ============================================================================
-#                               EXECUÇÃO
-# ============================================================================
-# Passo adicional SEM depender de "flash-attn" já instalado: trata somente o layer_norm
-#build_q8 || true
-# Apex (mantido)
-# Tenta primeiro via wheel no HF e, se não houver, compila e instala em wheel
-#echo "[flow] === apex ==="
-#HF_OUT="$(install_from_hf_by_prefix "apex" || true)"
-#if [ -n "${HF_OUT:-}" ]; then
-#  WHEEL_PATH="$(printf "%s\n" "${HF_OUT}" | tail -n1)"
-#  echo "[hub] Baixado: ${WHEEL_PATH}"
-#  python -m pip install -v -U --no-build-isolation "${WHEEL_PATH}" || true
-#  if ! check_apex; then
-#    echo "[flow] apex: import falhou após wheel; compilando"
-#    #build_apex || true
-#  fi
-#else
-#  echo "[hub] Nenhuma wheel apex compatível; compilando"
-#  build_apex || true
-#fi
- #Q8 (opcional)
- echo "[flow] === q8_kernels ==="
- HF_OUT="$(install_from_hf_by_prefix "q8_kernels" || true)"
- if [ -n "${HF_OUT:-}" ]; then
-   WHEEL_PATH="$(printf "%s\n" "${HF_OUT}" | tail -n1)"
-   echo "[hub] Baixado: ${WHEEL_PATH}"
-   python -m pip install -v -U --no-build-isolation "${WHEEL_PATH}" || true
-   if ! check_q8; then
-     echo "[flow] q8_kernels: import falhou após wheel; compilando"
-     build_q8 || true
-   fi
- else
-   echo "[hub] Nenhuma wheel q8_kernels compatível; compilando"
-   build_q8 || true
- fi
-# Upload de wheels produzidas para o HF (cache cross-restarts)
 python - <<'PY'
 import os
 from huggingface_hub import HfApi, HfFolder
-repo = os.environ.get("SELF_HF_REPO_ID","euIaxs22/Aduc-sdr")
 token = os.getenv("HF_TOKEN") or HfFolder.get_token()
-if not token:
-    raise SystemExit("HF_TOKEN ausente; upload desabilitado")
 api = HfApi(token=token)
 api.upload_folder(
-    folder_path="/app/wheels",
     repo_id=repo,
     repo_type="model",
     allow_patterns=["*.whl","NGC-DL-CONTAINER-LICENSE"],
-    ignore_patterns=["**/src/**","**/*.log","**/logs/**",".git/**"],
 )
-print("Upload concluído (wheels + licença).")
 PY
-chmod -R 777 /app/wheels || true
 echo "✅ Builder finalizado."

 #!/usr/bin/env bash
 set -euo pipefail
+echo "🚀 Builder completo — FlashAttention LayerNorm, Apex, Q8, FlashAttention (GitHub) + upload"
+# ===== Configurações e diretórios =====
+APP_WHEELS="/app/wheels"
+APP_CUDA_CACHE="/app/cuda_cache"
+SRC_DIR="$APP_WHEELS/src"
+mkdir -p "$APP_WHEELS" "$APP_CUDA_CACHE" "$SRC_DIR"
+chmod -R 777 "$APP_WHEELS" || true
+export CUDA_CACHE_PATH="$APP_CUDA_CACHE"
 # Preserva licença NGC (se existir)
 if [ -f "/NGC-DL-CONTAINER-LICENSE" ]; then
+  cp -f /NGC-DL-CONTAINER-LICENSE "$APP_WHEELS/" || true
 fi
 # ===== Dependências mínimas =====
+python -m pip install -v -U \
+  pip build setuptools wheel hatchling hatch-vcs \
+  scikit-build-core cmake ninja packaging \
+  "huggingface_hub[hf_transfer]" || true
 # ===== Tags de ambiente (Python/CUDA/Torch) =====
 PY_TAG="$(python -c 'import sys; print(f"cp{sys.version_info[0]}{sys.version_info[1]}")' 2>/dev/null || echo cp310)"
+TORCH_VER="$(python - <<'PY'
+try; import torch, re; v=torch.__version__; print(re.sub(r'\+.*$', '', v))
+except; print("unknown")
 PY
 )"
 CU_TAG="$(python - <<'PY'
+try; import torch; cu=getattr(torch.version,"cuda",None); echo="cu"+cu.replace(".","") if cu else ""; print(echo)
+except; print("")
 PY
 )"
+echo "[env] PY_TAG=$PY_TAG TORCH_VER=$TORCH_VER CU_TAG=$CU_TAG"
+# ===== Funções de checagem =====
+check_flash_layer_norm_bin() {
+  python - <<'PY'
+import importlib, sys
+modules = [
+  "dropout_layer_norm",
+  "flash_attn.ops.layer_norm",
+  "flash_attn.ops.rms_norm",
+]
+for m in modules:
+    try: importlib.import_module(m); sys.exit(0)
+    except: pass
+sys.exit(1)
 PY
 }
+check_apex() {
+  python - <<'PY'
+import sys
 try:
     from apex.normalization import FusedLayerNorm
     import importlib; importlib.import_module("fused_layer_norm_cuda")
+    sys.exit(0)
+except:
+    sys.exit(1)
 PY
 }
+check_q8() {
+  python - <<'PY'
+import importlib.util, sys
 spec = importlib.util.find_spec("ltx_q8_kernels") or importlib.util.find_spec("q8_kernels")
+sys.exit(0 if spec else 1)
 PY
 }
+# ===== Helpers Hugging Face =====
+install_from_hf_by_prefix() {
   local PREFIX="$1"
+  python - <<'PY' || return 1
 import os, sys, re
 from huggingface_hub import HfApi, hf_hub_download, HfFolder
+repo = os.getenv("SELF_HF_REPO_ID","euIaxs22/Aduc-sdr")
+token = os.getenv("HF_TOKEN") or HfFolder.get_token()
+api = HfApi(token=token)
+files = api.list_repo_files(repo_id=repo, repo_type="model")
+cands = [f for f in files if f.endswith(".whl") and "/${PREFIX}-" in f and "${PY_TAG}" in f]
+pref = [f for f in cands if "${CU_TAG}" in f] or cands
+if not pref: sys.exit(1)
 target = sorted(pref, reverse=True)[0]
+hf_hub_download(repo_id=repo, filename=target, repo_type="model", local_dir="$APP_WHEELS")
 print(target)
 PY
 }
+# ===== Build functions =====
+build_flash_layer_norm() {
+  echo "=== FlashAttn LayerNorm ==="
+  if install_from_hf_by_prefix "flash-attn"; then
+    python -m pip install -v --no-deps "$APP_WHEELS"/flash_attn-*.whl || true
+    check_flash_layer_norm_bin && return 0
+    echo "Wheel HF falhou, build local"
   fi
+  SRC="$SRC_DIR/flash-attention"
   if [ -d "$SRC/.git" ]; then
     git -C "$SRC" fetch --all -p || true
     git -C "$SRC" reset --hard origin/main || true
     git clone --depth 1 https://github.com/Dao-AILab/flash-attention "$SRC"
   fi
   export TORCH_CUDA_ARCH_LIST="$(python - <<'PY'
+import torch,sys
+try: cc="%d.%d"%torch.cuda.get_device_capability(0); print(cc)
+except: print("8.9")
 PY
   )"
+  echo "[build] TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST"
   pushd "$SRC/csrc/layer_norm" >/dev/null
+    python -m pip wheel -v --no-build-isolation --no-deps . -w "$APP_WHEELS" || true
   popd >/dev/null
+  WHEEL=$(ls -t "$APP_WHEELS"/*flash*layer*norm*-*.whl 2>/dev/null | head -n1)
+  python -m pip install -v --no-deps "${WHEEL:-$SRC/csrc/layer_norm}" || true
+  check_flash_layer_norm_bin || echo "⚠️ LayerNorm import falhou"
 }
+build_apex() {
+  echo "=== Apex ==="
+  SRC="$SRC_DIR/apex"
+  rm -rf "$SRC"
+  git clone --depth 1 https://github.com/NVIDIA/apex "$SRC"
   export APEX_CPP_EXT=1 APEX_CUDA_EXT=1 APEX_ALL_CONTRIB_EXT=0
+  python -m pip wheel -v --no-build-isolation --no-deps "$SRC" -w "$APP_WHEELS" || true
+  python -m pip install -v --no-deps "$APP_WHEELS"/apex-*.whl || true
 }
+build_q8() {
+  echo "=== Q8 Kernels ==="
+  SRC="$SRC_DIR/q8_kernels"
   rm -rf "$SRC"
   git clone --filter=blob:none "$Q8_REPO" "$SRC"
   git -C "$SRC" checkout "$Q8_COMMIT"
   git -C "$SRC" submodule update --init --recursive
+  python -m pip wheel -v --no-build-isolation --no-deps "$SRC" -w "$APP_WHEELS" || true
+  python -m pip install -v --no-deps "$APP_WHEELS"/q8_kernels-*.whl || true
+}
+build_flash_attention_full() {
+  echo "=== FlashAttention (full GitHub) ==="
+  SRC="$SRC_DIR/flash-attention-full"
+  rm -rf "$SRC"
+  git clone --depth 1 https://github.com/Dao-AILab/flash-attention "$SRC"
+  pushd "$SRC" >/dev/null
+    export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-cuda}"
+    python -m pip wheel -v --no-build-isolation --no-deps . -w "$APP_WHEELS" || true
+  popd >/dev/null
+  W=$(ls -t "$APP_WHEELS"/flash_attn-*.whl 2>/dev/null | head -n1)
+  if [ -n "$W" ]; then
+    python -m pip install -v --no-deps "$W"
   else
+    python -m pip install -v --no-deps git+https://github.com/Dao-AILab/flash-attention
   fi
 }
+# ===== Execução principal =====
+build_apex
+build_q8
+build_flash_attention_full
+build_flash_layer_norm
+# ===== Upload de wheels =====
 python - <<'PY'
 import os
 from huggingface_hub import HfApi, HfFolder
+repo = os.getenv("SELF_HF_REPO_ID","euIaxs22/Aduc-sdr")
 token = os.getenv("HF_TOKEN") or HfFolder.get_token()
+if not token: exit(0)
 api = HfApi(token=token)
 api.upload_folder(
+    folder_path="$APP_WHEELS",
     repo_id=repo,
     repo_type="model",
     allow_patterns=["*.whl","NGC-DL-CONTAINER-LICENSE"],
+    ignore_patterns=["**/src/**",".git/**"],
 )
+print("✅ Upload concluído.")
 PY
+chmod -R 777 "$APP_WHEELS" || true
 echo "✅ Builder finalizado."