eeuuia commited on
Commit
462bfaf
·
verified ·
1 Parent(s): e3108f3

Update builder.sh

Browse files
Files changed (1) hide show
  1. builder.sh +109 -283
builder.sh CHANGED
@@ -1,188 +1,106 @@
1
  #!/usr/bin/env bash
2
  set -euo pipefail
3
 
4
- echo "🚀 Builder (FlashAttn LayerNorm extra + Apex + Q8) runtime com GPU visível"
5
 
6
- # ===== Config e diretórios =====
7
-
8
-
9
- mkdir -p /app/wheels /app/cuda_cache /app/wheels/src
10
- chmod -R 777 /app/wheels || true
11
- export CUDA_CACHE_PATH="/app/cuda_cache"
 
12
 
13
  # Preserva licença NGC (se existir)
14
  if [ -f "/NGC-DL-CONTAINER-LICENSE" ]; then
15
- cp -f /NGC-DL-CONTAINER-LICENSE /app/wheels/NGC-DL-CONTAINER-LICENSE || true
16
  fi
17
 
18
  # ===== Dependências mínimas =====
19
- python -m pip install -v -U pip build setuptools wheel hatchling hatch-vcs scikit-build-core cmake ninja packaging "huggingface_hub[hf_transfer]" || true
 
 
 
20
 
21
  # ===== Tags de ambiente (Python/CUDA/Torch) =====
22
  PY_TAG="$(python -c 'import sys; print(f"cp{sys.version_info[0]}{sys.version_info[1]}")' 2>/dev/null || echo cp310)"
23
- TORCH_VER="$(python - <<'PY'
24
- try:
25
- import torch, re
26
- v = torch.__version__
27
- print(re.sub(r'\+.*$', '', v))
28
- except Exception:
29
- print("unknown")
30
  PY
31
  )"
32
  CU_TAG="$(python - <<'PY'
33
- try:
34
- import torch
35
- cu = getattr(torch.version, "cuda", None)
36
- print("cu"+cu.replace(".","")) if cu else print("")
37
- except Exception:
38
- print("")
39
  PY
40
  )"
41
- echo "[env] PY_TAG=${PY_TAG} TORCH_VER=${TORCH_VER} CU_TAG=${CU_TAG}"
42
-
43
- # ============================================================================
44
- # CHECKERS
45
- # ============================================================================
46
-
47
- # Checa especificamente o módulo nativo requerido pelo layer_norm (sem checar 'flash-attn' geral)
48
- check_flash_layer_norm_bin () {
49
- python - <<'PY'
50
- import importlib
51
- ok = False
52
- # extensões conhecidas produzidas por csrc/layer_norm
53
- for name in [
54
- "dropout_layer_norm", # nome do módulo nativo
55
- "flash_attn.ops.layer_norm", # wrapper python que usa o nativo
56
- "flash_attn.ops.rms_norm", # pode depender do mesmo backend em alguns empacotamentos
57
- ]:
58
- try:
59
- importlib.import_module(name)
60
- ok = True
61
- break
62
- except Exception:
63
- pass
64
- raise SystemExit(0 if ok else 1)
65
  PY
66
  }
67
 
68
- check_apex () {
69
- python - <<'PY'
 
70
  try:
71
  from apex.normalization import FusedLayerNorm
72
  import importlib; importlib.import_module("fused_layer_norm_cuda")
73
- ok = True
74
- except Exception:
75
- ok = False
76
- raise SystemExit(0 if ok else 1)
77
  PY
78
  }
79
 
80
- check_q8 () {
81
- python - <<'PY'
82
- import importlib.util
83
  spec = importlib.util.find_spec("ltx_q8_kernels") or importlib.util.find_spec("q8_kernels")
84
- raise SystemExit(0 if spec else 1)
85
  PY
86
  }
87
 
88
- # ============================================================================
89
- # DOWNLOAD DO HUB (GENÉRICO)
90
- # ============================================================================
91
-
92
- # Instala uma wheel do HF por prefixo simples (ex.: apex-, q8_kernels-)
93
- install_from_hf_by_prefix () {
94
  local PREFIX="$1"
95
- echo "[hub] Procurando wheels '${PREFIX}-*.whl' em ${SELF_HF_REPO_ID} com tags ${PY_TAG}/${CU_TAG}"
96
- python - "$PREFIX" "$PY_TAG" "$CU_TAG" <<'PY' || exit 0
97
- import os, sys
98
- from huggingface_hub import HfApi, hf_hub_download, HfFolder
99
-
100
- prefix, py_tag, cu_tag = sys.argv[1], sys.argv[2], sys.argv[3]
101
- repo = os.environ.get("SELF_HF_REPO_ID","euIaxs22/Aduc-sdr")
102
- api = HfApi(token=os.getenv("HF_TOKEN") or HfFolder.get_token())
103
- try:
104
- files = api.list_repo_files(repo_id=repo, repo_type="model")
105
- except Exception:
106
- raise SystemExit(0)
107
-
108
- def match(name: str) -> bool:
109
- return name.endswith(".whl") and name.rsplit("/",1)[-1].startswith(prefix + "-") and (py_tag in name)
110
-
111
- cands = [f for f in files if match(f)]
112
- pref = [f for f in cands if cu_tag and cu_tag in f] or cands
113
- if not pref:
114
- raise SystemExit(0)
115
-
116
- target = sorted(pref, reverse=True)[0]
117
- print(target)
118
- path = hf_hub_download(repo_id=repo, filename=target, repo_type="model", local_dir="/app/wheels")
119
- print(path)
120
- PY
121
- }
122
-
123
- # Instala wheels do submódulo layer_norm aceitando variantes de nome
124
- install_flash_layer_norm_from_hf () {
125
- echo "[hub] Procurando wheels FlashAttention LayerNorm em ${SELF_HF_REPO_ID}"
126
- python - "$PY_TAG" "$CU_TAG" <<'PY' || exit 0
127
  import os, sys, re
128
  from huggingface_hub import HfApi, hf_hub_download, HfFolder
129
-
130
- py_tag, cu_tag = sys.argv[1], sys.argv[2]
131
- repo = os.environ.get("SELF_HF_REPO_ID","euIaxs22/Aduc-sdr")
132
- api = HfApi(token=os.getenv("HF_TOKEN") or HfFolder.get_token())
133
- try:
134
- files = api.list_repo_files(repo_id=repo, repo_type="model")
135
- except Exception:
136
- raise SystemExit(0)
137
-
138
- pats = [
139
- r"^flash[_-]?attn[_-]?.*layer[_-]?norm-.*\.whl$",
140
- r"^dropout[_-]?layer[_-]?norm-.*\.whl$",
141
- ]
142
- def ok(fn: str) -> bool:
143
- name = fn.rsplit("/",1)[-1]
144
- if py_tag not in name: return False
145
- return any(re.search(p, name, flags=re.I) for p in pats)
146
-
147
- cands = [f for f in files if ok(f)]
148
- pref = [f for f in cands if cu_tag and cu_tag in f] or cands
149
- if not pref:
150
- raise SystemExit(0)
151
-
152
  target = sorted(pref, reverse=True)[0]
 
153
  print(target)
154
- path = hf_hub_download(repo_id=repo, filename=target, repo_type="model", local_dir="/app/wheels")
155
- print(path)
156
  PY
157
  }
158
 
159
- # ============================================================================
160
- # BUILDERS
161
- # ============================================================================
162
-
163
- # Passo extra: SIEMPRE tenta instalar o submódulo layer_norm via wheel do HF;
164
- # se não houver wheel compatível, compila a partir de csrc/layer_norm e gera wheel.
165
- build_or_install_flash_layer_norm () {
166
- echo "[flow] === FlashAttn LayerNorm (passo extra) ==="
167
-
168
- # 1) Tentar instalar wheel do HF primeiro (evita recompilar)
169
- HF_OUT="$(install_flash_layer_norm_from_hf || true)"
170
- if [ -n "${HF_OUT:-}" ]; then
171
- WHEEL_PATH="$(printf "%s\n" "${HF_OUT}" | tail -n1)"
172
- echo "[hub] Baixado: ${WHEEL_PATH}"
173
- python -m pip install -v -U --no-build-isolation --no-deps "${WHEEL_PATH}" || true
174
- if check_flash_layer_norm_bin; then
175
- echo "[flow] FlashAttn LayerNorm: OK via wheel do Hub"
176
- return 0
177
- fi
178
- echo "[flow] Wheel do Hub não resolveu import; seguirá com build"
179
- else
180
- echo "[hub] Nenhuma wheel compatível encontrada para FlashAttn LayerNorm"
181
  fi
182
 
183
- # 2) Build from source do submódulo csrc/layer_norm -> wheel
184
- local SRC="/app/wheels/src/flash-attn"
185
- echo "[build] Preparando fonte FlashAttention (layer_norm) em ${SRC}"
186
  if [ -d "$SRC/.git" ]; then
187
  git -C "$SRC" fetch --all -p || true
188
  git -C "$SRC" reset --hard origin/main || true
@@ -192,177 +110,85 @@ build_or_install_flash_layer_norm () {
192
  git clone --depth 1 https://github.com/Dao-AILab/flash-attention "$SRC"
193
  fi
194
 
195
- # Define CC alvo a partir da GPU ativa (reduz tempo/ruído de build)
196
  export TORCH_CUDA_ARCH_LIST="$(python - <<'PY'
197
- import torch
198
- try:
199
- cc = "%d.%d" % torch.cuda.get_device_capability(0)
200
- print(cc)
201
- except Exception:
202
- print("8.9") # fallback p/ Ada (L40S) caso build sem GPU visível
203
  PY
204
  )"
205
- echo "[build] TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}"
206
 
207
  pushd "$SRC/csrc/layer_norm" >/dev/null
208
- export MAX_JOBS="${MAX_JOBS:-90}"
209
- # Gera wheel reutilizável
210
- python -m pip wheel -v --no-build-isolation --no-deps . -w /app/wheels || true
211
  popd >/dev/null
212
 
213
- # Instala a wheel gerada
214
- local W="$(ls -t /app/wheels/*flash*attn*layer*norm*-*.whl 2>/dev/null | head -n1 || true)"
215
- if [ -z "${W}" ]; then
216
- W="$(ls -t /app/wheels/*dropout*layer*norm*-*.whl 2>/dev/null | head -n1 || true)"
217
- fi
218
- if [ -z "${W}" ]; then
219
- # fallback para qualquer .whl recém gerado
220
- W="$(ls -t /app/wheels/*.whl 2>/dev/null | head -n1 || true)"
221
- fi
222
-
223
- if [ -n "${W}" ]; then
224
- python -m pip install -v -U --no-deps "${W}" || true
225
- echo "[build] FlashAttn LayerNorm instalado da wheel: ${W}"
226
- else
227
- echo "[build] Nenhuma wheel gerada; instalando direto do source (último recurso)"
228
- python -m pip install -v --no-build-isolation "$SRC/csrc/layer_norm" || true
229
- fi
230
-
231
- # Checagem final do binário
232
- if check_flash_layer_norm_bin; then
233
- echo "[flow] FlashAttn LayerNorm: import OK após build"
234
- return 0
235
- fi
236
- echo "[flow] FlashAttn LayerNorm: falhou import após build"
237
- return 1
238
  }
239
 
240
-
241
- ## Instalação do FlashAttention completo do GitHub
242
- echo "Instalando FlashAttention completo do GitHub"
243
- # clonagem do repositório
244
- git clone --depth 1 https://github.com/Dao-AILab/flash-attention appwheelssrc/flash-attention-full
245
- # build de wheel para a GPU ativa (ajusta para L40s via TORCHCUDAARCHLIST)
246
- pushd appwheelssrc/flash-attention-full > /dev/null
247
- export TORCH_CUDA_ARCH_LIST="${TORCHCUDAARCHLIST}"
248
- python -m pip wheel -v --no-build-isolation --no-deps . -w ../../appwheels
249
- popd > /dev/null
250
- # instalação do wheel gerado
251
- WHEEL=$(ls -t appwheels/flash_attn-*.whl | head -n1)
252
- if [ -n "$WHEEL" ]; then
253
- python -m pip install -v --no-build-isolation --no-deps "$WHEEL"
254
- else
255
- # fallback para pip direto do Git
256
- python -m pip install -v --no-build-isolation --no-deps git+https://github.com/Dao-AILab/flash-attention
257
- fi
258
- echo "FlashAttention completo instalado com sucesso"
259
-
260
-
261
-
262
- build_apex () {
263
- local SRC="/app/wheels/src/apex"
264
- echo "[build] Preparando fonte Apex em ${SRC}"
265
- if [ -d "$SRC/.git" ]; then
266
- git -C "$SRC" fetch --all -p || true
267
- git -C "$SRC" reset --hard HEAD || true
268
- git -C "$SRC" clean -fdx || true
269
- else
270
- rm -rf "$SRC"
271
- git clone --depth 1 https://github.com/NVIDIA/apex "$SRC"
272
- fi
273
- echo "[build] Compilando Apex -> wheel"
274
  export APEX_CPP_EXT=1 APEX_CUDA_EXT=1 APEX_ALL_CONTRIB_EXT=0
275
- python -m pip wheel -v --no-build-isolation --no-deps "$SRC" -w /app/wheels || true
276
- local W="$(ls -t /app/wheels/apex-*.whl 2>/dev/null | head -n1 || true)"
277
- if [ -n "${W}" ]; then
278
- python -m pip install -v -U --no-deps "${W}" || true
279
- echo "[build] Apex instalado da wheel recém-compilada: ${W}"
280
- else
281
- echo "[build] Nenhuma wheel Apex gerada; instalando do source"
282
- python -m pip install -v --no-build-isolation "$SRC" || true
283
- fi
284
  }
285
 
286
- Q8_REPO="${Q8_REPO:-https://github.com/Lightricks/LTX-Video-Q8-Kernels}"
287
- Q8_COMMIT="${Q8_COMMIT:-f3066edea210082799ca5a2bbf9ef0321c5dd8fc}"
288
- build_q8 () {
289
- local SRC="/app/wheels/src/q8_kernels"
290
  rm -rf "$SRC"
291
  git clone --filter=blob:none "$Q8_REPO" "$SRC"
292
  git -C "$SRC" checkout "$Q8_COMMIT"
293
  git -C "$SRC" submodule update --init --recursive
294
- echo "[build] Compilando Q8 Kernels -> wheel"
295
- python -m pip wheel -v --no-build-isolation "$SRC" -w /app/wheels || true
296
- local W="$(ls -t /app/wheels/q8_kernels-*.whl 2>/dev/null | head -n1 || true)"
297
- if [ -n "${W}" ]; then
298
- python -m pip install -v -U --no-deps "${W}" || true
299
- echo "[build] Q8 instalado da wheel recém-compilada: ${W}"
 
 
 
 
 
 
 
 
 
 
300
  else
301
- echo "[build] Nenhuma wheel q8_kernels gerada; instalando do source"
302
- python -m pip install -v --no-build-isolation "$SRC" || true
303
  fi
304
  }
305
 
306
- # ============================================================================
307
- # EXECUÇÃO
308
- # ============================================================================
309
-
310
- # Passo adicional SEM depender de "flash-attn" já instalado: trata somente o layer_norm
311
- #build_q8 || true
312
 
313
- # Apex (mantido)
314
- # Tenta primeiro via wheel no HF e, se não houver, compila e instala em wheel
315
- #echo "[flow] === apex ==="
316
- #HF_OUT="$(install_from_hf_by_prefix "apex" || true)"
317
- #if [ -n "${HF_OUT:-}" ]; then
318
- # WHEEL_PATH="$(printf "%s\n" "${HF_OUT}" | tail -n1)"
319
- # echo "[hub] Baixado: ${WHEEL_PATH}"
320
- # python -m pip install -v -U --no-build-isolation "${WHEEL_PATH}" || true
321
- # if ! check_apex; then
322
- # echo "[flow] apex: import falhou após wheel; compilando"
323
- # #build_apex || true
324
- # fi
325
- #else
326
- # echo "[hub] Nenhuma wheel apex compatível; compilando"
327
- # build_apex || true
328
- #fi
329
 
330
- #Q8 (opcional)
331
- echo "[flow] === q8_kernels ==="
332
- HF_OUT="$(install_from_hf_by_prefix "q8_kernels" || true)"
333
- if [ -n "${HF_OUT:-}" ]; then
334
- WHEEL_PATH="$(printf "%s\n" "${HF_OUT}" | tail -n1)"
335
- echo "[hub] Baixado: ${WHEEL_PATH}"
336
- python -m pip install -v -U --no-build-isolation "${WHEEL_PATH}" || true
337
- if ! check_q8; then
338
- echo "[flow] q8_kernels: import falhou após wheel; compilando"
339
- build_q8 || true
340
- fi
341
- else
342
- echo "[hub] Nenhuma wheel q8_kernels compatível; compilando"
343
- build_q8 || true
344
- fi
345
-
346
- # Upload de wheels produzidas para o HF (cache cross-restarts)
347
  python - <<'PY'
348
  import os
349
  from huggingface_hub import HfApi, HfFolder
350
-
351
- repo = os.environ.get("SELF_HF_REPO_ID","euIaxs22/Aduc-sdr")
352
  token = os.getenv("HF_TOKEN") or HfFolder.get_token()
353
- if not token:
354
- raise SystemExit("HF_TOKEN ausente; upload desabilitado")
355
-
356
  api = HfApi(token=token)
357
  api.upload_folder(
358
- folder_path="/app/wheels",
359
  repo_id=repo,
360
  repo_type="model",
361
  allow_patterns=["*.whl","NGC-DL-CONTAINER-LICENSE"],
362
- ignore_patterns=["**/src/**","**/*.log","**/logs/**",".git/**"],
363
  )
364
- print("Upload concluído (wheels + licença).")
365
  PY
366
 
367
- chmod -R 777 /app/wheels || true
368
  echo "✅ Builder finalizado."
 
1
  #!/usr/bin/env bash
2
  set -euo pipefail
3
 
4
+ echo "🚀 Builder completo FlashAttention LayerNorm, Apex, Q8, FlashAttention (GitHub) + upload"
5
 
6
+ # ===== Configurações e diretórios =====
7
+ APP_WHEELS="/app/wheels"
8
+ APP_CUDA_CACHE="/app/cuda_cache"
9
+ SRC_DIR="$APP_WHEELS/src"
10
+ mkdir -p "$APP_WHEELS" "$APP_CUDA_CACHE" "$SRC_DIR"
11
+ chmod -R 777 "$APP_WHEELS" || true
12
+ export CUDA_CACHE_PATH="$APP_CUDA_CACHE"
13
 
14
  # Preserva licença NGC (se existir)
15
  if [ -f "/NGC-DL-CONTAINER-LICENSE" ]; then
16
+ cp -f /NGC-DL-CONTAINER-LICENSE "$APP_WHEELS/" || true
17
  fi
18
 
19
  # ===== Dependências mínimas =====
20
+ python -m pip install -v -U \
21
+ pip build setuptools wheel hatchling hatch-vcs \
22
+ scikit-build-core cmake ninja packaging \
23
+ "huggingface_hub[hf_transfer]" || true
24
 
25
  # ===== Tags de ambiente (Python/CUDA/Torch) =====
26
  PY_TAG="$(python -c 'import sys; print(f"cp{sys.version_info[0]}{sys.version_info[1]}")' 2>/dev/null || echo cp310)"
27
+ TORCH_VER="$(python - <<'PY'
28
+ try; import torch, re; v=torch.__version__; print(re.sub(r'\+.*$', '', v))
29
+ except; print("unknown")
 
 
 
 
30
  PY
31
  )"
32
  CU_TAG="$(python - <<'PY'
33
+ try; import torch; cu=getattr(torch.version,"cuda",None); echo="cu"+cu.replace(".","") if cu else ""; print(echo)
34
+ except; print("")
 
 
 
 
35
  PY
36
  )"
37
+ echo "[env] PY_TAG=$PY_TAG TORCH_VER=$TORCH_VER CU_TAG=$CU_TAG"
38
+
39
+ # ===== Funções de checagem =====
40
+ check_flash_layer_norm_bin() {
41
+ python - <<'PY'
42
+ import importlib, sys
43
+ modules = [
44
+ "dropout_layer_norm",
45
+ "flash_attn.ops.layer_norm",
46
+ "flash_attn.ops.rms_norm",
47
+ ]
48
+ for m in modules:
49
+ try: importlib.import_module(m); sys.exit(0)
50
+ except: pass
51
+ sys.exit(1)
 
 
 
 
 
 
 
 
 
52
  PY
53
  }
54
 
55
+ check_apex() {
56
+ python - <<'PY'
57
+ import sys
58
  try:
59
  from apex.normalization import FusedLayerNorm
60
  import importlib; importlib.import_module("fused_layer_norm_cuda")
61
+ sys.exit(0)
62
+ except:
63
+ sys.exit(1)
 
64
  PY
65
  }
66
 
67
+ check_q8() {
68
+ python - <<'PY'
69
+ import importlib.util, sys
70
  spec = importlib.util.find_spec("ltx_q8_kernels") or importlib.util.find_spec("q8_kernels")
71
+ sys.exit(0 if spec else 1)
72
  PY
73
  }
74
 
75
+ # ===== Helpers Hugging Face =====
76
+ install_from_hf_by_prefix() {
 
 
 
 
77
  local PREFIX="$1"
78
+ python - <<'PY' || return 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  import os, sys, re
80
  from huggingface_hub import HfApi, hf_hub_download, HfFolder
81
+ repo = os.getenv("SELF_HF_REPO_ID","euIaxs22/Aduc-sdr")
82
+ token = os.getenv("HF_TOKEN") or HfFolder.get_token()
83
+ api = HfApi(token=token)
84
+ files = api.list_repo_files(repo_id=repo, repo_type="model")
85
+ cands = [f for f in files if f.endswith(".whl") and "/${PREFIX}-" in f and "${PY_TAG}" in f]
86
+ pref = [f for f in cands if "${CU_TAG}" in f] or cands
87
+ if not pref: sys.exit(1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  target = sorted(pref, reverse=True)[0]
89
+ hf_hub_download(repo_id=repo, filename=target, repo_type="model", local_dir="$APP_WHEELS")
90
  print(target)
 
 
91
  PY
92
  }
93
 
94
+ # ===== Build functions =====
95
+ build_flash_layer_norm() {
96
+ echo "=== FlashAttn LayerNorm ==="
97
+ if install_from_hf_by_prefix "flash-attn"; then
98
+ python -m pip install -v --no-deps "$APP_WHEELS"/flash_attn-*.whl || true
99
+ check_flash_layer_norm_bin && return 0
100
+ echo "Wheel HF falhou, build local"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  fi
102
 
103
+ SRC="$SRC_DIR/flash-attention"
 
 
104
  if [ -d "$SRC/.git" ]; then
105
  git -C "$SRC" fetch --all -p || true
106
  git -C "$SRC" reset --hard origin/main || true
 
110
  git clone --depth 1 https://github.com/Dao-AILab/flash-attention "$SRC"
111
  fi
112
 
 
113
  export TORCH_CUDA_ARCH_LIST="$(python - <<'PY'
114
+ import torch,sys
115
+ try: cc="%d.%d"%torch.cuda.get_device_capability(0); print(cc)
116
+ except: print("8.9")
 
 
 
117
  PY
118
  )"
119
+ echo "[build] TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST"
120
 
121
  pushd "$SRC/csrc/layer_norm" >/dev/null
122
+ python -m pip wheel -v --no-build-isolation --no-deps . -w "$APP_WHEELS" || true
 
 
123
  popd >/dev/null
124
 
125
+ WHEEL=$(ls -t "$APP_WHEELS"/*flash*layer*norm*-*.whl 2>/dev/null | head -n1)
126
+ python -m pip install -v --no-deps "${WHEEL:-$SRC/csrc/layer_norm}" || true
127
+ check_flash_layer_norm_bin || echo "⚠️ LayerNorm import falhou"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  }
129
 
130
+ build_apex() {
131
+ echo "=== Apex ==="
132
+ SRC="$SRC_DIR/apex"
133
+ rm -rf "$SRC"
134
+ git clone --depth 1 https://github.com/NVIDIA/apex "$SRC"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  export APEX_CPP_EXT=1 APEX_CUDA_EXT=1 APEX_ALL_CONTRIB_EXT=0
136
+ python -m pip wheel -v --no-build-isolation --no-deps "$SRC" -w "$APP_WHEELS" || true
137
+ python -m pip install -v --no-deps "$APP_WHEELS"/apex-*.whl || true
 
 
 
 
 
 
 
138
  }
139
 
140
+ build_q8() {
141
+ echo "=== Q8 Kernels ==="
142
+ SRC="$SRC_DIR/q8_kernels"
 
143
  rm -rf "$SRC"
144
  git clone --filter=blob:none "$Q8_REPO" "$SRC"
145
  git -C "$SRC" checkout "$Q8_COMMIT"
146
  git -C "$SRC" submodule update --init --recursive
147
+ python -m pip wheel -v --no-build-isolation --no-deps "$SRC" -w "$APP_WHEELS" || true
148
+ python -m pip install -v --no-deps "$APP_WHEELS"/q8_kernels-*.whl || true
149
+ }
150
+
151
+ build_flash_attention_full() {
152
+ echo "=== FlashAttention (full GitHub) ==="
153
+ SRC="$SRC_DIR/flash-attention-full"
154
+ rm -rf "$SRC"
155
+ git clone --depth 1 https://github.com/Dao-AILab/flash-attention "$SRC"
156
+ pushd "$SRC" >/dev/null
157
+ export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-cuda}"
158
+ python -m pip wheel -v --no-build-isolation --no-deps . -w "$APP_WHEELS" || true
159
+ popd >/dev/null
160
+ W=$(ls -t "$APP_WHEELS"/flash_attn-*.whl 2>/dev/null | head -n1)
161
+ if [ -n "$W" ]; then
162
+ python -m pip install -v --no-deps "$W"
163
  else
164
+ python -m pip install -v --no-deps git+https://github.com/Dao-AILab/flash-attention
 
165
  fi
166
  }
167
 
168
+ # ===== Execução principal =====
 
 
 
 
 
169
 
170
+ build_apex
171
+ build_q8
172
+ build_flash_attention_full
173
+ build_flash_layer_norm
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
+ # ===== Upload de wheels =====
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  python - <<'PY'
177
  import os
178
  from huggingface_hub import HfApi, HfFolder
179
+ repo = os.getenv("SELF_HF_REPO_ID","euIaxs22/Aduc-sdr")
 
180
  token = os.getenv("HF_TOKEN") or HfFolder.get_token()
181
+ if not token: exit(0)
 
 
182
  api = HfApi(token=token)
183
  api.upload_folder(
184
+ folder_path="$APP_WHEELS",
185
  repo_id=repo,
186
  repo_type="model",
187
  allow_patterns=["*.whl","NGC-DL-CONTAINER-LICENSE"],
188
+ ignore_patterns=["**/src/**",".git/**"],
189
  )
190
+ print("Upload concluído.")
191
  PY
192
 
193
+ chmod -R 777 "$APP_WHEELS" || true
194
  echo "✅ Builder finalizado."