megablocks-hip / build.sh
leonardlin's picture
Add ROCm build debugging utilities
2d8a802
raw
history blame
1.36 kB
#!/usr/bin/env bash
set -euo pipefail
# 3-4min build with cleanup to prevent lock file hangs
echo "=== MegaBlocks Build Script ==="
echo "Cleaning up any previous build processes and lock files..."
# Kill any hanging build processes
echo "Killing any running build.py processes..."
pkill -f "python.*build\.py" 2>/dev/null || true
pkill -f "ninja" 2>/dev/null || true
pkill -f "hipcc" 2>/dev/null || true
# Wait a moment for processes to terminate
sleep 2
# Clean up lock files that cause infinite loops
echo "Removing stale lock files..."
if [ -d ".torch_extensions" ]; then
find .torch_extensions -name "lock" -delete 2>/dev/null || true
find .torch_extensions -name ".ninja_lock" -delete 2>/dev/null || true
fi
# Default to the ROCm 7.0.1 install unless the caller overrides it.
export ROCM_PATH="${ROCM_PATH:-/opt/rocm-7.0.1}"
export ROCM_HOME="${ROCM_HOME:-$ROCM_PATH}"
export HIP_PATH="${HIP_PATH:-$ROCM_PATH}"
export HIP_HOME="${HIP_HOME:-$ROCM_PATH}"
export PATH="$ROCM_HOME/bin:$PATH"
export LD_LIBRARY_PATH="$ROCM_HOME/lib:$ROCM_HOME/lib64:${LD_LIBRARY_PATH:-}"
export TORCH_HIP_ARCH_LIST="${TORCH_HIP_ARCH_LIST:-gfx942}"
export HSA_OVERRIDE_GFX_VERSION="${HSA_OVERRIDE_GFX_VERSION:-gfx942}"
export TORCH_EXTENSIONS_DIR="${TORCH_EXTENSIONS_DIR:-$PWD/.torch_extensions}"
echo "Environment configured. Starting build..."
python -u build.py