|
|
#!/usr/bin/env bash |
|
|
|
|
|
|
|
|
|
|
|
set -euo pipefail |
|
|
|
|
|
echo "=== ROCm Environment Debug Script 1 ===" |
|
|
echo "Testing basic ROCm/HIP environment setup" |
|
|
echo |
|
|
|
|
|
|
|
|
export ROCM_PATH="${ROCM_PATH:-/opt/rocm-7.0.1}" |
|
|
export ROCM_HOME="${ROCM_HOME:-$ROCM_PATH}" |
|
|
export HIP_PATH="${HIP_PATH:-$ROCM_PATH}" |
|
|
export HIP_HOME="${HIP_HOME:-$ROCM_PATH}" |
|
|
export PATH="$ROCM_HOME/bin:$PATH" |
|
|
export TORCH_HIP_ARCH_LIST="${TORCH_HIP_ARCH_LIST:-gfx942}" |
|
|
export HSA_OVERRIDE_GFX_VERSION="${HSA_OVERRIDE_GFX_VERSION:-gfx942}" |
|
|
|
|
|
echo "Environment Variables:" |
|
|
echo "ROCM_PATH=$ROCM_PATH" |
|
|
echo "ROCM_HOME=$ROCM_HOME" |
|
|
echo "HIP_PATH=$HIP_PATH" |
|
|
echo "HIP_HOME=$HIP_HOME" |
|
|
echo "TORCH_HIP_ARCH_LIST=$TORCH_HIP_ARCH_LIST" |
|
|
echo "HSA_OVERRIDE_GFX_VERSION=$HSA_OVERRIDE_GFX_VERSION" |
|
|
echo "PATH (ROCm portion): $(echo $PATH | tr ':' '\n' | grep rocm || echo 'No ROCm in PATH')" |
|
|
echo |
|
|
|
|
|
echo "=== Directory Checks ===" |
|
|
echo "ROCm installation directory exists: $(test -d "$ROCM_PATH" && echo 'YES' || echo 'NO')" |
|
|
echo "ROCm bin directory exists: $(test -d "$ROCM_PATH/bin" && echo 'YES' || echo 'NO')" |
|
|
echo "ROCm include directory exists: $(test -d "$ROCM_PATH/include" && echo 'YES' || echo 'NO')" |
|
|
echo "ROCm lib directory exists: $(test -d "$ROCM_PATH/lib" && echo 'YES' || echo 'NO')" |
|
|
echo |
|
|
|
|
|
echo "=== Tool Availability ===" |
|
|
echo "hipcc available: $(which hipcc >/dev/null 2>&1 && echo 'YES' || echo 'NO')" |
|
|
echo "hip-clang available: $(which hip-clang >/dev/null 2>&1 && echo 'YES' || echo 'NO')" |
|
|
echo "rocm-smi available: $(which rocm-smi >/dev/null 2>&1 && echo 'YES' || echo 'NO')" |
|
|
echo "hipconfig available: $(which hipconfig >/dev/null 2>&1 && echo 'YES' || echo 'NO')" |
|
|
echo |
|
|
|
|
|
echo "=== Tool Versions ===" |
|
|
if which hipcc >/dev/null 2>&1; then |
|
|
echo "hipcc version:" |
|
|
hipcc --version || echo "Failed to get hipcc version" |
|
|
echo |
|
|
fi |
|
|
|
|
|
if which hipconfig >/dev/null 2>&1; then |
|
|
echo "HIP config:" |
|
|
hipconfig --full || echo "Failed to get hipconfig" |
|
|
echo |
|
|
fi |
|
|
|
|
|
if which rocm-smi >/dev/null 2>&1; then |
|
|
echo "ROCm SMI:" |
|
|
rocm-smi --showproductname || echo "Failed to get ROCm SMI info" |
|
|
echo |
|
|
fi |
|
|
|
|
|
echo "=== Python Environment ===" |
|
|
python3 --version || echo "Python3 not available" |
|
|
python3 -c "import torch; print(f'PyTorch version: {torch.__version__}')" || echo "PyTorch not available" |
|
|
python3 -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')" || echo "Failed to check CUDA availability" |
|
|
python3 -c "import torch; print(f'HIP available: {hasattr(torch.version, \"hip\") and torch.version.hip is not None}')" || echo "Failed to check HIP availability" |
|
|
|
|
|
echo |
|
|
echo "=== Basic HIP Device Check ===" |
|
|
if which hipinfo >/dev/null 2>&1; then |
|
|
echo "HIP devices:" |
|
|
hipinfo || echo "hipinfo failed" |
|
|
else |
|
|
echo "hipinfo not available" |
|
|
fi |
|
|
|
|
|
echo |
|
|
echo "=== Debug Script 1 Complete ===" |