File size: 2,872 Bytes
2d8a802
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env bash

# Debug script 1: Basic ROCm environment and tool availability check

set -euo pipefail

echo "=== ROCm Environment Debug Script 1 ==="
echo "Testing basic ROCm/HIP environment setup"
echo

# Set ROCm environment variables
export ROCM_PATH="${ROCM_PATH:-/opt/rocm-7.0.1}"
export ROCM_HOME="${ROCM_HOME:-$ROCM_PATH}"
export HIP_PATH="${HIP_PATH:-$ROCM_PATH}"
export HIP_HOME="${HIP_HOME:-$ROCM_PATH}"
export PATH="$ROCM_HOME/bin:$PATH"
export TORCH_HIP_ARCH_LIST="${TORCH_HIP_ARCH_LIST:-gfx942}"
export HSA_OVERRIDE_GFX_VERSION="${HSA_OVERRIDE_GFX_VERSION:-gfx942}"

echo "Environment Variables:"
echo "ROCM_PATH=$ROCM_PATH"
echo "ROCM_HOME=$ROCM_HOME"
echo "HIP_PATH=$HIP_PATH"
echo "HIP_HOME=$HIP_HOME"
echo "TORCH_HIP_ARCH_LIST=$TORCH_HIP_ARCH_LIST"
echo "HSA_OVERRIDE_GFX_VERSION=$HSA_OVERRIDE_GFX_VERSION"
echo "PATH (ROCm portion): $(echo $PATH | tr ':' '\n' | grep rocm || echo 'No ROCm in PATH')"
echo

echo "=== Directory Checks ==="
echo "ROCm installation directory exists: $(test -d "$ROCM_PATH" && echo 'YES' || echo 'NO')"
echo "ROCm bin directory exists: $(test -d "$ROCM_PATH/bin" && echo 'YES' || echo 'NO')"
echo "ROCm include directory exists: $(test -d "$ROCM_PATH/include" && echo 'YES' || echo 'NO')"
echo "ROCm lib directory exists: $(test -d "$ROCM_PATH/lib" && echo 'YES' || echo 'NO')"
echo

echo "=== Tool Availability ==="
echo "hipcc available: $(which hipcc >/dev/null 2>&1 && echo 'YES' || echo 'NO')"
echo "hip-clang available: $(which hip-clang >/dev/null 2>&1 && echo 'YES' || echo 'NO')"
echo "rocm-smi available: $(which rocm-smi >/dev/null 2>&1 && echo 'YES' || echo 'NO')"
echo "hipconfig available: $(which hipconfig >/dev/null 2>&1 && echo 'YES' || echo 'NO')"
echo

echo "=== Tool Versions ==="
if which hipcc >/dev/null 2>&1; then
    echo "hipcc version:"
    hipcc --version || echo "Failed to get hipcc version"
    echo
fi

if which hipconfig >/dev/null 2>&1; then
    echo "HIP config:"
    hipconfig --full || echo "Failed to get hipconfig"
    echo
fi

if which rocm-smi >/dev/null 2>&1; then
    echo "ROCm SMI:"
    rocm-smi --showproductname || echo "Failed to get ROCm SMI info"
    echo
fi

echo "=== Python Environment ==="
python3 --version || echo "Python3 not available"
python3 -c "import torch; print(f'PyTorch version: {torch.__version__}')" || echo "PyTorch not available"
python3 -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')" || echo "Failed to check CUDA availability"
python3 -c "import torch; print(f'HIP available: {hasattr(torch.version, \"hip\") and torch.version.hip is not None}')" || echo "Failed to check HIP availability"

echo
echo "=== Basic HIP Device Check ==="
if which hipinfo >/dev/null 2>&1; then
    echo "HIP devices:"
    hipinfo || echo "hipinfo failed"
else
    echo "hipinfo not available"
fi

echo
echo "=== Debug Script 1 Complete ==="