File size: 1,912 Bytes
f24563f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/bin/bash

# setup_environment.sh - Script to set up the environment for training a powerful LLM on TPU v4-32
# This script will clone MaxText, install dependencies, and configure the environment

set -e  # Exit on any error

echo "Setting up environment for LLM training on TPU v4-32..."

# Create directories
mkdir -p logs
mkdir -p checkpoints

# Clone MaxText repository
if [ ! -d "maxtext" ]; then
    echo "Cloning MaxText repository..."
    git clone https://github.com/AI-Hypercomputer/maxtext.git
    cd maxtext
else
    echo "MaxText repository already exists, updating..."
    cd maxtext
    git pull
fi

# Install dependencies
echo "Installing dependencies..."
bash setup.sh
pre-commit install

# Set up environment variables
echo "Setting up environment variables..."
export PYTHONPATH=$PYTHONPATH:$(pwd)
export JAX_PLATFORMS="tpu"

# Check TPU configuration
echo "Checking TPU configuration..."
python3 -c "import jax; print(f'TPU devices: {jax.device_count()}')"
python3 -c "import jax; print(f'TPU type: {jax.devices()[0].platform}')"

# Create a GCS bucket for checkpoints and logs if it doesn't exist
# Note: You need to replace YOUR_PROJECT_ID with your actual GCP project ID
if [ -z "$GCS_BUCKET" ]; then
    echo "Please set the GCS_BUCKET environment variable"
    echo "Example: export GCS_BUCKET=gs://your-bucket-name"
    exit 1
fi

echo "Checking GCS bucket access..."
gsutil ls $GCS_BUCKET > /dev/null || (echo "Creating GCS bucket $GCS_BUCKET..." && gsutil mb -p $PROJECT_ID $GCS_BUCKET)

# Set up Cloud Storage FUSE for efficient data access
echo "Setting up Cloud Storage FUSE..."
BUCKET_NAME=$(echo $GCS_BUCKET | sed 's/gs:\/\///')
MOUNT_PATH="/tmp/gcsfuse"
bash setup_gcsfuse.sh DATASET_GCS_BUCKET=$BUCKET_NAME MOUNT_PATH=$MOUNT_PATH

echo "Environment setup complete!"
cd ..

# Return to the original directory
echo "You can now run the training script with: bash tpu_train.sh"