109 lines
3.4 KiB
Bash
Executable File
109 lines
3.4 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
set -euox pipefail
|
|
|
|
if [[ $# -lt 4 ]]; then
|
|
echo "Usage: .buildkite/run-multi-node-test.sh WORKING_DIR NUM_NODES NUM_GPUS DOCKER_IMAGE COMMAND1 COMMAND2 ... COMMANDN"
|
|
exit 1
|
|
fi
|
|
|
|
WORKING_DIR=$1
|
|
NUM_NODES=$2
|
|
NUM_GPUS=$3
|
|
DOCKER_IMAGE=$4
|
|
|
|
shift 4
|
|
COMMANDS=("$@")
|
|
if [ ${#COMMANDS[@]} -ne "$NUM_NODES" ]; then
|
|
echo "The number of commands must be equal to the number of nodes."
|
|
echo "Number of nodes: $NUM_NODES"
|
|
echo "Number of commands: ${#COMMANDS[@]}"
|
|
exit 1
|
|
fi
|
|
|
|
echo "List of commands"
|
|
for command in "${COMMANDS[@]}"; do
|
|
echo "$command"
|
|
done
|
|
|
|
start_network() {
|
|
docker network create --subnet=192.168.10.0/24 docker-net
|
|
}
|
|
|
|
start_nodes() {
|
|
for node in $(seq 0 $(($NUM_NODES-1))); do
|
|
GPU_DEVICES='"device='
|
|
for node_gpu in $(seq 0 $(($NUM_GPUS - 1))); do
|
|
DEVICE_NUM=$(($node * $NUM_GPUS + $node_gpu))
|
|
GPU_DEVICES+=$(($DEVICE_NUM))
|
|
if [ "$node_gpu" -lt $(($NUM_GPUS - 1)) ]; then
|
|
GPU_DEVICES+=','
|
|
fi
|
|
done
|
|
GPU_DEVICES+='"'
|
|
|
|
# start the container in detached mode
|
|
# things to note:
|
|
# 1. --shm-size=10.24gb is required. don't use --ipc=host
|
|
# 2. pass HF_TOKEN to the container
|
|
# 3. map the huggingface cache directory to the container
|
|
# 3. assign ip addresses to the containers (head node: 192.168.10.10, worker nodes:
|
|
# starting from 192.168.10.11)
|
|
docker run -d --gpus "$GPU_DEVICES" --shm-size=10.24gb -e HF_TOKEN \
|
|
-v ~/.cache/huggingface:/root/.cache/huggingface --name "node$node" \
|
|
--network docker-net --ip 192.168.10.$((10 + $node)) --rm "$DOCKER_IMAGE" \
|
|
/bin/bash -c "tail -f /dev/null"
|
|
|
|
# organize containers into a ray cluster
|
|
if [ "$node" -eq 0 ]; then
|
|
# start the ray head node
|
|
docker exec -d "node$node" /bin/bash -c "ray start --head --port=6379 --block"
|
|
# wait for the head node to be ready
|
|
sleep 10
|
|
else
|
|
# start the ray worker nodes, and connect them to the head node
|
|
docker exec -d "node$node" /bin/bash -c "ray start --address=192.168.10.10:6379 --block"
|
|
fi
|
|
done
|
|
|
|
# wait for the cluster to be ready
|
|
sleep 10
|
|
|
|
# print the cluster status
|
|
docker exec node0 /bin/bash -c "ray status"
|
|
}
|
|
|
|
run_nodes() {
|
|
# important: iterate in reverse order to start the head node last
|
|
# we start the worker nodes first, in detached mode, and then start the head node
|
|
# in the foreground, so that the output of the head node is visible in the buildkite logs
|
|
for node in $(seq $(($NUM_NODES - 1)) -1 0); do
|
|
GPU_DEVICES='"device='
|
|
for node_gpu in $(seq 0 $(($NUM_GPUS - 1))); do
|
|
DEVICE_NUM=$(($node * $NUM_GPUS + $node_gpu))
|
|
GPU_DEVICES+=$(($DEVICE_NUM))
|
|
if [ "$node_gpu" -lt $(($NUM_GPUS - 1)) ]; then
|
|
GPU_DEVICES+=','
|
|
fi
|
|
done
|
|
GPU_DEVICES+='"'
|
|
echo "Running node$node with GPU devices: $GPU_DEVICES"
|
|
if [ "$node" -ne 0 ]; then
|
|
docker exec -d "node$node" /bin/bash -c "cd $WORKING_DIR ; ${COMMANDS[$node]}"
|
|
else
|
|
docker exec "node$node" /bin/bash -c "cd $WORKING_DIR ; ${COMMANDS[$node]}"
|
|
fi
|
|
done
|
|
}
|
|
cleanup() {
|
|
for node in $(seq 0 $(($NUM_NODES-1))); do
|
|
docker stop "node$node"
|
|
done
|
|
docker network rm docker-net
|
|
}
|
|
trap cleanup EXIT
|
|
start_network
|
|
start_nodes
|
|
run_nodes
|
|
|