Benchmark: add H100 suite (#6047)

This commit is contained in:
Simon Mo 2024-07-11 09:17:07 -07:00 committed by GitHub
parent b675069d74
commit 52b7fcb35a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 40 additions and 23 deletions

View File

@ -11,7 +11,7 @@ steps:
- sh - sh
- .buildkite/nightly-benchmarks/scripts/wait-for-image.sh - .buildkite/nightly-benchmarks/scripts/wait-for-image.sh
- wait - wait
- label: "A100 Benchmark" - label: "A100"
agents: agents:
queue: A100 queue: A100
plugins: plugins:
@ -42,21 +42,20 @@ steps:
- name: devshm - name: devshm
emptyDir: emptyDir:
medium: Memory medium: Memory
# - label: "H100: NVIDIA SMI" - label: "H100"
# agents: agents:
# queue: H100 queue: H100
# plugins: plugins:
# - docker#v5.11.0: - docker#v5.11.0:
# image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
# command: command:
# - bash - bash
# - .buildkite/nightly-benchmarks/run-benchmarks-suite.sh - .buildkite/nightly-benchmarks/run-benchmarks-suite.sh
# mount-buildkite-agent: true mount-buildkite-agent: true
# propagate-environment: true propagate-environment: true
# propagate-uid-gid: false ipc: host
# ipc: host gpus: all
# gpus: all environment:
# environment: - VLLM_USAGE_SOURCE
# - VLLM_USAGE_SOURCE - HF_TOKEN
# - HF_TOKEN

View File

@ -54,7 +54,7 @@ wait_for_server() {
# wait for vllm server to start # wait for vllm server to start
# return 1 if vllm server crashes # return 1 if vllm server crashes
timeout 1200 bash -c ' timeout 1200 bash -c '
until curl localhost:8000/v1/completions; do until curl -X POST localhost:8000/v1/completions; do
sleep 1 sleep 1
done' && return 0 || return 1 done' && return 0 || return 1
} }
@ -73,8 +73,17 @@ kill_gpu_processes() {
echo "All GPU processes have been killed." echo "All GPU processes have been killed."
fi fi
# Sometimes kill with pid doesn't work properly, we can also kill all process running python or python3
# since we are in container anyway
pkill -9 -f python
pkill -9 -f python3
# waiting for GPU processes to be fully killed # waiting for GPU processes to be fully killed
sleep 10 # loop while nvidia-smi returns any processes
while [ -n "$(nvidia-smi --query-compute-apps=pid --format=csv,noheader)" ]; do
sleep 1
echo "Waiting for GPU processes to be killed"
done
# remove vllm config file # remove vllm config file
rm -rf ~/.config/vllm rm -rf ~/.config/vllm
@ -90,12 +99,19 @@ upload_to_buildkite() {
# upload the benchmarking results to buildkite # upload the benchmarking results to buildkite
# if the agent binary is not found, skip uploading the results, exit 0 # if the agent binary is not found, skip uploading the results, exit 0
if [ ! -f /workspace/buildkite-agent ]; then # Check if buildkite-agent is available in the PATH or at /workspace/buildkite-agent
if command -v buildkite-agent >/dev/null 2>&1; then
BUILDKITE_AGENT_COMMAND="buildkite-agent"
elif [ -f /workspace/buildkite-agent ]; then
BUILDKITE_AGENT_COMMAND="/workspace/buildkite-agent"
else
echo "buildkite-agent binary not found. Skip uploading the results." echo "buildkite-agent binary not found. Skip uploading the results."
return 0 return 0
fi fi
/workspace/buildkite-agent annotate --style "info" --context "benchmark-results" < $RESULTS_FOLDER/benchmark_results.md
/workspace/buildkite-agent artifact upload "$RESULTS_FOLDER/*" # Use the determined command to annotate and upload artifacts
$BUILDKITE_AGENT_COMMAND annotate --style "info" --context "$BUILDKITE_LABEL-benchmark-results" < $RESULTS_FOLDER/benchmark_results.md
$BUILDKITE_AGENT_COMMAND artifact upload "$RESULTS_FOLDER/*"
} }
run_latency_tests() { run_latency_tests() {
@ -269,6 +285,7 @@ run_serving_tests() {
echo "Running test case $test_name" echo "Running test case $test_name"
echo "Server command: $server_command" echo "Server command: $server_command"
eval "$server_command" & eval "$server_command" &
server_pid=$!
# wait until the server is alive # wait until the server is alive
wait_for_server wait_for_server
@ -318,6 +335,7 @@ run_serving_tests() {
done done
# clean up # clean up
kill -9 $server_pid
kill_gpu_processes kill_gpu_processes
done done
} }