[perf bench] H200 development (#9768)

Signed-off-by: simon-mo <simon.mo@hey.com>
This commit is contained in:
Simon Mo 2024-11-20 11:06:56 -08:00 committed by GitHub
parent 772a66732d
commit 5f1d6af2b6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 32 additions and 7 deletions

View File

@ -9,7 +9,9 @@ steps:
- image: badouralix/curl-jq - image: badouralix/curl-jq
command: command:
- sh .buildkite/nightly-benchmarks/scripts/wait-for-image.sh - sh .buildkite/nightly-benchmarks/scripts/wait-for-image.sh
- wait - wait
- label: "A100" - label: "A100"
agents: agents:
queue: A100 queue: A100
@ -41,6 +43,27 @@ steps:
- name: devshm - name: devshm
emptyDir: emptyDir:
medium: Memory medium: Memory
- label: "H200"
agents:
queue: H200
plugins:
- docker#v5.12.0:
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
command:
- bash
- .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
mount-buildkite-agent: true
propagate-environment: true
ipc: host
gpus: 4,5,6,7
volumes:
- /data/benchmark-hf-cache:/root/.cache/huggingface
environment:
- VLLM_USAGE_SOURCE
- HF_TOKEN
# - label: "H100" # - label: "H100"
# agents: # agents:
# queue: H100 # queue: H100

View File

@ -157,6 +157,11 @@ if __name__ == "__main__":
throughput_results, throughput_results,
serving_results) serving_results)
# Sort all dataframes by their respective "Test name" columns
for df in [latency_results, serving_results, throughput_results]:
if not df.empty:
df.sort_values(by="Test name", inplace=True)
# get markdown tables # get markdown tables
latency_md_table = tabulate(latency_results, latency_md_table = tabulate(latency_results,
headers='keys', headers='keys',

View File

@ -6,6 +6,7 @@
# Do not set -e, as the mixtral 8x22B model tends to crash occasionally # Do not set -e, as the mixtral 8x22B model tends to crash occasionally
# and we still want to see other benchmarking results even when mixtral crashes. # and we still want to see other benchmarking results even when mixtral crashes.
set -x
set -o pipefail set -o pipefail
check_gpus() { check_gpus() {
@ -85,11 +86,7 @@ kill_gpu_processes() {
ps -aux ps -aux
lsof -t -i:8000 | xargs -r kill -9 lsof -t -i:8000 | xargs -r kill -9
pkill -f pt_main_thread pgrep python3 | xargs -r kill -9
# this line doesn't work now
# ps aux | grep python | grep openai | awk '{print $2}' | xargs -r kill -9
pkill -f python3
pkill -f /usr/bin/python3
# wait until GPU memory usage smaller than 1GB # wait until GPU memory usage smaller than 1GB
@ -289,7 +286,7 @@ run_serving_tests() {
# run the server # run the server
echo "Running test case $test_name" echo "Running test case $test_name"
echo "Server command: $server_command" echo "Server command: $server_command"
eval "$server_command" & bash -c "$server_command" &
server_pid=$! server_pid=$!
# wait until the server is alive # wait until the server is alive
@ -322,7 +319,7 @@ run_serving_tests() {
echo "Running test case $test_name with qps $qps" echo "Running test case $test_name with qps $qps"
echo "Client command: $client_command" echo "Client command: $client_command"
eval "$client_command" bash -c "$client_command"
# record the benchmarking commands # record the benchmarking commands
jq_output=$(jq -n \ jq_output=$(jq -n \