[perf bench] H200 development (#9768)

Signed-off-by: simon-mo <simon.mo@hey.com>
This commit is contained in:
Simon Mo 2024-11-20 11:06:56 -08:00 committed by GitHub
parent 772a66732d
commit 5f1d6af2b6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 32 additions and 7 deletions

View File

@ -9,7 +9,9 @@ steps:
- image: badouralix/curl-jq
command:
- sh .buildkite/nightly-benchmarks/scripts/wait-for-image.sh
- wait
- label: "A100"
agents:
queue: A100
@ -41,6 +43,27 @@ steps:
- name: devshm
emptyDir:
medium: Memory
- label: "H200"
agents:
queue: H200
plugins:
- docker#v5.12.0:
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
command:
- bash
- .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
mount-buildkite-agent: true
propagate-environment: true
ipc: host
gpus: 4,5,6,7
volumes:
- /data/benchmark-hf-cache:/root/.cache/huggingface
environment:
- VLLM_USAGE_SOURCE
- HF_TOKEN
# - label: "H100"
# agents:
# queue: H100

View File

@ -157,6 +157,11 @@ if __name__ == "__main__":
throughput_results,
serving_results)
# Sort all dataframes by their respective "Test name" columns
for df in [latency_results, serving_results, throughput_results]:
if not df.empty:
df.sort_values(by="Test name", inplace=True)
# get markdown tables
latency_md_table = tabulate(latency_results,
headers='keys',

View File

@ -6,6 +6,7 @@
# Do not set -e, as the mixtral 8x22B model tends to crash occasionally
# and we still want to see other benchmarking results even when mixtral crashes.
set -x
set -o pipefail
check_gpus() {
@ -85,11 +86,7 @@ kill_gpu_processes() {
ps -aux
lsof -t -i:8000 | xargs -r kill -9
pkill -f pt_main_thread
# this line doesn't work now
# ps aux | grep python | grep openai | awk '{print $2}' | xargs -r kill -9
pkill -f python3
pkill -f /usr/bin/python3
pgrep python3 | xargs -r kill -9
# wait until GPU memory usage smaller than 1GB
@ -289,7 +286,7 @@ run_serving_tests() {
# run the server
echo "Running test case $test_name"
echo "Server command: $server_command"
eval "$server_command" &
bash -c "$server_command" &
server_pid=$!
# wait until the server is alive
@ -322,7 +319,7 @@ run_serving_tests() {
echo "Running test case $test_name with qps $qps"
echo "Client command: $client_command"
eval "$client_command"
bash -c "$client_command"
# record the benchmarking commands
jq_output=$(jq -n \