[perf bench] H200 development (#9768)
Signed-off-by: simon-mo <simon.mo@hey.com>
This commit is contained in:
parent
772a66732d
commit
5f1d6af2b6
@ -9,7 +9,9 @@ steps:
|
|||||||
- image: badouralix/curl-jq
|
- image: badouralix/curl-jq
|
||||||
command:
|
command:
|
||||||
- sh .buildkite/nightly-benchmarks/scripts/wait-for-image.sh
|
- sh .buildkite/nightly-benchmarks/scripts/wait-for-image.sh
|
||||||
|
|
||||||
- wait
|
- wait
|
||||||
|
|
||||||
- label: "A100"
|
- label: "A100"
|
||||||
agents:
|
agents:
|
||||||
queue: A100
|
queue: A100
|
||||||
@ -41,6 +43,27 @@ steps:
|
|||||||
- name: devshm
|
- name: devshm
|
||||||
emptyDir:
|
emptyDir:
|
||||||
medium: Memory
|
medium: Memory
|
||||||
|
|
||||||
|
- label: "H200"
|
||||||
|
agents:
|
||||||
|
queue: H200
|
||||||
|
plugins:
|
||||||
|
- docker#v5.12.0:
|
||||||
|
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
|
||||||
|
command:
|
||||||
|
- bash
|
||||||
|
- .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
|
||||||
|
mount-buildkite-agent: true
|
||||||
|
propagate-environment: true
|
||||||
|
ipc: host
|
||||||
|
gpus: 4,5,6,7
|
||||||
|
volumes:
|
||||||
|
- /data/benchmark-hf-cache:/root/.cache/huggingface
|
||||||
|
environment:
|
||||||
|
- VLLM_USAGE_SOURCE
|
||||||
|
- HF_TOKEN
|
||||||
|
|
||||||
|
|
||||||
# - label: "H100"
|
# - label: "H100"
|
||||||
# agents:
|
# agents:
|
||||||
# queue: H100
|
# queue: H100
|
||||||
|
@ -157,6 +157,11 @@ if __name__ == "__main__":
|
|||||||
throughput_results,
|
throughput_results,
|
||||||
serving_results)
|
serving_results)
|
||||||
|
|
||||||
|
# Sort all dataframes by their respective "Test name" columns
|
||||||
|
for df in [latency_results, serving_results, throughput_results]:
|
||||||
|
if not df.empty:
|
||||||
|
df.sort_values(by="Test name", inplace=True)
|
||||||
|
|
||||||
# get markdown tables
|
# get markdown tables
|
||||||
latency_md_table = tabulate(latency_results,
|
latency_md_table = tabulate(latency_results,
|
||||||
headers='keys',
|
headers='keys',
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
|
|
||||||
# Do not set -e, as the mixtral 8x22B model tends to crash occasionally
|
# Do not set -e, as the mixtral 8x22B model tends to crash occasionally
|
||||||
# and we still want to see other benchmarking results even when mixtral crashes.
|
# and we still want to see other benchmarking results even when mixtral crashes.
|
||||||
|
set -x
|
||||||
set -o pipefail
|
set -o pipefail
|
||||||
|
|
||||||
check_gpus() {
|
check_gpus() {
|
||||||
@ -85,11 +86,7 @@ kill_gpu_processes() {
|
|||||||
|
|
||||||
ps -aux
|
ps -aux
|
||||||
lsof -t -i:8000 | xargs -r kill -9
|
lsof -t -i:8000 | xargs -r kill -9
|
||||||
pkill -f pt_main_thread
|
pgrep python3 | xargs -r kill -9
|
||||||
# this line doesn't work now
|
|
||||||
# ps aux | grep python | grep openai | awk '{print $2}' | xargs -r kill -9
|
|
||||||
pkill -f python3
|
|
||||||
pkill -f /usr/bin/python3
|
|
||||||
|
|
||||||
|
|
||||||
# wait until GPU memory usage smaller than 1GB
|
# wait until GPU memory usage smaller than 1GB
|
||||||
@ -289,7 +286,7 @@ run_serving_tests() {
|
|||||||
# run the server
|
# run the server
|
||||||
echo "Running test case $test_name"
|
echo "Running test case $test_name"
|
||||||
echo "Server command: $server_command"
|
echo "Server command: $server_command"
|
||||||
eval "$server_command" &
|
bash -c "$server_command" &
|
||||||
server_pid=$!
|
server_pid=$!
|
||||||
|
|
||||||
# wait until the server is alive
|
# wait until the server is alive
|
||||||
@ -322,7 +319,7 @@ run_serving_tests() {
|
|||||||
echo "Running test case $test_name with qps $qps"
|
echo "Running test case $test_name with qps $qps"
|
||||||
echo "Client command: $client_command"
|
echo "Client command: $client_command"
|
||||||
|
|
||||||
eval "$client_command"
|
bash -c "$client_command"
|
||||||
|
|
||||||
# record the benchmarking commands
|
# record the benchmarking commands
|
||||||
jq_output=$(jq -n \
|
jq_output=$(jq -n \
|
||||||
|
Loading…
x
Reference in New Issue
Block a user