[Benchmark] Add new H100 machine (#10547)
This commit is contained in:
parent
9afa014552
commit
aed074860a
@ -13,6 +13,7 @@ steps:
|
|||||||
- wait
|
- wait
|
||||||
|
|
||||||
- label: "A100"
|
- label: "A100"
|
||||||
|
# skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"
|
||||||
agents:
|
agents:
|
||||||
queue: A100
|
queue: A100
|
||||||
plugins:
|
plugins:
|
||||||
@ -45,6 +46,7 @@ steps:
|
|||||||
medium: Memory
|
medium: Memory
|
||||||
|
|
||||||
- label: "H200"
|
- label: "H200"
|
||||||
|
# skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"
|
||||||
agents:
|
agents:
|
||||||
queue: H200
|
queue: H200
|
||||||
plugins:
|
plugins:
|
||||||
@ -63,21 +65,22 @@ steps:
|
|||||||
- VLLM_USAGE_SOURCE
|
- VLLM_USAGE_SOURCE
|
||||||
- HF_TOKEN
|
- HF_TOKEN
|
||||||
|
|
||||||
|
- label: "H100"
|
||||||
# - label: "H100"
|
# skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"
|
||||||
# agents:
|
agents:
|
||||||
# queue: H100
|
queue: H100
|
||||||
# plugins:
|
plugins:
|
||||||
# - docker#v5.11.0:
|
- docker#v5.12.0:
|
||||||
# image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
|
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
|
||||||
# command:
|
command:
|
||||||
# - bash
|
- bash
|
||||||
# - .buildkite/nightly-benchmarks/run-benchmarks-suite.sh
|
- .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
|
||||||
# mount-buildkite-agent: true
|
mount-buildkite-agent: true
|
||||||
# propagate-environment: true
|
propagate-environment: true
|
||||||
# ipc: host
|
ipc: host
|
||||||
# gpus: all
|
gpus: all # see CUDA_VISIBLE_DEVICES for actual GPUs used
|
||||||
# environment:
|
volumes:
|
||||||
# - VLLM_USAGE_SOURCE
|
- /data/benchmark-hf-cache:/root/.cache/huggingface
|
||||||
# - HF_TOKEN
|
environment:
|
||||||
|
- VLLM_USAGE_SOURCE
|
||||||
|
- HF_TOKEN
|
||||||
|
@ -157,11 +157,18 @@ if __name__ == "__main__":
|
|||||||
throughput_results,
|
throughput_results,
|
||||||
serving_results)
|
serving_results)
|
||||||
|
|
||||||
# Sort all dataframes by their respective "Test name" columns
|
|
||||||
for df in [latency_results, serving_results, throughput_results]:
|
for df in [latency_results, serving_results, throughput_results]:
|
||||||
if not df.empty:
|
if df.empty:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Sort all dataframes by their respective "Test name" columns
|
||||||
df.sort_values(by="Test name", inplace=True)
|
df.sort_values(by="Test name", inplace=True)
|
||||||
|
|
||||||
|
# The GPUs sometimes come in format of "GPUTYPE\nGPUTYPE\n...",
|
||||||
|
# we want to turn it into "8xGPUTYPE"
|
||||||
|
df["GPU"] = df["GPU"].apply(
|
||||||
|
lambda x: f"{len(x.split('\n'))}x{x.split('\n')[0]}")
|
||||||
|
|
||||||
# get markdown tables
|
# get markdown tables
|
||||||
latency_md_table = tabulate(latency_results,
|
latency_md_table = tabulate(latency_results,
|
||||||
headers='keys',
|
headers='keys',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user