196 lines
5.3 KiB
YAML
196 lines
5.3 KiB
YAML
common_pod_spec: &common_pod_spec
|
|
priorityClassName: perf-benchmark
|
|
nodeSelector:
|
|
nvidia.com/gpu.product: NVIDIA-A100-SXM4-80GB
|
|
volumes:
|
|
- name: devshm
|
|
emptyDir:
|
|
medium: Memory
|
|
- name: hf-cache
|
|
hostPath:
|
|
path: /root/.cache/huggingface
|
|
type: Directory
|
|
|
|
common_container_settings: &common_container_settings
|
|
command:
|
|
- bash .buildkite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh
|
|
resources:
|
|
limits:
|
|
nvidia.com/gpu: 8
|
|
volumeMounts:
|
|
- name: devshm
|
|
mountPath: /dev/shm
|
|
- name: hf-cache
|
|
mountPath: /root/.cache/huggingface
|
|
env:
|
|
- name: VLLM_USAGE_SOURCE
|
|
value: ci-test
|
|
- name: HF_HOME
|
|
value: /root/.cache/huggingface
|
|
- name: VLLM_SOURCE_CODE_LOC
|
|
value: /workspace/build/buildkite/vllm/performance-benchmark
|
|
- name: HF_TOKEN
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: hf-token-secret
|
|
key: token
|
|
|
|
steps:
|
|
- block: ":rocket: Ready for comparing vllm against alternatives? This will take 4 hours."
|
|
|
|
|
|
|
|
- label: "A100 vllm step 10"
|
|
priority: 100
|
|
agents:
|
|
queue: A100
|
|
plugins:
|
|
- kubernetes:
|
|
podSpec:
|
|
<<: *common_pod_spec
|
|
containers:
|
|
- image: vllm/vllm-openai:v0.6.2
|
|
<<: *common_container_settings
|
|
|
|
|
|
|
|
- label: "A100 sglang benchmark"
|
|
priority: 100
|
|
agents:
|
|
queue: A100
|
|
plugins:
|
|
- kubernetes:
|
|
podSpec:
|
|
<<: *common_pod_spec
|
|
containers:
|
|
- image: lmsysorg/sglang:v0.3.2-cu121
|
|
<<: *common_container_settings
|
|
|
|
- label: "A100 lmdeploy benchmark"
|
|
priority: 100
|
|
agents:
|
|
queue: A100
|
|
plugins:
|
|
- kubernetes:
|
|
podSpec:
|
|
<<: *common_pod_spec
|
|
containers:
|
|
- image: openmmlab/lmdeploy:v0.6.1-cu12
|
|
<<: *common_container_settings
|
|
|
|
|
|
|
|
|
|
- label: "A100 trt llama-8B"
|
|
priority: 100
|
|
agents:
|
|
queue: A100
|
|
plugins:
|
|
- kubernetes:
|
|
podSpec:
|
|
<<: *common_pod_spec
|
|
containers:
|
|
- image: nvcr.io/nvidia/tritonserver:24.07-trtllm-python-py3
|
|
<<: *common_container_settings
|
|
env:
|
|
- name: VLLM_USAGE_SOURCE
|
|
value: ci-test
|
|
- name: HF_HOME
|
|
value: /root/.cache/huggingface
|
|
- name: VLLM_SOURCE_CODE_LOC
|
|
value: /workspace/build/buildkite/vllm/performance-benchmark
|
|
- name: HF_TOKEN
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: hf-token-secret
|
|
key: token
|
|
- name: TEST_SELECTOR
|
|
value: "llama8B"
|
|
|
|
|
|
- label: "A100 trt llama-70B"
|
|
priority: 100
|
|
agents:
|
|
queue: A100
|
|
plugins:
|
|
- kubernetes:
|
|
podSpec:
|
|
<<: *common_pod_spec
|
|
containers:
|
|
- image: nvcr.io/nvidia/tritonserver:24.07-trtllm-python-py3
|
|
<<: *common_container_settings
|
|
env:
|
|
- name: VLLM_USAGE_SOURCE
|
|
value: ci-test
|
|
- name: HF_HOME
|
|
value: /root/.cache/huggingface
|
|
- name: VLLM_SOURCE_CODE_LOC
|
|
value: /workspace/build/buildkite/vllm/performance-benchmark
|
|
- name: HF_TOKEN
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: hf-token-secret
|
|
key: token
|
|
- name: TEST_SELECTOR
|
|
value: "llama70B"
|
|
|
|
|
|
# FIXME(Kuntai): uncomment this after NVIDIA gives us their test docker image
|
|
# - label: "A100 trt benchmark"
|
|
# priority: 100
|
|
# agents:
|
|
# queue: A100
|
|
# plugins:
|
|
# - kubernetes:
|
|
# podSpec:
|
|
# <<: *common_pod_spec
|
|
# containers:
|
|
# - image: nvcr.io/nvidia/tritonserver:24.07-trtllm-python-py3
|
|
# <<: *common_container_settings
|
|
|
|
|
|
# FIXME(Kuntai): uncomment this after TGI supports `--ignore-eos`.
|
|
# - label: "A100 tgi benchmark"
|
|
# priority: 100
|
|
# agents:
|
|
# queue: A100
|
|
# plugins:
|
|
# - kubernetes:
|
|
# podSpec:
|
|
# <<: *common_pod_spec
|
|
# containers:
|
|
# - image: ghcr.io/huggingface/text-generation-inference:2.2.0
|
|
# <<: *common_container_settings
|
|
|
|
- wait
|
|
|
|
- label: "Collect the results"
|
|
priority: 100
|
|
agents:
|
|
queue: A100
|
|
plugins:
|
|
- kubernetes:
|
|
podSpec:
|
|
<<: *common_pod_spec
|
|
containers:
|
|
- image: vllm/vllm-openai:v0.5.0.post1
|
|
command:
|
|
- bash .buildkite/nightly-benchmarks/scripts/nightly-annotate.sh
|
|
resources:
|
|
limits:
|
|
nvidia.com/gpu: 8
|
|
volumeMounts:
|
|
- name: devshm
|
|
mountPath: /dev/shm
|
|
env:
|
|
- name: VLLM_USAGE_SOURCE
|
|
value: ci-test
|
|
- name: VLLM_SOURCE_CODE_LOC
|
|
value: /workspace/build/buildkite/vllm/performance-benchmark
|
|
- name: HF_TOKEN
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: hf-token-secret
|
|
key: token
|
|
|
|
- block: ":rocket: check the results!" |