[CI] Add nightly benchmarks (#5260)
This commit is contained in:
parent
f0a500545f
commit
d5b1eb081e
26
.buildkite/nightly-benchmarks/kickoff-pipeline.sh
Executable file
26
.buildkite/nightly-benchmarks/kickoff-pipeline.sh
Executable file
@ -0,0 +1,26 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Install system packages
|
||||||
|
apt update
|
||||||
|
apt install -y curl jq
|
||||||
|
|
||||||
|
# Install minijinja for templating
|
||||||
|
curl -sSfL https://github.com/mitsuhiko/minijinja/releases/latest/download/minijinja-cli-installer.sh | sh
|
||||||
|
source $HOME/.cargo/env
|
||||||
|
|
||||||
|
# If BUILDKITE_PULL_REQUEST != "false", then we check the PR labels using curl and jq
|
||||||
|
if [ "$BUILDKITE_PULL_REQUEST" != "false" ]; then
|
||||||
|
PR_LABELS=$(curl -s "https://api.github.com/repos/vllm-project/vllm/pulls/$BUILDKITE_PULL_REQUEST" | jq -r '.labels[].name')
|
||||||
|
|
||||||
|
if [[ $PR_LABELS == *"perf-benchmarks"* ]]; then
|
||||||
|
echo "This PR has the 'perf-benchmarks' label. Proceeding with the nightly benchmarks."
|
||||||
|
else
|
||||||
|
echo "This PR does not have the 'perf-benchmarks' label. Skipping the nightly benchmarks."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Upload sample.yaml
|
||||||
|
buildkite-agent pipeline upload .buildkite/nightly-benchmarks/sample.yaml
|
39
.buildkite/nightly-benchmarks/sample.yaml
Normal file
39
.buildkite/nightly-benchmarks/sample.yaml
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
steps:
|
||||||
|
# NOTE(simon): You can create separate blocks for different jobs
|
||||||
|
- label: "A100: NVIDIA SMI"
|
||||||
|
agents:
|
||||||
|
queue: A100
|
||||||
|
plugins:
|
||||||
|
- kubernetes:
|
||||||
|
podSpec:
|
||||||
|
containers:
|
||||||
|
# - image: us-central1-docker.pkg.dev/vllm-405802/vllm-ci-test-repo/vllm-test:$BUILDKITE_COMMIT
|
||||||
|
# TODO(simon): check latest main branch or use the PR image.
|
||||||
|
- image: us-central1-docker.pkg.dev/vllm-405802/vllm-ci-test-repo/vllm-test:45c35f0d58f4508bf43bd6af1d3d0d0ec0c915e6
|
||||||
|
command:
|
||||||
|
- bash -c 'nvidia-smi && nvidia-smi topo -m && pwd && ls'
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu: 8
|
||||||
|
volumeMounts:
|
||||||
|
- name: devshm
|
||||||
|
mountPath: /dev/shm
|
||||||
|
nodeSelector:
|
||||||
|
nvidia.com/gpu.product: NVIDIA-A100-SXM4-80GB
|
||||||
|
volumes:
|
||||||
|
- name: devshm
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
# TODO(simon): bring H100 online
|
||||||
|
# - label: "H100: NVIDIA SMI"
|
||||||
|
# agents:
|
||||||
|
# queue: H100
|
||||||
|
# plugins:
|
||||||
|
# - docker#v5.11.0:
|
||||||
|
# image: us-central1-docker.pkg.dev/vllm-405802/vllm-ci-test-repo/vllm-test:45c35f0d58f4508bf43bd6af1d3d0d0ec0c915e6
|
||||||
|
# command:
|
||||||
|
# - bash -c 'nvidia-smi && nvidia-smi topo -m'
|
||||||
|
# propagate-environment: true
|
||||||
|
# ipc: host
|
||||||
|
# gpus: all
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user