vllm/.buildkite/nightly-benchmarks/benchmark-pipeline.yaml

steps:
  - label: "Wait for container to be ready"
    key: wait-for-container-image
    agents:
      queue: A100
    plugins:
    - kubernetes:
        podSpec:
          containers:
          - image: badouralix/curl-jq
            command:
            - sh .buildkite/nightly-benchmarks/scripts/wait-for-image.sh
  - label: "Cleanup H100"
    agents:
      queue: H100
    depends_on: ~
    command: docker system prune -a --volumes --force
  
  - label: "A100"
    # skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"
    agents:
      queue: A100
    depends_on: wait-for-container-image
    if: build.branch == "main"
    plugins:
    - kubernetes:
        podSpec:
          priorityClassName: perf-benchmark
          containers:
          - image: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT
            command:
            - bash .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
            resources:
              limits:
                nvidia.com/gpu: 8
            volumeMounts:
            - name: devshm
              mountPath: /dev/shm
            env:
            - name: VLLM_USAGE_SOURCE
              value: ci-test
            - name: HF_TOKEN
              valueFrom:
                secretKeyRef:
                  name: hf-token-secret
                  key: token
          nodeSelector:
            nvidia.com/gpu.product: NVIDIA-A100-SXM4-80GB
          volumes:
          - name: devshm
            emptyDir:
              medium: Memory

  - label: "H200"
    # skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"
    agents:
      queue: H200
    depends_on: wait-for-container-image
    if: build.branch == "main"
    plugins:
    - docker#v5.12.0:
        image: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT
        command:
        - bash
        - .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
        mount-buildkite-agent: true
        propagate-environment: true
        ipc: host
        gpus: 4,5,6,7
        volumes:
          - /data/benchmark-hf-cache:/root/.cache/huggingface
        environment:
        - VLLM_USAGE_SOURCE
        - HF_TOKEN

  #- block: "Run H100 Benchmark"
    #key: block-h100
    #depends_on: ~

  - label: "H100"
    # skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"
    agents:
      queue: H100
    depends_on: wait-for-container-image
    if: build.branch == "main"
    plugins:
    - docker#v5.12.0:
        image: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT
        command:
        - bash
        - .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
        mount-buildkite-agent: true
        propagate-environment: true
        ipc: host
        gpus: all # see CUDA_VISIBLE_DEVICES for actual GPUs used
        volumes:
          - /data/benchmark-hf-cache:/root/.cache/huggingface
        environment:
        - VLLM_USAGE_SOURCE
        - HF_TOKEN

  # Premerge benchmark
  - label: "A100"
    # skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"
    agents:
      queue: A100
    depends_on: wait-for-container-image
    if: build.branch != "main"
    plugins:
    - kubernetes:
        podSpec:
          priorityClassName: perf-benchmark
          containers:
          - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
            command:
            - bash .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
            resources:
              limits:
                nvidia.com/gpu: 8
            volumeMounts:
            - name: devshm
              mountPath: /dev/shm
            env:
            - name: VLLM_USAGE_SOURCE
              value: ci-test
            - name: HF_TOKEN
              valueFrom:
                secretKeyRef:
                  name: hf-token-secret
                  key: token
          nodeSelector:
            nvidia.com/gpu.product: NVIDIA-A100-SXM4-80GB
          volumes:
          - name: devshm
            emptyDir:
              medium: Memory

  - label: "H200"
    # skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"
    agents:
      queue: H200
    depends_on: wait-for-container-image
    if: build.branch != "main"
    plugins:
    - docker#v5.12.0:
        image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
        command:
        - bash
        - .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
        mount-buildkite-agent: true
        propagate-environment: true
        ipc: host
        gpus: 4,5,6,7
        volumes:
          - /data/benchmark-hf-cache:/root/.cache/huggingface
        environment:
        - VLLM_USAGE_SOURCE
        - HF_TOKEN

  #- block: "Run H100 Benchmark"
    #key: block-h100
    #depends_on: ~

  - label: "H100"
    # skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"
    agents:
      queue: H100
    depends_on: wait-for-container-image
    if: build.branch != "main"
    plugins:
    - docker#v5.12.0:
        image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
        command:
        - bash
        - .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
        mount-buildkite-agent: true
        propagate-environment: true
        ipc: host
        gpus: all # see CUDA_VISIBLE_DEVICES for actual GPUs used
        volumes:
          - /data/benchmark-hf-cache:/root/.cache/huggingface
        environment:
        - VLLM_USAGE_SOURCE
        - HF_TOKEN
[CI/Build][Misc] Add CI that benchmarks vllm performance on those PRs with `perf-benchmarks` label (#5073) Co-authored-by: simon-mo <simon.mo@hey.com> 2024-06-13 22:36:20 -07:00			`steps:`
			`- label: "Wait for container to be ready"`
[perf-benchmark] Fix dependency for steps in benchmark pipeline (#11710) 2025-01-03 13:38:37 +07:00			`key: wait-for-container-image`
[CI/Build][Misc] Add CI that benchmarks vllm performance on those PRs with `perf-benchmarks` label (#5073) Co-authored-by: simon-mo <simon.mo@hey.com> 2024-06-13 22:36:20 -07:00			`agents:`
			`queue: A100`
			`plugins:`
			`- kubernetes:`
			`podSpec:`
			`containers:`
			`- image: badouralix/curl-jq`
			`command:`
[perf bench] set timeout to debug hanging (#8516) 2024-09-16 14:30:02 -07:00			`- sh .buildkite/nightly-benchmarks/scripts/wait-for-image.sh`
[perf-benchmark] Fix ECR path for premerge benchmark (#13512) Signed-off-by: <> Co-authored-by: EC2 Default User <ec2-user@ip-172-31-20-117.us-west-2.compute.internal> 2025-02-18 23:56:11 -08:00			`- label: "Cleanup H100"`
			`agents:`
			`queue: H100`
			`depends_on: ~`
			`command: docker system prune -a --volumes --force`

Benchmark: add H100 suite (#6047) 2024-07-11 09:17:07 -07:00			`- label: "A100"`
[Benchmark] Add new H100 machine (#10547) 2024-11-21 18:27:20 -08:00			`# skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"`
[CI/Build][Misc] Add CI that benchmarks vllm performance on those PRs with `perf-benchmarks` label (#5073) Co-authored-by: simon-mo <simon.mo@hey.com> 2024-06-13 22:36:20 -07:00			`agents:`
			`queue: A100`
[perf-benchmark] Fix dependency for steps in benchmark pipeline (#11710) 2025-01-03 13:38:37 +07:00			`depends_on: wait-for-container-image`
[perf-benchmark] Fix ECR path for premerge benchmark (#13512) Signed-off-by: <> Co-authored-by: EC2 Default User <ec2-user@ip-172-31-20-117.us-west-2.compute.internal> 2025-02-18 23:56:11 -08:00			`if: build.branch == "main"`
[CI/Build][Misc] Add CI that benchmarks vllm performance on those PRs with `perf-benchmarks` label (#5073) Co-authored-by: simon-mo <simon.mo@hey.com> 2024-06-13 22:36:20 -07:00			`plugins:`
			`- kubernetes:`
			`podSpec:`
[ci] Add A100 queue into AWS CI template (#5648) Signed-off-by: kevin <kevin@anyscale.com> 2024-06-19 07:42:13 -07:00			`priorityClassName: perf-benchmark`
[CI/Build][Misc] Add CI that benchmarks vllm performance on those PRs with `perf-benchmarks` label (#5073) Co-authored-by: simon-mo <simon.mo@hey.com> 2024-06-13 22:36:20 -07:00			`containers:`
[perf-benchmark] Fix ECR path for premerge benchmark (#13512) Signed-off-by: <> Co-authored-by: EC2 Default User <ec2-user@ip-172-31-20-117.us-west-2.compute.internal> 2025-02-18 23:56:11 -08:00			`- image: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT`
[CI/Build][Misc] Add CI that benchmarks vllm performance on those PRs with `perf-benchmarks` label (#5073) Co-authored-by: simon-mo <simon.mo@hey.com> 2024-06-13 22:36:20 -07:00			`command:`
[CI] Organizing performance benchmark files (#7616) 2024-08-19 22:43:54 -07:00			`- bash .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh`
[CI/Build][Misc] Add CI that benchmarks vllm performance on those PRs with `perf-benchmarks` label (#5073) Co-authored-by: simon-mo <simon.mo@hey.com> 2024-06-13 22:36:20 -07:00			`resources:`
			`limits:`
			`nvidia.com/gpu: 8`
			`volumeMounts:`
			`- name: devshm`
			`mountPath: /dev/shm`
			`env:`
			`- name: VLLM_USAGE_SOURCE`
			`value: ci-test`
			`- name: HF_TOKEN`
			`valueFrom:`
			`secretKeyRef:`
			`name: hf-token-secret`
			`key: token`
			`nodeSelector:`
			`nvidia.com/gpu.product: NVIDIA-A100-SXM4-80GB`
			`volumes:`
			`- name: devshm`
			`emptyDir:`
			`medium: Memory`
[perf bench] H200 development (#9768) Signed-off-by: simon-mo <simon.mo@hey.com> 2024-11-20 11:06:56 -08:00
			`- label: "H200"`
[Benchmark] Add new H100 machine (#10547) 2024-11-21 18:27:20 -08:00			`# skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"`
[perf bench] H200 development (#9768) Signed-off-by: simon-mo <simon.mo@hey.com> 2024-11-20 11:06:56 -08:00			`agents:`
			`queue: H200`
[perf-benchmark] Fix dependency for steps in benchmark pipeline (#11710) 2025-01-03 13:38:37 +07:00			`depends_on: wait-for-container-image`
[perf-benchmark] Fix ECR path for premerge benchmark (#13512) Signed-off-by: <> Co-authored-by: EC2 Default User <ec2-user@ip-172-31-20-117.us-west-2.compute.internal> 2025-02-18 23:56:11 -08:00			`if: build.branch == "main"`
[perf bench] H200 development (#9768) Signed-off-by: simon-mo <simon.mo@hey.com> 2024-11-20 11:06:56 -08:00			`plugins:`
			`- docker#v5.12.0:`
[perf-benchmark] Fix ECR path for premerge benchmark (#13512) Signed-off-by: <> Co-authored-by: EC2 Default User <ec2-user@ip-172-31-20-117.us-west-2.compute.internal> 2025-02-18 23:56:11 -08:00			`image: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT`
[perf bench] H200 development (#9768) Signed-off-by: simon-mo <simon.mo@hey.com> 2024-11-20 11:06:56 -08:00			`command:`
			`- bash`
			`- .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh`
			`mount-buildkite-agent: true`
			`propagate-environment: true`
			`ipc: host`
			`gpus: 4,5,6,7`
			`volumes:`
			`- /data/benchmark-hf-cache:/root/.cache/huggingface`
			`environment:`
			`- VLLM_USAGE_SOURCE`
			`- HF_TOKEN`

[CI] Unboock H100 Benchmark (#11419) Signed-off-by: simon-mo <simon.mo@hey.com> 2024-12-22 14:17:43 -08:00			`#- block: "Run H100 Benchmark"`
			`#key: block-h100`
			`#depends_on: ~`
[benchmark] Make H100 benchmark optional (#10908) 2024-12-04 17:02:17 -08:00
[perf-benchmark] Fix ECR path for premerge benchmark (#13512) Signed-off-by: <> Co-authored-by: EC2 Default User <ec2-user@ip-172-31-20-117.us-west-2.compute.internal> 2025-02-18 23:56:11 -08:00			`- label: "H100"`
			`# skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"`
[perf-benchmark] cleanup unused Docker images and volumes in H100 benchmark instance (#12706) 2025-02-12 19:51:33 -08:00			`agents:`
			`queue: H100`
[perf-benchmark] Fix ECR path for premerge benchmark (#13512) Signed-off-by: <> Co-authored-by: EC2 Default User <ec2-user@ip-172-31-20-117.us-west-2.compute.internal> 2025-02-18 23:56:11 -08:00			`depends_on: wait-for-container-image`
			`if: build.branch == "main"`
			`plugins:`
			`- docker#v5.12.0:`
			`image: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT`
			`command:`
			`- bash`
			`- .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh`
			`mount-buildkite-agent: true`
			`propagate-environment: true`
			`ipc: host`
			`gpus: all # see CUDA_VISIBLE_DEVICES for actual GPUs used`
			`volumes:`
			`- /data/benchmark-hf-cache:/root/.cache/huggingface`
			`environment:`
			`- VLLM_USAGE_SOURCE`
			`- HF_TOKEN`

			`# Premerge benchmark`
			`- label: "A100"`
			`# skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"`
			`agents:`
			`queue: A100`
			`depends_on: wait-for-container-image`
			`if: build.branch != "main"`
			`plugins:`
			`- kubernetes:`
			`podSpec:`
			`priorityClassName: perf-benchmark`
			`containers:`
			`- image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT`
			`command:`
			`- bash .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh`
			`resources:`
			`limits:`
			`nvidia.com/gpu: 8`
			`volumeMounts:`
			`- name: devshm`
			`mountPath: /dev/shm`
			`env:`
			`- name: VLLM_USAGE_SOURCE`
			`value: ci-test`
			`- name: HF_TOKEN`
			`valueFrom:`
			`secretKeyRef:`
			`name: hf-token-secret`
			`key: token`
			`nodeSelector:`
			`nvidia.com/gpu.product: NVIDIA-A100-SXM4-80GB`
			`volumes:`
			`- name: devshm`
			`emptyDir:`
			`medium: Memory`

			`- label: "H200"`
			`# skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"`
			`agents:`
			`queue: H200`
			`depends_on: wait-for-container-image`
			`if: build.branch != "main"`
			`plugins:`
			`- docker#v5.12.0:`
			`image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT`
			`command:`
			`- bash`
			`- .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh`
			`mount-buildkite-agent: true`
			`propagate-environment: true`
			`ipc: host`
			`gpus: 4,5,6,7`
			`volumes:`
			`- /data/benchmark-hf-cache:/root/.cache/huggingface`
			`environment:`
			`- VLLM_USAGE_SOURCE`
			`- HF_TOKEN`

			`#- block: "Run H100 Benchmark"`
			`#key: block-h100`
			`#depends_on: ~`
[perf-benchmark] cleanup unused Docker images and volumes in H100 benchmark instance (#12706) 2025-02-12 19:51:33 -08:00
[Benchmark] Add new H100 machine (#10547) 2024-11-21 18:27:20 -08:00			`- label: "H100"`
			`# skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"`
			`agents:`
			`queue: H100`
[perf-benchmark] Fix dependency for steps in benchmark pipeline (#11710) 2025-01-03 13:38:37 +07:00			`depends_on: wait-for-container-image`
[perf-benchmark] Fix ECR path for premerge benchmark (#13512) Signed-off-by: <> Co-authored-by: EC2 Default User <ec2-user@ip-172-31-20-117.us-west-2.compute.internal> 2025-02-18 23:56:11 -08:00			`if: build.branch != "main"`
[Benchmark] Add new H100 machine (#10547) 2024-11-21 18:27:20 -08:00			`plugins:`
			`- docker#v5.12.0:`
[perf-benchmark] Fix ECR path for premerge benchmark (#13512) Signed-off-by: <> Co-authored-by: EC2 Default User <ec2-user@ip-172-31-20-117.us-west-2.compute.internal> 2025-02-18 23:56:11 -08:00			`image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT`
[Benchmark] Add new H100 machine (#10547) 2024-11-21 18:27:20 -08:00			`command:`
			`- bash`
			`- .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh`
			`mount-buildkite-agent: true`
			`propagate-environment: true`
			`ipc: host`
			`gpus: all # see CUDA_VISIBLE_DEVICES for actual GPUs used`
			`volumes:`
			`- /data/benchmark-hf-cache:/root/.cache/huggingface`
			`environment:`
			`- VLLM_USAGE_SOURCE`
			`- HF_TOKEN`