vllm/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh

#!/bin/bash

set -xue

# Build the docker image.
docker build -f docker/Dockerfile.tpu -t vllm-tpu .

# Set up cleanup.
remove_docker_container() { docker rm -f tpu-test || true; }
trap remove_docker_container EXIT
# Remove the container that might not be cleaned up in the previous run.
remove_docker_container

# For HF_TOKEN.
source /etc/environment
# Run a simple end-to-end example.
docker run --privileged --net host --shm-size=16G -it \
    -e "HF_TOKEN=$HF_TOKEN" --name tpu-test \
    vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git \
    && python3 -m pip install pytest \
    && python3 -m pip install lm_eval[api]==0.4.4 \
    && export VLLM_USE_V1=1 \
    && export VLLM_XLA_CHECK_RECOMPILATION=1 \
    && echo TEST_0 \
    && pytest -v -s /workspace/vllm/tests/v1/tpu/test_perf.py \
    && echo TEST_1 \
    && pytest -v -s /workspace/vllm/tests/tpu/test_compilation.py \
    && echo TEST_2 \
    && pytest -v -s /workspace/vllm/tests/v1/tpu/test_basic.py \
    && echo TEST_3 \
    && pytest -v -s /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine \
    && echo TEST_4 \
    && pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py \
    && echo TEST_5 \
    && python3 /workspace/vllm/examples/offline_inference/tpu.py \
    && echo TEST_6 \
    && pytest -s -v /workspace/vllm/tests/v1/tpu/worker/test_tpu_model_runner.py \
    && echo TEST_7 \
    && pytest -s -v /workspace/vllm/tests/v1/tpu/test_sampler.py \
    && echo TEST_8 \
    && pytest -s -v /workspace/vllm/tests/v1/tpu/test_topk_topp_sampler.py \
    && echo TEST_9 \
    && pytest -s -v /workspace/vllm/tests/v1/tpu/test_pallas.py" \


# TODO: This test fails because it uses RANDOM_SEED sampling
# && VLLM_USE_V1=1 pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py \
[CI] Add TPU v1 test (#14834) Signed-off-by: Richard Liu <ricliu@google.com> 2025-03-14 14:13:30 -07:00			`#!/bin/bash`

[TPU] Support sliding window and logit soft capping in the paged attention kernel for TPU. (#15732) Signed-off-by: Xiongfei Wei <isaacwxf23@gmail.com> 2025-04-03 14:23:28 -07:00			`set -xue`
[CI] Add TPU v1 test (#14834) Signed-off-by: Richard Liu <ricliu@google.com> 2025-03-14 14:13:30 -07:00
			`# Build the docker image.`
Move dockerfiles into their own directory (#14549) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> 2025-03-31 21:47:32 +01:00			`docker build -f docker/Dockerfile.tpu -t vllm-tpu .`
[CI] Add TPU v1 test (#14834) Signed-off-by: Richard Liu <ricliu@google.com> 2025-03-14 14:13:30 -07:00
			`# Set up cleanup.`
			`remove_docker_container() { docker rm -f tpu-test \|\| true; }`
			`trap remove_docker_container EXIT`
			`# Remove the container that might not be cleaned up in the previous run.`
			`remove_docker_container`

			`# For HF_TOKEN.`
			`source /etc/environment`
			`# Run a simple end-to-end example.`
			`docker run --privileged --net host --shm-size=16G -it \`
[V1] TPU - Fix CI/CD runner (#14974) 2025-03-17 17:07:07 -04:00			`-e "HF_TOKEN=$HF_TOKEN" --name tpu-test \`
[CI] Add TPU v1 test (#14834) Signed-off-by: Richard Liu <ricliu@google.com> 2025-03-14 14:13:30 -07:00			`vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git \`
			`&& python3 -m pip install pytest \`
			`&& python3 -m pip install lm_eval[api]==0.4.4 \`
[Hardware][TPU] Add check for no additional graph compilation during runtime (#14710) Signed-off-by: Siyuan Liu <lsiyuan@google.com> 2025-03-20 20:05:28 -07:00			`&& export VLLM_USE_V1=1 \`
			`&& export VLLM_XLA_CHECK_RECOMPILATION=1 \`
[V1] TPU CI - Add basic perf regression test (#15414) Signed-off-by: Alexander Matveev <amatveev@redhat.com> 2025-03-31 13:25:20 -04:00			`&& echo TEST_0 \`
			`&& pytest -v -s /workspace/vllm/tests/v1/tpu/test_perf.py \`
[V1] TPU - Fix CI/CD runner (#14974) 2025-03-17 17:07:07 -04:00			`&& echo TEST_1 \`
[V1] TPU CI - Fix test_compilation.py (#15570) Signed-off-by: Alexander Matveev <amatveev@redhat.com> 2025-03-26 17:51:54 -04:00			`&& pytest -v -s /workspace/vllm/tests/tpu/test_compilation.py \`
[V1] TPU - Fix CI/CD runner (#14974) 2025-03-17 17:07:07 -04:00			`&& echo TEST_2 \`
[Hardware][TPU] Add check for no additional graph compilation during runtime (#14710) Signed-off-by: Siyuan Liu <lsiyuan@google.com> 2025-03-20 20:05:28 -07:00			`&& pytest -v -s /workspace/vllm/tests/v1/tpu/test_basic.py \`
[V1] TPU - Fix CI/CD runner (#14974) 2025-03-17 17:07:07 -04:00			`&& echo TEST_3 \`
[Hardware][TPU] Add check for no additional graph compilation during runtime (#14710) Signed-off-by: Siyuan Liu <lsiyuan@google.com> 2025-03-20 20:05:28 -07:00			`&& pytest -v -s /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine \`
[V1] TPU - Fix CI/CD runner (#14974) 2025-03-17 17:07:07 -04:00			`&& echo TEST_4 \`
[TPU][V1][Bugfix] Fix w8a8 recompiilation with GSM8K (#15714) Signed-off-by: NickLucche <nlucches@redhat.com> 2025-03-29 05:13:06 +01:00			`&& pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py \`
[CI][TPU] Temporarily Disable Quant Test on TPU (#15649) Signed-off-by: rshaw@neuralmagic.com <robertgshaw2@gmail.com> 2025-03-27 22:45:05 -04:00			`&& echo TEST_5 \`
[TPU][V1][Bugfix] Fix w8a8 recompiilation with GSM8K (#15714) Signed-off-by: NickLucche <nlucches@redhat.com> 2025-03-29 05:13:06 +01:00			`&& python3 /workspace/vllm/examples/offline_inference/tpu.py \`
[CI][TPU] Temporarily Disable Quant Test on TPU (#15649) Signed-off-by: rshaw@neuralmagic.com <robertgshaw2@gmail.com> 2025-03-27 22:45:05 -04:00			`&& echo TEST_6 \`
[TPU][V1][Bugfix] Fix w8a8 recompiilation with GSM8K (#15714) Signed-off-by: NickLucche <nlucches@redhat.com> 2025-03-29 05:13:06 +01:00			`&& pytest -s -v /workspace/vllm/tests/v1/tpu/worker/test_tpu_model_runner.py \`
			`&& echo TEST_7 \`
[V1][TPU] TPU-optimized top-p implementation (avoids scattering). (#15736) Signed-off-by: Hyesoo Yang <hyeygit@gmail.com> Co-authored-by: root <root@t1v-n-822696b7-w-0.us-central2-b.c.tpu-prod-env-large-adhoc.internal> 2025-04-02 17:18:08 -07:00			`&& pytest -s -v /workspace/vllm/tests/v1/tpu/test_sampler.py \`
			`&& echo TEST_8 \`
[TPU] Support sliding window and logit soft capping in the paged attention kernel for TPU. (#15732) Signed-off-by: Xiongfei Wei <isaacwxf23@gmail.com> 2025-04-03 14:23:28 -07:00			`&& pytest -s -v /workspace/vllm/tests/v1/tpu/test_topk_topp_sampler.py \`
			`&& echo TEST_9 \`
			`&& pytest -s -v /workspace/vllm/tests/v1/tpu/test_pallas.py" \`
[Hardware][TPU] Add check for no additional graph compilation during runtime (#14710) Signed-off-by: Siyuan Liu <lsiyuan@google.com> 2025-03-20 20:05:28 -07:00
[V1] TPU - Fix CI/CD runner (#14974) 2025-03-17 17:07:07 -04:00
[V1] TPU - CI/CD use smaller model (#15054) Signed-off-by: Alexander Matveev <amatveev@redhat.com> 2025-03-18 17:39:21 -04:00			`# TODO: This test fails because it uses RANDOM_SEED sampling`
[V1] TPU - Fix CI/CD runner (#14974) 2025-03-17 17:07:07 -04:00			`# && VLLM_USE_V1=1 pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py \`