vllm/.buildkite/run-cpu-test.sh

# This script build the CPU docker image and run the offline inference inside the container.
# It serves a sanity check for compilation and basic model usage.
set -ex

# Try building the docker image
docker build -t cpu-test -f Dockerfile.cpu .
docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" -t cpu-test-avx2 -f Dockerfile.cpu .

# Setup cleanup
remove_docker_container() { docker rm -f cpu-test cpu-test-avx2 || true; }
trap remove_docker_container EXIT
remove_docker_container

# Run the image
docker run -itd -v ~/.cache/huggingface:/root/.cache/huggingface --cpuset-cpus=48-95 --cpuset-mems=1 --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --name cpu-test cpu-test
docker run -itd -v ~/.cache/huggingface:/root/.cache/huggingface --cpuset-cpus=48-95 --cpuset-mems=1 --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --name cpu-test-avx2 cpu-test-avx2

# offline inference
docker exec cpu-test bash -c "python3 examples/offline_inference.py"
docker exec cpu-test-avx2 bash -c "python3 examples/offline_inference.py"

# Run basic model test
docker exec cpu-test bash -c "cd tests;
  pip install pytest Pillow protobuf
  cd ../
  pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py" # Mamba on CPU is not supported
[Hardware][Intel] Add CPU inference backend (#3634) Co-authored-by: Kunshang Ji <kunshang.ji@intel.com> Co-authored-by: Yuan Zhou <yuan.zhou@intel.com> 2024-04-02 13:07:30 +08:00			`# This script build the CPU docker image and run the offline inference inside the container.`
			`# It serves a sanity check for compilation and basic model usage.`
			`set -ex`

			`# Try building the docker image`
			`docker build -t cpu-test -f Dockerfile.cpu .`
[CI/BUILD] Support non-AVX512 vLLM building and testing (#5574) 2024-06-18 02:36:10 +08:00			`docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" -t cpu-test-avx2 -f Dockerfile.cpu .`
[Hardware][Intel] Add CPU inference backend (#3634) Co-authored-by: Kunshang Ji <kunshang.ji@intel.com> Co-authored-by: Yuan Zhou <yuan.zhou@intel.com> 2024-04-02 13:07:30 +08:00
			`# Setup cleanup`
[CI/BUILD] Support non-AVX512 vLLM building and testing (#5574) 2024-06-18 02:36:10 +08:00			`remove_docker_container() { docker rm -f cpu-test cpu-test-avx2 \|\| true; }`
[Hardware][Intel] Add CPU inference backend (#3634) Co-authored-by: Kunshang Ji <kunshang.ji@intel.com> Co-authored-by: Yuan Zhou <yuan.zhou@intel.com> 2024-04-02 13:07:30 +08:00			`trap remove_docker_container EXIT`
			`remove_docker_container`

[CI/BUILD] enable intel queue for longer CPU tests (#4113) 2024-06-04 01:39:50 +08:00			`# Run the image`
[CI/Build] Reducing CPU CI execution time (#5241) 2024-06-05 01:26:40 +08:00			`docker run -itd -v ~/.cache/huggingface:/root/.cache/huggingface --cpuset-cpus=48-95 --cpuset-mems=1 --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --name cpu-test cpu-test`
[CI/BUILD] Support non-AVX512 vLLM building and testing (#5574) 2024-06-18 02:36:10 +08:00			`docker run -itd -v ~/.cache/huggingface:/root/.cache/huggingface --cpuset-cpus=48-95 --cpuset-mems=1 --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --name cpu-test-avx2 cpu-test-avx2`
[CI/BUILD] enable intel queue for longer CPU tests (#4113) 2024-06-04 01:39:50 +08:00
			`# offline inference`
			`docker exec cpu-test bash -c "python3 examples/offline_inference.py"`
[CI/BUILD] Support non-AVX512 vLLM building and testing (#5574) 2024-06-18 02:36:10 +08:00			`docker exec cpu-test-avx2 bash -c "python3 examples/offline_inference.py"`
[CI/BUILD] enable intel queue for longer CPU tests (#4113) 2024-06-04 01:39:50 +08:00
			`# Run basic model test`
			`docker exec cpu-test bash -c "cd tests;`
			`pip install pytest Pillow protobuf`
			`cd ../`
[Model] Jamba support (#4115) Signed-off-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai> Co-authored-by: Erez Schwartz <erezs@ai21.com> Co-authored-by: Mor Zusman <morz@ai21.com> Co-authored-by: tomeras91 <57313761+tomeras91@users.noreply.github.com> Co-authored-by: Tomer Asida <tomera@ai21.com> Co-authored-by: Zhuohan Li <zhuohan123@gmail.com> Co-authored-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai> 2024-07-03 02:11:29 +03:00			`pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py" # Mamba on CPU is not supported`