[CI/BUILD] Support non-AVX512 vLLM building and testing (#5574)
This commit is contained in:
parent
728c4c8a06
commit
ab66536dbf
@ -4,17 +4,20 @@ set -ex
|
|||||||
|
|
||||||
# Try building the docker image
|
# Try building the docker image
|
||||||
docker build -t cpu-test -f Dockerfile.cpu .
|
docker build -t cpu-test -f Dockerfile.cpu .
|
||||||
|
docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" -t cpu-test-avx2 -f Dockerfile.cpu .
|
||||||
|
|
||||||
# Setup cleanup
|
# Setup cleanup
|
||||||
remove_docker_container() { docker rm -f cpu-test || true; }
|
remove_docker_container() { docker rm -f cpu-test cpu-test-avx2 || true; }
|
||||||
trap remove_docker_container EXIT
|
trap remove_docker_container EXIT
|
||||||
remove_docker_container
|
remove_docker_container
|
||||||
|
|
||||||
# Run the image
|
# Run the image
|
||||||
docker run -itd -v ~/.cache/huggingface:/root/.cache/huggingface --cpuset-cpus=48-95 --cpuset-mems=1 --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --name cpu-test cpu-test
|
docker run -itd -v ~/.cache/huggingface:/root/.cache/huggingface --cpuset-cpus=48-95 --cpuset-mems=1 --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --name cpu-test cpu-test
|
||||||
|
docker run -itd -v ~/.cache/huggingface:/root/.cache/huggingface --cpuset-cpus=48-95 --cpuset-mems=1 --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --name cpu-test-avx2 cpu-test-avx2
|
||||||
|
|
||||||
# offline inference
|
# offline inference
|
||||||
docker exec cpu-test bash -c "python3 examples/offline_inference.py"
|
docker exec cpu-test bash -c "python3 examples/offline_inference.py"
|
||||||
|
docker exec cpu-test-avx2 bash -c "python3 examples/offline_inference.py"
|
||||||
|
|
||||||
# Run basic model test
|
# Run basic model test
|
||||||
docker exec cpu-test bash -c "cd tests;
|
docker exec cpu-test bash -c "cd tests;
|
||||||
|
@ -21,6 +21,10 @@ WORKDIR /workspace/vllm
|
|||||||
|
|
||||||
RUN pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
RUN pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
||||||
|
|
||||||
|
# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ...
|
||||||
|
ARG VLLM_CPU_DISABLE_AVX512
|
||||||
|
ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
|
||||||
|
|
||||||
RUN VLLM_TARGET_DEVICE=cpu python3 setup.py install
|
RUN VLLM_TARGET_DEVICE=cpu python3 setup.py install
|
||||||
|
|
||||||
WORKDIR /workspace/
|
WORKDIR /workspace/
|
||||||
|
@ -33,10 +33,21 @@ function (find_isa CPUINFO TARGET OUT)
|
|||||||
endif()
|
endif()
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|
||||||
|
function (is_avx512_disabled OUT)
|
||||||
|
set(DISABLE_AVX512 $ENV{VLLM_CPU_DISABLE_AVX512})
|
||||||
|
if(DISABLE_AVX512 AND DISABLE_AVX512 STREQUAL "true")
|
||||||
|
set(${OUT} ON PARENT_SCOPE)
|
||||||
|
else()
|
||||||
|
set(${OUT} OFF PARENT_SCOPE)
|
||||||
|
endif()
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
is_avx512_disabled(AVX512_DISABLED)
|
||||||
|
|
||||||
find_isa(${CPUINFO} "avx2" AVX2_FOUND)
|
find_isa(${CPUINFO} "avx2" AVX2_FOUND)
|
||||||
find_isa(${CPUINFO} "avx512f" AVX512_FOUND)
|
find_isa(${CPUINFO} "avx512f" AVX512_FOUND)
|
||||||
|
|
||||||
if (AVX512_FOUND)
|
if (AVX512_FOUND AND NOT AVX512_DISABLED)
|
||||||
list(APPEND CXX_COMPILE_FLAGS
|
list(APPEND CXX_COMPILE_FLAGS
|
||||||
"-mavx512f"
|
"-mavx512f"
|
||||||
"-mavx512vl"
|
"-mavx512vl"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user