name: Lint and Deploy Charts on: pull_request jobs: lint-and-deploy: runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 - name: Set up Helm uses: azure/setup-helm@b9e51907a09c216f16ebe8536097933489208112 # v4.3.0 with: version: v3.14.4 #Python is required because ct lint runs Yamale and yamllint which require Python. - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 with: python-version: '3.13' - name: Set up chart-testing uses: helm/chart-testing-action@0d28d3144d3a25ea2cc349d6e59901c4ff469b3b # v2.7.0 with: version: v3.10.1 - name: Run chart-testing (lint) run: ct lint --target-branch ${{ github.event.repository.default_branch }} --chart-dirs examples/online_serving/chart-helm --charts examples/online_serving/chart-helm - name: Setup minio run: | docker network create vllm-net docker run -d -p 9000:9000 --name minio --net vllm-net \ -e "MINIO_ACCESS_KEY=minioadmin" \ -e "MINIO_SECRET_KEY=minioadmin" \ -v /tmp/data:/data \ -v /tmp/config:/root/.minio \ minio/minio server /data export AWS_ACCESS_KEY_ID=minioadmin export AWS_SECRET_ACCESS_KEY=minioadmin export AWS_EC2_METADATA_DISABLED=true mkdir opt-125m cd opt-125m && curl -O -Ls "https://huggingface.co/facebook/opt-125m/resolve/main/{pytorch_model.bin,config.json,generation_config.json,merges.txt,special_tokens_map.json,tokenizer_config.json,vocab.json}" && cd .. aws --endpoint-url http://127.0.0.1:9000/ s3 mb s3://testbucket aws --endpoint-url http://127.0.0.1:9000/ s3 cp opt-125m/ s3://testbucket/opt-125m --recursive - name: Create kind cluster uses: helm/kind-action@a1b0e391336a6ee6713a0583f8c6240d70863de3 # v1.12.0 - name: Build the Docker image vllm cpu run: docker buildx build -f docker/Dockerfile.cpu -t vllm-cpu-env . - name: Configuration of docker images, network and namespace for the kind cluster run: | docker pull amazon/aws-cli:2.6.4 kind load docker-image amazon/aws-cli:2.6.4 --name chart-testing kind load docker-image vllm-cpu-env:latest --name chart-testing docker network connect vllm-net "$(docker ps -aqf "name=chart-testing-control-plane")" kubectl create ns ns-vllm - name: Run chart-testing (install) run: | export AWS_ACCESS_KEY_ID=minioadmin export AWS_SECRET_ACCESS_KEY=minioadmin sleep 30 && kubectl -n ns-vllm logs -f "$(kubectl -n ns-vllm get pods | awk '/deployment/ {print $1;exit}')" & helm install --wait --wait-for-jobs --timeout 5m0s --debug --create-namespace --namespace=ns-vllm test-vllm examples/online_serving/chart-helm -f examples/online_serving/chart-helm/values.yaml --set secrets.s3endpoint=http://minio:9000 --set secrets.s3bucketname=testbucket --set secrets.s3accesskeyid=$AWS_ACCESS_KEY_ID --set secrets.s3accesskey=$AWS_SECRET_ACCESS_KEY --set resources.requests.cpu=1 --set resources.requests.memory=4Gi --set resources.limits.cpu=2 --set resources.limits.memory=5Gi --set image.env[0].name=VLLM_CPU_KVCACHE_SPACE --set image.env[1].name=VLLM_LOGGING_LEVEL --set-string image.env[0].value="1" --set-string image.env[1].value="DEBUG" --set-string extraInit.s3modelpath="opt-125m/" --set-string 'resources.limits.nvidia\.com/gpu=0' --set-string 'resources.requests.nvidia\.com/gpu=0' --set-string image.repository="vllm-cpu-env" - name: curl test run: | kubectl -n ns-vllm port-forward service/test-vllm-service 8001:80 & sleep 10 CODE="$(curl -v -f --location http://localhost:8001/v1/completions \ --header "Content-Type: application/json" \ --data '{ "model": "opt-125m", "prompt": "San Francisco is a", "max_tokens": 7, "temperature": 0 }'):$CODE" echo "$CODE"