2024-04-18 15:26:01 -07:00
|
|
|
# default base image
|
2024-12-09 22:53:24 +01:00
|
|
|
# https://gallery.ecr.aws/neuron/pytorch-inference-neuronx
|
2025-04-07 19:11:18 -07:00
|
|
|
ARG BASE_IMAGE="public.ecr.aws/neuron/pytorch-inference-neuronx:2.5.1-neuronx-py310-sdk2.22.0-ubuntu22.04"
|
2024-04-18 15:26:01 -07:00
|
|
|
|
|
|
|
FROM $BASE_IMAGE
|
|
|
|
|
|
|
|
RUN echo "Base image is $BASE_IMAGE"
|
|
|
|
|
|
|
|
# Install some basic utilities
|
2024-09-23 18:44:26 +02:00
|
|
|
RUN apt-get update && \
|
|
|
|
apt-get install -y \
|
|
|
|
git \
|
|
|
|
python3 \
|
|
|
|
python3-pip \
|
|
|
|
ffmpeg libsm6 libxext6 libgl1
|
2024-04-18 15:26:01 -07:00
|
|
|
|
|
|
|
### Mount Point ###
|
2025-01-06 21:36:10 -08:00
|
|
|
# When launching the container, mount the code directory to /workspace
|
|
|
|
ARG APP_MOUNT=/workspace
|
2024-04-18 15:26:01 -07:00
|
|
|
VOLUME [ ${APP_MOUNT} ]
|
2024-10-17 19:25:06 +02:00
|
|
|
WORKDIR ${APP_MOUNT}/vllm
|
2024-04-18 15:26:01 -07:00
|
|
|
|
|
|
|
RUN python3 -m pip install --upgrade pip
|
2025-04-07 19:11:18 -07:00
|
|
|
RUN python3 -m pip install --no-cache-dir fastapi ninja tokenizers pandas tenacity
|
|
|
|
RUN python3 -m pip install sentencepiece transformers==4.48.0 -U
|
|
|
|
RUN python3 -m pip install neuronx-cc==2.17.194.0 --extra-index-url=https://pip.repos.neuron.amazonaws.com -U
|
2025-01-06 21:36:10 -08:00
|
|
|
RUN python3 -m pip install pytest
|
2024-04-18 15:26:01 -07:00
|
|
|
|
2025-02-08 01:41:35 -08:00
|
|
|
# uninstall transformers-neuronx package explicitly to avoid version conflict
|
|
|
|
RUN python3 -m pip uninstall -y transformers-neuronx
|
|
|
|
|
2024-10-17 19:25:06 +02:00
|
|
|
COPY . .
|
|
|
|
ARG GIT_REPO_CHECK=0
|
|
|
|
RUN --mount=type=bind,source=.git,target=.git \
|
|
|
|
if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi
|
2024-04-18 15:26:01 -07:00
|
|
|
|
2024-10-17 19:25:06 +02:00
|
|
|
RUN python3 -m pip install -U \
|
2024-11-12 15:06:48 -08:00
|
|
|
'cmake>=3.26' ninja packaging 'setuptools-scm>=8' wheel jinja2 \
|
2025-03-08 17:44:35 +01:00
|
|
|
-r requirements/neuron.txt
|
2024-04-18 15:26:01 -07:00
|
|
|
|
2024-06-11 00:37:56 -07:00
|
|
|
ENV VLLM_TARGET_DEVICE neuron
|
2024-09-23 18:44:26 +02:00
|
|
|
RUN --mount=type=bind,source=.git,target=.git \
|
2024-10-30 12:22:02 -07:00
|
|
|
pip install --no-build-isolation -v -e .
|
2024-04-18 15:26:01 -07:00
|
|
|
|
2024-11-26 00:20:04 -08:00
|
|
|
# install development dependencies (for testing)
|
|
|
|
RUN python3 -m pip install -e tests/vllm_test_utils
|
|
|
|
|
2025-02-08 01:41:35 -08:00
|
|
|
# install transformers-neuronx package as an optional dependencies (for V0)
|
|
|
|
# FIXME: `--no-deps` argument is temporarily added to resolve transformers package version conflict
|
|
|
|
RUN python3 -m pip install transformers-neuronx==0.13.* --extra-index-url=https://pip.repos.neuron.amazonaws.com -U --no-deps
|
|
|
|
|
2025-01-06 21:36:10 -08:00
|
|
|
# overwrite entrypoint to run bash script
|
|
|
|
RUN echo "import subprocess; import sys; subprocess.check_call(sys.argv[1:])" > /usr/local/bin/dockerd-entrypoint.py
|
|
|
|
|
2024-04-18 15:26:01 -07:00
|
|
|
CMD ["/bin/bash"]
|