[Build] Cython compilation support fix (#14296)
Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com>
This commit is contained in:
parent
23fdab00a8
commit
8279201ce6
@ -40,7 +40,7 @@ ARG USE_CYTHON
|
|||||||
RUN cd vllm \
|
RUN cd vllm \
|
||||||
&& python3 -m pip install -r requirements/rocm.txt \
|
&& python3 -m pip install -r requirements/rocm.txt \
|
||||||
&& python3 setup.py clean --all \
|
&& python3 setup.py clean --all \
|
||||||
&& if [ ${USE_CYTHON} -eq "1" ]; then python3 setup_cython.py build_ext --inplace; fi \
|
&& if [ ${USE_CYTHON} -eq "1" ]; then python3 tests/build_cython.py build_ext --inplace; fi \
|
||||||
&& python3 setup.py bdist_wheel --dist-dir=dist
|
&& python3 setup.py bdist_wheel --dist-dir=dist
|
||||||
FROM scratch AS export_vllm
|
FROM scratch AS export_vllm
|
||||||
ARG COMMON_WORKDIR
|
ARG COMMON_WORKDIR
|
||||||
|
@ -86,6 +86,7 @@ exclude = [
|
|||||||
"vllm/triton_utils/**/*.py" = ["UP006", "UP035"]
|
"vllm/triton_utils/**/*.py" = ["UP006", "UP035"]
|
||||||
"vllm/vllm_flash_attn/**/*.py" = ["UP006", "UP035"]
|
"vllm/vllm_flash_attn/**/*.py" = ["UP006", "UP035"]
|
||||||
"vllm/worker/**/*.py" = ["UP006", "UP035"]
|
"vllm/worker/**/*.py" = ["UP006", "UP035"]
|
||||||
|
"vllm/utils.py" = ["UP006", "UP035"]
|
||||||
|
|
||||||
[tool.ruff.lint]
|
[tool.ruff.lint]
|
||||||
select = [
|
select = [
|
||||||
|
38
tests/build_cython.py
Normal file
38
tests/build_cython.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
import Cython.Compiler.Options
|
||||||
|
from Cython.Build import cythonize
|
||||||
|
from setuptools import setup
|
||||||
|
|
||||||
|
Cython.Compiler.Options.annotate = True
|
||||||
|
|
||||||
|
infiles = []
|
||||||
|
|
||||||
|
infiles += [
|
||||||
|
"vllm/engine/llm_engine.py",
|
||||||
|
"vllm/transformers_utils/detokenizer.py",
|
||||||
|
"vllm/engine/output_processor/single_step.py",
|
||||||
|
"vllm/outputs.py",
|
||||||
|
"vllm/engine/output_processor/stop_checker.py",
|
||||||
|
]
|
||||||
|
|
||||||
|
infiles += [
|
||||||
|
"vllm/core/scheduler.py",
|
||||||
|
"vllm/sequence.py",
|
||||||
|
"vllm/core/block_manager.py",
|
||||||
|
]
|
||||||
|
|
||||||
|
infiles += [
|
||||||
|
"vllm/model_executor/layers/sampler.py",
|
||||||
|
"vllm/sampling_params.py",
|
||||||
|
"vllm/utils.py",
|
||||||
|
]
|
||||||
|
|
||||||
|
setup(ext_modules=cythonize(infiles,
|
||||||
|
annotate=False,
|
||||||
|
force=True,
|
||||||
|
compiler_directives={
|
||||||
|
'language_level': "3",
|
||||||
|
'infer_types': True
|
||||||
|
}))
|
||||||
|
|
||||||
|
# example usage: python3 build_cython.py build_ext --inplace
|
@ -1249,7 +1249,7 @@ class LLMEngine:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def _advance_to_next_step(
|
def _advance_to_next_step(
|
||||||
self, output: List[SamplerOutput],
|
self, output: SamplerOutput,
|
||||||
seq_group_metadata_list: List[SequenceGroupMetadata],
|
seq_group_metadata_list: List[SequenceGroupMetadata],
|
||||||
scheduled_seq_groups: List[ScheduledSequenceGroup]) -> None:
|
scheduled_seq_groups: List[ScheduledSequenceGroup]) -> None:
|
||||||
"""Given model output from a single run, append the tokens to the
|
"""Given model output from a single run, append the tokens to the
|
||||||
|
@ -1187,7 +1187,8 @@ def _build_sampler_output(
|
|||||||
deferred_sample_results_args=deferred_sample_results_args)
|
deferred_sample_results_args=deferred_sample_results_args)
|
||||||
|
|
||||||
|
|
||||||
def _get_next_prompt_tokens(seq_group: SequenceGroupToSample) -> List[int]:
|
def _get_next_prompt_tokens(
|
||||||
|
seq_group: SequenceGroupToSample) -> tuple[int, ...]:
|
||||||
"""Get a list of next prompt tokens to compute logprob from a
|
"""Get a list of next prompt tokens to compute logprob from a
|
||||||
given sequence group.
|
given sequence group.
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ from collections.abc import (AsyncGenerator, Awaitable, Generator, Hashable,
|
|||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from functools import cache, lru_cache, partial, wraps
|
from functools import cache, lru_cache, partial, wraps
|
||||||
from typing import (TYPE_CHECKING, Any, Callable, Generic, Literal, NamedTuple,
|
from typing import (TYPE_CHECKING, Any, Callable, Generic, Literal, NamedTuple,
|
||||||
Optional, TypeVar, Union)
|
Optional, Type, TypeVar, Union)
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
import cloudpickle
|
import cloudpickle
|
||||||
@ -1544,9 +1544,9 @@ class LazyDict(Mapping[str, T], Generic[T]):
|
|||||||
return len(self._factory)
|
return len(self._factory)
|
||||||
|
|
||||||
|
|
||||||
class ClassRegistry(UserDict[type[T], _V]):
|
class ClassRegistry(UserDict[Type[T], _V]):
|
||||||
|
|
||||||
def __getitem__(self, key: type[T]) -> _V:
|
def __getitem__(self, key: Type[T]) -> _V:
|
||||||
for cls in key.mro():
|
for cls in key.mro():
|
||||||
if cls in self.data:
|
if cls in self.data:
|
||||||
return self.data[cls]
|
return self.data[cls]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user