[mypy] Enable mypy type checking for vllm/core
(#7229)
This commit is contained in:
parent
5340a2dccf
commit
9c71c97ae2
1
.github/workflows/mypy.yaml
vendored
1
.github/workflows/mypy.yaml
vendored
@ -35,7 +35,6 @@ jobs:
|
||||
mypy
|
||||
mypy tests --follow-imports skip
|
||||
mypy vllm/attention --follow-imports skip
|
||||
mypy vllm/core --follow-imports skip
|
||||
mypy vllm/distributed --follow-imports skip
|
||||
mypy vllm/engine --follow-imports skip
|
||||
mypy vllm/executor --follow-imports skip
|
||||
|
@ -99,7 +99,6 @@ echo 'vLLM mypy:'
|
||||
mypy --follow-imports skip # Note that this is less strict than CI
|
||||
mypy tests --follow-imports skip
|
||||
mypy vllm/attention --follow-imports skip
|
||||
mypy vllm/core --follow-imports skip
|
||||
mypy vllm/distributed --follow-imports skip
|
||||
mypy vllm/engine --follow-imports skip
|
||||
mypy vllm/executor --follow-imports skip
|
||||
|
@ -58,6 +58,7 @@ files = [
|
||||
"vllm/adapter_commons",
|
||||
"vllm/assets",
|
||||
"vllm/entrypoints",
|
||||
"vllm/core",
|
||||
"vllm/inputs",
|
||||
"vllm/logging",
|
||||
"vllm/multimodal",
|
||||
|
@ -1,9 +1,9 @@
|
||||
"""Token blocks."""
|
||||
from typing import List, Optional
|
||||
from typing import TYPE_CHECKING, Iterator, List, Optional
|
||||
|
||||
from vllm.utils import Device
|
||||
|
||||
DEFAULT_LAST_ACCESSED_TIME = -1
|
||||
DEFAULT_LAST_ACCESSED_TIME: float = -1
|
||||
|
||||
|
||||
class PhysicalTokenBlock:
|
||||
@ -59,6 +59,11 @@ class BlockTable:
|
||||
def __getitem__(self, key):
|
||||
return self._blocks[key]
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
||||
def __iter__(self) -> Iterator[PhysicalTokenBlock]:
|
||||
raise RuntimeError("Method should be automatically generated")
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
if isinstance(key, slice):
|
||||
blocks = value
|
||||
|
@ -132,7 +132,7 @@ class CpuGpuBlockAllocator(DeviceAwareBlockAllocator):
|
||||
|
||||
def allocate_immutable_blocks(self, prev_block: Optional[Block],
|
||||
block_token_ids: List[List[int]],
|
||||
device: Optional[Device]) -> List[Block]:
|
||||
device: Device) -> List[Block]:
|
||||
"""Allocates a new group of immutable blocks with the provided block
|
||||
token IDs on the specified device.
|
||||
|
||||
|
@ -278,7 +278,7 @@ class BlockSpaceManagerV1(BlockSpaceManager):
|
||||
# request ID
|
||||
self.cross_block_tables: Dict[str, BlockTable] = {}
|
||||
|
||||
def _get_seq_num_required_blocks(self, seq: Sequence) -> int:
|
||||
def _get_seq_num_required_blocks(self, seq: Optional[Sequence]) -> int:
|
||||
return 0 if seq is None else seq.n_blocks
|
||||
|
||||
def can_allocate(self, seq_group: SequenceGroup) -> AllocStatus:
|
||||
@ -310,13 +310,14 @@ class BlockSpaceManagerV1(BlockSpaceManager):
|
||||
return AllocStatus.LATER
|
||||
|
||||
def _allocate_sequence(self, \
|
||||
seq: Sequence, \
|
||||
seq: Optional[Sequence], \
|
||||
ref_count: int, \
|
||||
is_encoder_decoder: bool = True) -> BlockTable:
|
||||
# Allocate new physical token blocks that will store the prompt tokens.
|
||||
num_prompt_blocks = seq.n_blocks
|
||||
num_prompt_blocks = self._get_seq_num_required_blocks(seq)
|
||||
|
||||
block_table: BlockTable = BlockTable()
|
||||
assert seq is not None
|
||||
for logical_idx in range(num_prompt_blocks):
|
||||
if (self.block_sliding_window is not None
|
||||
and logical_idx >= self.block_sliding_window):
|
||||
|
@ -120,8 +120,10 @@ class BlockSpaceManagerV2(BlockSpaceManager):
|
||||
)
|
||||
|
||||
if seq_group.is_encoder_decoder():
|
||||
encoder_seq = seq_group.get_encoder_seq()
|
||||
assert encoder_seq is not None
|
||||
num_required_blocks += BlockTable.get_num_required_blocks(
|
||||
seq_group.get_encoder_seq().get_token_ids(),
|
||||
encoder_seq.get_token_ids(),
|
||||
block_size=self.block_size,
|
||||
)
|
||||
|
||||
@ -189,7 +191,9 @@ class BlockSpaceManagerV2(BlockSpaceManager):
|
||||
check_no_caching_or_swa_for_blockmgr_encdec(self, seq_group)
|
||||
|
||||
if seq_group.is_encoder_decoder():
|
||||
block_table = self._allocate_sequence(seq_group.get_encoder_seq())
|
||||
encoder_seq = seq_group.get_encoder_seq()
|
||||
assert encoder_seq is not None
|
||||
block_table = self._allocate_sequence(encoder_seq)
|
||||
self.cross_block_tables[request_id] = block_table
|
||||
|
||||
def can_append_slots(self, seq_group: SequenceGroup,
|
||||
|
@ -77,8 +77,8 @@ class EmbeddingModelBlockSpaceManager(BlockSpaceManager):
|
||||
pass
|
||||
|
||||
def get_common_computed_block_ids(self,
|
||||
seq_group: SequenceGroup) -> List[int]:
|
||||
return None # type: ignore
|
||||
seq_group: List[Sequence]) -> List[int]:
|
||||
return []
|
||||
|
||||
def mark_blocks_as_computed(self, seq_group: SequenceGroup):
|
||||
pass
|
||||
|
@ -221,10 +221,10 @@ class SchedulerSwappedInOutputs:
|
||||
"""
|
||||
# Selected sequences that are going to be swapped in and is in a
|
||||
# decoding phase.
|
||||
decode_seq_groups: List[SequenceGroup]
|
||||
decode_seq_groups: List[ScheduledSequenceGroup]
|
||||
# Selected sequences that are going to be swapped in and in a prefill
|
||||
# phase. I.e., it means the prefill has been chunked.
|
||||
prefill_seq_groups: List[SequenceGroup]
|
||||
prefill_seq_groups: List[ScheduledSequenceGroup]
|
||||
# The blocks to swap in.
|
||||
blocks_to_swap_in: List[Tuple[int, int]]
|
||||
# The blocks to copy.
|
||||
@ -254,7 +254,7 @@ class SchedulerPrefillOutputs:
|
||||
to be recomputed from scratch.
|
||||
"""
|
||||
# Selected sequences for prefill.
|
||||
seq_groups: List[SequenceGroup]
|
||||
seq_groups: List[ScheduledSequenceGroup]
|
||||
# Ignored sequence groups.
|
||||
ignored_seq_groups: List[SequenceGroup]
|
||||
num_lookahead_slots: int
|
||||
@ -289,7 +289,9 @@ def scheduler_running_outputs_builder():
|
||||
|
||||
|
||||
def scheduled_seq_group_builder():
|
||||
return ScheduledSequenceGroup(seq_group=None, token_chunk_size=0)
|
||||
return ScheduledSequenceGroup(SequenceGroup("", [], -1),
|
||||
token_chunk_size=0)
|
||||
# return ScheduledSequenceGroup(seq_group=None, token_chunk_size=0)
|
||||
|
||||
|
||||
class Scheduler:
|
||||
@ -791,7 +793,7 @@ class Scheduler:
|
||||
SchedulerPrefillOutputs.
|
||||
"""
|
||||
ignored_seq_groups: List[SequenceGroup] = []
|
||||
seq_groups: List[SequenceGroup] = []
|
||||
seq_groups: List[ScheduledSequenceGroup] = []
|
||||
|
||||
waiting_queue = self.waiting
|
||||
|
||||
@ -1130,7 +1132,9 @@ class Scheduler:
|
||||
|
||||
if seq_group.is_encoder_decoder():
|
||||
# Encoder associated with SequenceGroup
|
||||
encoder_seq_data = seq_group.get_encoder_seq().data
|
||||
encoder_seq = seq_group.get_encoder_seq()
|
||||
assert encoder_seq is not None
|
||||
encoder_seq_data = encoder_seq.data
|
||||
# Block table for cross-attention
|
||||
# Also managed at SequenceGroup level
|
||||
cross_block_table = self.block_manager.get_cross_block_table(
|
||||
|
Loading…
x
Reference in New Issue
Block a user