[Core][Optimization] change copy-on-write from dict[int, list] to list (#4648)
This commit is contained in:
parent
10760da800
commit
469f85c782
@ -410,8 +410,7 @@ def test_cow(block_size: int, sequence_len: int, append_len: int,
|
||||
expected_src = static_block_table.physical_block_ids[cow_block_id]
|
||||
expected_dst = appender_block_table.physical_block_ids[cow_block_id]
|
||||
|
||||
assert expected_src in cows
|
||||
assert expected_dst in cows[expected_src]
|
||||
assert (expected_src, expected_dst) in cows
|
||||
else:
|
||||
# Otherwise, there should be no copy-on-write.
|
||||
assert not cows
|
||||
@ -490,8 +489,7 @@ def test_cow_lookahead_simple(block_size: int, sequence_len: int,
|
||||
expected_src = static_block_table.physical_block_ids[cow_block_id]
|
||||
expected_dst = appender_block_table.physical_block_ids[cow_block_id]
|
||||
|
||||
assert expected_src in cows
|
||||
assert expected_dst in cows[expected_src]
|
||||
assert (expected_src, expected_dst) in cows
|
||||
|
||||
static_block_table.free()
|
||||
appender_block_table.free()
|
||||
|
@ -1,4 +1,5 @@
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from typing import List
|
||||
|
||||
import pytest
|
||||
@ -155,7 +156,10 @@ def test_append_slot_cow():
|
||||
|
||||
cows = block_manager.append_slots(child)
|
||||
assert cows
|
||||
for src_block, dst_blocks in cows.items():
|
||||
dict_cows = defaultdict(list)
|
||||
for src_block, dst_block in cows:
|
||||
dict_cows[src_block].append(dst_block)
|
||||
for src_block, dst_blocks in dict_cows.items():
|
||||
assert src_block not in dst_blocks
|
||||
|
||||
after_blocks = block_manager.get_num_free_gpu_blocks()
|
||||
|
@ -636,7 +636,7 @@ def test_schedule_decode_blocks_to_copy_update():
|
||||
|
||||
# The last request should be swapped out.
|
||||
scheduler.block_manager.append_slots = MagicMock()
|
||||
scheduler.block_manager.append_slots.return_value = {2: [3]}
|
||||
scheduler.block_manager.append_slots.return_value = [(2, 3)]
|
||||
|
||||
budget = create_token_budget()
|
||||
remaining_running, output = scheduler._schedule_running(
|
||||
@ -845,7 +845,7 @@ def test_schedule_swapped_blocks_to_copy():
|
||||
|
||||
# The last request should be swapped out.
|
||||
scheduler.block_manager.append_slots = MagicMock()
|
||||
scheduler.block_manager.append_slots.return_value = {2: [3]}
|
||||
scheduler.block_manager.append_slots.return_value = [(2, 3)]
|
||||
|
||||
budget = create_token_budget()
|
||||
remaining_swapped, output = scheduler._schedule_swapped(
|
||||
|
@ -1,5 +1,4 @@
|
||||
from collections import defaultdict
|
||||
from typing import Dict, Iterable, List, Optional, Protocol
|
||||
from typing import Dict, Iterable, List, Optional, Protocol, Tuple
|
||||
|
||||
from vllm.core.block.interfaces import Block, BlockAllocator
|
||||
|
||||
@ -111,7 +110,7 @@ class CopyOnWriteTracker:
|
||||
refcounter: RefCounterProtocol,
|
||||
allocator: BlockAllocator,
|
||||
):
|
||||
self._copy_on_writes: Dict[BlockId, List[BlockId]] = defaultdict(list)
|
||||
self._copy_on_writes: List[Tuple[BlockId, BlockId]] = []
|
||||
self._refcounter = refcounter
|
||||
self._allocator = allocator
|
||||
|
||||
@ -152,25 +151,25 @@ class CopyOnWriteTracker:
|
||||
# Track src/dst copy.
|
||||
assert src_block_id is not None
|
||||
assert block_id is not None
|
||||
self._copy_on_writes[src_block_id].append(block_id)
|
||||
self._copy_on_writes.append((src_block_id, block_id))
|
||||
|
||||
return block_id
|
||||
|
||||
def clear_cows(self) -> Dict[BlockId, List[BlockId]]:
|
||||
def clear_cows(self) -> List[Tuple[BlockId, BlockId]]:
|
||||
"""Clears the copy-on-write tracking information and returns the current
|
||||
state.
|
||||
|
||||
This method returns a dictionary mapping source block indices to lists
|
||||
of destination block indices for the current copy-on-write operations.
|
||||
This method returns a list mapping source block indices to
|
||||
destination block indices for the current copy-on-write operations.
|
||||
It then clears the internal tracking information.
|
||||
|
||||
Returns:
|
||||
Dict[BlockId, List[BlockId]]: A dictionary mapping source
|
||||
block indices to lists of destination block indices for the
|
||||
List[Tuple[BlockId, BlockId]]: A list mapping source
|
||||
block indices to destination block indices for the
|
||||
current copy-on-write operations.
|
||||
"""
|
||||
cows = dict(self._copy_on_writes)
|
||||
self._copy_on_writes.clear()
|
||||
cows = self._copy_on_writes
|
||||
self._copy_on_writes = []
|
||||
return cows
|
||||
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import Dict, FrozenSet, List, Optional
|
||||
from typing import Dict, FrozenSet, List, Optional, Tuple
|
||||
|
||||
from vllm.core.block.interfaces import (Block, BlockAllocator, BlockId,
|
||||
DeviceAwareBlockAllocator)
|
||||
@ -185,13 +185,13 @@ class CpuGpuBlockAllocator(DeviceAwareBlockAllocator):
|
||||
def get_num_total_blocks(self, device: Device) -> int:
|
||||
return self._allocators[device].get_num_total_blocks()
|
||||
|
||||
def clear_copy_on_writes(self) -> Dict[int, List[int]]:
|
||||
def clear_copy_on_writes(self) -> List[Tuple[int, int]]:
|
||||
"""Clears the copy-on-write (CoW) state and returns the mapping of
|
||||
source to destination block IDs.
|
||||
|
||||
Returns:
|
||||
Dict[int, List[int]]: A dictionary mapping source block IDs to lists
|
||||
of destination block IDs.
|
||||
List[Tuple[int, int]]: A list mapping source block IDs to
|
||||
destination block IDs.
|
||||
"""
|
||||
# CoW only supported on GPU
|
||||
device = Device.GPU
|
||||
|
@ -1,5 +1,5 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, FrozenSet, List, Optional, Protocol
|
||||
from typing import FrozenSet, List, Optional, Protocol, Tuple
|
||||
|
||||
from vllm.utils import Device
|
||||
|
||||
@ -122,7 +122,7 @@ class BlockAllocator(ABC):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def clear_copy_on_writes(self) -> Dict[int, List[int]]:
|
||||
def clear_copy_on_writes(self) -> List[Tuple[int, int]]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
@ -187,7 +187,7 @@ class DeviceAwareBlockAllocator(ABC):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def clear_copy_on_writes(self) -> Dict[int, List[int]]:
|
||||
def clear_copy_on_writes(self) -> List[Tuple[int, int]]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import Dict, FrozenSet, Iterable, List, Optional, Set
|
||||
from typing import FrozenSet, Iterable, List, Optional, Set, Tuple
|
||||
|
||||
from vllm.core.block.common import (CopyOnWriteTracker, RefCounter,
|
||||
get_all_blocks_recursively)
|
||||
@ -175,12 +175,12 @@ class NaiveBlockAllocator(BlockAllocator):
|
||||
"""
|
||||
return self._cow_tracker.cow_block_if_not_appendable(block)
|
||||
|
||||
def clear_copy_on_writes(self) -> Dict[BlockId, List[BlockId]]:
|
||||
def clear_copy_on_writes(self) -> List[Tuple[BlockId, BlockId]]:
|
||||
"""Returns the copy-on-write source->destination mapping and clears it.
|
||||
|
||||
Returns:
|
||||
Dict[BlockId, List[BlockId]]: A dictionary mapping source
|
||||
block indices to lists of destination block indices.
|
||||
List[Tuple[BlockId, BlockId]]: A list mapping source
|
||||
block indices to destination block indices.
|
||||
"""
|
||||
return self._cow_tracker.clear_cows()
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
"""Token blocks."""
|
||||
from itertools import takewhile
|
||||
from os.path import commonprefix
|
||||
from typing import Dict, FrozenSet, Iterable, List, Optional
|
||||
from typing import Dict, FrozenSet, Iterable, List, Optional, Tuple
|
||||
|
||||
from vllm.core.block.common import (CopyOnWriteTracker,
|
||||
get_all_blocks_recursively)
|
||||
@ -337,12 +337,12 @@ class PrefixCachingBlockAllocator(BlockAllocator):
|
||||
"""
|
||||
return self._cow_tracker.cow_block_if_not_appendable(block)
|
||||
|
||||
def clear_copy_on_writes(self) -> Dict[BlockId, List[BlockId]]:
|
||||
def clear_copy_on_writes(self) -> List[Tuple[BlockId, BlockId]]:
|
||||
"""Returns the copy-on-write source->destination mapping and clears it.
|
||||
|
||||
Returns:
|
||||
Dict[BlockId, List[BlockId]]: A dictionary mapping source
|
||||
block indices to lists of destination block indices.
|
||||
List[Tuple[BlockId, BlockId]]: A list mapping source
|
||||
block indices to destination block indices.
|
||||
"""
|
||||
return self._cow_tracker.clear_cows()
|
||||
|
||||
|
@ -5,7 +5,7 @@ from itertools import count, takewhile
|
||||
from os.path import commonprefix
|
||||
from typing import Dict, List, Optional
|
||||
from typing import Sequence as GenericSequence
|
||||
from typing import Set
|
||||
from typing import Set, Tuple
|
||||
|
||||
from vllm.block import BlockTable, PhysicalTokenBlock
|
||||
from vllm.core.evictor_v1 import EvictionPolicy, Evictor, make_evictor
|
||||
@ -386,7 +386,7 @@ class BlockSpaceManagerV1(BlockSpaceManager):
|
||||
self,
|
||||
seq: Sequence,
|
||||
num_lookahead_slots: int = 0,
|
||||
) -> Dict[int, List[int]]:
|
||||
) -> List[Tuple[int, int]]:
|
||||
"""Allocate a physical slot for a new token."""
|
||||
logical_blocks = seq.logical_token_blocks
|
||||
block_table = self.block_tables[seq.seq_id]
|
||||
@ -405,7 +405,7 @@ class BlockSpaceManagerV1(BlockSpaceManager):
|
||||
# Allocate a new physical block.
|
||||
new_block = self._allocate_last_physical_block(seq)
|
||||
block_table.append(new_block)
|
||||
return {}
|
||||
return []
|
||||
|
||||
# We want to append the token to the last physical block.
|
||||
last_block = block_table[-1]
|
||||
@ -418,7 +418,7 @@ class BlockSpaceManagerV1(BlockSpaceManager):
|
||||
maybe_new_block = self._maybe_promote_last_block(
|
||||
seq, last_block)
|
||||
block_table[-1] = maybe_new_block
|
||||
return {}
|
||||
return []
|
||||
else:
|
||||
# The last block is shared with other sequences.
|
||||
# Copy on Write: Allocate a new block and copy the tokens.
|
||||
@ -426,7 +426,7 @@ class BlockSpaceManagerV1(BlockSpaceManager):
|
||||
|
||||
block_table[-1] = new_block
|
||||
self.gpu_allocator.free(last_block)
|
||||
return {last_block.block_number: [new_block.block_number]}
|
||||
return [(last_block.block_number, new_block.block_number)]
|
||||
|
||||
def fork(self, parent_seq: Sequence, child_seq: Sequence) -> None:
|
||||
# NOTE: fork does not allocate a new physical block.
|
||||
|
@ -1,6 +1,7 @@
|
||||
"""A block manager that manages token blocks."""
|
||||
from typing import Dict, List, Optional
|
||||
from typing import Sequence as GenericSequence
|
||||
from typing import Tuple
|
||||
|
||||
from vllm.core.block.block_table import BlockTable
|
||||
from vllm.core.block.cpu_gpu_block_allocator import CpuGpuBlockAllocator
|
||||
@ -166,7 +167,7 @@ class BlockSpaceManagerV2(BlockSpaceManager):
|
||||
self,
|
||||
seq: Sequence,
|
||||
num_lookahead_slots: int,
|
||||
) -> Dict[int, List[int]]:
|
||||
) -> List[Tuple[int, int]]:
|
||||
|
||||
block_table = self.block_tables[seq.seq_id]
|
||||
|
||||
|
@ -2,6 +2,7 @@ import enum
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, List
|
||||
from typing import Sequence as GenericSequence
|
||||
from typing import Tuple
|
||||
|
||||
from vllm.sequence import Sequence, SequenceGroup
|
||||
|
||||
@ -54,7 +55,7 @@ class BlockSpaceManager(ABC):
|
||||
self,
|
||||
seq: Sequence,
|
||||
num_lookahead_slots: int,
|
||||
) -> Dict[int, List[int]]:
|
||||
) -> List[Tuple[int, int]]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
|
@ -1027,10 +1027,7 @@ class Scheduler:
|
||||
|
||||
for seq in seq_group.get_seqs(status=SequenceStatus.RUNNING):
|
||||
cows = self.block_manager.append_slots(seq, num_lookahead_slots)
|
||||
|
||||
for src, dests in cows.items():
|
||||
for dest in dests:
|
||||
blocks_to_copy.append((src, dest))
|
||||
blocks_to_copy.extend(cows)
|
||||
|
||||
def _preempt(
|
||||
self,
|
||||
|
Loading…
x
Reference in New Issue
Block a user