[Core][Optimization] change copy-on-write from dict[int, list] to list (#4648)

2024-05-07 11:06:32 -07:00 · 2024-05-07 11:06:32 -07:00 · 469f85c782
commit 469f85c782
parent 10760da800
12 changed files with 44 additions and 44 deletions
--- a/tests/core/block/test_block_table.py
+++ b/tests/core/block/test_block_table.py
@ -410,8 +410,7 @@ def test_cow(block_size: int, sequence_len: int, append_len: int,
        expected_src = static_block_table.physical_block_ids[cow_block_id]
        expected_dst = appender_block_table.physical_block_ids[cow_block_id]

-        assert expected_src in cows
-        assert expected_dst in cows[expected_src]
+        assert (expected_src, expected_dst) in cows
    else:
        # Otherwise, there should be no copy-on-write.
        assert not cows
@ -490,8 +489,7 @@ def test_cow_lookahead_simple(block_size: int, sequence_len: int,
        expected_src = static_block_table.physical_block_ids[cow_block_id]
        expected_dst = appender_block_table.physical_block_ids[cow_block_id]

-        assert expected_src in cows
-        assert expected_dst in cows[expected_src]
+        assert (expected_src, expected_dst) in cows

    static_block_table.free()
    appender_block_table.free()
--- a/tests/core/test_block_manager.py
+++ b/tests/core/test_block_manager.py
@ -1,4 +1,5 @@
 import time
+from collections import defaultdict
 from typing import List

 import pytest
@ -155,7 +156,10 @@ def test_append_slot_cow():

    cows = block_manager.append_slots(child)
    assert cows
-    for src_block, dst_blocks in cows.items():
+    dict_cows = defaultdict(list)
+    for src_block, dst_block in cows:
+        dict_cows[src_block].append(dst_block)
+    for src_block, dst_blocks in dict_cows.items():
        assert src_block not in dst_blocks

    after_blocks = block_manager.get_num_free_gpu_blocks()
--- a/tests/core/test_scheduler.py
+++ b/tests/core/test_scheduler.py
@ -636,7 +636,7 @@ def test_schedule_decode_blocks_to_copy_update():

    # The last request should be swapped out.
    scheduler.block_manager.append_slots = MagicMock()
-    scheduler.block_manager.append_slots.return_value = {2: [3]}
+    scheduler.block_manager.append_slots.return_value = [(2, 3)]

    budget = create_token_budget()
    remaining_running, output = scheduler._schedule_running(
@ -845,7 +845,7 @@ def test_schedule_swapped_blocks_to_copy():

    # The last request should be swapped out.
    scheduler.block_manager.append_slots = MagicMock()
-    scheduler.block_manager.append_slots.return_value = {2: [3]}
+    scheduler.block_manager.append_slots.return_value = [(2, 3)]

    budget = create_token_budget()
    remaining_swapped, output = scheduler._schedule_swapped(
--- a/vllm/core/block/common.py
+++ b/vllm/core/block/common.py
@ -1,5 +1,4 @@
-from collections import defaultdict
-from typing import Dict, Iterable, List, Optional, Protocol
+from typing import Dict, Iterable, List, Optional, Protocol, Tuple

 from vllm.core.block.interfaces import Block, BlockAllocator

@ -111,7 +110,7 @@ class CopyOnWriteTracker:
        refcounter: RefCounterProtocol,
        allocator: BlockAllocator,
    ):
-        self._copy_on_writes: Dict[BlockId, List[BlockId]] = defaultdict(list)
+        self._copy_on_writes: List[Tuple[BlockId, BlockId]] = []
        self._refcounter = refcounter
        self._allocator = allocator

@ -152,25 +151,25 @@ class CopyOnWriteTracker:
            # Track src/dst copy.
            assert src_block_id is not None
            assert block_id is not None
-            self._copy_on_writes[src_block_id].append(block_id)
+            self._copy_on_writes.append((src_block_id, block_id))

        return block_id

-    def clear_cows(self) -> Dict[BlockId, List[BlockId]]:
+    def clear_cows(self) -> List[Tuple[BlockId, BlockId]]:
        """Clears the copy-on-write tracking information and returns the current
        state.

-        This method returns a dictionary mapping source block indices to lists
-        of destination block indices for the current copy-on-write operations.
+        This method returns a list mapping source block indices to
+         destination block indices for the current copy-on-write operations.
        It then clears the internal tracking information.

        Returns:
-            Dict[BlockId, List[BlockId]]: A dictionary mapping source
-                block indices to lists of destination block indices for the
+            List[Tuple[BlockId, BlockId]]: A list mapping source
+                block indices to destination block indices for the
                current copy-on-write operations.
        """
-        cows = dict(self._copy_on_writes)
-        self._copy_on_writes.clear()
+        cows = self._copy_on_writes
+        self._copy_on_writes = []
        return cows


--- a/vllm/core/block/cpu_gpu_block_allocator.py
+++ b/vllm/core/block/cpu_gpu_block_allocator.py
@ -1,4 +1,4 @@
-from typing import Dict, FrozenSet, List, Optional
+from typing import Dict, FrozenSet, List, Optional, Tuple

 from vllm.core.block.interfaces import (Block, BlockAllocator, BlockId,
                                        DeviceAwareBlockAllocator)
@ -185,13 +185,13 @@ class CpuGpuBlockAllocator(DeviceAwareBlockAllocator):
    def get_num_total_blocks(self, device: Device) -> int:
        return self._allocators[device].get_num_total_blocks()

-    def clear_copy_on_writes(self) -> Dict[int, List[int]]:
+    def clear_copy_on_writes(self) -> List[Tuple[int, int]]:
        """Clears the copy-on-write (CoW) state and returns the mapping of
            source to destination block IDs.

        Returns:
-            Dict[int, List[int]]: A dictionary mapping source block IDs to lists
-                of destination block IDs.
+            List[Tuple[int, int]]: A list mapping source block IDs to 
+                destination block IDs.
        """
        # CoW only supported on GPU
        device = Device.GPU
--- a/vllm/core/block/interfaces.py
+++ b/vllm/core/block/interfaces.py
@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Dict, FrozenSet, List, Optional, Protocol
+from typing import FrozenSet, List, Optional, Protocol, Tuple

 from vllm.utils import Device

@ -122,7 +122,7 @@ class BlockAllocator(ABC):
        pass

    @abstractmethod
-    def clear_copy_on_writes(self) -> Dict[int, List[int]]:
+    def clear_copy_on_writes(self) -> List[Tuple[int, int]]:
        pass

    @abstractmethod
@ -187,7 +187,7 @@ class DeviceAwareBlockAllocator(ABC):
        pass

    @abstractmethod
-    def clear_copy_on_writes(self) -> Dict[int, List[int]]:
+    def clear_copy_on_writes(self) -> List[Tuple[int, int]]:
        pass

    @abstractmethod
--- a/vllm/core/block/naive_block.py
+++ b/vllm/core/block/naive_block.py
@ -1,4 +1,4 @@
-from typing import Dict, FrozenSet, Iterable, List, Optional, Set
+from typing import FrozenSet, Iterable, List, Optional, Set, Tuple

 from vllm.core.block.common import (CopyOnWriteTracker, RefCounter,
                                    get_all_blocks_recursively)
@ -175,12 +175,12 @@ class NaiveBlockAllocator(BlockAllocator):
        """
        return self._cow_tracker.cow_block_if_not_appendable(block)

-    def clear_copy_on_writes(self) -> Dict[BlockId, List[BlockId]]:
+    def clear_copy_on_writes(self) -> List[Tuple[BlockId, BlockId]]:
        """Returns the copy-on-write source->destination mapping and clears it.

        Returns:
-            Dict[BlockId, List[BlockId]]: A dictionary mapping source
-                block indices to lists of destination block indices.
+            List[Tuple[BlockId, BlockId]]: A list mapping source
+                block indices to destination block indices.
        """
        return self._cow_tracker.clear_cows()

--- a/vllm/core/block/prefix_caching_block.py
+++ b/vllm/core/block/prefix_caching_block.py
@ -1,7 +1,7 @@
 """Token blocks."""
 from itertools import takewhile
 from os.path import commonprefix
-from typing import Dict, FrozenSet, Iterable, List, Optional
+from typing import Dict, FrozenSet, Iterable, List, Optional, Tuple

 from vllm.core.block.common import (CopyOnWriteTracker,
                                    get_all_blocks_recursively)
@ -337,12 +337,12 @@ class PrefixCachingBlockAllocator(BlockAllocator):
        """
        return self._cow_tracker.cow_block_if_not_appendable(block)

-    def clear_copy_on_writes(self) -> Dict[BlockId, List[BlockId]]:
+    def clear_copy_on_writes(self) -> List[Tuple[BlockId, BlockId]]:
        """Returns the copy-on-write source->destination mapping and clears it.

        Returns:
-            Dict[BlockId, List[BlockId]]: A dictionary mapping source
-                block indices to lists of destination block indices.
+            List[Tuple[BlockId, BlockId]]: A list mapping source
+                block indices to destination block indices.
        """
        return self._cow_tracker.clear_cows()

--- a/vllm/core/block_manager_v1.py
+++ b/vllm/core/block_manager_v1.py
@ -5,7 +5,7 @@ from itertools import count, takewhile
 from os.path import commonprefix
 from typing import Dict, List, Optional
 from typing import Sequence as GenericSequence
-from typing import Set
+from typing import Set, Tuple

 from vllm.block import BlockTable, PhysicalTokenBlock
 from vllm.core.evictor_v1 import EvictionPolicy, Evictor, make_evictor
@ -386,7 +386,7 @@ class BlockSpaceManagerV1(BlockSpaceManager):
        self,
        seq: Sequence,
        num_lookahead_slots: int = 0,
-    ) -> Dict[int, List[int]]:
+    ) -> List[Tuple[int, int]]:
        """Allocate a physical slot for a new token."""
        logical_blocks = seq.logical_token_blocks
        block_table = self.block_tables[seq.seq_id]
@ -405,7 +405,7 @@ class BlockSpaceManagerV1(BlockSpaceManager):
                # Allocate a new physical block.
                new_block = self._allocate_last_physical_block(seq)
                block_table.append(new_block)
-                return {}
+                return []

        # We want to append the token to the last physical block.
        last_block = block_table[-1]
@ -418,7 +418,7 @@ class BlockSpaceManagerV1(BlockSpaceManager):
                maybe_new_block = self._maybe_promote_last_block(
                    seq, last_block)
                block_table[-1] = maybe_new_block
-            return {}
+            return []
        else:
            # The last block is shared with other sequences.
            # Copy on Write: Allocate a new block and copy the tokens.
@ -426,7 +426,7 @@ class BlockSpaceManagerV1(BlockSpaceManager):

            block_table[-1] = new_block
            self.gpu_allocator.free(last_block)
-            return {last_block.block_number: [new_block.block_number]}
+            return [(last_block.block_number, new_block.block_number)]

    def fork(self, parent_seq: Sequence, child_seq: Sequence) -> None:
        # NOTE: fork does not allocate a new physical block.
--- a/vllm/core/block_manager_v2.py
+++ b/vllm/core/block_manager_v2.py
@ -1,6 +1,7 @@
 """A block manager that manages token blocks."""
 from typing import Dict, List, Optional
 from typing import Sequence as GenericSequence
+from typing import Tuple

 from vllm.core.block.block_table import BlockTable
 from vllm.core.block.cpu_gpu_block_allocator import CpuGpuBlockAllocator
@ -166,7 +167,7 @@ class BlockSpaceManagerV2(BlockSpaceManager):
        self,
        seq: Sequence,
        num_lookahead_slots: int,
-    ) -> Dict[int, List[int]]:
+    ) -> List[Tuple[int, int]]:

        block_table = self.block_tables[seq.seq_id]

--- a/vllm/core/interfaces.py
+++ b/vllm/core/interfaces.py
@ -2,6 +2,7 @@ import enum
 from abc import ABC, abstractmethod
 from typing import Dict, List
 from typing import Sequence as GenericSequence
+from typing import Tuple

 from vllm.sequence import Sequence, SequenceGroup

@ -54,7 +55,7 @@ class BlockSpaceManager(ABC):
        self,
        seq: Sequence,
        num_lookahead_slots: int,
-    ) -> Dict[int, List[int]]:
+    ) -> List[Tuple[int, int]]:
        pass

    @abstractmethod
--- a/vllm/core/scheduler.py
+++ b/vllm/core/scheduler.py
@ -1027,10 +1027,7 @@ class Scheduler:

        for seq in seq_group.get_seqs(status=SequenceStatus.RUNNING):
            cows = self.block_manager.append_slots(seq, num_lookahead_slots)
-
-            for src, dests in cows.items():
-                for dest in dests:
-                    blocks_to_copy.append((src, dest))
+            blocks_to_copy.extend(cows)

    def _preempt(
        self,