vllm/cacheflow/core/block_manager.py

"""A block manager that manages token blocks."""
from typing import Dict, List, Optional, Set, Tuple

from cacheflow.block import PhysicalTokenBlock
from cacheflow.sequence import Sequence, SequenceGroup, SequenceStatus
from cacheflow.utils import Device


class BlockAllocator:
    """Manages free physical token blocks for a device.

    The allocator maintains a list of free blocks and allocates a block when
    requested. When a block is freed, its reference count is decremented. If
    the reference count becomes zero, the block is added back to the free list.
    """

    def __init__(
        self,
        device: Device,
        block_size: int,
        num_blocks: int,
    ) -> None:
        self.device = device
        self.block_size = block_size
        self.num_blocks = num_blocks

        # Initialize the free blocks.
        self.free_blocks: List[PhysicalTokenBlock] = []
        for i in range(num_blocks):
            block = PhysicalTokenBlock(
                device=device, block_number=i, block_size=block_size)
            self.free_blocks.append(block)

    def allocate(self) -> PhysicalTokenBlock:
        if not self.free_blocks:
            raise ValueError("Out of memory! No free blocks are available.")
        block = self.free_blocks.pop()
        block.ref_count = 1
        return block

    def free(self, block: PhysicalTokenBlock) -> None:
        if block.ref_count == 0:
            raise ValueError(f"Double free! {block} is already freed.")
        block.ref_count -= 1
        if block.ref_count == 0:
            self.free_blocks.append(block)

    def get_num_free_blocks(self) -> int:
        return len(self.free_blocks)


# Mapping: logical block number -> physical block.
BlockTable = List[PhysicalTokenBlock]


class BlockSpaceManager:
    """Manages the mapping between logical and physical token blocks."""

    def __init__(
        self,
        block_size: int,
        num_gpu_blocks: int,
        num_cpu_blocks: int,
        watermark: float = 0.01,
    ) -> None:
        self.block_size = block_size
        self.num_total_gpu_blocks = num_gpu_blocks
        self.num_total_cpu_blocks = num_cpu_blocks
        self.watermark = watermark
        assert watermark >= 0.0

        self.watermark_blocks = int(watermark * num_gpu_blocks)
        self.gpu_allocator = BlockAllocator(Device.GPU, block_size,
                                            num_gpu_blocks)
        self.cpu_allocator = BlockAllocator(Device.CPU, block_size,
                                            num_cpu_blocks)
        # Mapping: seq_id -> BlockTable.
        self.block_tables: Dict[int, BlockTable] = {}

    def can_allocate(self, seq_group: SequenceGroup) -> bool:
        # FIXME(woosuk): Here we assume that all sequences in the group share
        # the same prompt. This may not be true for preempted sequences.
        seq = seq_group.get_seqs()[0]
        num_required_blocks = len(seq.logical_token_blocks)
        num_free_gpu_blocks = self.gpu_allocator.get_num_free_blocks()
        # Use watermark to avoid frequent cache eviction.
        return num_free_gpu_blocks - num_required_blocks >= self.watermark_blocks

    def allocate(self, seq_group: SequenceGroup) -> None:
        # NOTE: Here we assume that all sequences in the group have the same prompt.
        seq = seq_group.get_seqs()[0]

        # Allocate new physical token blocks that will store the prompt tokens.
        block_table: BlockTable = []
        for _ in range(len(seq.logical_token_blocks)):
            block = self.gpu_allocator.allocate()
            # Set the reference counts of the token blocks.
            block.ref_count = seq_group.num_seqs()
            block_table.append(block)

        # Assign the block table for each sequence.
        for seq in seq_group.get_seqs():
            self.block_tables[seq.seq_id] = block_table.copy()

    def can_append_slot(self, seq_group: SequenceGroup) -> bool:
        # Simple heuristic: If there is at least one free block
        # for each sequence, we can append.
        num_free_gpu_blocks = self.gpu_allocator.get_num_free_blocks()
        num_seqs = seq_group.num_seqs(status=SequenceStatus.RUNNING)
        return num_seqs <= num_free_gpu_blocks

    def append_slot(self, seq: Sequence) -> Optional[Tuple[int, int]]:
        """Allocate a physical slot for a new token."""
        logical_blocks = seq.logical_token_blocks
        block_table = self.block_tables[seq.seq_id]

        if len(block_table) < len(logical_blocks):
            # The sequence has a new logical block.
            # Allocate a new physical block.
            block = self.gpu_allocator.allocate()
            block_table.append(block)
            return None

        # We want to append the token to the last physical block.
        last_block = block_table[-1]
        assert last_block.device == Device.GPU
        if last_block.ref_count == 1:
            # Not shared with other sequences. Appendable.
            return None
        else:
            # The last block is shared with other sequences.
            # Copy on Write: Allocate a new block and copy the tokens.
            new_block = self.gpu_allocator.allocate()
            block_table[-1] = new_block
            self.gpu_allocator.free(last_block)
            return last_block.block_number, new_block.block_number

    def fork(self, parent_seq: Sequence, child_seq: Sequence) -> None:
        # NOTE: fork does not allocate a new physical block.
        # Thus, it is always safe from OOM.
        src_block_table = self.block_tables[parent_seq.seq_id]
        self.block_tables[child_seq.seq_id] = src_block_table.copy()
        for block in src_block_table:
            block.ref_count += 1

    def _get_physical_blocks(self, seq_group: SequenceGroup) -> List[PhysicalTokenBlock]:
        # NOTE: Here, we assume that the physical blocks are only shared by
        # the sequences in the same group.
        blocks: Set[PhysicalTokenBlock] = set()
        for seq in seq_group.get_seqs():
            if SequenceStatus.is_finished(seq.status):
                continue
            block_table = self.block_tables[seq.seq_id]
            for block in block_table:
                blocks.add(block)
        return list(blocks)

    def can_swap_in(self, seq_group: SequenceGroup) -> bool:
        blocks = self._get_physical_blocks(seq_group)
        num_swapped_seqs = seq_group.num_seqs(status=SequenceStatus.SWAPPED)
        num_free_blocks = self.gpu_allocator.get_num_free_blocks()
        # NOTE: Conservatively, we assume that every sequence will allocate
        # at least one free block right after the swap-in.
        # NOTE: This should match the logic in can_append_slot().
        num_required_blocks = len(blocks) + num_swapped_seqs
        return num_free_blocks - num_required_blocks >= self.watermark_blocks

    def swap_in(self, seq_group: SequenceGroup) -> Dict[int, int]:
        # CPU block -> GPU block.
        mapping: Dict[PhysicalTokenBlock, PhysicalTokenBlock] = {}
        for seq in seq_group.get_seqs():
            if SequenceStatus.is_finished(seq.status):
                continue
            new_block_table: BlockTable = []
            block_table = self.block_tables[seq.seq_id]

            for cpu_block in block_table:
                if cpu_block in mapping:
                    gpu_block = mapping[cpu_block]
                    gpu_block.ref_count += 1
                else:
                    gpu_block = self.gpu_allocator.allocate()
                    mapping[cpu_block] = gpu_block
                new_block_table.append(gpu_block)
                # Free the CPU block swapped in to GPU.
                self.cpu_allocator.free(cpu_block)
            self.block_tables[seq.seq_id] = new_block_table

        block_number_mapping = {
            cpu_block.block_number: gpu_block.block_number
            for cpu_block, gpu_block in mapping.items()
        }
        return block_number_mapping

    def can_swap_out(self, seq_group: SequenceGroup) -> bool:
        blocks = self._get_physical_blocks(seq_group)
        return len(blocks) <= self.cpu_allocator.get_num_free_blocks()

    def swap_out(self, seq_group: SequenceGroup) -> Dict[int, int]:
        # GPU block -> CPU block.
        mapping: Dict[PhysicalTokenBlock, PhysicalTokenBlock] = {}
        for seq in seq_group.get_seqs():
            if SequenceStatus.is_finished(seq.status):
                continue
            new_block_table: BlockTable = []
            block_table = self.block_tables[seq.seq_id]

            for gpu_block in block_table:
                if gpu_block in mapping:
                    cpu_block = mapping[gpu_block]
                    cpu_block.ref_count += 1
                else:
                    cpu_block = self.cpu_allocator.allocate()
                    mapping[gpu_block] = cpu_block
                new_block_table.append(cpu_block)
                # Free the GPU block swapped out to CPU.
                self.gpu_allocator.free(gpu_block)
            self.block_tables[seq.seq_id] = new_block_table

        block_number_mapping = {
            gpu_block.block_number: cpu_block.block_number
            for gpu_block, cpu_block in mapping.items()
        }
        return block_number_mapping

    def _free_block_table(self, block_table: BlockTable) -> None:
        for block in block_table:
            if block.device == Device.GPU:
                self.gpu_allocator.free(block)
            else:
                self.cpu_allocator.free(block)

    def free(self, seq: Sequence) -> None:
        block_table = self.block_tables[seq.seq_id]
        self._free_block_table(block_table)
        del self.block_tables[seq.seq_id]

    def reset(self) -> None:
        for block_table in self.block_tables.values():
            self._free_block_table(block_table)
        self.block_tables.clear()

    def get_block_table(self, seq: Sequence) -> List[int]:
        block_table = self.block_tables[seq.seq_id]
        return [block.block_number for block in block_table]

    def get_num_free_gpu_blocks(self) -> int:
        return self.gpu_allocator.get_num_free_blocks()

    def get_num_free_cpu_blocks(self) -> int:
        return self.cpu_allocator.get_num_free_blocks()
Add docstrings to some modules and classes (#100) 2023-05-14 22:32:38 -07:00			`"""A block manager that manages token blocks."""`
Minor 2023-02-14 09:06:27 +00:00			`from typing import Dict, List, Optional, Set, Tuple`
Add block manager 2023-02-09 11:27:33 +00:00
			`from cacheflow.block import PhysicalTokenBlock`
Add docstrings to some modules and classes (#100) 2023-05-14 22:32:38 -07:00			`from cacheflow.sequence import Sequence, SequenceGroup, SequenceStatus`
Add block manager 2023-02-09 11:27:33 +00:00			`from cacheflow.utils import Device`


Minor 2023-03-26 08:00:39 +00:00			`class BlockAllocator:`
Add docstrings to some modules and classes (#100) 2023-05-14 22:32:38 -07:00			`"""Manages free physical token blocks for a device.`

			`The allocator maintains a list of free blocks and allocates a block when`
			`requested. When a block is freed, its reference count is decremented. If`
			`the reference count becomes zero, the block is added back to the free list.`
			`"""`
Add block manager 2023-02-09 11:27:33 +00:00
			`def __init__(`
			`self,`
			`device: Device,`
			`block_size: int,`
			`num_blocks: int,`
			`) -> None:`
			`self.device = device`
			`self.block_size = block_size`
			`self.num_blocks = num_blocks`

			`# Initialize the free blocks.`
Add docstrings to some modules and classes (#100) 2023-05-14 22:32:38 -07:00			`self.free_blocks: List[PhysicalTokenBlock] = []`
			`for i in range(num_blocks):`
			`block = PhysicalTokenBlock(`
			`device=device, block_number=i, block_size=block_size)`
			`self.free_blocks.append(block)`
Add block manager 2023-02-09 11:27:33 +00:00
			`def allocate(self) -> PhysicalTokenBlock:`
			`if not self.free_blocks:`
Add docstrings to some modules and classes (#100) 2023-05-14 22:32:38 -07:00			`raise ValueError("Out of memory! No free blocks are available.")`
Add block manager 2023-02-09 11:27:33 +00:00			`block = self.free_blocks.pop()`
			`block.ref_count = 1`
			`return block`

			`def free(self, block: PhysicalTokenBlock) -> None:`
			`if block.ref_count == 0:`
Add docstrings to some modules and classes (#100) 2023-05-14 22:32:38 -07:00			`raise ValueError(f"Double free! {block} is already freed.")`
Add block manager 2023-02-09 11:27:33 +00:00			`block.ref_count -= 1`
			`if block.ref_count == 0:`
			`self.free_blocks.append(block)`

			`def get_num_free_blocks(self) -> int:`
			`return len(self.free_blocks)`


			`# Mapping: logical block number -> physical block.`
			`BlockTable = List[PhysicalTokenBlock]`


			`class BlockSpaceManager:`
Add docstrings to some modules and classes (#100) 2023-05-14 22:32:38 -07:00			`"""Manages the mapping between logical and physical token blocks."""`
Add block manager 2023-02-09 11:27:33 +00:00
			`def __init__(`
			`self,`
			`block_size: int,`
			`num_gpu_blocks: int,`
			`num_cpu_blocks: int,`
Add cache watermark to avoid frequent cache eviction (#11) 2023-03-29 16:38:48 -07:00			`watermark: float = 0.01,`
Add block manager 2023-02-09 11:27:33 +00:00			`) -> None:`
			`self.block_size = block_size`
			`self.num_total_gpu_blocks = num_gpu_blocks`
			`self.num_total_cpu_blocks = num_cpu_blocks`
Add cache watermark to avoid frequent cache eviction (#11) 2023-03-29 16:38:48 -07:00			`self.watermark = watermark`
			`assert watermark >= 0.0`
Add block manager 2023-02-09 11:27:33 +00:00
Add cache watermark to avoid frequent cache eviction (#11) 2023-03-29 16:38:48 -07:00			`self.watermark_blocks = int(watermark * num_gpu_blocks)`
Add docstrings to some modules and classes (#100) 2023-05-14 22:32:38 -07:00			`self.gpu_allocator = BlockAllocator(Device.GPU, block_size,`
			`num_gpu_blocks)`
			`self.cpu_allocator = BlockAllocator(Device.CPU, block_size,`
			`num_cpu_blocks)`
Add block manager 2023-02-09 11:27:33 +00:00			`# Mapping: seq_id -> BlockTable.`
			`self.block_tables: Dict[int, BlockTable] = {}`

			`def can_allocate(self, seq_group: SequenceGroup) -> bool:`
Implement preemption via recomputation & Refactor scheduling logic (#12) 2023-03-30 14:51:46 -07:00			`# FIXME(woosuk): Here we assume that all sequences in the group share`
			`# the same prompt. This may not be true for preempted sequences.`
Implement stop strings and best_of (#114) 2023-05-21 11:18:00 -07:00			`seq = seq_group.get_seqs()[0]`
Add block manager 2023-02-09 11:27:33 +00:00			`num_required_blocks = len(seq.logical_token_blocks)`
			`num_free_gpu_blocks = self.gpu_allocator.get_num_free_blocks()`
Add cache watermark to avoid frequent cache eviction (#11) 2023-03-29 16:38:48 -07:00			`# Use watermark to avoid frequent cache eviction.`
			`return num_free_gpu_blocks - num_required_blocks >= self.watermark_blocks`
Add block manager 2023-02-09 11:27:33 +00:00
			`def allocate(self, seq_group: SequenceGroup) -> None:`
Fix can_swap_in 2023-02-13 02:38:50 +00:00			`# NOTE: Here we assume that all sequences in the group have the same prompt.`
Implement stop strings and best_of (#114) 2023-05-21 11:18:00 -07:00			`seq = seq_group.get_seqs()[0]`
Add block manager 2023-02-09 11:27:33 +00:00
			`# Allocate new physical token blocks that will store the prompt tokens.`
			`block_table: BlockTable = []`
			`for _ in range(len(seq.logical_token_blocks)):`
			`block = self.gpu_allocator.allocate()`
			`# Set the reference counts of the token blocks.`
			`block.ref_count = seq_group.num_seqs()`
			`block_table.append(block)`

			`# Assign the block table for each sequence.`
Implement stop strings and best_of (#114) 2023-05-21 11:18:00 -07:00			`for seq in seq_group.get_seqs():`
Add block manager 2023-02-09 11:27:33 +00:00			`self.block_tables[seq.seq_id] = block_table.copy()`

Rename variables and methods (#91) 2023-05-10 00:58:31 -07:00			`def can_append_slot(self, seq_group: SequenceGroup) -> bool:`
Add block manager 2023-02-09 11:27:33 +00:00			`# Simple heuristic: If there is at least one free block`
			`# for each sequence, we can append.`
			`num_free_gpu_blocks = self.gpu_allocator.get_num_free_blocks()`
SERVING -> RUNNING 2023-02-12 08:25:05 +00:00			`num_seqs = seq_group.num_seqs(status=SequenceStatus.RUNNING)`
Add block manager 2023-02-09 11:27:33 +00:00			`return num_seqs <= num_free_gpu_blocks`

Rename variables and methods (#91) 2023-05-10 00:58:31 -07:00			`def append_slot(self, seq: Sequence) -> Optional[Tuple[int, int]]:`
			`"""Allocate a physical slot for a new token."""`
Add block manager 2023-02-09 11:27:33 +00:00			`logical_blocks = seq.logical_token_blocks`
			`block_table = self.block_tables[seq.seq_id]`

			`if len(block_table) < len(logical_blocks):`
			`# The sequence has a new logical block.`
			`# Allocate a new physical block.`
			`block = self.gpu_allocator.allocate()`
			`block_table.append(block)`
			`return None`

			`# We want to append the token to the last physical block.`
			`last_block = block_table[-1]`
			`assert last_block.device == Device.GPU`
			`if last_block.ref_count == 1:`
Fix double free 2023-02-14 09:14:05 +00:00			`# Not shared with other sequences. Appendable.`
Add block manager 2023-02-09 11:27:33 +00:00			`return None`
			`else:`
			`# The last block is shared with other sequences.`
			`# Copy on Write: Allocate a new block and copy the tokens.`
Minor 2023-02-09 11:30:55 +00:00			`new_block = self.gpu_allocator.allocate()`
Fix double free 2023-02-14 09:14:05 +00:00			`block_table[-1] = new_block`
Add block manager 2023-02-09 11:27:33 +00:00			`self.gpu_allocator.free(last_block)`
Minor 2023-02-09 11:30:55 +00:00			`return last_block.block_number, new_block.block_number`
Add block manager 2023-02-09 11:27:33 +00:00
Fix can_swap_in 2023-02-13 02:38:50 +00:00			`def fork(self, parent_seq: Sequence, child_seq: Sequence) -> None:`
Add block manager 2023-02-09 11:27:33 +00:00			`# NOTE: fork does not allocate a new physical block.`
			`# Thus, it is always safe from OOM.`
Fix can_swap_in 2023-02-13 02:38:50 +00:00			`src_block_table = self.block_tables[parent_seq.seq_id]`
Add block manager 2023-02-09 11:27:33 +00:00			`self.block_tables[child_seq.seq_id] = src_block_table.copy()`
			`for block in src_block_table:`
			`block.ref_count += 1`

			`def _get_physical_blocks(self, seq_group: SequenceGroup) -> List[PhysicalTokenBlock]:`
			`# NOTE: Here, we assume that the physical blocks are only shared by`
			`# the sequences in the same group.`
			`blocks: Set[PhysicalTokenBlock] = set()`
Implement stop strings and best_of (#114) 2023-05-21 11:18:00 -07:00			`for seq in seq_group.get_seqs():`
OpenAI Compatible Frontend (#116) 2023-05-23 21:39:50 -07:00			`if SequenceStatus.is_finished(seq.status):`
Add block manager 2023-02-09 11:27:33 +00:00			`continue`
			`block_table = self.block_tables[seq.seq_id]`
			`for block in block_table:`
			`blocks.add(block)`
			`return list(blocks)`

			`def can_swap_in(self, seq_group: SequenceGroup) -> bool:`
			`blocks = self._get_physical_blocks(seq_group)`
Fix can_swap_in 2023-02-14 01:57:40 +00:00			`num_swapped_seqs = seq_group.num_seqs(status=SequenceStatus.SWAPPED)`
Fix can_swap_in 2023-02-13 02:38:50 +00:00			`num_free_blocks = self.gpu_allocator.get_num_free_blocks()`
			`# NOTE: Conservatively, we assume that every sequence will allocate`
			`# at least one free block right after the swap-in.`
Rename variables and methods (#91) 2023-05-10 00:58:31 -07:00			`# NOTE: This should match the logic in can_append_slot().`
Add cache watermark to avoid frequent cache eviction (#11) 2023-03-29 16:38:48 -07:00			`num_required_blocks = len(blocks) + num_swapped_seqs`
			`return num_free_blocks - num_required_blocks >= self.watermark_blocks`
Add block manager 2023-02-09 11:27:33 +00:00
			`def swap_in(self, seq_group: SequenceGroup) -> Dict[int, int]:`
Fix a bug in swap_in and swap_out 2023-02-14 02:12:58 +00:00			`# CPU block -> GPU block.`
			`mapping: Dict[PhysicalTokenBlock, PhysicalTokenBlock] = {}`
Implement stop strings and best_of (#114) 2023-05-21 11:18:00 -07:00			`for seq in seq_group.get_seqs():`
OpenAI Compatible Frontend (#116) 2023-05-23 21:39:50 -07:00			`if SequenceStatus.is_finished(seq.status):`
Add block manager 2023-02-09 11:27:33 +00:00			`continue`
Fix a bug in swap_in and swap_out 2023-02-14 02:12:58 +00:00			`new_block_table: BlockTable = []`
Add block manager 2023-02-09 11:27:33 +00:00			`block_table = self.block_tables[seq.seq_id]`

			`for cpu_block in block_table:`
			`if cpu_block in mapping:`
Fix memory leak in swapping 2023-02-14 08:45:59 +00:00			`gpu_block = mapping[cpu_block]`
Fix a ref count bug in swapping 2023-02-14 09:20:12 +00:00			`gpu_block.ref_count += 1`
Fix memory leak in swapping 2023-02-14 08:45:59 +00:00			`else:`
			`gpu_block = self.gpu_allocator.allocate()`
			`mapping[cpu_block] = gpu_block`
			`new_block_table.append(gpu_block)`
Add block manager 2023-02-09 11:27:33 +00:00			`# Free the CPU block swapped in to GPU.`
			`self.cpu_allocator.free(cpu_block)`
Fix a bug in swap_in and swap_out 2023-02-14 02:12:58 +00:00			`self.block_tables[seq.seq_id] = new_block_table`

			`block_number_mapping = {`
			`cpu_block.block_number: gpu_block.block_number`
			`for cpu_block, gpu_block in mapping.items()`
			`}`
			`return block_number_mapping`
Add block manager 2023-02-09 11:27:33 +00:00
			`def can_swap_out(self, seq_group: SequenceGroup) -> bool:`
			`blocks = self._get_physical_blocks(seq_group)`
			`return len(blocks) <= self.cpu_allocator.get_num_free_blocks()`

			`def swap_out(self, seq_group: SequenceGroup) -> Dict[int, int]:`
Fix a bug in swap_in and swap_out 2023-02-14 02:12:58 +00:00			`# GPU block -> CPU block.`
			`mapping: Dict[PhysicalTokenBlock, PhysicalTokenBlock] = {}`
Implement stop strings and best_of (#114) 2023-05-21 11:18:00 -07:00			`for seq in seq_group.get_seqs():`
OpenAI Compatible Frontend (#116) 2023-05-23 21:39:50 -07:00			`if SequenceStatus.is_finished(seq.status):`
Add block manager 2023-02-09 11:27:33 +00:00			`continue`
Fix a bug in swap_in and swap_out 2023-02-14 02:12:58 +00:00			`new_block_table: BlockTable = []`
Add block manager 2023-02-09 11:27:33 +00:00			`block_table = self.block_tables[seq.seq_id]`

			`for gpu_block in block_table:`
Fix a bug in swap_in and swap_out 2023-02-14 02:12:58 +00:00			`if gpu_block in mapping:`
Fix memory leak in swapping 2023-02-14 08:45:59 +00:00			`cpu_block = mapping[gpu_block]`
Fix a ref count bug in swapping 2023-02-14 09:20:12 +00:00			`cpu_block.ref_count += 1`
Fix memory leak in swapping 2023-02-14 08:45:59 +00:00			`else:`
			`cpu_block = self.cpu_allocator.allocate()`
			`mapping[gpu_block] = cpu_block`
Fix a bug in swap_in and swap_out 2023-02-14 02:12:58 +00:00			`new_block_table.append(cpu_block)`
Add block manager 2023-02-09 11:27:33 +00:00			`# Free the GPU block swapped out to CPU.`
			`self.gpu_allocator.free(gpu_block)`
Fix a bug in swap_in and swap_out 2023-02-14 02:12:58 +00:00			`self.block_tables[seq.seq_id] = new_block_table`

			`block_number_mapping = {`
			`gpu_block.block_number: cpu_block.block_number`
			`for gpu_block, cpu_block in mapping.items()`
			`}`
			`return block_number_mapping`
Add block manager 2023-02-09 11:27:33 +00:00
Minor 2023-02-14 09:05:39 +00:00			`def _free_block_table(self, block_table: BlockTable) -> None:`
			`for block in block_table:`
Add block manager 2023-02-09 11:27:33 +00:00			`if block.device == Device.GPU:`
			`self.gpu_allocator.free(block)`
			`else:`
			`self.cpu_allocator.free(block)`

			`def free(self, seq: Sequence) -> None:`
			`block_table = self.block_tables[seq.seq_id]`
Minor 2023-02-14 09:05:39 +00:00			`self._free_block_table(block_table)`
Add block manager 2023-02-09 11:27:33 +00:00			`del self.block_tables[seq.seq_id]`

			`def reset(self) -> None:`
			`for block_table in self.block_tables.values():`
Minor 2023-02-14 09:05:39 +00:00			`self._free_block_table(block_table)`
Add block manager 2023-02-09 11:27:33 +00:00			`self.block_tables.clear()`
Add get_block_table 2023-02-23 07:55:14 +00:00
			`def get_block_table(self, seq: Sequence) -> List[int]:`
			`block_table = self.block_tables[seq.seq_id]`
			`return [block.block_number for block in block_table]`
Collect system stats in scheduler & Add scripts for experiments (#30) 2023-04-12 15:03:49 -07:00
			`def get_num_free_gpu_blocks(self) -> int:`
			`return self.gpu_allocator.get_num_free_blocks()`

			`def get_num_free_cpu_blocks(self) -> int:`
			`return self.cpu_allocator.get_num_free_blocks()`