Minor
This commit is contained in:
parent
2f49f15585
commit
d359cda5fa
@ -7,7 +7,7 @@ from cacheflow.sequence import SequenceStatus
|
||||
from cacheflow.utils import Device
|
||||
|
||||
|
||||
class BlockManager:
|
||||
class BlockAllocator:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@ -65,8 +65,8 @@ class BlockSpaceManager:
|
||||
self.num_total_gpu_blocks = num_gpu_blocks
|
||||
self.num_total_cpu_blocks = num_cpu_blocks
|
||||
|
||||
self.gpu_allocator = BlockManager(Device.GPU, block_size, num_gpu_blocks)
|
||||
self.cpu_allocator = BlockManager(Device.CPU, block_size, num_cpu_blocks)
|
||||
self.gpu_allocator = BlockAllocator(Device.GPU, block_size, num_gpu_blocks)
|
||||
self.cpu_allocator = BlockAllocator(Device.CPU, block_size, num_cpu_blocks)
|
||||
|
||||
# Mapping: seq_id -> BlockTable.
|
||||
self.block_tables: Dict[int, BlockTable] = {}
|
||||
|
@ -8,6 +8,7 @@ from cacheflow.sampling_params import SamplingParams
|
||||
from cacheflow.sequence import SequenceOutputs
|
||||
from cacheflow.parallel_utils.tensor_parallel import gather_from_tensor_model_parallel_region
|
||||
|
||||
|
||||
class Sampler(nn.Module):
|
||||
|
||||
def __init__(self) -> None:
|
||||
|
@ -30,7 +30,7 @@ class Sequence:
|
||||
|
||||
self.status = SequenceStatus.PENDING
|
||||
self.output_logprobs: List[Dict[int, float]] = []
|
||||
self.cumulative_logprobs = 1.0
|
||||
self.cumulative_logprobs = 0.0
|
||||
|
||||
def add_block(self) -> None:
|
||||
block = LogicalTokenBlock(
|
||||
|
Loading…
x
Reference in New Issue
Block a user