[Bugfix] fix automatic prefix args and add log info (#3608)
This commit is contained in:
parent
925f3332ca
commit
e67c295b0c
@ -9,6 +9,9 @@ from vllm.block import BlockTable, PhysicalTokenBlock
|
||||
from vllm.sequence import Sequence, SequenceGroup, SequenceStatus
|
||||
from vllm.utils import Device
|
||||
from vllm.core.evictor import Evictor, EvictionPolicy, make_evictor
|
||||
from vllm.logger import init_logger
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
class BlockAllocatorBase(ABC):
|
||||
@ -241,11 +244,13 @@ class BlockSpaceManager:
|
||||
self.watermark_blocks = int(watermark * num_gpu_blocks)
|
||||
|
||||
if self.enable_caching:
|
||||
logger.info("enable automatic prefix caching")
|
||||
self.gpu_allocator = CachedBlockAllocator(Device.GPU, block_size,
|
||||
num_gpu_blocks)
|
||||
self.cpu_allocator = CachedBlockAllocator(Device.CPU, block_size,
|
||||
num_cpu_blocks)
|
||||
else:
|
||||
logger.info("disable automatic prefix caching")
|
||||
self.gpu_allocator = UncachedBlockAllocator(
|
||||
Device.GPU, block_size, num_gpu_blocks)
|
||||
self.cpu_allocator = UncachedBlockAllocator(
|
||||
|
@ -337,7 +337,8 @@ class EngineArgs:
|
||||
cache_config = CacheConfig(self.block_size,
|
||||
self.gpu_memory_utilization,
|
||||
self.swap_space, self.kv_cache_dtype,
|
||||
model_config.get_sliding_window())
|
||||
model_config.get_sliding_window(),
|
||||
self.enable_prefix_caching)
|
||||
parallel_config = ParallelConfig(
|
||||
self.pipeline_parallel_size, self.tensor_parallel_size,
|
||||
self.worker_use_ray, self.max_parallel_loading_workers,
|
||||
|
Loading…
x
Reference in New Issue
Block a user