[Misc] Warn about v0 in benchmark_paged_attn.py (#15495)

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
Tyler Michael Smith 2025-03-25 23:31:04 -04:00 committed by GitHub
parent 997c8811d6
commit 23114d3364
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -7,10 +7,13 @@ from typing import Optional
import torch
from vllm import _custom_ops as ops
from vllm.logger import init_logger
from vllm.platforms import current_platform
from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, FlexibleArgumentParser,
create_kv_caches_with_random)
logger = init_logger(__name__)
NUM_BLOCKS = 128 * 1024
PARTITION_SIZE = 512
PARTITION_SIZE_ROCM = 256
@ -193,6 +196,9 @@ def main(
if __name__ == '__main__':
logger.warning("This script benchmarks the paged attention kernel. "
"By default this is no longer used in vLLM inference.")
parser = FlexibleArgumentParser(
description="Benchmark the paged attention kernel.")
parser.add_argument("--version",