[v1][Bugfix] Add extra_keys to block_hash for prefix caching (#12603)

This pr adds extra key to block hash, to generate different hash value
for two blocks with the same token string but different extra_keys in
their parent blocks. For example, it can generate different hash value
for the second block of the following two requests:
```python
request1 = make_request(
        request_id=0,
        prompt_token_ids=[_ for _ in range(6)],
        mm_positions=[{
            "offset": 0,
            "length": 3
        }, {
            "offset": 3,
            "length": 3
        }],
        mm_hashes=["hash1", "hash2"],
    )
    request2 = make_request(
        request_id=1,
        prompt_token_ids=[_ for _ in range(6)],
        mm_positions=[{
            "offset": 0,
            "length": 3
        }, {
            "offset": 3,
            "length": 3
        }],
        mm_hashes=["hash3", "hash2"],
    )
```

---------

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
Chen Zhang 2025-02-01 05:13:04 +08:00 committed by GitHub
parent 60bcef000e
commit 89003c4082
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 37 additions and 3 deletions

View File

@ -192,7 +192,7 @@ def test_hash_block_tokens():
extra_keys) extra_keys)
assert isinstance(block_hash, BlockHashType) assert isinstance(block_hash, BlockHashType)
assert block_hash.hash_value == hash( assert block_hash.hash_value == hash(
(parent_block_hash, *curr_block_token_ids)) (parent_block_hash, curr_block_token_ids, extra_keys))
assert block_hash.token_ids == curr_block_token_ids assert block_hash.token_ids == curr_block_token_ids
assert block_hash.extra_keys == extra_keys assert block_hash.extra_keys == extra_keys
@ -227,6 +227,38 @@ def test_hash_request_tokens():
assert block_hashes[1].extra_keys == ("hash2", ) assert block_hashes[1].extra_keys == ("hash2", )
def test_hash_tokens_different_mm_input():
request1 = make_request(
request_id=0,
prompt_token_ids=[_ for _ in range(6)],
mm_positions=[{
"offset": 0,
"length": 3
}, {
"offset": 3,
"length": 3
}],
mm_hashes=["hash1", "hash2"],
)
request2 = make_request(
request_id=1,
prompt_token_ids=[_ for _ in range(6)],
mm_positions=[{
"offset": 0,
"length": 3
}, {
"offset": 3,
"length": 3
}],
mm_hashes=["hash3", "hash2"],
)
block_size = 3
block_hashes1 = hash_request_tokens(block_size, request1)
block_hashes2 = hash_request_tokens(block_size, request2)
assert block_hashes1[0] != block_hashes2[0]
assert block_hashes1[1] != block_hashes2[1]
def test_hash_request_tokens_no_mm_inputs(): def test_hash_request_tokens_no_mm_inputs():
request = make_request( request = make_request(
request_id=0, request_id=0,

View File

@ -262,8 +262,10 @@ def hash_block_tokens(
The hash value of the block and the token ids in the block. The hash value of the block and the token ids in the block.
The entire tuple is used as the hash key of the block. The entire tuple is used as the hash key of the block.
""" """
return BlockHashType(hash((parent_block_hash, *curr_block_token_ids)), curr_block_token_ids_tuple = tuple(curr_block_token_ids)
tuple(curr_block_token_ids), extra_keys) return BlockHashType(
hash((parent_block_hash, curr_block_token_ids_tuple, extra_keys)),
curr_block_token_ids_tuple, extra_keys)
def hash_request_tokens(block_size: int, def hash_request_tokens(block_size: int,