Skip to content

Commit

Permalink
[Core] Improve hash collision avoidance in prefix caching (vllm-proje…
Browse files Browse the repository at this point in the history
…ct#12621)

Signed-off-by: Russell Bryant <[email protected]>
  • Loading branch information
russellb authored Feb 4, 2025
1 parent 5095e96 commit 73b35cc
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 10 deletions.
4 changes: 2 additions & 2 deletions tests/core/block/test_prefix_caching_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ def test_nth_block_has_correct_content_hash(seed: int, block_size: int,

previous_block = MagicMock(spec=PrefixCachingBlock)
prev_block_hash = random.randint(0, 1000)
previous_block.content_hash = (prev_block_hash
if prev_block_has_hash else None)
previous_block.content_hash = (prev_block_hash if prev_block_has_hash
else hash('None'))

num_to_fill = block_size if is_curr_block_full else random.randint(
0, block_size - 1)
Expand Down
42 changes: 34 additions & 8 deletions vllm/core/block/prefix_caching_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,15 @@ class PrefixCachingBlockAllocator(BlockAllocator):
from 0 to num_blocks - 1.
"""

# Note that we use 'None' as a string here instead of None because
# as of Python 3.12, hash(None) returns a constant predictable value.
# This could possibly make it easier to find and exploit hash
# collisions. 'None' as a string will be hashed differently per process,
# but consistently within the same process. This is the same as the
# behavior of None prior to Python 3.12.
_none_hash: int = hash('None')

# Implements Block.Factory.
def __init__(
self,
num_blocks: int,
Expand Down Expand Up @@ -122,7 +131,6 @@ def __init__(

self.metric_data = CacheMetricData()

# Implements Block.Factory.
def _create_block(
self,
prev_block: Optional[Block],
Expand Down Expand Up @@ -737,6 +745,14 @@ class PrefixCachingBlock(Block):
such as adapters that influence the block, apart from the token_ids.
"""

# Note that we use 'None' as a string here instead of None because
# as of Python 3.12, hash(None) returns a constant predictable value.
# This could possibly make it easier to find and exploit hash
# collisions. 'None' as a string will be hashed differently per process,
# but consistently within the same process. This is the same as the
# behavior of None prior to Python 3.12.
_none_hash: int = hash('None')

def __init__(
self,
prev_block: Optional[Block],
Expand Down Expand Up @@ -891,13 +907,13 @@ def content_hash(self) -> Optional[int]:

is_first_block = self._prev_block is None
prev_block_hash = (
None if is_first_block else
self._none_hash if is_first_block else
self._prev_block.content_hash # type: ignore
)

# Previous block exists but does not yet have a hash.
# Return no hash in this case.
if prev_block_hash is None and not is_first_block:
if prev_block_hash == self._none_hash and not is_first_block:
return None

self._cached_content_hash = PrefixCachingBlock.hash_block_tokens(
Expand All @@ -907,8 +923,9 @@ def content_hash(self) -> Optional[int]:
extra_hash=self._extra_hash)
return self._cached_content_hash

@staticmethod
def hash_block_tokens(is_first_block: bool,
@classmethod
def hash_block_tokens(cls,
is_first_block: bool,
prev_block_hash: Optional[int],
cur_block_token_ids: List[int],
extra_hash: Optional[int] = None) -> int:
Expand All @@ -929,7 +946,8 @@ def hash_block_tokens(is_first_block: bool,
Returns:
- int: The computed hash value for the block.
"""
assert (prev_block_hash is None) == is_first_block
if is_first_block and prev_block_hash is None:
prev_block_hash = cls._none_hash
return hash((is_first_block, prev_block_hash, *cur_block_token_ids,
extra_hash))

Expand All @@ -949,6 +967,14 @@ class ComputedBlocksTracker:
cached block hashes in the allocator.
"""

# Note that we use 'None' as a string here instead of None because
# as of Python 3.12, hash(None) returns a constant predictable value.
# This could possibly make it easier to find and exploit hash
# collisions. 'None' as a string will be hashed differently per process,
# but consistently within the same process. This is the same as the
# behavior of None prior to Python 3.12.
_none_hash: int = hash('None')

def __init__(
self,
allocator: DeviceAwareBlockAllocator,
Expand Down Expand Up @@ -994,7 +1020,7 @@ def _update_seq_hashes(self, seq: Sequence) -> None:
# We need to know the hash of the previous block to compute the hash of
# the current block so that blocks could be uniquely identified across
# sequences of prefixes.
prev_block_hash = (None if cur_num_blocks_recorded == 0 else
prev_block_hash = (self._none_hash if cur_num_blocks_recorded == 0 else
block_hashes_recorded[-1])
# Only update the computed block hashes for the new blocks
for i in range(cur_num_blocks_recorded, num_computed_blocks):
Expand All @@ -1009,7 +1035,7 @@ def _update_seq_hashes(self, seq: Sequence) -> None:
# This has to be kept in sync with the allocator's hash
# calculation.
block_hash = PrefixCachingBlock.hash_block_tokens(
is_first_block=prev_block_hash is None,
is_first_block=prev_block_hash == self._none_hash,
prev_block_hash=prev_block_hash,
cur_block_token_ids=block_token_ids,
extra_hash=extra_hash,
Expand Down
9 changes: 9 additions & 0 deletions vllm/v1/core/kv_cache_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,15 @@ def hash_block_tokens(
The hash value of the block and the token ids in the block.
The entire tuple is used as the hash key of the block.
"""
if not parent_block_hash:
# Note that we use 'None' as a string here instead of None because
# as of Python 3.12, hash(None) returns a constant predictable value.
# This could possibly make it easier to find and exploit hash
# collisions. 'None' as a string will be hashed differently per process,
# but consistently within the same process. This is the same as the
# behavior of None prior to Python 3.12.
parent_block_hash = hash('None')

curr_block_token_ids_tuple = tuple(curr_block_token_ids)
return BlockHashType(
hash((parent_block_hash, curr_block_token_ids_tuple, extra_keys)),
Expand Down

0 comments on commit 73b35cc

Please sign in to comment.