mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
3749457476
numpy tobytes() only serializes raw element bytes without encoding shape
or dtype metadata. This means arrays with identical raw bytes but
different shapes (e.g. (6,4) vs (4,6)) or different dtypes (e.g.
float32 vs uint8 reinterpretation of same memory) produce the same
SHA-256 digest, leading to silent cache collisions in
ProcessorCacheManager / EncoderCacheManager / PrefixCacheManager.
Prepend a "{shape}|{dtype}|" header to the byte payload before hashing
so that shape and dtype participate in the digest.
Added test cases for shape and dtype sensitivity.
61 lines
2.5 KiB
Python
61 lines
2.5 KiB
Python
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import hashlib
|
|
import pickle
|
|
import unittest
|
|
|
|
import numpy as np
|
|
|
|
from fastdeploy.multimodal.hasher import MultimodalHasher
|
|
|
|
|
|
class TestHashFeatures(unittest.TestCase):
|
|
def test_hash_features_ndarray(self):
|
|
"""Test hash features with numpy ndarray"""
|
|
arr = np.random.randint(low=0, high=255, size=(28, 28), dtype=np.uint8)
|
|
arr_hash = MultimodalHasher.hash_features(arr)
|
|
header = f"{arr.shape}|{arr.dtype}|".encode()
|
|
target_hash = hashlib.sha256(header + arr.tobytes()).hexdigest()
|
|
assert arr_hash == target_hash, f"Ndarray hash mismatch: {arr_hash} != {target_hash}"
|
|
|
|
def test_hash_features_ndarray_shape_sensitivity(self):
|
|
"""Arrays with same bytes but different shapes must produce different hashes"""
|
|
base = np.arange(24, dtype=np.float32)
|
|
a = base.reshape(6, 4)
|
|
b = base.reshape(4, 6)
|
|
assert MultimodalHasher.hash_features(a) != MultimodalHasher.hash_features(b)
|
|
|
|
def test_hash_features_ndarray_dtype_sensitivity(self):
|
|
"""Arrays with same shape but different dtypes must produce different hashes"""
|
|
a = np.zeros((4, 4), dtype=np.float32)
|
|
b = np.zeros((4, 4), dtype=np.float64)
|
|
assert MultimodalHasher.hash_features(a) != MultimodalHasher.hash_features(b)
|
|
|
|
def test_hash_features_object(self):
|
|
"""Test hash features with unsupported object type"""
|
|
obj = {"key": "value"}
|
|
obj_hash = MultimodalHasher.hash_features(obj)
|
|
target_hash = hashlib.sha256((pickle.dumps(obj))).hexdigest()
|
|
assert obj_hash == target_hash, f"Dict hash mismatch: {obj_hash} != {target_hash}"
|
|
|
|
obj = "test hasher str"
|
|
obj_hash = MultimodalHasher.hash_features(obj)
|
|
target_hash = hashlib.sha256((pickle.dumps(obj))).hexdigest()
|
|
assert obj_hash == target_hash, f"Str hash mismatch: {obj_hash} != {target_hash}"
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|