Files
FastDeploy/tests/engine/test_resource_manager.py
T
cloudforge1 9148562ed0 [CI]【Hackathon 10th Spring No.35】resource_manager 单测补充 (#6734)
* [CI]【Hackathon 10th Spring No.35】resource_manager 单测补充

* [CI]【Hackathon 10th Spring No.35】resource_manager 单测补充

* [CI]【Hackathon 10th Spring No.35】add __main__ block

---------

Co-authored-by: cloudforge1 <cloudforge1@users.noreply.github.com>
Co-authored-by: CSWYF3634076 <wangyafeng@baidu.com>
2026-03-19 17:45:21 +08:00

254 lines
8.8 KiB
Python

# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from types import SimpleNamespace
from unittest.mock import patch
import pytest
# -- Stubs ------------------------------------------------------------------
class _StubCacheManager:
"""Minimal PrefixCacheManager surface for unit-testing ResourceManager."""
def __init__(self, *args, num_blocks=100, **kwargs):
self.num_gpu_blocks = num_blocks
self.gpu_free_block_list = list(range(num_blocks))
self._recycled = []
self._released = []
def allocate_gpu_blocks(self, n):
out = self.gpu_free_block_list[:n]
self.gpu_free_block_list = self.gpu_free_block_list[n:]
return out
def recycle_gpu_blocks(self, blocks):
self._recycled.extend(blocks)
self.gpu_free_block_list.extend(blocks)
def release_block_ids_async(self, task):
self._released.append(task)
def free_block_ids_async(self, n):
return n
def update_cache_config(self, cfg):
pass
def request_block_ids(self, task, block_size, dec_token_num):
total = (len(task.prompt_token_ids) + block_size - 1) // block_size
common = list(range(total // 2))
unique = list(range(100, 100 + total - total // 2))
return common, unique, {"gpu_cache_blocks": len(common), "cpu_cache_blocks": 0}
class _Task:
"""Real task object with all fields ResourceManager touches."""
def __init__(self, request_id="req-1", prompt_len=128, disaggregate_info=None):
self.request_id = request_id
self.prompt_token_ids = list(range(prompt_len))
self.prompt_token_ids_len = prompt_len
self.block_tables = []
self.need_block_tables = []
self.disaggregate_info = disaggregate_info
self.seq_lens_decoder = 0
self.inference_time_cost = -1.0
self.tokens_all_num = 0
self.idx = 0
self.num_cached_tokens = 0
self.gpu_cache_token_num = 0
self.cpu_cache_token_num = 0
self.cache_info = None
self.cache_prepare_time = 0.0
self._seed = None
def get(self, k):
return self._seed if k == "seed" else None
def set(self, k, v):
if k == "seed":
self._seed = v
def _cache_cfg(block_size=64, dec_token_num=128, max_block_num_per_seq=16, enable_prefix_caching=False):
return SimpleNamespace(
block_size=block_size,
dec_token_num=dec_token_num,
max_block_num_per_seq=max_block_num_per_seq,
enable_prefix_caching=enable_prefix_caching,
)
def _config(cache_config=None):
return SimpleNamespace(cache_config=cache_config or _cache_cfg())
def _noop_logger():
return SimpleNamespace(
info=lambda *a, **kw: None,
debug=lambda *a, **kw: None,
error=lambda *a, **kw: None,
warning=lambda *a, **kw: None,
)
def _stub_metrics():
m = SimpleNamespace()
for n in (
"max_batch_size",
"batch_size",
"available_gpu_block_num",
"gpu_cache_usage_perc",
"prefix_cache_token_num",
"prefix_gpu_cache_token_num",
"prefix_cpu_cache_token_num",
):
setattr(m, n, SimpleNamespace(set=lambda v: None, inc=lambda v: None))
return m
@pytest.fixture()
def rm_factory():
"""Yield a factory that creates ResourceManagers with stubbed deps."""
with (
patch("fastdeploy.engine.resource_manager.PrefixCacheManager", _StubCacheManager),
patch("fastdeploy.engine.resource_manager.main_process_metrics", _stub_metrics()),
patch("fastdeploy.engine.resource_manager.llm_logger", _noop_logger()),
):
from fastdeploy.engine.resource_manager import ResourceManager
def make(max_seqs=4, block_size=64, dec_token=128, enable_prefix=False, num_free=100):
cc = _cache_cfg(block_size, dec_token, 16, enable_prefix)
rm = ResourceManager(max_seqs, _config(cc), 1, "mixed")
rm.cache_manager = _StubCacheManager(num_blocks=num_free)
return rm
yield make
# -- Tests ------------------------------------------------------------------
def test_init_block_math_and_config(rm_factory):
"""Constructor fields, block calculations, reset_cache_config."""
rm = rm_factory(max_seqs=8, block_size=64, dec_token=128)
assert rm.max_num_seqs == 8
assert rm.stop_flags == [True] * 8
assert rm.get_required_block_number(100) == 4
assert rm.get_encoder_block_number(100) == 2
assert rm.get_decoder_block_number() == 2
assert rm.total_block_number() == 100
rm.reset_cache_config(_cache_cfg(block_size=128))
assert rm.cfg.block_size == 128
def test_availability_and_sufficiency(rm_factory):
"""available_batch, available_block_num, is_resource_sufficient."""
rm = rm_factory(max_seqs=4, dec_token=0, num_free=100)
assert rm.available_batch() == 4
assert rm.available_block_num() == 100
assert rm.is_resource_sufficient(64)
rm.stop_flags = [False] * 4
assert not rm.is_resource_sufficient(1)
rm2 = rm_factory(max_seqs=4, num_free=0)
assert not rm2.is_resource_sufficient(64)
def test_allocate_no_prefix(rm_factory):
"""Main allocation path without prefix caching (happy + empty-blocks)."""
rm = rm_factory(max_seqs=4, enable_prefix=False, dec_token=0, num_free=100)
tasks = [_Task(request_id=f"r{i}") for i in range(3)]
result = rm.allocate_resources_for_new_tasks(tasks)
assert len(result) == 3
assert rm.stop_flags == [False, False, False, True]
assert rm.real_bsz == 3
assert all(t.get("seed") is not None for t in result)
assert all(len(t.block_tables) > 0 for t in result)
def test_allocate_with_prefix(rm_factory):
"""Allocation with prefix cache (exercises _record_request_cache_info)."""
rm = rm_factory(max_seqs=4, enable_prefix=True, dec_token=0, block_size=64, num_free=100)
t = _Task(prompt_len=256)
result = rm.allocate_resources_for_new_tasks([t])
assert len(result) == 1
assert len(t.block_tables) > 0
assert t.num_cached_tokens >= 0
assert t.cache_info is not None
def test_allocate_disaggregate(rm_factory):
"""Disaggregate prefill/decode paths (prefix + no-prefix)."""
rm = rm_factory(max_seqs=4, enable_prefix=True, dec_token=0, block_size=64, num_free=100)
t = _Task(prompt_len=256, disaggregate_info={"role": "prefill"})
rm.allocate_resources_for_new_tasks([t])
assert "block_tables" in t.disaggregate_info
assert t.request_id in rm.req_dict
# No-prefix + decode
rm2 = rm_factory(max_seqs=4, enable_prefix=False, dec_token=0, num_free=100)
t2 = _Task(prompt_len=128, disaggregate_info={"role": "decode"})
rm2.allocate_resources_for_new_tasks([t2])
assert t2.request_id in rm2.req_dict
def test_recycle_free_and_check(rm_factory):
"""_recycle_block_tables, free_block_tables, check_and_free_block_tables."""
rm = rm_factory(enable_prefix=False, num_free=100)
t = _Task()
t.block_tables = [0, 1, 2]
rm._recycle_block_tables(t)
assert 0 in rm.cache_manager._recycled
# Prefix recycle delegates to release_block_ids_async
rm2 = rm_factory(enable_prefix=True, num_free=100)
t2 = _Task()
t2.block_tables = [0, 1]
rm2._recycle_block_tables(t2)
assert t2 in rm2.cache_manager._released
# free + check paths
assert rm.free_block_tables(10) == 10
rm.check_and_free_block_tables()
rm3 = rm_factory(enable_prefix=True, num_free=5)
rm3.check_and_free_block_tables()
def test_info_and_cache_usage(rm_factory):
"""info() string and get_gpu_cache_usage_perc."""
rm = rm_factory(num_free=100)
assert "ResourceManager info" in rm.info()
rm.cache_manager.num_gpu_blocks = 100
rm.cache_manager.gpu_free_block_list = list(range(80))
assert abs(rm.get_gpu_cache_usage_perc() - 0.2) < 1e-9
rm2 = rm_factory(num_free=0)
rm2.cache_manager.num_gpu_blocks = 0
assert rm2.get_gpu_cache_usage_perc() == 0.0
def test_delete_cached_data(rm_factory):
"""_delete_cached_data: full and partial cache hits."""
rm = rm_factory(block_size=64)
t = _Task(prompt_len=128)
rm._delete_cached_data(t, 128)
assert t.prompt_token_ids_len == 64
assert t.seq_lens_decoder == 64
t2 = _Task(prompt_len=256)
rm._delete_cached_data(t2, 64)
assert t2.prompt_token_ids_len == 192
assert t2.seq_lens_decoder == 64
if __name__ == "__main__":
pytest.main([__file__, "-v"])