mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[BugFix] fix num_cpu_blocks computation (#6438)
* [BugFix] fix num_cpu_blocks computation * [fix] fix syntax and log * [fix] pre-commit * [fix] use getattr * [fix] ci test
This commit is contained in:
@@ -81,7 +81,7 @@ def _build_manager(
|
||||
model_cfg.max_model_len = max_model_len
|
||||
model_cfg.architectures = architectures or ["test_model"]
|
||||
model_cfg.mm_max_tokens_per_item = None
|
||||
cache_cfg.bytes_per_layer_per_block = 1
|
||||
cache_cfg.bytes_per_token_per_layer = 1
|
||||
cache_cfg.kv_cache_ratio = 1.0
|
||||
parallel_cfg = ParallelConfig(args)
|
||||
scheduler_cfg = SchedulerConfig(args)
|
||||
@@ -142,7 +142,7 @@ class TestResourceManagerV1(unittest.TestCase):
|
||||
model_cfg.max_model_len = 3200
|
||||
model_cfg.architectures = ["test_model"]
|
||||
model_cfg.mm_max_tokens_per_item = None
|
||||
cache_cfg.bytes_per_layer_per_block = 1
|
||||
cache_cfg.bytes_per_token_per_layer = 1
|
||||
cache_cfg.kv_cache_ratio = 1.0
|
||||
parallel_cfg = ParallelConfig(args)
|
||||
scheduler_cfg = SchedulerConfig(args)
|
||||
@@ -304,7 +304,7 @@ class TestRevertChunkedMMInput(unittest.TestCase):
|
||||
model_cfg.max_model_len = 3200
|
||||
model_cfg.architectures = ["test_model"]
|
||||
model_cfg.mm_max_tokens_per_item = None
|
||||
cache_cfg.bytes_per_layer_per_block = 1
|
||||
cache_cfg.bytes_per_token_per_layer = 1
|
||||
cache_cfg.kv_cache_ratio = 1.0
|
||||
cache_cfg.block_size = 64
|
||||
parallel_cfg = ParallelConfig(args)
|
||||
|
||||
Reference in New Issue
Block a user