mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Optimization] 移除 num_blocks 上限限制 (#7241)
This commit is contained in:
@@ -173,6 +173,7 @@ custom_ops/tmp*
|
||||
build
|
||||
|
||||
.ccls-cache
|
||||
.claude
|
||||
|
||||
third_party
|
||||
|
||||
|
||||
@@ -126,11 +126,6 @@ class IluvatarPaddleDisWorkerProc(PaddleDisWorkerProc):
|
||||
# 2. Calculate the appropriate number of blocks
|
||||
model_block_memory_used = self.worker.cal_theortical_kvcache()
|
||||
num_blocks_local = int(available_kv_cache_memory // model_block_memory_used)
|
||||
# NOTE(liuzichang): Too many block will lead to illegal memory access
|
||||
# We will develop dynamic limits in future.
|
||||
if num_blocks_local > 40000:
|
||||
logger.info(f"------- Reset num_blocks_local {num_blocks_local} to 40000")
|
||||
num_blocks_local = min(40000, num_blocks_local)
|
||||
logger.info(f"------- model_block_memory_used:{model_block_memory_used} --------")
|
||||
logger.info(f"------- num_blocks_local:{num_blocks_local} --------")
|
||||
|
||||
|
||||
@@ -666,11 +666,6 @@ class PaddleDisWorkerProc:
|
||||
# 2. Calculate the appropriate number of blocks
|
||||
model_block_memory_used = self.worker.cal_theortical_kvcache()
|
||||
num_blocks_local = int(available_kv_cache_memory // model_block_memory_used)
|
||||
# NOTE(liuzichang): Too many block will lead to illegal memory access
|
||||
# We will develop dynamic limits in future.
|
||||
if num_blocks_local > 40000:
|
||||
logger.info(f"------- Reset num_blocks_local {num_blocks_local} to 40000")
|
||||
num_blocks_local = min(40000, num_blocks_local)
|
||||
logger.info(f"------- model_block_memory_used:{model_block_memory_used / 1024**3} GB --------")
|
||||
logger.info(f"------- num_blocks_local:{num_blocks_local} --------")
|
||||
|
||||
|
||||
@@ -281,7 +281,7 @@ def test_non_thinking_prompt(api_url, headers):
|
||||
def test_profile_reset_block_num():
|
||||
"""测试profile reset_block_num功能,与baseline diff不能超过5%"""
|
||||
log_file = "./log/config.log"
|
||||
baseline = 40000
|
||||
baseline = 74000
|
||||
|
||||
if not os.path.exists(log_file):
|
||||
pytest.fail(f"Log file not found: {log_file}")
|
||||
|
||||
@@ -736,7 +736,7 @@ def test_profile_reset_block_num():
|
||||
"""测试profile reset_block_num功能,与baseline diff不能超过5%"""
|
||||
log_dir = os.getenv("FD_LOG_DIR", "log")
|
||||
log_file = os.path.join(log_dir, "config.log")
|
||||
baseline = 40000
|
||||
baseline = 65400
|
||||
|
||||
if not os.path.exists(log_file):
|
||||
pytest.fail(f"Log file not found: {log_file}")
|
||||
|
||||
Reference in New Issue
Block a user