[Optimization] 移除 num_blocks 上限限制 (#7241)

This commit is contained in:
Yuanle Liu
2026-04-13 22:07:41 +08:00
committed by GitHub
parent e83d45833f
commit 0ddb6e461c
5 changed files with 3 additions and 12 deletions
-5
View File
@@ -126,11 +126,6 @@ class IluvatarPaddleDisWorkerProc(PaddleDisWorkerProc):
# 2. Calculate the appropriate number of blocks
model_block_memory_used = self.worker.cal_theortical_kvcache()
num_blocks_local = int(available_kv_cache_memory // model_block_memory_used)
# NOTE(liuzichang): Too many block will lead to illegal memory access
# We will develop dynamic limits in future.
if num_blocks_local > 40000:
logger.info(f"------- Reset num_blocks_local {num_blocks_local} to 40000")
num_blocks_local = min(40000, num_blocks_local)
logger.info(f"------- model_block_memory_used:{model_block_memory_used} --------")
logger.info(f"------- num_blocks_local:{num_blocks_local} --------")
-5
View File
@@ -666,11 +666,6 @@ class PaddleDisWorkerProc:
# 2. Calculate the appropriate number of blocks
model_block_memory_used = self.worker.cal_theortical_kvcache()
num_blocks_local = int(available_kv_cache_memory // model_block_memory_used)
# NOTE(liuzichang): Too many block will lead to illegal memory access
# We will develop dynamic limits in future.
if num_blocks_local > 40000:
logger.info(f"------- Reset num_blocks_local {num_blocks_local} to 40000")
num_blocks_local = min(40000, num_blocks_local)
logger.info(f"------- model_block_memory_used:{model_block_memory_used / 1024**3} GB --------")
logger.info(f"------- num_blocks_local:{num_blocks_local} --------")