[BugFix][Scheduler]Fix FD_DISABLE_CHUNKED_PREFILL max_num_batched_tokens limit (#7407)

* fix FD_DISABLE_CHUNKED_PREFILL max_num_batched_tokens=max_model_len

* fix FD_DISABLE_CHUNKED_PREFILL max_num_batched_tokens=max_model_len
This commit is contained in:
AIbin
2026-04-15 15:55:11 +08:00
committed by GitHub
parent 5e54770b2e
commit 8eebbcaf15
2 changed files with 9 additions and 2 deletions
+4 -1
View File
@@ -2064,7 +2064,10 @@ class FDConfig:
if self.scheduler_config.max_num_batched_tokens is None:
if int(envs.ENABLE_V1_KVCACHE_SCHEDULER):
self.scheduler_config.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
if int(envs.FD_DISABLE_CHUNKED_PREFILL):
self.scheduler_config.max_num_batched_tokens = self.model_config.max_model_len
else:
self.scheduler_config.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
else:
if self.cache_config.enable_chunked_prefill:
self.scheduler_config.max_num_batched_tokens = 2048
+5 -1
View File
@@ -1498,7 +1498,11 @@ class EngineArgs:
if self.max_num_batched_tokens is None:
if int(envs.ENABLE_V1_KVCACHE_SCHEDULER):
if current_platform.is_maca() or current_platform.is_iluvatar():
if (
int(envs.FD_DISABLE_CHUNKED_PREFILL)
or current_platform.is_maca()
or current_platform.is_iluvatar()
):
self.max_num_batched_tokens = self.max_model_len
else:
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM