mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[BugFix][Scheduler]Fix FD_DISABLE_CHUNKED_PREFILL max_num_batched_tokens limit (#7407)
* fix FD_DISABLE_CHUNKED_PREFILL max_num_batched_tokens=max_model_len * fix FD_DISABLE_CHUNKED_PREFILL max_num_batched_tokens=max_model_len
This commit is contained in:
@@ -2064,7 +2064,10 @@ class FDConfig:
|
|||||||
|
|
||||||
if self.scheduler_config.max_num_batched_tokens is None:
|
if self.scheduler_config.max_num_batched_tokens is None:
|
||||||
if int(envs.ENABLE_V1_KVCACHE_SCHEDULER):
|
if int(envs.ENABLE_V1_KVCACHE_SCHEDULER):
|
||||||
self.scheduler_config.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
|
if int(envs.FD_DISABLE_CHUNKED_PREFILL):
|
||||||
|
self.scheduler_config.max_num_batched_tokens = self.model_config.max_model_len
|
||||||
|
else:
|
||||||
|
self.scheduler_config.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
|
||||||
else:
|
else:
|
||||||
if self.cache_config.enable_chunked_prefill:
|
if self.cache_config.enable_chunked_prefill:
|
||||||
self.scheduler_config.max_num_batched_tokens = 2048
|
self.scheduler_config.max_num_batched_tokens = 2048
|
||||||
|
|||||||
@@ -1498,7 +1498,11 @@ class EngineArgs:
|
|||||||
|
|
||||||
if self.max_num_batched_tokens is None:
|
if self.max_num_batched_tokens is None:
|
||||||
if int(envs.ENABLE_V1_KVCACHE_SCHEDULER):
|
if int(envs.ENABLE_V1_KVCACHE_SCHEDULER):
|
||||||
if current_platform.is_maca() or current_platform.is_iluvatar():
|
if (
|
||||||
|
int(envs.FD_DISABLE_CHUNKED_PREFILL)
|
||||||
|
or current_platform.is_maca()
|
||||||
|
or current_platform.is_iluvatar()
|
||||||
|
):
|
||||||
self.max_num_batched_tokens = self.max_model_len
|
self.max_num_batched_tokens = self.max_model_len
|
||||||
else:
|
else:
|
||||||
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
|
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
|
||||||
|
|||||||
Reference in New Issue
Block a user