mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Metax] fix shape error & output garbled code when reasoning big picture or video (#5965)
Co-authored-by: root <root@lt-wks-10-0-180-15.pub.metax-tech.com>
This commit is contained in:
@@ -1374,7 +1374,7 @@ class EngineArgs:
|
||||
|
||||
speculative_cfg = self.create_speculative_config()
|
||||
if not self.enable_chunked_prefill:
|
||||
if current_platform.is_cuda() and self.splitwise_role == "mixed":
|
||||
if (current_platform.is_cuda() or current_platform.is_maca()) and self.splitwise_role == "mixed":
|
||||
# default enable chunked prefill
|
||||
self.enable_chunked_prefill = True
|
||||
|
||||
@@ -1384,7 +1384,10 @@ class EngineArgs:
|
||||
|
||||
if self.max_num_batched_tokens is None:
|
||||
if int(envs.ENABLE_V1_KVCACHE_SCHEDULER):
|
||||
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
|
||||
if current_platform.is_maca():
|
||||
self.max_num_batched_tokens = self.max_model_len
|
||||
else:
|
||||
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
|
||||
else:
|
||||
if self.enable_chunked_prefill:
|
||||
self.max_num_batched_tokens = 2048
|
||||
|
||||
Reference in New Issue
Block a user