mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[BugFix] Fix model loading error for 300B FP8 EP parallel test case (#6382)
* fix fp8 bug * fix * fix comment, cn to en * fix ci * del else in utils * fix review
This commit is contained in:
@@ -139,7 +139,7 @@ def limit_thinking_content_length(
|
||||
step_idx,
|
||||
limit_think_status,
|
||||
stop_flags,
|
||||
eos_token_ids, # 处理由于模型效果问题导致思考过程中输出eos token的问题
|
||||
eos_token_ids, # Address the issue where the model outputs EOS tokens during the reasoning process due to model performance problems
|
||||
think_end_id,
|
||||
)
|
||||
elif limit_strategy == "\n</think>\n\n":
|
||||
@@ -179,7 +179,7 @@ def speculate_limit_thinking_content_length(
|
||||
limit_think_status,
|
||||
accept_num,
|
||||
stop_flags,
|
||||
eos_token_ids, # 处理由于模型效果问题导致思考过程中输出eos token的问题
|
||||
eos_token_ids, # Address the issue where the model outputs EOS tokens during the reasoning process due to model performance problems
|
||||
think_end_id,
|
||||
)
|
||||
elif limit_strategy == "\n</think>\n\n":
|
||||
|
||||
Reference in New Issue
Block a user