mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 17:11:21 +08:00
[Optimization][DeepSeekV3.2]Reducing slot_mapping compute frequency from twice per layer to a single pre-processing step. (#7367)
This commit is contained in:
@@ -160,7 +160,8 @@ class ForwardMeta:
|
||||
|
||||
# for mla & dsa
|
||||
position_ids: Optional[paddle.Tensor] = None
|
||||
mask_encoder_batch: Optional[paddle.Tensor] = None
|
||||
# for kvcache slot
|
||||
slot_mapping: Optional[paddle.Tensor] = None
|
||||
|
||||
real_bsz: int = 0
|
||||
|
||||
|
||||
Reference in New Issue
Block a user