[Optimization][DeepSeekV3.2]Reducing slot_mapping compute frequency from twice per layer to a single pre-processing step. (#7367)

This commit is contained in:
ShaneGZhu
2026-04-16 19:54:12 +08:00
committed by GitHub
parent d2d633b05c
commit 2d8338f9e4
10 changed files with 73 additions and 146 deletions
+2 -1
View File
@@ -160,7 +160,8 @@ class ForwardMeta:
# for mla & dsa
position_ids: Optional[paddle.Tensor] = None
mask_encoder_batch: Optional[paddle.Tensor] = None
# for kvcache slot
slot_mapping: Optional[paddle.Tensor] = None
real_bsz: int = 0