[Optimization][DeepSeekV3.2]Reducing slot_mapping compute frequency from twice per layer to a single pre-processing step. (#7367)

2026-04-23 17:11:21 +08:00 · 2026-04-16 19:54:12 +08:00
parent d2d633b05c
commit 2d8338f9e4
10 changed files with 73 additions and 146 deletions
@@ -160,7 +160,8 @@ class ForwardMeta:

    # for mla & dsa
    position_ids: Optional[paddle.Tensor] = None
-    mask_encoder_batch: Optional[paddle.Tensor] = None
+    # for kvcache slot
+    slot_mapping: Optional[paddle.Tensor] = None

    real_bsz: int = 0