[Optimization][DeepSeekV3.2]Reducing slot_mapping compute frequency from twice per layer to a single pre-processing step. (#7367)

This commit is contained in:
ShaneGZhu
2026-04-16 19:54:12 +08:00
committed by GitHub
parent d2d633b05c
commit 2d8338f9e4
10 changed files with 73 additions and 146 deletions
+1
View File
@@ -85,6 +85,7 @@ class MockFDConfig:
name = "default"
splitwise_role = "mixed"
max_num_seqs = 2
max_num_batched_tokens = 2048
parallel_config = ParallelConfig()
scheduler_config = SchedulerConfig()
@@ -33,24 +33,17 @@ class TestGetPositionIdsAndMaskEncoderBatch(unittest.TestCase):
total_len = int(seq_lens_encoder.numpy().sum() + seq_lens_this_time.numpy().sum())
position_ids = paddle.zeros([total_len], dtype="int32")
mask_encoder_batch = paddle.zeros([total_len], dtype="int32")
# Call the custom operator
get_position_ids_and_mask_encoder_batch(
seq_lens_encoder, seq_lens_decoder, seq_lens_this_time, position_ids, mask_encoder_batch
)
get_position_ids_and_mask_encoder_batch(seq_lens_encoder, seq_lens_decoder, seq_lens_this_time, position_ids)
expected_position_ids = np.array([0, 1, 2, 1, 0, 1, 2, 3], dtype=np.int32)
expected_mask = np.array([1, 1, 1, 0, 1, 1, 0, 0], dtype=np.int32)
# Convert to numpy for comparison
position_ids_np = position_ids.numpy()
mask_encoder_batch_np = mask_encoder_batch.numpy()
# Assert equality
np.testing.assert_array_equal(position_ids_np, expected_position_ids)
np.testing.assert_array_equal(mask_encoder_batch_np, expected_mask)
def test_empty_decoder(self):
# Test case where decoder length is 0
@@ -59,17 +52,12 @@ class TestGetPositionIdsAndMaskEncoderBatch(unittest.TestCase):
seq_lens_this_time = paddle.to_tensor([0], dtype="int32")
position_ids = paddle.zeros([2], dtype="int32")
mask_encoder_batch = paddle.zeros([2], dtype="int32")
get_position_ids_and_mask_encoder_batch(
seq_lens_encoder, seq_lens_decoder, seq_lens_this_time, position_ids, mask_encoder_batch
)
get_position_ids_and_mask_encoder_batch(seq_lens_encoder, seq_lens_decoder, seq_lens_this_time, position_ids)
expected_position_ids = np.array([0, 1], dtype=np.int32)
expected_mask = np.array([1, 1], dtype=np.int32)
np.testing.assert_array_equal(position_ids.numpy(), expected_position_ids)
np.testing.assert_array_equal(mask_encoder_batch.numpy(), expected_mask)
if __name__ == "__main__":
@@ -59,6 +59,7 @@ def create_mock_config():
scheduler_config = Mock(spec=SchedulerConfig)
scheduler_config.max_num_seqs = 10
scheduler_config.max_num_batched_tokens = 2048
speculative_config = Mock(spec=SpeculativeConfig)
speculative_config.method = None