[Cherry-Pick][BugFix] Fix entropy bugs (#5818) (#5819)

* [Speculative Decoding] Fix attn_mask_offset for multi-step MTP in mixed and PD-split modes (#5738)

* fix attn_mask_offset in mtp with multi-step and pd-split-mode

* fix xpu operater register

* update pmtp multi-step mtp strategy in d-split -mode

* add note

* fix xpu register

* fix entropy bugs

* Revert "[Speculative Decoding] Fix attn_mask_offset for multi-step MTP in mixed and PD-split modes (#5738)"

This reverts commit ba0d35a52e8775300a1459bfcaa39056df570525.

* fix ut

* fix

---------

Co-authored-by: freeliuzc <lzc842650834@gmail.com>
This commit is contained in:
GoldPancake
2025-12-30 12:45:03 +08:00
committed by GitHub
parent 834502711a
commit 0d29f6df03
4 changed files with 31 additions and 10 deletions
+6 -6
View File
@@ -28,6 +28,7 @@ class TestCalculateLogitsEntropy(unittest.TestCase):
share_inputs = {
"seq_lens_this_time": paddle.to_tensor([[1], [0], [15]], dtype="int32"),
"seq_lens_encoder": paddle.to_tensor([[0], [0], [15]], dtype="int32"),
"seq_lens_decoder": paddle.to_tensor([[30], [0], [15]], dtype="int32"),
"entropy_list": [[], [], []],
"stop_flags": paddle.to_tensor([[False], [True], [False]], dtype="bool"),
"req_ids": ["req_1", "req_2", "req_3"],
@@ -55,6 +56,7 @@ class TestCalculateLogitsEntropy(unittest.TestCase):
share_inputs = {
"seq_lens_this_time": paddle.to_tensor([[1], [0], [15]], dtype="int32"),
"seq_lens_encoder": paddle.to_tensor([[0], [0], [15]], dtype="int32"),
"seq_lens_decoder": paddle.to_tensor([[30], [0], [15]], dtype="int32"),
"entropy_list": [[], [], []],
"stop_flags": paddle.to_tensor([[False], [True], [False]], dtype="bool"),
"req_ids": ["req_1", "req_2", "req_3"],
@@ -82,6 +84,7 @@ class TestCalculateLogitsEntropy(unittest.TestCase):
share_inputs = {
"seq_lens_this_time": paddle.to_tensor([[1], [0], [15]], dtype="int32"),
"seq_lens_encoder": paddle.to_tensor([[0], [0], [15]], dtype="int32"),
"seq_lens_decoder": paddle.to_tensor([[30], [0], [15]], dtype="int32"),
"entropy_list": [[], [], []],
"stop_flags": paddle.to_tensor([[True], [True], [False]], dtype="bool"),
"req_ids": ["req_1", "req_2", "req_3"],
@@ -111,6 +114,7 @@ class TestSpeculateCalculateLogitsEntropy(unittest.TestCase):
share_inputs = {
"seq_lens_this_time": paddle.to_tensor([[2], [2], [0], [15]], dtype="int32"),
"seq_lens_encoder": paddle.to_tensor([[0], [0], [0], [15]], dtype="int32"),
"seq_lens_decoder": paddle.to_tensor([[30], [30], [0], [15]], dtype="int32"),
"entropy_list": [[], [], [], []],
"stop_flags": paddle.to_tensor([[False], [False], [True], [False]], dtype="bool"),
"req_ids": ["req_1", "req_2", "req_3", "req_4"],
@@ -130,8 +134,6 @@ class TestSpeculateCalculateLogitsEntropy(unittest.TestCase):
speculate_calculate_logits_entropy(logits, share_inputs, temperature)
print(share_inputs["entropy_list"])
self.assertEqual(len(share_inputs["entropy_list"][0]), 2)
self.assertEqual(len(share_inputs["entropy_list"][1]), 1)
self.assertEqual(len(share_inputs["entropy_list"][2]), 0)
@@ -145,6 +147,7 @@ class TestSpeculateCalculateLogitsEntropy(unittest.TestCase):
share_inputs = {
"seq_lens_this_time": paddle.to_tensor([[2], [2], [0], [15]], dtype="int32"),
"seq_lens_encoder": paddle.to_tensor([[0], [0], [0], [15]], dtype="int32"),
"seq_lens_decoder": paddle.to_tensor([[30], [30], [0], [15]], dtype="int32"),
"entropy_list": [[], [], [], []],
"stop_flags": paddle.to_tensor([[False], [False], [True], [False]], dtype="bool"),
"req_ids": ["req_1", "req_2", "req_3", "req_4"],
@@ -164,8 +167,6 @@ class TestSpeculateCalculateLogitsEntropy(unittest.TestCase):
speculate_calculate_logits_entropy(logits, share_inputs, temperature)
print(share_inputs["entropy_list"])
self.assertEqual(len(share_inputs["entropy_list"][0]), 2)
self.assertEqual(len(share_inputs["entropy_list"][1]), 1)
self.assertEqual(len(share_inputs["entropy_list"][2]), 0)
@@ -179,6 +180,7 @@ class TestSpeculateCalculateLogitsEntropy(unittest.TestCase):
share_inputs = {
"seq_lens_this_time": paddle.to_tensor([[2], [2], [0], [15]], dtype="int32"),
"seq_lens_encoder": paddle.to_tensor([[0], [0], [0], [15]], dtype="int32"),
"seq_lens_decoder": paddle.to_tensor([[30], [30], [0], [15]], dtype="int32"),
"entropy_list": [[], [], [], []],
"stop_flags": paddle.to_tensor([[True], [False], [True], [False]], dtype="bool"),
"req_ids": ["req_1", "req_2", "req_3", "req_4"],
@@ -198,8 +200,6 @@ class TestSpeculateCalculateLogitsEntropy(unittest.TestCase):
speculate_calculate_logits_entropy(logits, share_inputs, temperature)
print(share_inputs["entropy_list"])
self.assertEqual(len(share_inputs["entropy_list"][0]), 0)
self.assertEqual(len(share_inputs["entropy_list"][1]), 1)
self.assertEqual(len(share_inputs["entropy_list"][2]), 0)