mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
* [Speculative Decoding] Fix attn_mask_offset for multi-step MTP in mixed and PD-split modes (#5738) * fix attn_mask_offset in mtp with multi-step and pd-split-mode * fix xpu operater register * update pmtp multi-step mtp strategy in d-split -mode * add note * fix xpu register * fix entropy bugs * Revert "[Speculative Decoding] Fix attn_mask_offset for multi-step MTP in mixed and PD-split modes (#5738)" This reverts commit ba0d35a52e8775300a1459bfcaa39056df570525. * fix ut * fix --------- Co-authored-by: freeliuzc <lzc842650834@gmail.com>
This commit is contained in:
@@ -28,6 +28,7 @@ class TestCalculateLogitsEntropy(unittest.TestCase):
|
||||
share_inputs = {
|
||||
"seq_lens_this_time": paddle.to_tensor([[1], [0], [15]], dtype="int32"),
|
||||
"seq_lens_encoder": paddle.to_tensor([[0], [0], [15]], dtype="int32"),
|
||||
"seq_lens_decoder": paddle.to_tensor([[30], [0], [15]], dtype="int32"),
|
||||
"entropy_list": [[], [], []],
|
||||
"stop_flags": paddle.to_tensor([[False], [True], [False]], dtype="bool"),
|
||||
"req_ids": ["req_1", "req_2", "req_3"],
|
||||
@@ -55,6 +56,7 @@ class TestCalculateLogitsEntropy(unittest.TestCase):
|
||||
share_inputs = {
|
||||
"seq_lens_this_time": paddle.to_tensor([[1], [0], [15]], dtype="int32"),
|
||||
"seq_lens_encoder": paddle.to_tensor([[0], [0], [15]], dtype="int32"),
|
||||
"seq_lens_decoder": paddle.to_tensor([[30], [0], [15]], dtype="int32"),
|
||||
"entropy_list": [[], [], []],
|
||||
"stop_flags": paddle.to_tensor([[False], [True], [False]], dtype="bool"),
|
||||
"req_ids": ["req_1", "req_2", "req_3"],
|
||||
@@ -82,6 +84,7 @@ class TestCalculateLogitsEntropy(unittest.TestCase):
|
||||
share_inputs = {
|
||||
"seq_lens_this_time": paddle.to_tensor([[1], [0], [15]], dtype="int32"),
|
||||
"seq_lens_encoder": paddle.to_tensor([[0], [0], [15]], dtype="int32"),
|
||||
"seq_lens_decoder": paddle.to_tensor([[30], [0], [15]], dtype="int32"),
|
||||
"entropy_list": [[], [], []],
|
||||
"stop_flags": paddle.to_tensor([[True], [True], [False]], dtype="bool"),
|
||||
"req_ids": ["req_1", "req_2", "req_3"],
|
||||
@@ -111,6 +114,7 @@ class TestSpeculateCalculateLogitsEntropy(unittest.TestCase):
|
||||
share_inputs = {
|
||||
"seq_lens_this_time": paddle.to_tensor([[2], [2], [0], [15]], dtype="int32"),
|
||||
"seq_lens_encoder": paddle.to_tensor([[0], [0], [0], [15]], dtype="int32"),
|
||||
"seq_lens_decoder": paddle.to_tensor([[30], [30], [0], [15]], dtype="int32"),
|
||||
"entropy_list": [[], [], [], []],
|
||||
"stop_flags": paddle.to_tensor([[False], [False], [True], [False]], dtype="bool"),
|
||||
"req_ids": ["req_1", "req_2", "req_3", "req_4"],
|
||||
@@ -130,8 +134,6 @@ class TestSpeculateCalculateLogitsEntropy(unittest.TestCase):
|
||||
|
||||
speculate_calculate_logits_entropy(logits, share_inputs, temperature)
|
||||
|
||||
print(share_inputs["entropy_list"])
|
||||
|
||||
self.assertEqual(len(share_inputs["entropy_list"][0]), 2)
|
||||
self.assertEqual(len(share_inputs["entropy_list"][1]), 1)
|
||||
self.assertEqual(len(share_inputs["entropy_list"][2]), 0)
|
||||
@@ -145,6 +147,7 @@ class TestSpeculateCalculateLogitsEntropy(unittest.TestCase):
|
||||
share_inputs = {
|
||||
"seq_lens_this_time": paddle.to_tensor([[2], [2], [0], [15]], dtype="int32"),
|
||||
"seq_lens_encoder": paddle.to_tensor([[0], [0], [0], [15]], dtype="int32"),
|
||||
"seq_lens_decoder": paddle.to_tensor([[30], [30], [0], [15]], dtype="int32"),
|
||||
"entropy_list": [[], [], [], []],
|
||||
"stop_flags": paddle.to_tensor([[False], [False], [True], [False]], dtype="bool"),
|
||||
"req_ids": ["req_1", "req_2", "req_3", "req_4"],
|
||||
@@ -164,8 +167,6 @@ class TestSpeculateCalculateLogitsEntropy(unittest.TestCase):
|
||||
|
||||
speculate_calculate_logits_entropy(logits, share_inputs, temperature)
|
||||
|
||||
print(share_inputs["entropy_list"])
|
||||
|
||||
self.assertEqual(len(share_inputs["entropy_list"][0]), 2)
|
||||
self.assertEqual(len(share_inputs["entropy_list"][1]), 1)
|
||||
self.assertEqual(len(share_inputs["entropy_list"][2]), 0)
|
||||
@@ -179,6 +180,7 @@ class TestSpeculateCalculateLogitsEntropy(unittest.TestCase):
|
||||
share_inputs = {
|
||||
"seq_lens_this_time": paddle.to_tensor([[2], [2], [0], [15]], dtype="int32"),
|
||||
"seq_lens_encoder": paddle.to_tensor([[0], [0], [0], [15]], dtype="int32"),
|
||||
"seq_lens_decoder": paddle.to_tensor([[30], [30], [0], [15]], dtype="int32"),
|
||||
"entropy_list": [[], [], [], []],
|
||||
"stop_flags": paddle.to_tensor([[True], [False], [True], [False]], dtype="bool"),
|
||||
"req_ids": ["req_1", "req_2", "req_3", "req_4"],
|
||||
@@ -198,8 +200,6 @@ class TestSpeculateCalculateLogitsEntropy(unittest.TestCase):
|
||||
|
||||
speculate_calculate_logits_entropy(logits, share_inputs, temperature)
|
||||
|
||||
print(share_inputs["entropy_list"])
|
||||
|
||||
self.assertEqual(len(share_inputs["entropy_list"][0]), 0)
|
||||
self.assertEqual(len(share_inputs["entropy_list"][1]), 1)
|
||||
self.assertEqual(len(share_inputs["entropy_list"][2]), 0)
|
||||
|
||||
Reference in New Issue
Block a user