mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Feature] Support for request-level speculative decoding metrics monitoring. (#5518)
* support spec metrics monitor per request * fix bug * remove debug log * fix ut bugs
This commit is contained in:
@@ -142,13 +142,10 @@ class TestTokenProcessorProcessBatchOutput(unittest.TestCase):
|
||||
processor.num_accepted_tokens = 0
|
||||
processor.num_emitted_tokens = 0
|
||||
processor.max_num_emitted_tokens = 0
|
||||
processor.num_rest_requests_per_head = [
|
||||
0,
|
||||
] * MAX_DRAFT_TOKENS
|
||||
processor.num_accept_requests_per_head = [
|
||||
0,
|
||||
] * MAX_DRAFT_TOKENS
|
||||
processor.speculative_stats_step = 0
|
||||
processor.total_step_per_request = {}
|
||||
processor.accept_token_num_per_head_per_request = {}
|
||||
processor.accept_token_num_per_head = [0] * MAX_DRAFT_TOKENS
|
||||
|
||||
# processor._recycle_resources = Mock()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user