mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[BugFix] Fix inaccurate cache hit rate and TTFT after request preemption (#6620)
* [chore] add has_been_rescheduled flag for requests * [refactor] rename reschedule to preempted for accuracy and fix cache hit metrics * [chore] add ttft_s
This commit is contained in:
@@ -34,6 +34,8 @@ class LoggingEventName(Enum):
|
||||
INFERENCE_END = "INFERENCE_END"
|
||||
POSTPROCESSING_START = "POSTPROCESSING_START"
|
||||
POSTPROCESSING_END = "POSTPROCESSING_END"
|
||||
PREEMPTED = "PREEMPTED"
|
||||
RESCHEDULED_INFERENCE_START = "RESCHEDULED_INFERENCE_START"
|
||||
|
||||
|
||||
class StageName(Enum):
|
||||
@@ -60,6 +62,8 @@ EVENT_TO_STAGE_MAP = {
|
||||
LoggingEventName.INFERENCE_START: StageName.PREFILL,
|
||||
LoggingEventName.FIRST_TOKEN_GENERATED: StageName.PREFILL,
|
||||
LoggingEventName.DECODE_START: StageName.DECODE,
|
||||
LoggingEventName.PREEMPTED: StageName.DECODE,
|
||||
LoggingEventName.RESCHEDULED_INFERENCE_START: StageName.DECODE,
|
||||
LoggingEventName.INFERENCE_END: StageName.DECODE,
|
||||
LoggingEventName.POSTPROCESSING_START: StageName.POSTPROCESSING,
|
||||
LoggingEventName.POSTPROCESSING_END: StageName.POSTPROCESSING,
|
||||
|
||||
Reference in New Issue
Block a user