mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Optimization] refactor(chat_handler,completion_handler): extract base classes and use AsyncLLM (#5195)
* [Optimization] refactor(chat_handler,completion_handler): extract base classes and use AsyncLLM * [Optimization] refactor(chat_handler,completion_handler): rename class
This commit is contained in:
@@ -76,6 +76,16 @@ class CompletionTokenUsageInfo(BaseModel):
|
||||
reasoning_tokens: Optional[int] = None
|
||||
image_tokens: Optional[int] = None
|
||||
|
||||
def add(self, other: CompletionTokenUsageInfo):
|
||||
if self.reasoning_tokens is not None and other.reasoning_tokens is not None:
|
||||
self.reasoning_tokens += other.reasoning_tokens
|
||||
elif other.reasoning_tokens is not None:
|
||||
self.reasoning_tokens = other.reasoning_tokens
|
||||
if self.image_tokens is not None and other.image_tokens is not None:
|
||||
self.image_tokens += other.image_tokens
|
||||
elif other.image_tokens is not None:
|
||||
self.image_tokens = other.image_tokens
|
||||
|
||||
|
||||
class PromptTokenUsageInfo(BaseModel):
|
||||
"""
|
||||
@@ -86,6 +96,22 @@ class PromptTokenUsageInfo(BaseModel):
|
||||
image_tokens: Optional[int] = None
|
||||
video_tokens: Optional[int] = None
|
||||
|
||||
def add(self, other: PromptTokenUsageInfo):
|
||||
if self.cached_tokens and other.cached_tokens:
|
||||
self.cached_tokens += other.cached_tokens
|
||||
elif other.cached_tokens:
|
||||
self.cached_tokens = other.cached_tokens
|
||||
|
||||
if self.image_tokens and other.image_tokens:
|
||||
self.image_tokens += other.image_tokens
|
||||
elif other.image_tokens:
|
||||
self.image_tokens = other.image_tokens
|
||||
|
||||
if self.video_tokens and other.video_tokens:
|
||||
self.video_tokens += other.video_tokens
|
||||
elif other.video_tokens:
|
||||
self.video_tokens = other.video_tokens
|
||||
|
||||
|
||||
class UsageInfo(BaseModel):
|
||||
"""
|
||||
@@ -98,6 +124,19 @@ class UsageInfo(BaseModel):
|
||||
prompt_tokens_details: Optional[PromptTokenUsageInfo] = None
|
||||
completion_tokens_details: Optional[CompletionTokenUsageInfo] = None
|
||||
|
||||
def add(self, other: UsageInfo):
|
||||
self.prompt_tokens += other.prompt_tokens
|
||||
self.completion_tokens += other.completion_tokens
|
||||
self.total_tokens = self.prompt_tokens + self.completion_tokens
|
||||
if other.prompt_tokens_details and self.prompt_tokens_details:
|
||||
self.prompt_tokens_details.add(other.prompt_tokens_details)
|
||||
elif other.prompt_tokens_details:
|
||||
self.prompt_tokens_details = other.prompt_tokens_details
|
||||
if other.completion_tokens_details and self.completion_tokens_details:
|
||||
self.completion_tokens_details.add(other.completion_tokens_details)
|
||||
elif other.completion_tokens_details:
|
||||
self.completion_tokens_details = other.completion_tokens_details
|
||||
|
||||
|
||||
class ModelPermission(BaseModel):
|
||||
id: str = Field(default_factory=lambda: f"modelperm-{str(uuid.uuid4().hex)}")
|
||||
@@ -294,7 +333,7 @@ class ChatCompletionResponseStreamChoice(BaseModel):
|
||||
logprobs: Optional[LogProbs] = None
|
||||
draft_logprobs: Optional[LogProbs] = None
|
||||
prompt_logprobs: Optional[PromptLogprobs] = None
|
||||
finish_reason: Optional[Literal["stop", "length", "tool_calls"]] = None
|
||||
finish_reason: Optional[Literal["stop", "length", "tool_calls", "recover_stop"]] = None
|
||||
arrival_time: Optional[float] = None
|
||||
speculate_metrics: Optional[SpeculateMetrics] = None
|
||||
|
||||
@@ -330,7 +369,7 @@ class CompletionResponseChoice(BaseModel):
|
||||
draft_logprobs: Optional[CompletionLogprobs] = None
|
||||
prompt_logprobs: Optional[PromptLogprobs] = None
|
||||
reasoning_content: Optional[str] = None
|
||||
finish_reason: Optional[Literal["stop", "length", "tool_calls"]]
|
||||
finish_reason: Optional[Literal["stop", "length", "tool_calls", "recover_stop"]] = None
|
||||
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||
speculate_metrics: Optional[SpeculateMetrics] = None
|
||||
|
||||
@@ -376,7 +415,7 @@ class CompletionResponseStreamChoice(BaseModel):
|
||||
prompt_tokens: Optional[str] = None
|
||||
completion_tokens: Optional[str] = None
|
||||
reasoning_content: Optional[str] = None
|
||||
finish_reason: Optional[Literal["stop", "length", "tool_calls"]] = None
|
||||
finish_reason: Optional[Literal["stop", "length", "tool_calls", "recover_stop"]] = None
|
||||
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
|
||||
speculate_metrics: Optional[SpeculateMetrics] = None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user