[Optimization] refactor(chat_handler,completion_handler): extract base classes and use AsyncLLM (#5195)

* [Optimization] refactor(chat_handler,completion_handler): extract base classes and use AsyncLLM

* [Optimization] refactor(chat_handler,completion_handler): rename class
This commit is contained in:
memoryCoderC
2025-12-25 16:28:15 +08:00
committed by GitHub
parent 8fc789bb3f
commit be3be4913a
19 changed files with 3601 additions and 66 deletions
+42 -3
View File
@@ -76,6 +76,16 @@ class CompletionTokenUsageInfo(BaseModel):
reasoning_tokens: Optional[int] = None
image_tokens: Optional[int] = None
def add(self, other: CompletionTokenUsageInfo):
if self.reasoning_tokens is not None and other.reasoning_tokens is not None:
self.reasoning_tokens += other.reasoning_tokens
elif other.reasoning_tokens is not None:
self.reasoning_tokens = other.reasoning_tokens
if self.image_tokens is not None and other.image_tokens is not None:
self.image_tokens += other.image_tokens
elif other.image_tokens is not None:
self.image_tokens = other.image_tokens
class PromptTokenUsageInfo(BaseModel):
"""
@@ -86,6 +96,22 @@ class PromptTokenUsageInfo(BaseModel):
image_tokens: Optional[int] = None
video_tokens: Optional[int] = None
def add(self, other: PromptTokenUsageInfo):
if self.cached_tokens and other.cached_tokens:
self.cached_tokens += other.cached_tokens
elif other.cached_tokens:
self.cached_tokens = other.cached_tokens
if self.image_tokens and other.image_tokens:
self.image_tokens += other.image_tokens
elif other.image_tokens:
self.image_tokens = other.image_tokens
if self.video_tokens and other.video_tokens:
self.video_tokens += other.video_tokens
elif other.video_tokens:
self.video_tokens = other.video_tokens
class UsageInfo(BaseModel):
"""
@@ -98,6 +124,19 @@ class UsageInfo(BaseModel):
prompt_tokens_details: Optional[PromptTokenUsageInfo] = None
completion_tokens_details: Optional[CompletionTokenUsageInfo] = None
def add(self, other: UsageInfo):
self.prompt_tokens += other.prompt_tokens
self.completion_tokens += other.completion_tokens
self.total_tokens = self.prompt_tokens + self.completion_tokens
if other.prompt_tokens_details and self.prompt_tokens_details:
self.prompt_tokens_details.add(other.prompt_tokens_details)
elif other.prompt_tokens_details:
self.prompt_tokens_details = other.prompt_tokens_details
if other.completion_tokens_details and self.completion_tokens_details:
self.completion_tokens_details.add(other.completion_tokens_details)
elif other.completion_tokens_details:
self.completion_tokens_details = other.completion_tokens_details
class ModelPermission(BaseModel):
id: str = Field(default_factory=lambda: f"modelperm-{str(uuid.uuid4().hex)}")
@@ -294,7 +333,7 @@ class ChatCompletionResponseStreamChoice(BaseModel):
logprobs: Optional[LogProbs] = None
draft_logprobs: Optional[LogProbs] = None
prompt_logprobs: Optional[PromptLogprobs] = None
finish_reason: Optional[Literal["stop", "length", "tool_calls"]] = None
finish_reason: Optional[Literal["stop", "length", "tool_calls", "recover_stop"]] = None
arrival_time: Optional[float] = None
speculate_metrics: Optional[SpeculateMetrics] = None
@@ -330,7 +369,7 @@ class CompletionResponseChoice(BaseModel):
draft_logprobs: Optional[CompletionLogprobs] = None
prompt_logprobs: Optional[PromptLogprobs] = None
reasoning_content: Optional[str] = None
finish_reason: Optional[Literal["stop", "length", "tool_calls"]]
finish_reason: Optional[Literal["stop", "length", "tool_calls", "recover_stop"]] = None
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
speculate_metrics: Optional[SpeculateMetrics] = None
@@ -376,7 +415,7 @@ class CompletionResponseStreamChoice(BaseModel):
prompt_tokens: Optional[str] = None
completion_tokens: Optional[str] = None
reasoning_content: Optional[str] = None
finish_reason: Optional[Literal["stop", "length", "tool_calls"]] = None
finish_reason: Optional[Literal["stop", "length", "tool_calls", "recover_stop"]] = None
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
speculate_metrics: Optional[SpeculateMetrics] = None