[Optimization]Optimize CPU utilization (#6950)

* Optimize CPU utilization
This commit is contained in:
luukunn
2026-03-22 23:02:39 +08:00
committed by GitHub
parent fdd12ff5ba
commit 33e79f922a
3 changed files with 34 additions and 33 deletions
@@ -47,6 +47,7 @@ class ChatResponseProcessor:
self.decoder_client = AsyncTokenizerClient(base_url=decoder_base_url)
else:
self.decoder_client = None
self._is_async_processor = inspect.iscoroutinefunction(data_processor.process_response_dict)
self._mm_buffer: List[Any] = [] # Buffer for accumulating image token_ids
self._end_image_code_request_output: Optional[Any] = None
self._audio_buffer: Dict[Any] = {}
@@ -92,7 +93,7 @@ class ChatResponseProcessor:
all_audio_tokens = self._audio_buffer.pop(req_id, [])
else:
all_audio_tokens = None
if inspect.iscoroutinefunction(self.data_processor.process_response_dict):
if self._is_async_processor:
response = await self.data_processor.process_response_dict(
response_dict=request_output,
stream=stream,
@@ -152,7 +153,7 @@ class ChatResponseProcessor:
image_output["outputs"]["num_image_tokens"] = count_tokens(all_tokens)
yield image_output
if inspect.iscoroutinefunction(self.data_processor.process_response_dict):
if self._is_async_processor:
await self.data_processor.process_response_dict(
response_dict=request_output,
stream=stream,
@@ -181,7 +182,7 @@ class ChatResponseProcessor:
num_image_tokens = 0
for part in self._multipart_buffer:
if part["decode_type"] == 0:
if inspect.iscoroutinefunction(self.data_processor.process_response_dict):
if self._is_async_processor:
await self.data_processor.process_response_dict(
response_dict=part["request_output"],
stream=False,