[Optimization]Optimize CPU utilization (#6950)

* Optimize CPU utilization
2026-04-23 00:17:25 +08:00 · 2026-03-22 23:02:39 +08:00
parent fdd12ff5ba
commit 33e79f922a
3 changed files with 34 additions and 33 deletions
@@ -47,6 +47,7 @@ class ChatResponseProcessor:
            self.decoder_client = AsyncTokenizerClient(base_url=decoder_base_url)
        else:
            self.decoder_client = None
+        self._is_async_processor = inspect.iscoroutinefunction(data_processor.process_response_dict)
        self._mm_buffer: List[Any] = []  # Buffer for accumulating image token_ids
        self._end_image_code_request_output: Optional[Any] = None
        self._audio_buffer: Dict[Any] = {}
@@ -92,7 +93,7 @@ class ChatResponseProcessor:
                            all_audio_tokens = self._audio_buffer.pop(req_id, [])
                        else:
                            all_audio_tokens = None
-                        if inspect.iscoroutinefunction(self.data_processor.process_response_dict):
+                        if self._is_async_processor:
                            response = await self.data_processor.process_response_dict(
                                response_dict=request_output,
                                stream=stream,
@@ -152,7 +153,7 @@ class ChatResponseProcessor:
                            image_output["outputs"]["num_image_tokens"] = count_tokens(all_tokens)
                            yield image_output

-                    if inspect.iscoroutinefunction(self.data_processor.process_response_dict):
+                    if self._is_async_processor:
                        await self.data_processor.process_response_dict(
                            response_dict=request_output,
                            stream=stream,
@@ -181,7 +182,7 @@ class ChatResponseProcessor:
                    num_image_tokens = 0
                    for part in self._multipart_buffer:
                        if part["decode_type"] == 0:
-                            if inspect.iscoroutinefunction(self.data_processor.process_response_dict):
+                            if self._is_async_processor:
                                await self.data_processor.process_response_dict(
                                    response_dict=part["request_output"],
                                    stream=False,