mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Optimization]Optimize CPU utilization (#6950)
* Optimize CPU utilization
This commit is contained in:
@@ -47,6 +47,7 @@ class ChatResponseProcessor:
|
||||
self.decoder_client = AsyncTokenizerClient(base_url=decoder_base_url)
|
||||
else:
|
||||
self.decoder_client = None
|
||||
self._is_async_processor = inspect.iscoroutinefunction(data_processor.process_response_dict)
|
||||
self._mm_buffer: List[Any] = [] # Buffer for accumulating image token_ids
|
||||
self._end_image_code_request_output: Optional[Any] = None
|
||||
self._audio_buffer: Dict[Any] = {}
|
||||
@@ -92,7 +93,7 @@ class ChatResponseProcessor:
|
||||
all_audio_tokens = self._audio_buffer.pop(req_id, [])
|
||||
else:
|
||||
all_audio_tokens = None
|
||||
if inspect.iscoroutinefunction(self.data_processor.process_response_dict):
|
||||
if self._is_async_processor:
|
||||
response = await self.data_processor.process_response_dict(
|
||||
response_dict=request_output,
|
||||
stream=stream,
|
||||
@@ -152,7 +153,7 @@ class ChatResponseProcessor:
|
||||
image_output["outputs"]["num_image_tokens"] = count_tokens(all_tokens)
|
||||
yield image_output
|
||||
|
||||
if inspect.iscoroutinefunction(self.data_processor.process_response_dict):
|
||||
if self._is_async_processor:
|
||||
await self.data_processor.process_response_dict(
|
||||
response_dict=request_output,
|
||||
stream=stream,
|
||||
@@ -181,7 +182,7 @@ class ChatResponseProcessor:
|
||||
num_image_tokens = 0
|
||||
for part in self._multipart_buffer:
|
||||
if part["decode_type"] == 0:
|
||||
if inspect.iscoroutinefunction(self.data_processor.process_response_dict):
|
||||
if self._is_async_processor:
|
||||
await self.data_processor.process_response_dict(
|
||||
response_dict=part["request_output"],
|
||||
stream=False,
|
||||
|
||||
Reference in New Issue
Block a user