[Optimization] Update ZMQ server (#6735)

* add batch zmq send reaponse * update * Revert "update" This reverts commit 0234a25b47. * update * remove lock * fix unit test * add unit test * add unit test * pre commit * add unit test * fix unit test * add unit test * fix worker>1 * update zmq_worker_pid * fix unit test * fix unit test * fix unit test * add unit test * fix unit test * fix first token time * fix logprobs * add unit test * op * remore debug log --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
2026-04-23 00:17:25 +08:00 · 2026-03-19 21:53:16 +08:00
parent 9148562ed0
commit c3d8db85c4
18 changed files with 2739 additions and 133 deletions
@@ -19,7 +19,6 @@ from typing import Any, Dict, List, Optional

 from fastdeploy.entrypoints.openai.usage_calculator import count_tokens
 from fastdeploy.input.tokenzier_client import AsyncTokenizerClient, ImageDecodeRequest
-from fastdeploy.utils import api_server_logger


 class ChatResponseProcessor:
@@ -81,7 +80,6 @@ class ChatResponseProcessor:
            include_stop_str_in_output: Whether or not to include stop strings in the output.
        """
        for request_output in request_outputs:
-            api_server_logger.debug(f"request_output {request_output}")
            if not self.enable_mm_output:
                outputs = request_output.get("outputs", None)
                token_ids = outputs.get("token_ids", None) if outputs is not None else None