From 6e16438a57520b8715a0e593a2fac9e854941dfc Mon Sep 17 00:00:00 2001 From: zhouchong <43821961+xyxinyang@users.noreply.github.com> Date: Thu, 16 Apr 2026 15:13:05 +0800 Subject: [PATCH] [Feature] implement log channel separation and request log level system (#7190) * feat: implement log channel separation and request log level system * fix: log system improvements based on review * add request_id to error logs, use RequestLogLevel enum, and unify logger implementation from utils to logger module --- docs/usage/environment_variables.md | 2 +- docs/usage/log.md | 47 ++- docs/zh/usage/environment_variables.md | 2 +- docs/zh/usage/log.md | 52 +++- fastdeploy/engine/async_llm.py | 5 +- fastdeploy/engine/engine.py | 56 +++- fastdeploy/engine/request.py | 19 +- fastdeploy/entrypoints/api_server.py | 19 +- fastdeploy/entrypoints/engine_client.py | 158 +++++++--- fastdeploy/entrypoints/llm.py | 13 +- fastdeploy/entrypoints/openai/api_server.py | 23 +- fastdeploy/entrypoints/openai/protocol.py | 5 +- fastdeploy/entrypoints/openai/serving_chat.py | 77 +++-- .../entrypoints/openai/serving_completion.py | 99 +++++-- .../entrypoints/openai/serving_embedding.py | 9 +- .../entrypoints/openai/serving_engine.py | 61 +++- .../entrypoints/openai/serving_models.py | 5 +- .../entrypoints/openai/serving_reward.py | 11 +- .../ernie_45_vl_thinking_tool_parser.py | 23 +- .../tool_parsers/ernie_x1_tool_parser.py | 8 +- .../entrypoints/openai/v1/serving_chat.py | 23 +- .../openai/v1/serving_completion.py | 50 +++- fastdeploy/envs.py | 2 +- fastdeploy/input/base_processor.py | 33 ++- .../ernie4_5_vl_processor.py | 3 +- .../qwen3_vl_processor/qwen3_vl_processor.py | 3 +- .../qwen_vl_processor/qwen_vl_processor.py | 3 +- fastdeploy/input/tokenizer_client.py | 19 +- fastdeploy/logger/__init__.py | 124 ++++++++ fastdeploy/logger/config.py | 47 +++ fastdeploy/logger/logger.py | 124 ++++++-- fastdeploy/logger/request_logger.py | 94 ++++++ fastdeploy/logger/setup_logging.py | 223 +++++++------- fastdeploy/output/token_processor.py | 158 ++++++++-- fastdeploy/scheduler/dp_scheduler.py | 19 +- fastdeploy/scheduler/global_scheduler.py | 39 ++- fastdeploy/scheduler/local_scheduler.py | 28 +- fastdeploy/scheduler/splitwise_scheduler.py | 22 +- fastdeploy/utils.py | 212 ++++--------- .../openai/v1/test_serving_completion_v1.py | 13 +- tests/entrypoints/test_abort.py | 18 +- tests/entrypoints/test_engine_client.py | 13 +- tests/entrypoints/test_llm.py | 15 +- tests/input/test_video_utils.py | 2 +- tests/logger/test_logger.py | 8 +- tests/logger/test_logging_config.py | 91 ++++++ tests/logger/test_request_logger.py | 176 +++++++++++ tests/logger/test_setup_logging.py | 280 +++++++++++++----- tests/output/test_process_batch_output.py | 11 +- .../test_process_batch_output_use_zmq.py | 13 +- tests/scheduler/test_dp_scheduler.py | 20 +- tests/scheduler/test_local_scheduler.py | 15 +- 52 files changed, 1956 insertions(+), 639 deletions(-) create mode 100644 fastdeploy/logger/config.py create mode 100644 fastdeploy/logger/request_logger.py create mode 100644 tests/logger/test_logging_config.py create mode 100644 tests/logger/test_request_logger.py diff --git a/docs/usage/environment_variables.md b/docs/usage/environment_variables.md index 692ad8cd02..e3945e85f0 100644 --- a/docs/usage/environment_variables.md +++ b/docs/usage/environment_variables.md @@ -25,7 +25,7 @@ environment_variables: dict[str, Callable[[], Any]] = { "FD_LOG_REQUESTS": lambda: int(os.getenv("FD_LOG_REQUESTS", "1")), # Request logging detail level (0-3). Higher level means more verbose output. - "FD_LOG_REQUESTS_LEVEL": lambda: int(os.getenv("FD_LOG_REQUESTS_LEVEL", "0")), + "FD_LOG_REQUESTS_LEVEL": lambda: int(os.getenv("FD_LOG_REQUESTS_LEVEL", "2")), # Max field length for request logging truncation. "FD_LOG_MAX_LEN": lambda: int(os.getenv("FD_LOG_MAX_LEN", "2048")), diff --git a/docs/usage/log.md b/docs/usage/log.md index 5c963a0650..33e45c1133 100644 --- a/docs/usage/log.md +++ b/docs/usage/log.md @@ -5,12 +5,48 @@ FastDeploy generates the following log files during deployment. Below is an explanation of each log's purpose. By default, logs are stored in the `log` directory under the execution path. To specify a custom directory, set the environment variable `FD_LOG_DIR`. +## Log Channel Separation + +FastDeploy separates logs into three channels: + +| Channel | Logger Name | Output Files | Description | +|---------|-------------|--------------|-------------| +| main | `fastdeploy.main.*` | `fastdeploy.log`, `console.log` | Main logs for system configuration, startup info, etc. | +| request | `fastdeploy.request.*` | `request.log` | Request logs for request lifecycle and processing details | +| console | `fastdeploy.console.*` | `console.log` | Console logs, output to terminal and console.log | + +## Request Log Levels + +Request logs (`request.log`) support 4 levels, controlled by the environment variable `FD_LOG_REQUESTS_LEVEL`: + +| Level | Enum Name | Description | Example Content | +|-------|-----------|-------------|-----------------| +| 0 | LIFECYCLE | Lifecycle start/end | Request creation/initialization, completion stats (InputToken/OutputToken/latency), first and last streaming response, request abort | +| 1 | STAGES | Processing stages | Semaphore acquire/release, first token time recording, signal handling (preemption/abortion/recovery), cache task, preprocess time, parameter adjustment warnings | +| 2 | CONTENT | Content and scheduling | Request parameters, processed request, scheduling info (enqueue/pull/finish), response content (long content is truncated) | +| 3 | FULL | Complete raw data | Complete request and response data, raw received request | + +Default level is 2 (CONTENT), which logs request parameters, scheduling info, and response content. Lower levels (0-1) only log critical events, while level 3 includes complete raw data. + +## Log-Related Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `FD_LOG_DIR` | `log` | Log file storage directory | +| `FD_LOG_LEVEL` | `INFO` | Log level, supports `INFO` or `DEBUG` | +| `FD_LOG_REQUESTS` | `1` | Enable request logging, `0` to disable, `1` to enable | +| `FD_LOG_REQUESTS_LEVEL` | `2` | Request log level, range 0-3 | +| `FD_LOG_MAX_LEN` | `2048` | Maximum length for L2 level log content (excess is truncated) | +| `FD_LOG_BACKUP_COUNT` | `7` | Number of log files to retain | +| `FD_DEBUG` | `0` | Debug mode, `1` enables DEBUG log level | + ## Inference Service Logs + +* `fastdeploy.log` : Main log file, records system configuration, startup information, runtime status, etc. +* `request.log` : Request log file, records user request lifecycle and processing details +* `console.log` : Console log, records model startup time and other information. This log is also printed to the console. +* `error.log` : Error log file, records all ERROR and above level logs * `backup_env.*.json` : Records environment variables set during instance startup. The number of files matches the number of GPU cards. -* `envlog.*` : Logs environment variables set during instance startup. The number of files matches the number of GPU cards. -* `console.log` : Records model startup time and other information. This log is also printed to the console. -* `data_processor.log` : Logs input/output data encoding and decoding details. -* `fastdeploy.log` : Records configuration information during instance startup, as well as request and response details during runtime. * `workerlog.*` : Tracks model loading progress and inference operator errors. Each GPU card has a corresponding file. * `worker_process.log` : Logs engine inference data for each iteration. * `cache_manager.log` : Records KV Cache logical index allocation for each request and cache hit status. @@ -18,9 +54,6 @@ By default, logs are stored in the `log` directory under the execution path. To * `gpu_worker.log` : Records KV Cache block count information during profiling. * `gpu_model_runner.log` : Contains model details and loading time. -## Online Inference Client Logs -* `api_server.log` : Logs startup parameters and received request information. - ## Scheduler Logs * `scheduler.log` : Records scheduler information, including node status and request allocation details. diff --git a/docs/zh/usage/environment_variables.md b/docs/zh/usage/environment_variables.md index 0a4cfd389d..733f49a1c7 100644 --- a/docs/zh/usage/environment_variables.md +++ b/docs/zh/usage/environment_variables.md @@ -25,7 +25,7 @@ environment_variables: dict[str, Callable[[], Any]] = { "FD_LOG_REQUESTS": lambda: int(os.getenv("FD_LOG_REQUESTS", "1")), # 请求日志详细级别 (0-3)。级别越高输出越详细。 - "FD_LOG_REQUESTS_LEVEL": lambda: int(os.getenv("FD_LOG_REQUESTS_LEVEL", "0")), + "FD_LOG_REQUESTS_LEVEL": lambda: int(os.getenv("FD_LOG_REQUESTS_LEVEL", "2")), # 请求日志字段截断最大长度。 "FD_LOG_MAX_LEN": lambda: int(os.getenv("FD_LOG_MAX_LEN", "2048")), diff --git a/docs/zh/usage/log.md b/docs/zh/usage/log.md index 7d276655b6..ed28d896af 100644 --- a/docs/zh/usage/log.md +++ b/docs/zh/usage/log.md @@ -2,15 +2,51 @@ # 日志说明 -FastDeploy 在部署过程中,会产生如下日志文件,各日志含义说明 +FastDeploy 在部署过程中,会产生如下日志文件,各日志含义说明。 默认日志目录为执行目录下的 `log` 文件夹,若需要指定可设置环境变量 `FD_LOG_DIR`。 +## 日志通道划分 + +FastDeploy 将日志分为三个通道: + +| 通道 | Logger 名称 | 输出文件 | 说明 | +|------|-------------|----------|------| +| main | `fastdeploy.main.*` | `fastdeploy.log`, `console.log` | 主日志,记录系统配置、启动信息等 | +| request | `fastdeploy.request.*` | `request.log` | 请求日志,记录请求生命周期和处理细节 | +| console | `fastdeploy.console.*` | `console.log` | 控制台日志,输出到终端和 console.log | + +## 请求日志级别 + +请求日志 (`request.log`) 支持 4 个级别,通过环境变量 `FD_LOG_REQUESTS_LEVEL` 控制: + +| 级别 | 枚举名 | 说明 | 示例内容 | +|------|--------|------|----------| +| 0 | LIFECYCLE | 生命周期起止 | 请求创建/初始化、完成统计(InputToken/OutputToken/耗时)、流式响应首次和最后发送、请求中止 | +| 1 | STAGES | 处理阶段 | 信号量获取/释放、首 token 时间记录、信号处理(preemption/abortion/recovery)、缓存任务、预处理耗时、参数调整警告 | +| 2 | CONTENT | 内容和调度 | 请求参数、处理后的请求、调度信息(入队/拉取/完成)、响应内容(超长内容会被截断) | +| 3 | FULL | 完整数据 | 完整的请求和响应数据、原始接收请求 | + +默认级别为 2 (CONTENT),记录请求参数、调度信息和响应内容。较低级别 (0-1) 只记录关键事件,级别 3 则包含完整原始数据。 + +## 日志相关环境变量 + +| 环境变量 | 默认值 | 说明 | +|----------|--------|------| +| `FD_LOG_DIR` | `log` | 日志文件存储目录 | +| `FD_LOG_LEVEL` | `INFO` | 日志级别,支持 `INFO` 或 `DEBUG` | +| `FD_LOG_REQUESTS` | `1` | 是否启用请求日志,`0` 禁用,`1` 启用 | +| `FD_LOG_REQUESTS_LEVEL` | `2` | 请求日志级别,范围 0-3 | +| `FD_LOG_MAX_LEN` | `2048` | L2 级别日志内容的最大长度(超出部分会被截断) | +| `FD_LOG_BACKUP_COUNT` | `7` | 日志文件保留数量 | +| `FD_DEBUG` | `0` | 调试模式,`1` 启用时日志级别设为 `DEBUG` | + ## 推理服务日志 + +* `fastdeploy.log` : 主日志文件,记录系统配置、启动信息、运行状态等 +* `request.log` : 请求日志文件,记录用户请求的生命周期和处理细节 +* `console.log` : 控制台日志,记录模型启动耗时等信息,该日志信息会被打印到控制台 +* `error.log` : 错误日志文件,记录所有 ERROR 及以上级别的日志 * `backup_env.*.json` : 记录当前实例启动时设置的环境变量,文件个数与卡数相同 -* `envlog.*` : 记录当前实例启动时设置的环境变量,文件个数与卡数相同 -* `console.log` : 记录模型启动耗时等信息,该日志信息会被打印到控制台 -* `data_processor.log` : 记录输入数据及输出输出编码解码的内容 -* `fastdeploy.log` : 记录当前实例启动的各个 config 的信息,运行中记录用户请求的 request 及 response 信息 * `workerlog.*` : 记录模型启动加载进度及推理算子报错信息,每个卡对应一个文件 * `worker_process.log` : 记录引擎每一轮推理的数据 * `cache_manager.log` : 记录每一个请求分配 KV Cache 的逻辑索引,以及当前请求的命中情况 @@ -18,9 +54,6 @@ FastDeploy 在部署过程中,会产生如下日志文件,各日志含义说 * `gpu_worker.log` : 记录 profile 时计算 KV Cache block 数目的信息 * `gpu_model_runner.log` : 当前的模型信息及加载时间 -## 在线推理客户端日志 -* `api_server.log` : 记录启动参数,及接收到的请求信息 - ## 调度器日志 * `scheduler.log` : 记录调度器的信息包含当前结点的信息,每条请求分配的信息 @@ -31,12 +64,11 @@ FastDeploy 在部署过程中,会产生如下日志文件,各日志含义说 * `cache_queue_manager.log` : 记录启动参数,及接收到的请求信息 * `cache_transfer_manager.log` : 记录启动参数,及接收到的请求信息 -* `cache_queue_manager.log` : 记录启动参数,及接收到的请求信息 * `launch_cache_manager.log` : 启动 cache transfer 记录启动参数,报错信息 ## PD 分离相关日志 -* `cache_messager.log` : 记录P 实例使用的传输协议及传输信息 +* `cache_messager.log` : 记录 P 实例使用的传输协议及传输信息 * `splitwise_connector.log` : 记录收到 P/D 发送的数据,及建联信息 ## CudaGraph 相关日志 diff --git a/fastdeploy/engine/async_llm.py b/fastdeploy/engine/async_llm.py index c06292ec98..75892a6026 100644 --- a/fastdeploy/engine/async_llm.py +++ b/fastdeploy/engine/async_llm.py @@ -37,6 +37,7 @@ from fastdeploy.entrypoints.openai.utils import DealerConnectionManager from fastdeploy.input.preprocess import InputPreprocessor from fastdeploy.inter_communicator import IPCSignal from fastdeploy.inter_communicator.zmq_client import ZmqIpcClient +from fastdeploy.logger.request_logger import log_request_error from fastdeploy.metrics.metrics import main_process_metrics from fastdeploy.utils import EngineError, envs, llm_logger @@ -562,7 +563,7 @@ class AsyncLLM(EngineServiceClient): llm_logger.info(f"Request {conn_request_id} generator exit (outer)") return except Exception as e: - llm_logger.error(f"Request {conn_request_id} failed: {e}") + log_request_error(message="Request {request_id} failed: {error}", request_id=conn_request_id, error=e) raise EngineError(str(e), error_code=500) from e finally: # Ensure request_map/request_num are cleaned up @@ -584,7 +585,7 @@ class AsyncLLM(EngineServiceClient): await self.connection_manager.cleanup_request(request_id) llm_logger.info(f"Aborted request {request_id}") except Exception as e: - llm_logger.error(f"Failed to abort request {request_id}: {e}") + log_request_error(message="Failed to abort request {request_id}: {error}", request_id=request_id, error=e) async def shutdown(self): """ diff --git a/fastdeploy/engine/engine.py b/fastdeploy/engine/engine.py index 439438f1bc..f5f48c6274 100644 --- a/fastdeploy/engine/engine.py +++ b/fastdeploy/engine/engine.py @@ -44,6 +44,11 @@ from fastdeploy.engine.common_engine import ( from fastdeploy.engine.expert_service import start_data_parallel_service from fastdeploy.engine.request import Request from fastdeploy.inter_communicator import EngineWorkerQueue, IPCSignal +from fastdeploy.logger.request_logger import ( + RequestLogLevel, + log_request, + log_request_error, +) from fastdeploy.metrics.metrics import main_process_metrics from fastdeploy.platforms import current_platform from fastdeploy.utils import EngineError, console_logger, envs, llm_logger @@ -285,7 +290,7 @@ class LLMEngine: # Create Request struct after processing request = Request.from_dict(task) request.metrics.scheduler_recv_req_time = time.time() - llm_logger.info(f"Receive request {request}") + log_request(RequestLogLevel.CONTENT, message="Receive request {request}", request=request) request.metrics.preprocess_start_time = time.time() request.prompt_token_ids_len = len(request.prompt_token_ids) @@ -304,12 +309,20 @@ class LLMEngine: f"Input text is too long, length of prompt token({input_ids_len}) " f"+ min_dec_len ({min_tokens}) >= max_model_len " ) - llm_logger.error(error_msg) + log_request_error( + message="request[{request_id}] error: {error}", + request_id=request.get("request_id"), + error=error_msg, + ) raise EngineError(error_msg, error_code=400) if input_ids_len > self.cfg.model_config.max_model_len: error_msg = f"Length of input token({input_ids_len}) exceeds the limit max_model_len({self.cfg.model_config.max_model_len})." - llm_logger.error(error_msg) + log_request_error( + message="request[{request_id}] error: {error}", + request_id=request.get("request_id"), + error=error_msg, + ) raise EngineError(error_msg, error_code=400) if request.get("stop_seqs_len") is not None: @@ -320,7 +333,11 @@ class LLMEngine: f"Length of stop ({stop_seqs_len}) exceeds the limit max_stop_seqs_num({max_stop_seqs_num})." "Please reduce the number of stop or set a lager max_stop_seqs_num by `FD_MAX_STOP_SEQS_NUM`" ) - llm_logger.error(error_msg) + log_request_error( + message="request[{request_id}] error: {error}", + request_id=request.get("request_id"), + error=error_msg, + ) raise EngineError(error_msg, error_code=400) stop_seqs_max_len = envs.FD_STOP_SEQS_MAX_LEN for single_stop_seq_len in stop_seqs_len: @@ -329,7 +346,11 @@ class LLMEngine: f"Length of stop_seqs({single_stop_seq_len}) exceeds the limit stop_seqs_max_len({stop_seqs_max_len})." "Please reduce the length of stop sequences or set a larger stop_seqs_max_len by `FD_STOP_SEQS_MAX_LEN`" ) - llm_logger.error(error_msg) + log_request_error( + message="request[{request_id}] error: {error}", + request_id=request.get("request_id"), + error=error_msg, + ) raise EngineError(error_msg, error_code=400) if self._has_guided_input(request): @@ -342,14 +363,22 @@ class LLMEngine: request, err_msg = self.guided_decoding_checker.schema_format(request) if err_msg is not None: - llm_logger.error(err_msg) + log_request_error( + message="request[{request_id}] error: {error}", + request_id=request.get("request_id"), + error=err_msg, + ) raise EngineError(err_msg, error_code=400) request.metrics.preprocess_end_time = time.time() request.metrics.scheduler_recv_req_time = time.time() self.engine.scheduler.put_requests([request]) - llm_logger.info(f"Cache task with request_id ({request.get('request_id')})") - llm_logger.debug(f"cache task: {request}") + log_request( + RequestLogLevel.STAGES, + message="Cache task with request_id ({request_id})", + request_id=request.get("request_id"), + ) + log_request(RequestLogLevel.FULL, message="cache task: {request}", request=request) def _worker_processes_ready(self): """ @@ -717,11 +746,16 @@ class LLMEngine: Yields: dict: The generated response. """ - llm_logger.info(f"Starting generation for prompt: {prompts}") + log_request(RequestLogLevel.CONTENT, message="Starting generation for prompt: {prompts}", prompts=prompts) try: req_id = self._format_and_add_data(prompts) except Exception as e: - llm_logger.error(f"Error happened while adding request, details={e}, {str(traceback.format_exc())}") + log_request_error( + message="request[{request_id}] error while adding request: {error}, {traceback}", + request_id=prompts.get("request_id"), + error=str(e), + traceback=traceback.format_exc(), + ) raise EngineError(str(e), error_code=400) # Get the result of the current request @@ -740,7 +774,7 @@ class LLMEngine: output = self.engine.data_processor.process_response_dict( result.to_dict(), stream=False, include_stop_str_in_output=False, direct_decode=not stream ) - llm_logger.debug(f"Generate result: {output}") + log_request(RequestLogLevel.FULL, message="Generate result: {output}", output=output) if not stream: yield output else: diff --git a/fastdeploy/engine/request.py b/fastdeploy/engine/request.py index 0e95cd5e1f..cf062fe000 100644 --- a/fastdeploy/engine/request.py +++ b/fastdeploy/engine/request.py @@ -39,7 +39,11 @@ from fastdeploy.entrypoints.openai.protocol import ( StructuralTagResponseFormat, ToolCall, ) -from fastdeploy.utils import data_processor_logger +from fastdeploy.logger.request_logger import ( + RequestLogLevel, + log_request, + log_request_error, +) from fastdeploy.worker.output import ( LogprobsLists, PromptLogprobs, @@ -313,15 +317,13 @@ class Request: ), "The parameter `raw_request` is not supported now, please use completion api instead." for key, value in req.metadata.items(): setattr(request, key, value) - from fastdeploy.utils import api_server_logger - - api_server_logger.warning("The parameter metadata is obsolete.") + log_request(RequestLogLevel.STAGES, message="The parameter metadata is obsolete.") return request @classmethod def from_dict(cls, d: dict): - data_processor_logger.debug(f"{d}") + log_request(RequestLogLevel.FULL, message="{request}", request=d) sampling_params: SamplingParams = None pooling_params: PoolingParams = None metrics: RequestMetrics = None @@ -352,8 +354,11 @@ class Request: ImagePosition(**mm_pos) if not isinstance(mm_pos, ImagePosition) else mm_pos ) except Exception as e: - data_processor_logger.error( - f"Convert mm_positions to ImagePosition error: {e}, {str(traceback.format_exc())}" + log_request_error( + message="request[{request_id}] Convert mm_positions to ImagePosition error: {error}, {traceback}", + request_id=d.get("request_id"), + error=str(e), + traceback=traceback.format_exc(), ) return cls( request_id=d["request_id"], diff --git a/fastdeploy/entrypoints/api_server.py b/fastdeploy/entrypoints/api_server.py index 4f4d7f2250..c1d92a7d04 100644 --- a/fastdeploy/entrypoints/api_server.py +++ b/fastdeploy/entrypoints/api_server.py @@ -23,6 +23,11 @@ from fastapi.responses import Response, StreamingResponse from fastdeploy.engine.args_utils import EngineArgs from fastdeploy.engine.engine import LLMEngine +from fastdeploy.logger.request_logger import ( + RequestLogLevel, + log_request, + log_request_error, +) from fastdeploy.utils import ( FlexibleArgumentParser, api_server_logger, @@ -61,7 +66,7 @@ async def generate(request: dict): """ generate stream api """ - api_server_logger.info(f"Receive request: {request}") + log_request(RequestLogLevel.FULL, message="Receive request: {request}", request=request) stream = request.get("stream", 0) if not stream: @@ -72,7 +77,11 @@ async def generate(request: dict): output = result except Exception as e: # 记录完整的异常堆栈信息 - api_server_logger.error(f"Error during generation: {e!s}", exc_info=True) + log_request_error( + message="request[{request_id}] Error during generation: {error}", + request_id=request.get("request_id"), + error=str(e), + ) # 返回结构化的错误消息并终止流 output = {"error": str(e), "error_type": e.__class__.__name__} return output @@ -84,7 +93,11 @@ async def generate(request: dict): yield f"data: {json.dumps(result)}\n\n" except Exception as e: # 记录完整的异常堆栈信息 - api_server_logger.error(f"Error during generation: {e!s}", exc_info=True) + log_request_error( + message="request[{request_id}] Error during generation: {error}", + request_id=request.get("request_id"), + error=str(e), + ) # 返回结构化的错误消息并终止流 error_msg = {"error": str(e), "error_type": e.__class__.__name__} yield f"data: {json.dumps(error_msg)}\n\n" diff --git a/fastdeploy/entrypoints/engine_client.py b/fastdeploy/entrypoints/engine_client.py index 4c56e9bcd7..ceb7c807e1 100644 --- a/fastdeploy/entrypoints/engine_client.py +++ b/fastdeploy/entrypoints/engine_client.py @@ -49,6 +49,11 @@ from fastdeploy.inter_communicator import ( RearrangeExpertStatus, ZmqIpcClient, ) +from fastdeploy.logger.request_logger import ( + RequestLogLevel, + log_request, + log_request_error, +) from fastdeploy.metrics.metrics import main_process_metrics from fastdeploy.platforms import current_platform from fastdeploy.trace.constants import LoggingEventName @@ -362,12 +367,16 @@ class EngineClient: if "messages" in task: task["messages"] = None - api_server_logger.info(f"task['max_tokens']:{task['max_tokens']}") main_process_metrics.request_params_max_tokens.observe(task["max_tokens"]) main_process_metrics.prompt_tokens_total.inc(input_ids_len) main_process_metrics.request_prompt_tokens.observe(input_ids_len) except Exception as e: - api_server_logger.error(f"add_requests error: {e}, {str(traceback.format_exc())}") + log_request_error( + message="request[{request_id}] add_requests error: {error}, {traceback}", + request_id=task.get("request_id"), + error=e, + traceback=traceback.format_exc(), + ) raise EngineError(str(e), error_code=400) if input_ids_len + min_tokens >= self.max_model_len: @@ -375,14 +384,18 @@ class EngineClient: f"Input text is too long, input_ids_len ({input_ids_len}) " f"+ min_tokens({min_tokens}) >= max_model_len({self.max_model_len})" ) - api_server_logger.error(error_msg) + log_request_error( + message="request[{request_id}] {error_msg}", request_id=task.get("request_id"), error_msg=error_msg + ) raise EngineError(error_msg, error_code=400) if input_ids_len > self.max_model_len: error_msg = ( f"Length of input token({input_ids_len}) exceeds the limit max_model_len({self.max_model_len})." ) - api_server_logger.error(error_msg) + log_request_error( + message="request[{request_id}] {error_msg}", request_id=task.get("request_id"), error_msg=error_msg + ) raise EngineError(error_msg, error_code=400) if "stop_seqs_len" in task and task["stop_seqs_len"]: @@ -393,7 +406,9 @@ class EngineClient: f"Length of stop ({stop_seqs_len}) exceeds the limit max_stop_seqs_num({max_stop_seqs_num})." "Please reduce the number of stop or set a lager max_stop_seqs_num by `FD_MAX_STOP_SEQS_NUM`" ) - api_server_logger.error(error_msg) + log_request_error( + message="request[{request_id}] {error_msg}", request_id=task.get("request_id"), error_msg=error_msg + ) raise EngineError(error_msg, error_code=400) stop_seqs_max_len = envs.FD_STOP_SEQS_MAX_LEN for single_stop_seq_len in stop_seqs_len: @@ -402,18 +417,28 @@ class EngineClient: f"Length of stop_seqs({single_stop_seq_len}) exceeds the limit stop_seqs_max_len({stop_seqs_max_len})." "Please reduce the length of stop sequences or set a larger stop_seqs_max_len by `FD_STOP_SEQS_MAX_LEN`" ) - api_server_logger.error(error_msg) + log_request_error( + message="request[{request_id}] {error_msg}", + request_id=task.get("request_id"), + error_msg=error_msg, + ) raise EngineError(error_msg, error_code=400) task["metrics"]["preprocess_end_time"] = time.time() preprocess_cost_time = task["metrics"]["preprocess_end_time"] - task["metrics"]["preprocess_start_time"] - api_server_logger.info( - f"Cache request with request_id ({task.get('request_id')}), " - f"preprocess time cost {preprocess_cost_time}" + log_request( + level=RequestLogLevel.STAGES, + message="Cache request with request_id ({request_id}), preprocess time cost {preprocess_cost_time}", + request_id=task.get("request_id"), + preprocess_cost_time=preprocess_cost_time, ) self.valid_parameters(task) - api_server_logger.debug(f"Receive task: {task}") + log_request( + level=RequestLogLevel.FULL, + message="Receive task: {task}", + task=task, + ) n = task.get("n", 1) try: request_id_idx = task.get("request_id") @@ -433,7 +458,12 @@ class EngineClient: tracing.TraceSpanName.PREPROCESSING, task.get("request_id").split("_")[0], thread_finish_flag=True ) except Exception as e: - api_server_logger.error(f"zmq_client send task error: {e}, {str(traceback.format_exc())}") + log_request_error( + message="request[{request_id}] zmq_client send task error: {error}, {traceback}", + request_id=task.get("request_id"), + error=e, + traceback=traceback.format_exc(), + ) raise EngineError(str(e), error_code=400) def _send_task(self, task): @@ -455,8 +485,11 @@ class EngineClient: if data.get("max_tokens") is not None: if data["max_tokens"] < 1 or data["max_tokens"] >= self.max_model_len: - api_server_logger.error( - f"req_id:{data['request_id']}, max_tokens must be defined [1, {self.max_model_len}), but now it's {data['max_tokens']}." + log_request_error( + message="req_id:{request_id}, max_tokens must be defined [1, {max_model_len}), but now it's {max_tokens}.", + request_id=data["request_id"], + max_model_len=self.max_model_len, + max_tokens=data["max_tokens"], ) raise ValueError( f"max_tokens can be defined [1, {self.max_model_len}), but now it's {data['max_tokens']}." @@ -467,14 +500,18 @@ class EngineClient: raise ParameterError("reasoning_max_tokens", "reasoning_max_tokens must be greater than 0") if data["reasoning_max_tokens"] > data["max_tokens"]: data["reasoning_max_tokens"] = data["max_tokens"] - api_server_logger.warning( - f"req_id: {data['request_id']}, reasoning_max_tokens exceeds max_tokens, the value of reasoning_max_tokens will be adjusted to {data['max_tokens']}" + log_request( + level=RequestLogLevel.STAGES, + message="req_id: {request_id}, reasoning_max_tokens exceeds max_tokens, the value of reasoning_max_tokens will be adjusted to {max_tokens}", + request_id=data["request_id"], + max_tokens=data["max_tokens"], ) if data.get("reasoning_effort") is not None: data["reasoning_max_tokens"] = None - api_server_logger.warning( - f"req_id: {data['request_id']}, reasoning_max_tokens and reasoning_effort are both set, " - f"enable_thinking will be disabled." + log_request( + level=RequestLogLevel.STAGES, + message="req_id: {request_id}, reasoning_max_tokens and reasoning_effort are both set, enable_thinking will be disabled.", + request_id=data["request_id"], ) if data.get("response_max_tokens") is not None: @@ -493,7 +530,9 @@ class EngineClient: is_chat = True if not self.enable_logprob: err_msg = "Logprobs is disabled, please enable it in startup config." - api_server_logger.error(err_msg) + log_request_error( + message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg + ) raise ParameterError("logprobs", err_msg) top_logprobs = data.get("top_logprobs") elif isinstance(logprobs, int): @@ -506,11 +545,15 @@ class EngineClient: max_logprobs = self.ori_vocab_size if max_logprobs < -1: err_msg = f"Invalid 'max_logprobs': must be >= -1, got {max_logprobs}." - api_server_logger.error(err_msg) + log_request_error( + message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg + ) raise ValueError("max_logprobs", err_msg) if max_logprobs > self.ori_vocab_size: err_msg = f"Invalid 'max_logprobs': must be <= vocab_size {self.ori_vocab_size}, got {max_logprobs}." - api_server_logger.error(err_msg) + log_request_error( + message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg + ) raise ValueError("max_logprobs", err_msg) prompt_logprobs = data.get("prompt_logprobs", None) @@ -518,41 +561,55 @@ class EngineClient: if prompt_logprobs is not None: if not self.enable_logprob: err_msg = "`enable_logprob` is disabled, please enable it in startup config." - api_server_logger.error(err_msg) + log_request_error( + message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg + ) raise ParameterError("prompt_logprobs", err_msg) if not envs.FD_USE_GET_SAVE_OUTPUT_V1: err_msg = "prompt_logprobs is not support when FD_USE_GET_SAVE_OUTPUT_V1 is disabled." - api_server_logger.error(err_msg) + log_request_error( + message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg + ) raise ParameterError("prompt_logprobs", err_msg) if self.enable_prefix_caching: err_msg = "prompt_logprobs is not support when prefix caching is enabled." - api_server_logger.error(err_msg) + log_request_error( + message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg + ) raise ParameterError("prompt_logprobs", err_msg) if prompt_logprobs == -1 and self.ori_vocab_size > max_logprobs: err_msg = f"The requested value of ({self.ori_vocab_size}) for prompt_logprobs (-1) exceeds the maximum allowed value of ({max_logprobs})" - api_server_logger.error(err_msg) + log_request_error( + message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg + ) raise ValueError("prompt_logprobs", err_msg) if prompt_logprobs < -1: err_msg = ( f"prompt_logprobs must be a non-negative value or -1; the current value is {prompt_logprobs}." ) - api_server_logger.error(err_msg) + log_request_error( + message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg + ) raise ValueError("prompt_logprobs", err_msg) if prompt_logprobs > max_logprobs: err_msg = f"Number of prompt_logprobs requested ({prompt_logprobs}) exceeds maximum allowed value ({max_logprobs})." - api_server_logger.error(err_msg) + log_request_error( + message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg + ) raise ValueError("prompt_logprobs", err_msg) # enable_logprob if top_logprobs is not None: if not self.enable_logprob: err_msg = "Logprobs is disabled, please enable it in startup config." - api_server_logger.error(err_msg) + log_request_error( + message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg + ) raise ParameterError("top_logprobs" if is_chat else "logprobs", err_msg) if not isinstance(top_logprobs, int): @@ -560,28 +617,38 @@ class EngineClient: err_msg = ( f"Invalid type for {'top_logprobs' if is_chat else 'logprobs'}: expected int but got {err_type}." ) - api_server_logger.error(err_msg) + log_request_error( + message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg + ) raise ParameterError("top_logprobs" if is_chat else "logprobs", err_msg) if top_logprobs > max_logprobs: err_msg = f"Number of {'top_logprobs' if is_chat else 'logprobs'} requested ({top_logprobs}) exceeds maximum allowed value ({max_logprobs})." - api_server_logger.error(err_msg) + log_request_error( + message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg + ) raise ValueError("top_logprobs" if is_chat else "logprobs", err_msg) if not envs.FD_USE_GET_SAVE_OUTPUT_V1: if top_logprobs < 0 or top_logprobs > max_logprobs: err_msg = f"{'top_logprobs' if is_chat else 'logprobs'} must be between 0 and {max_logprobs}; the current value is {top_logprobs}." - api_server_logger.error(err_msg) + log_request_error( + message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg + ) raise ValueError("top_logprobs" if is_chat else "logprobs", err_msg) else: if top_logprobs == -1 and self.ori_vocab_size > max_logprobs: err_msg = f"The requested value of ({self.ori_vocab_size}) for {'top_logprobs' if is_chat else 'logprobs'} (-1) exceeds the maximum allowed value of ({max_logprobs})" - api_server_logger.error(err_msg) + log_request_error( + message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg + ) raise ValueError("top_logprobs" if is_chat else "logprobs", err_msg) if top_logprobs < -1: err_msg = f"{'top_logprobs' if is_chat else 'logprobs'} must be a non-negative value or -1; the current value is {top_logprobs}." - api_server_logger.error(err_msg) + log_request_error( + message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg + ) raise ValueError("top_logprobs" if is_chat else "logprobs", err_msg) def check_health(self, time_interval_threashold=30): @@ -617,12 +684,21 @@ class EngineClient: return response except asyncio.TimeoutError: error_response = ControlResponse(request_id, 500, "Timeout waiting for control method response") - api_server_logger.error(f"Control request timed out: {error_response}") + log_request_error( + message="request[{request_id}] Control request timed out: {error_response}", + request_id=request_id, + error_response=error_response, + ) return error_response except Exception as e: import traceback - api_server_logger.error(f"Unknown error in control method: {str(e)}\n{traceback.format_exc()}") + log_request_error( + message="request[{request_id}] Unknown error in control method: {error}\n{traceback}", + request_id=request_id, + error=str(e), + traceback=traceback.format_exc(), + ) error_response = ControlResponse(request_id, 500, str(e)) return error_response @@ -1024,7 +1100,11 @@ class EngineClient: async def abort(self, request_id, n=1) -> None: if envs.FD_ENABLE_REQUEST_DISCONNECT_STOP_INFERENCE: - api_server_logger.info(f"abort request_id:{request_id}") + log_request( + level=RequestLogLevel.LIFECYCLE, + message="abort request_id: {request_id}", + request_id=request_id, + ) if n <= 0: api_server_logger.warning("Abort function called with non-positive n: %d. No requests aborted.", n) return @@ -1044,7 +1124,11 @@ class EngineClient: } self._send_task(data) - api_server_logger.info("Aborted request(s) %s.", ",".join(request_ids)) + log_request( + level=RequestLogLevel.LIFECYCLE, + message="Aborted request(s) {request_ids}.", + request_ids=",".join(request_ids), + ) def process_messages(self, messages): for message in messages: diff --git a/fastdeploy/entrypoints/llm.py b/fastdeploy/entrypoints/llm.py index d4545c41b6..1d18421a55 100644 --- a/fastdeploy/entrypoints/llm.py +++ b/fastdeploy/entrypoints/llm.py @@ -36,6 +36,7 @@ from fastdeploy.entrypoints.chat_utils import load_chat_template from fastdeploy.entrypoints.openai.protocol import ChatCompletionToolsParam from fastdeploy.entrypoints.openai.tool_parsers import ToolParserManager from fastdeploy.input.utils import validate_model_path +from fastdeploy.logger.request_logger import log_request_error from fastdeploy.utils import ( deprecated_kwargs_warning, llm_logger, @@ -139,7 +140,11 @@ class LLM: continue self.req_output[request_id].add(result) except Exception as e: - llm_logger.error(f"Unexpected error happened: {e}, {traceback.format_exc()!s}") + log_request_error( + message="Unexpected error happened: {error}, {traceback}", + error=e, + traceback=traceback.format_exc(), + ) def generate( self, @@ -437,7 +442,11 @@ class LLM: return result except Exception as e: - llm_logger.error(f"Error building sample logprobs from LogprobsLists: {e}, {str(traceback.format_exc())}") + log_request_error( + message="Error building sample logprobs from LogprobsLists: {error}, {traceback}", + error=e, + traceback=traceback.format_exc(), + ) def _build_prompt_logprobs( self, diff --git a/fastdeploy/entrypoints/openai/api_server.py b/fastdeploy/entrypoints/openai/api_server.py index 2b07b1eac3..0750180a67 100644 --- a/fastdeploy/entrypoints/openai/api_server.py +++ b/fastdeploy/entrypoints/openai/api_server.py @@ -73,6 +73,11 @@ from fastdeploy.entrypoints.openai.v1.serving_completion import ( OpenAIServingCompletion as OpenAIServingCompletionV1, ) from fastdeploy.envs import environment_variables +from fastdeploy.logger.request_logger import ( + RequestLogLevel, + log_request, + log_request_error, +) from fastdeploy.metrics.metrics import get_filtered_metrics from fastdeploy.utils import ( ExceptionHandler, @@ -325,7 +330,11 @@ async def connection_manager(): await asyncio.wait_for(connection_semaphore.acquire(), timeout=0.001) yield except asyncio.TimeoutError: - api_server_logger.info(f"Reach max request concurrency, semaphore status: {connection_semaphore.status()}") + log_request( + level=RequestLogLevel.LIFECYCLE, + message="Reach max request concurrency, semaphore status: {status}", + status=connection_semaphore.status(), + ) raise HTTPException( status_code=429, detail=f"Too many requests,current max concurrency is {args.max_concurrency}" ) @@ -545,7 +554,7 @@ async def create_chat_completion(request: ChatCompletionRequest, req: Request): """ Create a chat completion for the provided prompt and parameters. """ - api_server_logger.debug(f"Chat Received request: {request.model_dump_json()}") + log_request(RequestLogLevel.FULL, message="Chat Received request: {request}", request=request.model_dump_json()) if envs.TRACES_ENABLE: if req.headers: headers = dict(req.headers) @@ -572,7 +581,11 @@ async def create_chat_completion(request: ChatCompletionRequest, req: Request): return StreamingResponse(content=wrapped_generator(), media_type="text/event-stream") except HTTPException as e: - api_server_logger.error(f"Error in chat completion: {str(e)}") + log_request_error( + message="request[{request_id}] Error in chat completion: {error}", + request_id=getattr(request, "request_id", None), + error=str(e), + ) return JSONResponse(status_code=e.status_code, content={"detail": e.detail}) @@ -582,7 +595,9 @@ async def create_completion(request: CompletionRequest, req: Request): """ Create a completion for the provided prompt and parameters. """ - api_server_logger.info(f"Completion Received request: {request.model_dump_json()}") + log_request( + RequestLogLevel.FULL, message="Completion Received request: {request}", request=request.model_dump_json() + ) if envs.TRACES_ENABLE: if req.headers: headers = dict(req.headers) diff --git a/fastdeploy/entrypoints/openai/protocol.py b/fastdeploy/entrypoints/openai/protocol.py index 09c8cae0c1..9c21b05182 100644 --- a/fastdeploy/entrypoints/openai/protocol.py +++ b/fastdeploy/entrypoints/openai/protocol.py @@ -31,6 +31,7 @@ from pydantic import ( ) from fastdeploy.engine.pooling_params import PoolingParams +from fastdeploy.logger.request_logger import RequestLogLevel, log_request from fastdeploy.worker.output import PromptLogprobs, SpeculateMetrics @@ -758,9 +759,7 @@ class ChatCompletionRequest(BaseModel): ), "The parameter `raw_request` is not supported now, please use completion api instead." for key, value in self.metadata.items(): req_dict[key] = value - from fastdeploy.utils import api_server_logger - - api_server_logger.warning("The parameter metadata is obsolete.") + log_request(RequestLogLevel.STAGES, message="The parameter metadata is obsolete.") for key, value in self.dict().items(): if value is not None: req_dict[key] = value diff --git a/fastdeploy/entrypoints/openai/serving_chat.py b/fastdeploy/entrypoints/openai/serving_chat.py index eb106f6550..df0cf369db 100644 --- a/fastdeploy/entrypoints/openai/serving_chat.py +++ b/fastdeploy/entrypoints/openai/serving_chat.py @@ -44,6 +44,11 @@ from fastdeploy.entrypoints.openai.protocol import ( UsageInfo, ) from fastdeploy.entrypoints.openai.response_processors import ChatResponseProcessor +from fastdeploy.logger.request_logger import ( + RequestLogLevel, + log_request, + log_request_error, +) from fastdeploy.metrics.metrics import main_process_metrics from fastdeploy.trace.constants import LoggingEventName from fastdeploy.trace.trace_logger import print as trace_print @@ -112,14 +117,16 @@ class OpenAIServingChat: err_msg = ( f"Only master node can accept completion request, please send request to master node: {self.master_ip}" ) - api_server_logger.error(err_msg) + log_request_error(message="request[{request_id}] {error}", request_id=request.request_id, error=err_msg) return ErrorResponse(error=ErrorInfo(message=err_msg, type=ErrorType.INTERNAL_ERROR)) if self.models: is_supported, request.model = self.models.is_supported_model(request.model) if not is_supported: err_msg = f"Unsupported model: [{request.model}], support [{', '.join([x.name for x in self.models.model_paths])}] or default" - api_server_logger.error(err_msg) + log_request_error( + message="request[{request_id}] {error}", request_id=request.request_id, error=err_msg + ) return ErrorResponse( error=ErrorInfo(message=err_msg, type=ErrorType.INTERNAL_ERROR, code=ErrorCode.MODEL_NOT_SUPPORT) ) @@ -129,7 +136,11 @@ class OpenAIServingChat: await self.engine_client.semaphore.acquire() else: await asyncio.wait_for(self.engine_client.semaphore.acquire(), timeout=self.max_waiting_time) - api_server_logger.info(f"current {self.engine_client.semaphore.status()}") + log_request( + RequestLogLevel.STAGES, + message="semaphore status: {status}", + status=self.engine_client.semaphore.status(), + ) if request.request_id is not None: request_id = request.request_id @@ -141,7 +152,11 @@ class OpenAIServingChat: request_id = f"chatcmpl-{uuid.uuid4()}" tracing.trace_req_start(rid=request_id, trace_content=request.trace_context, role="FastDeploy") del request.trace_context - api_server_logger.info(f"create chat completion request: {request_id}") + log_request( + level=RequestLogLevel.LIFECYCLE, + message="create chat completion request: {request_id}", + request_id=request_id, + ) prompt_tokens = None max_tokens = None try: @@ -156,14 +171,19 @@ class OpenAIServingChat: if isinstance(prompt_token_ids, np.ndarray): prompt_token_ids = prompt_token_ids.tolist() except ParameterError as e: - api_server_logger.error(f"request[{request_id}] generator error: {str(e)}, {e.message}") + log_request_error( + message="request[{request_id}] generator error: {error}, {error_message}", + request_id=request_id, + error=str(e), + error_message=e.message, + ) self.engine_client.semaphore.release() return ErrorResponse( error=ErrorInfo(message=str(e.message), type=ErrorType.INVALID_REQUEST_ERROR, param=e.param) ) except Exception as e: error_msg = f"request[{request_id}] generator error: {str(e)}, {str(traceback.format_exc())}" - api_server_logger.error(error_msg) + log_request_error(message=error_msg) self.engine_client.semaphore.release() return ErrorResponse(error=ErrorInfo(message=error_msg, type=ErrorType.INVALID_REQUEST_ERROR)) @@ -178,12 +198,12 @@ class OpenAIServingChat: ) except Exception as e: error_msg = f"request[{request_id}]full generator error: {str(e)}, {str(traceback.format_exc())}" - api_server_logger.error(error_msg) + log_request_error(message=error_msg) return ErrorResponse(error=ErrorInfo(message=error_msg, type=ErrorType.INTERNAL_ERROR)) except asyncio.CancelledError as e: await self.engine_client.abort(f"{request_id}_0", 1 if request.n is None else request.n) error_msg = f"request[{request_id}_0] client disconnected: {str(e)}, {str(traceback.format_exc())}" - api_server_logger.error(error_msg) + log_request_error(message=error_msg) return ErrorResponse( error=ErrorInfo(message=error_msg, type=ErrorType.INVALID_REQUEST_ERROR, code=ErrorCode.CLIENT_ABORTED) ) @@ -192,13 +212,13 @@ class OpenAIServingChat: f"request[{request_id}] waiting error: {str(e)}, {str(traceback.format_exc())}, " f"max waiting time: {self.max_waiting_time}" ) - api_server_logger.error(error_msg) + log_request_error(message=error_msg) return ErrorResponse( error=ErrorInfo(message=error_msg, type=ErrorType.TIMEOUT_ERROR, code=ErrorCode.TIMEOUT) ) def _create_streaming_error_response(self, message: str) -> str: - api_server_logger.error(message) + log_request_error(message=message) error_response = ErrorResponse(error=ErrorInfo(message=message, type=ErrorType.INTERNAL_ERROR)) return error_response.model_dump_json() @@ -249,7 +269,9 @@ class OpenAIServingChat: choices=[], model=model_name, ) - api_server_logger.info(f"create chat completion request: {request_id}") + log_request( + RequestLogLevel.LIFECYCLE, message="create chat completion request: {request_id}", request_id=request_id + ) try: dealer, response_queue = await self.engine_client.connection_manager.get_connection( @@ -372,7 +394,12 @@ class OpenAIServingChat: completion_tokens_details=CompletionTokenUsageInfo(reasoning_tokens=0), ) yield f"data: {chunk.model_dump_json(exclude_unset=True)} \n\n" - api_server_logger.info(f"Chat Streaming response send_idx 0: {chunk.model_dump_json()}") + log_request( + level=RequestLogLevel.LIFECYCLE, + message="Chat Streaming response send_idx 0: request_id={request_id}, completion_tokens={completion_tokens}", + request_id=request_id, + completion_tokens=0, + ) first_iteration = False output = res["outputs"] @@ -497,7 +524,14 @@ class OpenAIServingChat: chunk.choices = choices yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n" if res["finished"]: - api_server_logger.info(f"Chat Streaming response last send: {chunk.model_dump_json()}") + log_request( + level=RequestLogLevel.LIFECYCLE, + message="Chat Streaming response last send: request_id={request_id}, finish_reason={finish_reason}, completion_tokens={completion_tokens}, logprobs={logprobs}", + request_id=request_id, + finish_reason=choice.finish_reason, + completion_tokens=previous_num_tokens[idx], + logprobs=logprobs_res, + ) choices = [] if include_usage: @@ -525,7 +559,7 @@ class OpenAIServingChat: except asyncio.CancelledError as e: await self.engine_client.abort(f"{request_id}_0", 1 if request.n is None else request.n) error_msg = f"request[{request_id}_0] client disconnected: {str(e)}, {str(traceback.format_exc())}" - api_server_logger.error(error_msg) + log_request_error(message=error_msg) except Exception as e: error_data = self._create_streaming_error_response( f"request[{request_id}] generate stream error: {str(e)}, {str(traceback.format_exc())}" @@ -536,7 +570,12 @@ class OpenAIServingChat: tracing.trace_req_finish(request_id) await self.engine_client.connection_manager.cleanup_request(request_id) self.engine_client.semaphore.release() - api_server_logger.info(f"release {request_id} {self.engine_client.semaphore.status()}") + log_request( + level=RequestLogLevel.STAGES, + message="release {request_id} {status}", + request_id=request_id, + status=self.engine_client.semaphore.status(), + ) yield "data: [DONE]\n\n" async def chat_completion_full_generator( @@ -704,7 +743,9 @@ class OpenAIServingChat: tracing.trace_req_finish(request_id) await self.engine_client.connection_manager.cleanup_request(request_id) self.engine_client.semaphore.release() - api_server_logger.info(f"release {self.engine_client.semaphore.status()}") + log_request( + RequestLogLevel.STAGES, message="release {status}", status=self.engine_client.semaphore.status() + ) num_prompt_tokens = len(prompt_token_ids) num_generated_tokens = sum(previous_num_tokens) @@ -731,7 +772,7 @@ class OpenAIServingChat: choices=choices, usage=usage, ) - api_server_logger.info(f"Chat response: {res.model_dump_json()}") + log_request(RequestLogLevel.CONTENT, message="Chat response: {response}", response=res.model_dump_json()) return res async def _create_chat_completion_choice( @@ -904,7 +945,7 @@ class OpenAIServingChat: except Exception as e: error_msg = f"Error in _build_logprobs_response: {e}, {str(traceback.format_exc())}" - api_server_logger.error(error_msg) + log_request_error(message=error_msg) return None def _build_prompt_logprobs( diff --git a/fastdeploy/entrypoints/openai/serving_completion.py b/fastdeploy/entrypoints/openai/serving_completion.py index b277576a1f..5eaf8cd65c 100644 --- a/fastdeploy/entrypoints/openai/serving_completion.py +++ b/fastdeploy/entrypoints/openai/serving_completion.py @@ -41,6 +41,11 @@ from fastdeploy.entrypoints.openai.protocol import ( PromptTokenUsageInfo, UsageInfo, ) +from fastdeploy.logger.request_logger import ( + RequestLogLevel, + log_request, + log_request_error, +) from fastdeploy.trace.constants import LoggingEventName from fastdeploy.trace.trace_logger import print as trace_print from fastdeploy.utils import ( @@ -91,13 +96,15 @@ class OpenAIServingCompletion: err_msg = ( f"Only master node can accept completion request, please send request to master node: {self.master_ip}" ) - api_server_logger.error(err_msg) + log_request_error(message="request[{request_id}] {error}", request_id=request.request_id, error=err_msg) return ErrorResponse(error=ErrorInfo(message=err_msg, type=ErrorType.INTERNAL_ERROR)) if self.models: is_supported, request.model = self.models.is_supported_model(request.model) if not is_supported: err_msg = f"Unsupported model: [{request.model}], support [{', '.join([x.name for x in self.models.model_paths])}] or default" - api_server_logger.error(err_msg) + log_request_error( + message="request[{request_id}] {error}", request_id=request.request_id, error=err_msg + ) return ErrorResponse( error=ErrorInfo(message=err_msg, type=ErrorType.INTERNAL_ERROR, code=ErrorCode.MODEL_NOT_SUPPORT) ) @@ -110,7 +117,11 @@ class OpenAIServingCompletion: request_id = f"cmpl-{request.user}-{uuid.uuid4()}" else: request_id = f"cmpl-{uuid.uuid4()}" - api_server_logger.info(f"Initialize request {request_id}: {request}") + log_request( + level=RequestLogLevel.LIFECYCLE, + message="Initialize request {request_id}", + request_id=request_id, + ) tracing.trace_req_start(rid=request_id, trace_content=request.trace_context, role="FastDeploy") del request.trace_context request_prompt_ids = None @@ -147,15 +158,20 @@ class OpenAIServingCompletion: else: raise ValueError("Prompt type must be one of: str, list[str], list[int], list[list[int]]") except Exception as e: - error_msg = f"OpenAIServingCompletion create_completion: {e}, {str(traceback.format_exc())}" - api_server_logger.error(error_msg) + error_msg = f"request[{request_id}] create_completion: {e}, {str(traceback.format_exc())}" + log_request_error(message=error_msg) return ErrorResponse(error=ErrorInfo(message=error_msg, type=ErrorType.INTERNAL_ERROR)) if request_prompt_ids is not None: request_prompts = request_prompt_ids num_choices = len(request_prompts) * (1 if request.n is None else request.n) - api_server_logger.info(f"Start preprocessing request: req_id={request_id}), num_choices={num_choices}") + log_request( + RequestLogLevel.STAGES, + message="Start preprocessing request: req_id={request_id}), num_choices={num_choices}", + request_id=request_id, + num_choices=num_choices, + ) prompt_batched_token_ids = [] prompt_tokens_list = [] max_tokens_list = [] @@ -169,7 +185,7 @@ class OpenAIServingCompletion: f"OpenAIServingCompletion waiting error: {e}, {str(traceback.format_exc())}, " f"max waiting time: {self.max_waiting_time}" ) - api_server_logger.error(error_msg) + log_request_error(message="request[{request_id}] {error}", request_id=request_id, error=error_msg) return ErrorResponse( error=ErrorInfo(message=error_msg, code=ErrorCode.TIMEOUT, type=ErrorType.TIMEOUT_ERROR) ) @@ -188,14 +204,19 @@ class OpenAIServingCompletion: max_tokens_list.append(current_req_dict.get("max_tokens")) del current_req_dict except ParameterError as e: - api_server_logger.error(f"OpenAIServingCompletion format error: {e}, {e.message}") + log_request_error( + message="request[{request_id}] format error: {error}, {error_message}", + request_id=request_id, + error=e, + error_message=e.message, + ) self.engine_client.semaphore.release() return ErrorResponse( error=ErrorInfo(code="400", message=str(e.message), type="invalid_request", param=e.param) ) except Exception as e: - error_msg = f"OpenAIServingCompletion format error: {e}, {str(traceback.format_exc())}" - api_server_logger.error(error_msg) + error_msg = f"request[{request_id}] format error: {e}, {str(traceback.format_exc())}" + log_request_error(message=error_msg) self.engine_client.semaphore.release() return ErrorResponse( error=ErrorInfo(message=str(e), code=ErrorCode.INVALID_VALUE, type=ErrorType.INVALID_REQUEST_ERROR) @@ -226,20 +247,20 @@ class OpenAIServingCompletion: ) except Exception as e: error_msg = ( - f"OpenAIServingCompletion completion_full_generator error: {e}, {str(traceback.format_exc())}" + f"request[{request_id}] completion_full_generator error: {e}, {str(traceback.format_exc())}" ) - api_server_logger.error(error_msg) + log_request_error(message=error_msg) return ErrorResponse(error=ErrorInfo(message=error_msg, type=ErrorType.INTERNAL_ERROR)) except asyncio.CancelledError as e: await self.engine_client.abort(f"{request_id}_0", num_choices) error_msg = f"request[{request_id}_0] client disconnected: {str(e)}, {str(traceback.format_exc())}" - api_server_logger.error(error_msg) + log_request_error(message=error_msg) return ErrorResponse( error=ErrorInfo(message=error_msg, type=ErrorType.INVALID_REQUEST_ERROR, code=ErrorCode.CLIENT_ABORTED) ) except Exception as e: - error_msg = f"OpenAIServingCompletion create_completion error: {e}, {str(traceback.format_exc())}" - api_server_logger.error(error_msg) + error_msg = f"request[{request_id}] create_completion error: {e}, {str(traceback.format_exc())}" + log_request_error(message=error_msg) return ErrorResponse(error=ErrorInfo(message=error_msg, type=ErrorType.INTERNAL_ERROR)) async def completion_full_generator( @@ -368,10 +389,16 @@ class OpenAIServingCompletion: prompt_tokens_list=prompt_tokens_list, max_tokens_list=max_tokens_list, ) - api_server_logger.info(f"Completion response: {res.model_dump_json()}") + log_request( + RequestLogLevel.CONTENT, message="Completion response: {response}", response=res.model_dump_json() + ) return res except Exception as e: - api_server_logger.error(f"Error in completion_full_generator: {e}", exc_info=True) + log_request_error( + message="request[{request_id}] error in completion_full_generator: {error}", + request_id=request_id, + error=e, + ) finally: trace_print(LoggingEventName.POSTPROCESSING_END, request_id, getattr(request, "user", "")) tracing.trace_req_finish(request_id) @@ -514,8 +541,11 @@ class OpenAIServingCompletion: ], ) yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n" - api_server_logger.info( - f"Completion Streaming response send_idx 0: {chunk.model_dump_json()}" + log_request( + level=RequestLogLevel.LIFECYCLE, + message="Completion Streaming response send_idx 0: request_id={request_id}, completion_tokens={completion_tokens}", + request_id=request_id, + completion_tokens=0, ) first_iteration[idx] = False @@ -592,8 +622,11 @@ class OpenAIServingCompletion: if send_idx == 0 and not request.return_token_ids: chunk_temp = chunk chunk_temp.choices = choices - api_server_logger.info( - f"Completion Streaming response send_idx 0: {chunk_temp.model_dump_json()}" + log_request( + level=RequestLogLevel.LIFECYCLE, + message="Completion Streaming response send_idx 0: request_id={request_id}, completion_tokens={completion_tokens}", + request_id=request_id, + completion_tokens=output_tokens[idx], ) del chunk_temp @@ -646,14 +679,26 @@ class OpenAIServingCompletion: metrics=res["metrics"] if request.collect_metrics else None, ) yield f"data: {usage_chunk.model_dump_json(exclude_unset=True)}\n\n" - api_server_logger.info(f"Completion Streaming response last send: {chunk.model_dump_json()}") + log_request( + level=RequestLogLevel.LIFECYCLE, + message="Completion Streaming response last send: request_id={request_id}, finish_reason={finish_reason}, completion_tokens={completion_tokens}, logprobs={logprobs}", + request_id=request_id, + finish_reason=chunk.choices[-1].finish_reason if chunk.choices else None, + completion_tokens=output_tokens[idx], + logprobs=logprobs_res, + ) except asyncio.CancelledError as e: await self.engine_client.abort(f"{request_id}_0", num_choices) error_msg = f"request[{request_id}_0] client disconnected: {str(e)}, {str(traceback.format_exc())}" - api_server_logger.error(error_msg) + log_request_error(message=error_msg) except Exception as e: - api_server_logger.error(f"Error in completion_stream_generator: {e}, {str(traceback.format_exc())}") + log_request_error( + message="request[{request_id}] error in completion_stream_generator: {error}, {traceback}", + request_id=request_id, + error=e, + traceback=traceback.format_exc(), + ) yield f"data: {ErrorResponse(error=ErrorInfo(message=str(e), code='400', type=ErrorType.INTERNAL_ERROR)).model_dump_json(exclude_unset=True)}\n\n" finally: trace_print(LoggingEventName.POSTPROCESSING_END, request_id, getattr(request, "user", "")) @@ -887,7 +932,11 @@ class OpenAIServingCompletion: ) except Exception as e: - api_server_logger.error(f"Error in _build_logprobs_response: {str(e)}, {str(traceback.format_exc())}") + log_request_error( + message="Error in _build_logprobs_response: {error}, {traceback}", + error=str(e), + traceback=traceback.format_exc(), + ) return None def _build_prompt_logprobs( diff --git a/fastdeploy/entrypoints/openai/serving_embedding.py b/fastdeploy/entrypoints/openai/serving_embedding.py index 25f3f63051..a214209a4a 100644 --- a/fastdeploy/entrypoints/openai/serving_embedding.py +++ b/fastdeploy/entrypoints/openai/serving_embedding.py @@ -35,7 +35,7 @@ from fastdeploy.entrypoints.openai.protocol import ( UsageInfo, ) from fastdeploy.entrypoints.openai.serving_engine import ServeContext, ZmqOpenAIServing -from fastdeploy.utils import api_server_logger +from fastdeploy.logger.request_logger import RequestLogLevel, log_request def _get_embedding( @@ -140,7 +140,12 @@ class OpenAIServingEmbedding(ZmqOpenAIServing): @override def _build_response(self, ctx: ServeContext, request_output: dict): """Generate final embedding response""" - api_server_logger.info(f"[{ctx.request_id}] Embedding RequestOutput received:{request_output}") + log_request( + level=RequestLogLevel.CONTENT, + message="[{request_id}] Embedding RequestOutput received:{request_output}", + request_id=ctx.request_id, + request_output=request_output, + ) base = PoolingRequestOutput.from_dict(request_output) embedding_res = EmbeddingRequestOutput.from_base(base) diff --git a/fastdeploy/entrypoints/openai/serving_engine.py b/fastdeploy/entrypoints/openai/serving_engine.py index d83f8da1ab..862c74ceee 100644 --- a/fastdeploy/entrypoints/openai/serving_engine.py +++ b/fastdeploy/entrypoints/openai/serving_engine.py @@ -33,6 +33,11 @@ from fastdeploy.entrypoints.openai.protocol import ( InvalidParameterException, ) from fastdeploy.envs import FD_SUPPORT_MAX_CONNECTIONS +from fastdeploy.logger.request_logger import ( + RequestLogLevel, + log_request, + log_request_error, +) from fastdeploy.utils import ErrorCode, ErrorType, StatefulSemaphore, api_server_logger RequestT = TypeVar("RequestT") @@ -96,13 +101,18 @@ class OpenAIServing(ABC, Generic[RequestT]): is_supported, adjusted_name = self.models.is_supported_model(model_name) if not is_supported: err_msg = f"Unsupported model: [{model_name}]" - api_server_logger.error(err_msg) + log_request_error(message=err_msg) return is_supported, adjusted_name async def _acquire_semaphore(self, request_id: str) -> bool: """Acquire engine client semaphore with timeout""" try: - api_server_logger.info(f"Acquire request:{request_id} status:{self._get_semaphore().status()}") + log_request( + level=RequestLogLevel.STAGES, + message="Acquire request:{request_id} status:{status}", + request_id=request_id, + status=self._get_semaphore().status(), + ) if self.max_waiting_time < 0: await self._get_semaphore().acquire() else: @@ -111,13 +121,18 @@ class OpenAIServing(ABC, Generic[RequestT]): except asyncio.TimeoutError: self._release_semaphore(request_id) error_msg = f"Request waiting timeout, request:{request_id} max waiting time:{self.max_waiting_time}" - api_server_logger.error(error_msg) + log_request_error(message=error_msg) return False def _release_semaphore(self, request_id: str) -> None: """Release engine client semaphore""" self._get_semaphore().release() - api_server_logger.info(f"Release request:{request_id} status:{self._get_semaphore().status()}") + log_request( + level=RequestLogLevel.STAGES, + message="Release request:{request_id} status:{status}", + request_id=request_id, + status=self._get_semaphore().status(), + ) def _create_error_response( self, @@ -128,7 +143,7 @@ class OpenAIServing(ABC, Generic[RequestT]): ) -> ErrorResponse: """Create standardized error response""" traceback.print_exc() - api_server_logger.error(message) + log_request_error(message=message) return ErrorResponse(error=ErrorInfo(message=message, type=error_type, code=code, param=param)) def _generate_request_id(self, request: RequestT) -> str: @@ -193,7 +208,12 @@ class OpenAIServing(ABC, Generic[RequestT]): request_id = self._generate_request_id(request) ctx.request_id = request_id - api_server_logger.info(f"Initialize request {request_id}: {request}") + log_request( + level=RequestLogLevel.LIFECYCLE, + message="Initialize request {request_id}: {request}", + request_id=request_id, + request=request, + ) # Step 2: Semaphore acquisition if not await self._acquire_semaphore(request_id): @@ -252,7 +272,12 @@ class ZmqOpenAIServing(OpenAIServing): request_dicts = self._request_to_batch_dicts(ctx) ctx.preprocess_requests = request_dicts for request_dict in request_dicts: - api_server_logger.info(f"batch add request_id: {request_dict['request_id']}, request: {request_dict}") + log_request( + level=RequestLogLevel.CONTENT, + message="batch add request_id: {request_id}, request: {request}", + request_id=request_dict["request_id"], + request=request_dict, + ) await self.engine_client.format_and_add_data(request_dict) def _process_chat_template_kwargs(self, request_dict): @@ -283,7 +308,11 @@ class ZmqOpenAIServing(OpenAIServing): while num_choices > 0: request_output_dicts = await asyncio.wait_for(request_output_queue.get(), timeout=60) for request_output_dict in request_output_dicts: - api_server_logger.debug(f"Received RequestOutput: {request_output_dict}") + log_request( + level=RequestLogLevel.FULL, + message="Received RequestOutput: {request_output}", + request_output=request_output_dict, + ) if request_output_dict["finished"] is True: num_choices -= 1 yield request_output_dict @@ -301,7 +330,12 @@ class ZmqOpenAIServing(OpenAIServing): async def _acquire_semaphore(self, request_id: str) -> bool: """Acquire engine client semaphore with timeout""" try: - api_server_logger.info(f"Acquire request:{request_id} status:{self._get_semaphore().status()}") + log_request( + level=RequestLogLevel.STAGES, + message="Acquire request:{request_id} status:{status}", + request_id=request_id, + status=self._get_semaphore().status(), + ) if self.max_waiting_time < 0: await self._get_semaphore().acquire() else: @@ -310,14 +344,19 @@ class ZmqOpenAIServing(OpenAIServing): except asyncio.TimeoutError: self._release_semaphore(request_id) error_msg = f"Request waiting timeout, request:{request_id} max waiting time:{self.max_waiting_time}" - api_server_logger.error(error_msg) + log_request_error(message=error_msg) return False @override def _release_semaphore(self, request_id: str) -> None: """Release engine client semaphore""" self._get_semaphore().release() - api_server_logger.info(f"Release request:{request_id} status:{self._get_semaphore().status()}") + log_request( + level=RequestLogLevel.STAGES, + message="Release request:{request_id} status:{status}", + request_id=request_id, + status=self._get_semaphore().status(), + ) @override def _check_master(self) -> bool: diff --git a/fastdeploy/entrypoints/openai/serving_models.py b/fastdeploy/entrypoints/openai/serving_models.py index fc1507e5f3..8b45c8a939 100644 --- a/fastdeploy/entrypoints/openai/serving_models.py +++ b/fastdeploy/entrypoints/openai/serving_models.py @@ -24,7 +24,8 @@ from fastdeploy.entrypoints.openai.protocol import ( ModelList, ModelPermission, ) -from fastdeploy.utils import ErrorType, api_server_logger, get_host_ip +from fastdeploy.logger.request_logger import log_request_error +from fastdeploy.utils import ErrorType, get_host_ip @dataclass @@ -86,7 +87,7 @@ class OpenAIServingModels: err_msg = ( f"Only master node can accept models request, please send request to master node: {self.master_ip}" ) - api_server_logger.error(err_msg) + log_request_error(message=err_msg) return ErrorResponse(error=ErrorInfo(message=err_msg, type=ErrorType.INTERNAL_ERROR)) model_infos = [ ModelInfo( diff --git a/fastdeploy/entrypoints/openai/serving_reward.py b/fastdeploy/entrypoints/openai/serving_reward.py index cc3ed8a472..d04044c0c0 100644 --- a/fastdeploy/entrypoints/openai/serving_reward.py +++ b/fastdeploy/entrypoints/openai/serving_reward.py @@ -28,7 +28,7 @@ from fastdeploy.entrypoints.openai.protocol import ( UsageInfo, ) from fastdeploy.entrypoints.openai.serving_engine import ServeContext, ZmqOpenAIServing -from fastdeploy.utils import api_server_logger +from fastdeploy.logger.request_logger import RequestLogLevel, log_request class OpenAIServingReward(ZmqOpenAIServing): @@ -77,7 +77,7 @@ class OpenAIServingReward(ZmqOpenAIServing): response: ChatRewardResponse = None generators: AsyncGenerator[ChatRewardResponse, None] = self.handle(ctx) async for r in generators: - api_server_logger.info(f"engine pooling result:{r}") + log_request(RequestLogLevel.CONTENT, message="engine pooling result: {result}", result=r) r.data[0].index = idx idx += 1 if response is None or isinstance(r, ErrorResponse): @@ -93,7 +93,12 @@ class OpenAIServingReward(ZmqOpenAIServing): @override def _build_response(self, ctx: ServeContext, request_output: dict): """Generate final reward response""" - api_server_logger.info(f"[{ctx.request_id}] Reward RequestOutput received:{request_output}") + log_request( + level=RequestLogLevel.CONTENT, + message="Reward RequestOutput received: request_id={request_id}, output={request_output}", + request_id=ctx.request_id, + request_output=request_output, + ) base = PoolingRequestOutput.from_dict(request_output) reward_res = RewardRequestOutput.from_base(base) diff --git a/fastdeploy/entrypoints/openai/tool_parsers/ernie_45_vl_thinking_tool_parser.py b/fastdeploy/entrypoints/openai/tool_parsers/ernie_45_vl_thinking_tool_parser.py index 4e6cc93cba..df4ef95f4d 100644 --- a/fastdeploy/entrypoints/openai/tool_parsers/ernie_45_vl_thinking_tool_parser.py +++ b/fastdeploy/entrypoints/openai/tool_parsers/ernie_45_vl_thinking_tool_parser.py @@ -41,6 +41,7 @@ from fastdeploy.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager, ) +from fastdeploy.logger.request_logger import log_request_error from fastdeploy.utils import data_processor_logger @@ -184,7 +185,9 @@ class Ernie45VLThinkingToolParser(ToolParser): continue if not function_call_arr: - data_processor_logger.error("No valid tool calls found") + log_request_error( + message="request[{request_id}] No valid tool calls found", request_id=request.request_id + ) return ExtractedToolCallInformation(tools_called=False, content=model_output) tool_calls = [] @@ -226,7 +229,11 @@ class Ernie45VLThinkingToolParser(ToolParser): ) except Exception as e: - data_processor_logger.error(f"Error in extracting tool call from response: {str(e)}") + log_request_error( + message="request[{request_id}] Error in extracting tool call from response: {error}", + request_id=request.request_id, + error=str(e), + ) return ExtractedToolCallInformation(tools_called=False, tool_calls=None, content=model_output) def extract_tool_calls_streaming( @@ -343,7 +350,11 @@ class Ernie45VLThinkingToolParser(ToolParser): ) return delta except Exception as e: - data_processor_logger.error(f"Error in streaming tool call extraction: {str(e)}") + log_request_error( + message="request[{request_id}] Error in streaming tool call extraction: {error}", + request_id=request.get("request_id"), + error=str(e), + ) return None if "" in self.buffer: end_pos = self.buffer.find("") @@ -354,5 +365,9 @@ class Ernie45VLThinkingToolParser(ToolParser): return delta except Exception as e: - data_processor_logger.error(f"Error in streaming tool call extraction: {str(e)}") + log_request_error( + message="request[{request_id}] Error in streaming tool call extraction: {error}", + request_id=request.get("request_id"), + error=str(e), + ) return None diff --git a/fastdeploy/entrypoints/openai/tool_parsers/ernie_x1_tool_parser.py b/fastdeploy/entrypoints/openai/tool_parsers/ernie_x1_tool_parser.py index 7435dbce49..34cbb2e545 100644 --- a/fastdeploy/entrypoints/openai/tool_parsers/ernie_x1_tool_parser.py +++ b/fastdeploy/entrypoints/openai/tool_parsers/ernie_x1_tool_parser.py @@ -41,6 +41,7 @@ from fastdeploy.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager, ) +from fastdeploy.logger.request_logger import log_request_error from fastdeploy.utils import data_processor_logger as logger @@ -254,8 +255,11 @@ class ErnieX1ToolParser(ToolParser): logger.debug("Skipping text %s - no arguments", delta_text) delta = None - elif cur_arguments is None and prev_arguments is not None: - logger.error("should be impossible to have arguments reset " "mid-call. skipping streaming anything.") + elif not cur_arguments and prev_arguments: + log_request_error( + message="request[{request_id}] should be impossible to have arguments reset mid-call. skipping streaming anything.", + request_id=request.request_id, + ) delta = None elif cur_arguments is not None and prev_arguments is None: diff --git a/fastdeploy/entrypoints/openai/v1/serving_chat.py b/fastdeploy/entrypoints/openai/v1/serving_chat.py index afd55faf54..0e06b3d2c2 100644 --- a/fastdeploy/entrypoints/openai/v1/serving_chat.py +++ b/fastdeploy/entrypoints/openai/v1/serving_chat.py @@ -45,8 +45,12 @@ from fastdeploy.entrypoints.openai.v1.serving_base import ( ServingResponseContext, ) from fastdeploy.input.tokenizer_client import AsyncTokenizerClient, ImageDecodeRequest +from fastdeploy.logger.request_logger import ( + RequestLogLevel, + log_request, + log_request_error, +) from fastdeploy.metrics.metrics import main_process_metrics -from fastdeploy.utils import api_server_logger from fastdeploy.worker.output import LogprobsLists @@ -178,7 +182,7 @@ class OpenAIServingChat(OpenAiServingBase): except Exception as e: error_msg = f"Error in _build_logprobs_response: {e}, {str(traceback.format_exc())}" - api_server_logger.error(error_msg) + log_request_error(message=error_msg) return None @override @@ -302,7 +306,14 @@ class OpenAIServingChat(OpenAiServingBase): max_tokens = request.max_completion_tokens or request.max_tokens choice_completion_tokens = response_ctx.choice_completion_tokens_dict[output.index] choice.finish_reason = self._calc_finish_reason(request_output, max_tokens, choice_completion_tokens) - api_server_logger.info(f"Chat Streaming response last send: {chunk.model_dump_json()}") + log_request( + level=RequestLogLevel.LIFECYCLE, + message="Chat Streaming response last send: request_id={request_id}, finish_reason={finish_reason}, completion_tokens={completion_tokens}, logprobs={logprobs}", + request_id=request_id, + finish_reason=choice.finish_reason, + completion_tokens=choice_completion_tokens, + logprobs=choice.logprobs, + ) yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n" if request_output.finished and response_ctx.remain_choices == 0: @@ -339,7 +350,11 @@ class OpenAIServingChat(OpenAiServingBase): res = ChatCompletionResponse( id=ctx.request_id, model=request.model, choices=choices, created=ctx.created_time, usage=response_ctx.usage ) - api_server_logger.info(f"Chat response: {res.model_dump_json()}") + log_request( + level=RequestLogLevel.CONTENT, + message="Chat response: {response}", + response=res.model_dump_json(), + ) return res async def _create_chat_completion_choice( diff --git a/fastdeploy/entrypoints/openai/v1/serving_completion.py b/fastdeploy/entrypoints/openai/v1/serving_completion.py index 95968497b8..608c00d195 100644 --- a/fastdeploy/entrypoints/openai/v1/serving_completion.py +++ b/fastdeploy/entrypoints/openai/v1/serving_completion.py @@ -38,7 +38,12 @@ from fastdeploy.entrypoints.openai.v1.serving_base import ( ServeContext, ServingResponseContext, ) -from fastdeploy.utils import ErrorType, api_server_logger +from fastdeploy.logger.request_logger import ( + RequestLogLevel, + log_request, + log_request_error, +) +from fastdeploy.utils import ErrorType from fastdeploy.worker.output import LogprobsLists @@ -94,7 +99,7 @@ class OpenAIServingCompletion(OpenAiServingBase): raise ValueError("Prompt type must be one of: str, list[str], list[int], list[list[int]]") except Exception as e: error_msg = f"OpenAIServingCompletion create_completion: {e}, {str(traceback.format_exc())}" - api_server_logger.error(error_msg) + log_request_error(message=error_msg) return ErrorResponse(error=ErrorInfo(message=error_msg, type=ErrorType.INTERNAL_ERROR)) if request_prompt_ids is not None: @@ -199,7 +204,11 @@ class OpenAIServingCompletion(OpenAiServingBase): ) except Exception as e: - api_server_logger.error(f"Error in _build_logprobs_response: {str(e)}, {str(traceback.format_exc())}") + log_request_error( + message="Error in _build_logprobs_response: {error}, {traceback}", + error=str(e), + traceback=traceback.format_exc(), + ) return None async def _build_stream_response( @@ -271,9 +280,21 @@ class OpenAIServingCompletion(OpenAiServingBase): choice.finish_reason = self._calc_finish_reason( request_output, request.max_tokens, choice_completion_tokens ) - api_server_logger.info(f"Completion Streaming response last send: {chunk.model_dump_json()}") + log_request( + level=RequestLogLevel.LIFECYCLE, + message="Completion Streaming response last send: request_id={request_id}, finish_reason={finish_reason}, completion_tokens={completion_tokens}, logprobs={logprobs}", + request_id=request_id, + finish_reason=choice.finish_reason, + completion_tokens=choice_completion_tokens, + logprobs=choice.logprobs, + ) if send_idx == 0 and not request.return_token_ids: - api_server_logger.info(f"Completion Streaming response send_idx 0: {chunk.model_dump_json()}") + log_request( + level=RequestLogLevel.LIFECYCLE, + message="Completion Streaming response send_idx 0: request_id={request_id}, completion_tokens={completion_tokens}", + request_id=request_id, + completion_tokens=response_ctx.choice_completion_tokens_dict[output.index], + ) yield f"data: {chunk.model_dump_json()}\n\n" if request_output.finished and response_ctx.remain_choices == 0: if include_usage: @@ -287,7 +308,12 @@ class OpenAIServingCompletion(OpenAiServingBase): yield f"data: {usage_chunk.model_dump_json(exclude_unset=True)}\n\n" yield "data: [DONE]\n\n" except Exception as e: - api_server_logger.error(f"Error in completion_stream_generator: {e}, {str(traceback.format_exc())}") + log_request_error( + message="request[{request_id}] Error in completion_stream_generator: {error}, {traceback}", + request_id=request_id, + error=e, + traceback=traceback.format_exc(), + ) yield f"data: {ErrorResponse(error=ErrorInfo(message=str(e), code='400', type=ErrorType.INTERNAL_ERROR)).model_dump_json(exclude_unset=True)}\n\n" async def _build_full_response( @@ -321,10 +347,18 @@ class OpenAIServingCompletion(OpenAiServingBase): choices=choices, usage=response_ctx.usage, ) - api_server_logger.info(f"Completion response: {res.model_dump_json()}") + log_request( + level=RequestLogLevel.FULL, + message="Completion response: {response}", + response=res.model_dump_json(), + ) return res except Exception as e: - api_server_logger.error(f"Error in completion_full_generator: {e}", exc_info=True) + log_request_error( + message="request[{request_id}] Error in completion_full_generator: {error}", + request_id=ctx.request_id, + error=e, + ) return self._create_error_response(str(e)) def build_completion_choice( diff --git a/fastdeploy/envs.py b/fastdeploy/envs.py index cc577b5a21..e52604f2ff 100644 --- a/fastdeploy/envs.py +++ b/fastdeploy/envs.py @@ -43,7 +43,7 @@ environment_variables: dict[str, Callable[[], Any]] = { # Request logging master switch. Set to 0 to disable request logging. "FD_LOG_REQUESTS": lambda: int(os.getenv("FD_LOG_REQUESTS", "1")), # Request logging detail level (0-3). Higher level means more verbose output. - "FD_LOG_REQUESTS_LEVEL": lambda: int(os.getenv("FD_LOG_REQUESTS_LEVEL", "0")), + "FD_LOG_REQUESTS_LEVEL": lambda: int(os.getenv("FD_LOG_REQUESTS_LEVEL", "2")), # Max field length for request logging truncation. "FD_LOG_MAX_LEN": lambda: int(os.getenv("FD_LOG_MAX_LEN", "2048")), # Unified trace mode: off, local, otel, all. diff --git a/fastdeploy/input/base_processor.py b/fastdeploy/input/base_processor.py index fb08ab6058..a06150c191 100644 --- a/fastdeploy/input/base_processor.py +++ b/fastdeploy/input/base_processor.py @@ -55,6 +55,7 @@ from paddleformers.transformers import Llama3Tokenizer, LlamaTokenizer from fastdeploy import envs from fastdeploy.input.utils import process_stop_token_ids +from fastdeploy.logger.request_logger import RequestLogLevel, log_request from fastdeploy.utils import data_processor_logger _SAMPLING_EPS = 1e-5 @@ -164,7 +165,12 @@ class BaseTextProcessor(ABC): req_id = request.get("request_id", None) if isinstance(request, dict) else None tokens = self.tokenizer.tokenize(spliced_message) token_ids = self.tokenizer.convert_tokens_to_ids(tokens) - data_processor_logger.info(f"req_id:{req_id}, tokens:{tokens}, token_ids: {token_ids}") + log_request( + level=1, + message="req_id:{req_id}, token_ids: {token_ids}", + req_id=req_id, + token_ids=token_ids, + ) return token_ids # ------------------------------------------------------------------ @@ -359,7 +365,7 @@ class BaseTextProcessor(ABC): def process_request_dict(self, request, max_model_len=None, **kwargs): """Unified request pre-processing shared by all processors.""" - data_processor_logger.info(f"Start processing request dict: {request}") + log_request(RequestLogLevel.CONTENT, message="Start processing request dict: {request}", request=request) request = self._apply_default_parameters(request) if not request.get("eos_token_ids"): request["eos_token_ids"] = self.eos_token_ids @@ -440,7 +446,7 @@ class BaseTextProcessor(ABC): if request.get("response_max_tokens") is not None and request.get("enable_thinking") is False: request["max_tokens"] = min(request["response_max_tokens"], request["max_tokens"]) - data_processor_logger.info(f"Processed request dict: {request}") + log_request(RequestLogLevel.CONTENT, message="Processed request dict: {request}", request=request) return request def _apply_reasoning_parser(self, request): @@ -481,7 +487,12 @@ class BaseTextProcessor(ABC): if seq != self.tokenizer.eos_token_id: stop_seqs.append(self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(seq))) stop_seqs, stop_seqs_len = self.pad_batch_data(stop_seqs, pad_id=-1, return_seq_len=True, return_array=False) - data_processor_logger.debug(f"processed stop_seqs: {stop_seqs}, {stop_seqs_len}") + log_request( + level=3, + message="processed stop_seqs: {stop_seqs}, {stop_seqs_len}", + stop_seqs=stop_seqs, + stop_seqs_len=stop_seqs_len, + ) return stop_seqs, stop_seqs_len # ------------------------------------------------------------------ @@ -607,14 +618,20 @@ class BaseTextProcessor(ABC): prompt_token_ids = self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(prompt)) if len(prompt_token_ids) != 1: if not add_prefix_space: - data_processor_logger.warning( - f"bad_words: '{prompt}' tokenises to {len(prompt_token_ids)} tokens, skipping" + log_request( + level=1, + message="bad_words: '{prompt}' tokenises to {num_tokens} tokens, skipping", + prompt=prompt, + num_tokens=len(prompt_token_ids), ) continue if prompt_token_ids[0] > self.tokenizer.vocab_size: if not add_prefix_space: - data_processor_logger.warning( - f"bad_words: '{prompt}' token id {prompt_token_ids[0]} > vocab_size, skipping" + log_request( + level=1, + message="bad_words: '{prompt}' token id {token_id} > vocab_size, skipping", + prompt=prompt, + token_id=prompt_token_ids[0], ) continue if prompt_token_ids not in token_ids: diff --git a/fastdeploy/input/ernie4_5_vl_processor/ernie4_5_vl_processor.py b/fastdeploy/input/ernie4_5_vl_processor/ernie4_5_vl_processor.py index cfc394d463..fdc023beb6 100644 --- a/fastdeploy/input/ernie4_5_vl_processor/ernie4_5_vl_processor.py +++ b/fastdeploy/input/ernie4_5_vl_processor/ernie4_5_vl_processor.py @@ -22,6 +22,7 @@ from paddleformers.generation import GenerationConfig from fastdeploy.input.ernie4_5_processor import Ernie4_5Processor from fastdeploy.input.utils import IDS_TYPE_FLAG, process_stop_token_ids +from fastdeploy.logger.request_logger import RequestLogLevel, log_request from fastdeploy.utils import data_processor_logger from .process import DataProcessor @@ -294,7 +295,7 @@ class Ernie4_5_VLProcessor(Ernie4_5Processor): if request.get("response_max_tokens") is not None and request.get("enable_thinking") is False: request["max_tokens"] = min(request["response_max_tokens"], request["max_tokens"]) - data_processor_logger.info(f"Processed request {request}") + log_request(RequestLogLevel.CONTENT, message="Processed request: {request}", request=request) return request def append_completion_tokens(self, multimodal_inputs, completion_token_ids): diff --git a/fastdeploy/input/qwen3_vl_processor/qwen3_vl_processor.py b/fastdeploy/input/qwen3_vl_processor/qwen3_vl_processor.py index cc0110e1e1..6b1460f60e 100644 --- a/fastdeploy/input/qwen3_vl_processor/qwen3_vl_processor.py +++ b/fastdeploy/input/qwen3_vl_processor/qwen3_vl_processor.py @@ -17,6 +17,7 @@ import numpy as np from fastdeploy.input.text_processor import DataProcessor as TextProcessor +from fastdeploy.logger.request_logger import RequestLogLevel, log_request from fastdeploy.utils import data_processor_logger from .process import DataProcessor @@ -261,7 +262,7 @@ class Qwen3VLProcessor(TextProcessor): request["max_tokens"] = max(1, max_tokens) else: request["max_tokens"] = min(max_tokens, request["max_tokens"]) - data_processor_logger.info(f"Processed request {request}") + log_request(RequestLogLevel.CONTENT, message="Processed request: {request}", request=request) return request diff --git a/fastdeploy/input/qwen_vl_processor/qwen_vl_processor.py b/fastdeploy/input/qwen_vl_processor/qwen_vl_processor.py index 88bc5c7693..e5bba78b96 100644 --- a/fastdeploy/input/qwen_vl_processor/qwen_vl_processor.py +++ b/fastdeploy/input/qwen_vl_processor/qwen_vl_processor.py @@ -18,6 +18,7 @@ import numpy as np from fastdeploy.input.text_processor import DataProcessor as TextProcessor from fastdeploy.input.utils import process_stop_token_ids +from fastdeploy.logger.request_logger import RequestLogLevel, log_request from fastdeploy.utils import data_processor_logger from .process import DataProcessor @@ -262,7 +263,7 @@ class QwenVLProcessor(TextProcessor): else: self.model_status_dict[request["request_id"]] = model_status request["enable_thinking"] = model_status == "think_start" - data_processor_logger.info(f"Processed request {request}") + log_request(RequestLogLevel.CONTENT, message="Processed request: {request}", request=request) return request diff --git a/fastdeploy/input/tokenizer_client.py b/fastdeploy/input/tokenizer_client.py index 1bee41cbc0..97f62a63f2 100644 --- a/fastdeploy/input/tokenizer_client.py +++ b/fastdeploy/input/tokenizer_client.py @@ -20,6 +20,7 @@ from typing import Any, Optional, Union import httpx from pydantic import BaseModel, HttpUrl +from fastdeploy.logger.request_logger import log_request, log_request_error from fastdeploy.utils import data_processor_logger @@ -161,7 +162,12 @@ class AsyncTokenizerClient: except httpx.RequestError as e: # Network error, keep polling - data_processor_logger.debug(f"Request error while polling tokenize task {task_tag}: {e}") + log_request( + level=3, + message="Request error while polling tokenize task {task_tag}: {error}", + task_tag=task_tag, + error=str(e), + ) # 超时检测 if asyncio.get_event_loop().time() - start_time > self.max_wait: @@ -193,10 +199,15 @@ class AsyncTokenizerClient: raise RuntimeError(f"Tokenize task creation failed, {resp.json().get('message')}") return resp.json().get("result") except Exception as e: - data_processor_logger.error(f"Attempt to decode_request {attempt + 1} failed: {e}") + log_request_error( + message="Attempt to decode_request {attempt} failed: {error}", + attempt=attempt + 1, + error=str(e), + ) if attempt == self.max_retries - 1: - data_processor_logger.error( - f"Max retries of decode_request reached. Giving up. request is {request}" + log_request_error( + message="Max retries of decode_request reached. Giving up. request is {request}", + request=str(request), ) await asyncio.sleep(1) except httpx.RequestError as e: diff --git a/fastdeploy/logger/__init__.py b/fastdeploy/logger/__init__.py index e69de29bb2..148c3ee0df 100644 --- a/fastdeploy/logger/__init__.py +++ b/fastdeploy/logger/__init__.py @@ -0,0 +1,124 @@ +""" +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +""" +FastDeploy Logging Module + +Unified logging management module providing: +- Formatters: ColoredFormatter, CustomFormatter +- Handlers: DailyRotatingFileHandler, IntervalRotatingFileHandler, LazyFileHandler +- Logger: FastDeployLogger +- Request Logger: log_request, log_request_error, RequestLogLevel +- Setup: setup_logging +- Pre-defined logger instances (lazy-loaded) + +Usage: + from fastdeploy.logger import get_logger, llm_logger, console_logger + from fastdeploy.logger import log_request, log_request_error, RequestLogLevel +""" + +# Formatters +from fastdeploy.logger.formatters import ColoredFormatter, CustomFormatter + +# Handlers +from fastdeploy.logger.handlers import ( + DailyRotatingFileHandler, + IntervalRotatingFileHandler, + LazyFileHandler, +) + +# Logger +from fastdeploy.logger.logger import FastDeployLogger + +# Request logger +from fastdeploy.logger.request_logger import ( + RequestLogLevel, + log_request, + log_request_error, +) + +# Setup +from fastdeploy.logger.setup_logging import setup_logging + + +def get_logger(name, file_name=None, without_formater=False, print_to_console=False, channel=None): + """Convenience function to get a logger instance""" + return FastDeployLogger().get_logger(name, file_name, without_formater, print_to_console, channel=channel) + + +# Pre-defined logger instance configs (name, file_name, without_formater, print_to_console, channel) +_LOGGER_CONFIGS = { + "llm_logger": ("fastdeploy", None, False, False, "main"), + "data_processor_logger": ("data_processor", None, False, False, "main"), + "scheduler_logger": ("scheduler", None, False, False, "main"), + "api_server_logger": ("api_server", None, False, False, "main"), + "console_logger": (None, None, False, False, "console"), + "spec_logger": ("speculate", "speculate.log", False, False, None), + "zmq_client_logger": ("zmq_client", "comm.log", False, False, None), + "router_logger": ("router", "comm.log", False, False, None), + "fmq_logger": ("fmq", "comm.log", False, False, None), + "obj_logger": ("obj", "obj.log", False, False, None), + "register_manager_logger": ("register_manager", "register_manager.log", False, False, None), + "_request_logger": ("request", None, False, False, "request"), +} + +_logger_cache = {} + + +def __getattr__(name): + """Lazy-load pre-defined logger instances""" + if name in _LOGGER_CONFIGS: + if name not in _logger_cache: + cfg = _LOGGER_CONFIGS[name] + _logger_cache[name] = get_logger(cfg[0], cfg[1], cfg[2], cfg[3], cfg[4]) + return _logger_cache[name] + if name == "trace_logger": + if name not in _logger_cache: + _logger_cache[name] = FastDeployLogger().get_trace_logger("trace", "trace.log") + return _logger_cache[name] + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +__all__ = [ + # Formatters + "ColoredFormatter", + "CustomFormatter", + # Handlers + "DailyRotatingFileHandler", + "IntervalRotatingFileHandler", + "LazyFileHandler", + # Logger + "FastDeployLogger", + "get_logger", + # Request logger + "RequestLogLevel", + "log_request", + "log_request_error", + # Setup + "setup_logging", + # Pre-defined logger instances (lazy-loaded) + "llm_logger", + "data_processor_logger", + "scheduler_logger", + "api_server_logger", + "console_logger", + "spec_logger", + "zmq_client_logger", + "trace_logger", + "router_logger", + "fmq_logger", + "obj_logger", + "register_manager_logger", +] diff --git a/fastdeploy/logger/config.py b/fastdeploy/logger/config.py new file mode 100644 index 0000000000..c2f202859b --- /dev/null +++ b/fastdeploy/logger/config.py @@ -0,0 +1,47 @@ +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Log configuration parsing module +""" + +import os + + +def resolve_log_level(raw_level=None, debug_enabled=None) -> str: + """ + Resolve log level configuration + + Priority: FD_LOG_LEVEL > FD_DEBUG + """ + raw = os.getenv("FD_LOG_LEVEL") if raw_level is None else raw_level + # Handle None or string "None" case + if raw and str(raw).upper() != "NONE": + level = raw.upper() + if level not in {"INFO", "DEBUG"}: + raise ValueError(f"Unsupported FD_LOG_LEVEL: {raw}") + return level + debug = os.getenv("FD_DEBUG", "0") if debug_enabled is None else str(debug_enabled) + return "DEBUG" if debug == "1" else "INFO" + + +def resolve_request_logging_defaults() -> dict[str, int]: + """ + Resolve request logging default configuration + """ + return { + "enabled": int(os.getenv("FD_LOG_REQUESTS", "1")), + "level": int(os.getenv("FD_LOG_REQUESTS_LEVEL", "2")), + "max_len": int(os.getenv("FD_LOG_MAX_LEN", "2048")), + } diff --git a/fastdeploy/logger/logger.py b/fastdeploy/logger/logger.py index e50b484b90..5c0d7ba85e 100644 --- a/fastdeploy/logger/logger.py +++ b/fastdeploy/logger/logger.py @@ -20,21 +20,36 @@ This module provides the get_logger method to uniformly manage logging behavior import logging import os +import sys import threading from contextlib import contextmanager from pathlib import Path from fastdeploy import envs -from fastdeploy.logger.formatters import CustomFormatter +from fastdeploy.logger.config import resolve_log_level +from fastdeploy.logger.formatters import ColoredFormatter, CustomFormatter from fastdeploy.logger.handlers import DailyRotatingFileHandler, LazyFileHandler from fastdeploy.logger.setup_logging import setup_logging +# Standard log format +_LOG_FORMAT = "%(levelname)-8s %(asctime)s %(process)-5s %(filename)s[line:%(lineno)d] %(message)s" + class FastDeployLogger: _instance = None _initialized = False _lock = threading.RLock() + # Channel to file mapping + _channel_files = { + "main": "fastdeploy.log", + "request": "request.log", + "console": "console.log", + } + + # Cache for channel loggers that have been configured + _configured_channels = set() + def __new__(cls): """Singleton pattern implementation""" if cls._instance is None: @@ -50,7 +65,7 @@ class FastDeployLogger: setup_logging() self._initialized = True - def get_logger(self, name, file_name=None, without_formater=False, print_to_console=False): + def get_logger(self, name, file_name=None, without_formater=False, print_to_console=False, channel=None): """ Get logger (compatible with the original interface) @@ -59,7 +74,14 @@ class FastDeployLogger: file_name: Log file name (for compatibility) without_formater: Whether to not use a formatter print_to_console: Whether to print to console + channel: Log channel (main, request, console) """ + # If channel is specified, use channel-based logging + if channel is not None: + if not self._initialized: + self._initialize() + return self._get_channel_logger(name, channel) + # If only one parameter is provided, use the new unified naming convention if file_name is None and not without_formater and not print_to_console: # Lazy initialization @@ -74,27 +96,95 @@ class FastDeployLogger: """ New unified way to get logger """ - if name is None: - return logging.getLogger("fastdeploy") + return self._get_channel_logger(name, "main") - # Handle __main__ special case - if name == "__main__": + def _get_channel_logger(self, name, channel): + """ + Get logger through channel with manual handler setup. + + Uses manual addHandler instead of dictConfig for better performance. + Handlers are attached to the channel root logger (fastdeploy.{channel}), + and child loggers propagate to it. + + Args: + name: logger name + channel: log channel (main, request, console) + """ + # Get or create the channel root logger (all handlers go here) + channel_root_name = f"fastdeploy.{channel}" + channel_logger = logging.getLogger(channel_root_name) + + # Configure the channel root logger once + if channel not in self._configured_channels: + self._configured_channels.add(channel) + + log_dir = envs.FD_LOG_DIR + os.makedirs(log_dir, exist_ok=True) + + # Resolve log level (priority: FD_LOG_LEVEL > FD_DEBUG) + log_level = resolve_log_level() + channel_logger.setLevel(logging.DEBUG if log_level == "DEBUG" else logging.INFO) + + # Create formatters + file_formatter = logging.Formatter(_LOG_FORMAT) + console_formatter = ColoredFormatter(_LOG_FORMAT) + + # Clear existing handlers + for handler in channel_logger.handlers[:]: + channel_logger.removeHandler(handler) + + # Create file handler for this channel + file_name = self._channel_files.get(channel, f"{channel}.log") + log_file = os.path.join(log_dir, file_name) + backup_count = int(envs.FD_LOG_BACKUP_COUNT) + + file_handler = LazyFileHandler(log_file, backupCount=backup_count) + file_handler.setFormatter(file_formatter) + channel_logger.addHandler(file_handler) + + # Error file handler (all channels write errors to error.log) + error_log_file = os.path.join(log_dir, "error.log") + error_file_handler = LazyFileHandler( + filename=error_log_file, backupCount=backup_count, level=logging.ERROR + ) + error_file_handler.setFormatter(file_formatter) + channel_logger.addHandler(error_file_handler) + + # Stderr handler for ERROR level (all channels output errors to stderr) + stderr_handler = logging.StreamHandler(sys.stderr) + stderr_handler.setLevel(logging.ERROR) + stderr_handler.setFormatter(console_formatter) + channel_logger.addHandler(stderr_handler) + + # Console stdout handler for console channel only + if channel == "console": + stdout_handler = logging.StreamHandler(sys.stdout) + stdout_handler.setLevel(logging.DEBUG if log_level == "DEBUG" else logging.INFO) + stdout_handler.setFormatter(console_formatter) + # Filter to exclude ERROR and above (they go to stderr) + stdout_handler.addFilter(lambda record: record.levelno < logging.ERROR) + channel_logger.addHandler(stdout_handler) + + channel_logger.propagate = False + + # Determine the actual logger name and return the appropriate logger + if name is None or name == "fastdeploy": + return channel_logger + elif name == "__main__": import __main__ - # Get the __file__ attribute of the main module if hasattr(__main__, "__file__"): - # Get the main module file name base_name = Path(__main__.__file__).stem - # Create logger with prefix - return logging.getLogger(f"fastdeploy.main.{base_name}") - return logging.getLogger("fastdeploy.main") - - # If already in fastdeploy namespace, use directly - if name.startswith("fastdeploy.") or name == "fastdeploy": - return logging.getLogger(name) + logger_name = f"{channel_root_name}.{base_name}" + else: + return channel_logger + elif name.startswith("fastdeploy."): + logger_name = name else: - # Add fastdeploy prefix for other cases - return logging.getLogger(f"fastdeploy.{name}") + logger_name = f"{channel_root_name}.{name}" + + # Child loggers propagate to channel_logger (which has handlers) + return logging.getLogger(logger_name) def get_trace_logger(self, name, file_name, without_formater=False, print_to_console=False): """ diff --git a/fastdeploy/logger/request_logger.py b/fastdeploy/logger/request_logger.py new file mode 100644 index 0000000000..7d91cd17db --- /dev/null +++ b/fastdeploy/logger/request_logger.py @@ -0,0 +1,94 @@ +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Request logging module + +Provides leveled request logging with L0-L3 levels: +- L0: Critical lifecycle events (creation, completion, abort) +- L1: Processing stage details +- L2: Request/response content (truncated) +- L3: Full data +""" + +from enum import IntEnum + +from fastdeploy import envs + + +class RequestLogLevel(IntEnum): + """Request log level""" + + LIFECYCLE = 0 # Lifecycle start/end: creation, completion, abort + STAGES = 1 # Processing stages: semaphore, first token, signal handling + CONTENT = 2 # Content and scheduling: request params, scheduling, response + FULL = 3 # Complete raw data + + +def _should_log(level: int) -> bool: + """Check if this level should be logged""" + if int(envs.FD_LOG_REQUESTS) == 0: + return False + return int(level) <= int(envs.FD_LOG_REQUESTS_LEVEL) + + +def _truncate(value): + """Truncate long content""" + text = str(value) + max_len = int(envs.FD_LOG_MAX_LEN) + if len(text) <= max_len: + return text + return text[:max_len] + + +def log_request(level: int, message: str, **fields): + """ + Log request message + + Args: + level: Log level (0-3) + message: Log message template, supports {field} formatting + **fields: Message fields + """ + if not _should_log(level): + return + + from fastdeploy.logger import _request_logger + + if not fields: + _request_logger.info(message, stacklevel=2) + return + + payload = fields + # CONTENT level content needs to be truncated + if int(level) == int(RequestLogLevel.CONTENT): + payload = {key: _truncate(value) for key, value in fields.items()} + + _request_logger.info(message.format(**payload), stacklevel=2) + + +def log_request_error(message: str, **fields): + """ + Log request error message + + Args: + message: Log message template, supports {field} formatting + **fields: Message fields + """ + from fastdeploy.logger import _request_logger + + if fields: + _request_logger.error(message.format(**fields), stacklevel=2) + else: + _request_logger.error(message, stacklevel=2) diff --git a/fastdeploy/logger/setup_logging.py b/fastdeploy/logger/setup_logging.py index e90f215fc9..bb6fedb8b4 100644 --- a/fastdeploy/logger/setup_logging.py +++ b/fastdeploy/logger/setup_logging.py @@ -14,7 +14,19 @@ """ """ -配置日志系统 +Setup logging system + +This module handles logging initialization: +- Basic setup: log directory creation +- Optional: load external JSON config file via dictConfig + +Channel-based logger configuration is handled by FastDeployLogger._get_channel_logger() +using manual addHandler for better performance. + +Log channels: +- main: Main logs -> fastdeploy.log +- request: Request logs -> request.log +- console: Console logs -> console.log + terminal """ import json @@ -27,54 +39,29 @@ from fastdeploy import envs class MaxLevelFilter(logging.Filter): - """过滤低于指定级别的日志记录。 + """Filter log records below specified level. - 用于将 INFO/DEBUG 路由到 stdout,ERROR/CRITICAL 路由到 stderr。 + Used to route INFO/DEBUG to stdout, ERROR/CRITICAL to stderr. """ def __init__(self, level): super().__init__() - self.level = logging._nameToLevel.get(level, level) + self.level = logging._nameToLevel.get(level, level) if isinstance(level, str) else level def filter(self, record): return record.levelno < self.level -def setup_logging(log_dir=None, config_file=None): - """ - 设置FastDeploy的日志配置 - - Args: - log_dir: 日志文件存储目录,如果不提供则使用环境变量 - config_file: JSON配置文件路径,如果不提供则使用默认配置 - """ - - # 避免重复配置 - if getattr(setup_logging, "_configured", False): - return logging.getLogger("fastdeploy") - - # 使用环境变量中的日志目录,如果没有则使用传入的参数或默认值 - if log_dir is None: - log_dir = getattr(envs, "FD_LOG_DIR", "log") - - # 确保日志目录存在 - Path(log_dir).mkdir(parents=True, exist_ok=True) - - # 从环境变量获取日志级别和备份数量 - is_debug = int(getattr(envs, "FD_DEBUG", 0)) - FASTDEPLOY_LOGGING_LEVEL = "DEBUG" if is_debug else "INFO" - backup_count = int(getattr(envs, "FD_LOG_BACKUP_COUNT", 7)) - - # 定义日志输出格式 +def _build_default_config(log_dir, log_level, backup_count): + """Build default logging configuration for dictConfig""" _FORMAT = "%(levelname)-8s %(asctime)s %(process)-5s %(filename)s[line:%(lineno)d] %(message)s" - # 默认配置 - default_config = { + return { "version": 1, "disable_existing_loggers": False, "filters": { "below_error": { - "()": "fastdeploy.logger.setup_logging.MaxLevelFilter", + "()": MaxLevelFilter, "level": "ERROR", } }, @@ -91,99 +78,135 @@ def setup_logging(log_dir=None, config_file=None): }, }, "handlers": { - # 控制台标准输出,用于 INFO/DEBUG(低于 ERROR 级别) + # Console stdout for INFO/DEBUG (below ERROR level) "console_stdout": { "class": "logging.StreamHandler", - "level": FASTDEPLOY_LOGGING_LEVEL, + "level": log_level, "filters": ["below_error"], "formatter": "colored", "stream": "ext://sys.stdout", }, - # 控制台错误输出,用于 ERROR/CRITICAL + # Console stderr for ERROR/CRITICAL "console_stderr": { "class": "logging.StreamHandler", "level": "ERROR", "formatter": "colored", "stream": "ext://sys.stderr", }, - # 默认错误日志,保留最新1个小时的日志,位置在log/error.log + # Main log file + "main_file": { + "class": "fastdeploy.logger.handlers.LazyFileHandler", + "level": log_level, + "formatter": "standard", + "filename": os.path.join(log_dir, "fastdeploy.log"), + "backupCount": backup_count, + }, + # Console log file + "console_file": { + "class": "fastdeploy.logger.handlers.LazyFileHandler", + "level": log_level, + "formatter": "standard", + "filename": os.path.join(log_dir, "console.log"), + "backupCount": backup_count, + }, + # Request log file + "request_file": { + "class": "fastdeploy.logger.handlers.LazyFileHandler", + "level": log_level, + "formatter": "standard", + "filename": os.path.join(log_dir, "request.log"), + "backupCount": backup_count, + }, + # Error log file "error_file": { - "class": "logging.handlers.TimedRotatingFileHandler", + "class": "fastdeploy.logger.handlers.LazyFileHandler", "level": "ERROR", "formatter": "standard", "filename": os.path.join(log_dir, "error.log"), - "when": "H", - "interval": 1, - "backupCount": 1, - }, - # 全量日志,保留最新1小时的日志,位置在log/default.log - "default_file": { - "class": "logging.handlers.TimedRotatingFileHandler", - "level": FASTDEPLOY_LOGGING_LEVEL, - "formatter": "standard", - "filename": os.path.join(log_dir, "default.log"), - "when": "H", - "interval": 1, - "backupCount": 1, - }, - # 错误日志归档,保留7天内的日志,每隔1小时一个文件,形式如:FastDeploy/log/2025-08-14/error_2025-08-14-18.log - "error_archive": { - "class": "fastdeploy.logger.handlers.IntervalRotatingFileHandler", - "level": "ERROR", - "formatter": "standard", - "filename": os.path.join(log_dir, "error.log"), - "backupDays": 7, - "interval": 1, - "encoding": "utf-8", - }, - # 全量日志归档,保留7天内的日志,每隔1小时一个文件,形式如:FastDeploy/log/2025-08-14/default_2025-08-14-18.log - "default_archive": { - "class": "fastdeploy.logger.handlers.IntervalRotatingFileHandler", - "level": FASTDEPLOY_LOGGING_LEVEL, - "formatter": "standard", - "filename": os.path.join(log_dir, "default.log"), - "backupDays": 7, - "interval": 1, - "encoding": "utf-8", + "backupCount": backup_count, }, }, "loggers": { - # 默认日志记录器,全局共享 + # Default logger "fastdeploy": { "level": "DEBUG", - "handlers": [ - "console_stdout", - "console_stderr", - "error_file", - "default_file", - "error_archive", - "default_archive", - ], + "handlers": ["main_file", "console_file", "error_file", "console_stderr"], "propagate": False, - } + }, + # Main channel + "fastdeploy.main": { + "level": "DEBUG", + "handlers": ["main_file", "console_file", "error_file", "console_stderr"], + "propagate": False, + }, + # Request channel - only output to request.log and error.log + "fastdeploy.request": { + "level": "DEBUG", + "handlers": ["request_file", "error_file", "console_stderr"], + "propagate": False, + }, + # Console channel - output to console.log and terminal + "fastdeploy.console": { + "level": "DEBUG", + "handlers": ["console_file", "console_stdout", "error_file", "console_stderr"], + "propagate": False, + }, }, } - # 如果提供了配置文件,则加载配置文件 - if config_file and os.path.exists(config_file): - with open(config_file, "r", encoding="utf-8") as f: - config = json.load(f) - # 合并环境变量配置到用户配置中,环境变量的优先级高于自定义的优先级 - if "handlers" in config: - for handler_name, handler_config in config["handlers"].items(): - if "backupCount" not in handler_config and "DailyRotating" in handler_config.get("class", ""): - handler_config["backupCount"] = backup_count - if handler_config.get("level") == "INFO" and is_debug: - handler_config["level"] = "DEBUG" - else: - config = default_config +def setup_logging(log_dir=None, config_file=None): + """ + Setup FastDeploy logging configuration. - # 应用日志配置 - logging.config.dictConfig(config) + This function: + 1. Ensures the log directory exists + 2. Optionally loads external JSON config file via dictConfig - # 避免重复加载 + Note: Channel-based loggers (get_logger with channel param) use manual addHandler + for better performance, independent of dictConfig. + + Args: + log_dir: Log file storage directory, uses environment variable if not provided + config_file: Optional JSON config file path for dictConfig + """ + # Avoid duplicate configuration + if getattr(setup_logging, "_configured", False): + return + + # Use log directory from environment variable, or use provided parameter or default value + if log_dir is None: + log_dir = getattr(envs, "FD_LOG_DIR", "log") + + # Ensure log directory exists + Path(log_dir).mkdir(parents=True, exist_ok=True) + + # Store log_dir for later use + setup_logging._log_dir = log_dir + + # If config_file is provided, use dictConfig to load it + if config_file is not None: + is_debug = int(getattr(envs, "FD_DEBUG", 0)) + log_level = "DEBUG" if is_debug else "INFO" + backup_count = int(getattr(envs, "FD_LOG_BACKUP_COUNT", 7)) + + if os.path.exists(config_file): + with open(config_file, "r", encoding="utf-8") as f: + config = json.load(f) + + # Merge environment variable config into user config + if "handlers" in config: + for handler_config in config["handlers"].values(): + if "backupCount" not in handler_config and "DailyRotating" in handler_config.get("class", ""): + handler_config["backupCount"] = backup_count + if handler_config.get("level") == "INFO" and log_level == "DEBUG": + handler_config["level"] = "DEBUG" + else: + # Config file not found, use default config + config = _build_default_config(log_dir, log_level, backup_count) + + # Apply logging configuration via dictConfig + logging.config.dictConfig(config) + + # Mark as configured setup_logging._configured = True - - # 返回fastdeploy的logger - return logging.getLogger("fastdeploy") diff --git a/fastdeploy/output/token_processor.py b/fastdeploy/output/token_processor.py index 85e54647b7..965c2ceeb5 100644 --- a/fastdeploy/output/token_processor.py +++ b/fastdeploy/output/token_processor.py @@ -40,6 +40,11 @@ from fastdeploy.engine.request import ( SpeculateMetrics, ) from fastdeploy.inter_communicator import ZmqIpcServer +from fastdeploy.logger.request_logger import ( + RequestLogLevel, + log_request, + log_request_error, +) from fastdeploy.metrics.metrics import main_process_metrics from fastdeploy.platforms import current_platform from fastdeploy.spec_decode import SpecMethod @@ -224,7 +229,11 @@ class TokenProcessor: for token_id in token_id_list: recovery_stop = token_id == RECOVERY_STOP_SIGNAL if recovery_stop: - llm_logger.info(f"recovery stop signal found at task {task_id}") + log_request( + RequestLogLevel.STAGES, + message="recovery stop signal found at task {request_id}", + request_id=task_id, + ) self.tokens_counter[task_id] += 1 if token_id != RECOVERY_STOP_SIGNAL: result.outputs.token_ids.append(token_id) @@ -252,12 +261,25 @@ class TokenProcessor: # Print combined log with all required information ttft = task.metrics.first_token_time if task.metrics.first_token_time else 0 - llm_logger.info( - f"Request={task_id}, InputToken={task.prompt_token_ids_len}, " - f"CachedDetail={cached_detail}, OutputToken={self.tokens_counter[task_id]}, " - f"TokenRatio={token_ratio:.2f}, TTFT={ttft:.2f}, " - f"E2E={e2e_time:.2f}, IsPrefill={is_prefill}, RecoveryStop={recovery_stop}, " - f"PreemptedCount={getattr(task.metrics, 'preempted_count', 0)}" + log_request( + RequestLogLevel.LIFECYCLE, + message=( + "Request={request_id}, InputToken={input_tokens}, " + "CachedDetail={cached_detail}, OutputToken={output_tokens}, " + "TokenRatio={token_ratio}, TTFT={ttft}, " + "E2E={e2e_time}, IsPrefill={is_prefill}, RecoveryStop={recovery_stop}, " + "PreemptedCount={preempted_count}" + ), + request_id=task_id, + input_tokens=task.prompt_token_ids_len, + cached_detail=cached_detail, + output_tokens=self.tokens_counter[task_id], + token_ratio=f"{token_ratio:.2f}", + ttft=f"{ttft:.2f}", + e2e_time=f"{e2e_time:.2f}", + is_prefill=is_prefill, + recovery_stop=recovery_stop, + preempted_count=getattr(task.metrics, "preempted_count", 0), ) main_process_metrics.request_token_ratio.observe(token_ratio) @@ -289,13 +311,21 @@ class TokenProcessor: task_id in self.resource_manager.to_be_aborted_req_id_set and token_ids[-1] == PREEMPTED_TOKEN_ID ): - llm_logger.info(f"start to recycle abort request_id {task_id}") + log_request( + RequestLogLevel.STAGES, + message="start to recycle abort request_id {request_id}", + request_id=task_id, + ) self.resource_manager.recycle_abort_task(task_id) if ( task_id in self.resource_manager.to_be_rescheduled_request_id_set and token_ids[-1] == PREEMPTED_TOKEN_ID ): - llm_logger.info(f"sync preemption for request_id {task_id} done.") + log_request( + RequestLogLevel.STAGES, + message="sync preemption for request_id {request_id} done.", + request_id=task_id, + ) self.resource_manager.reschedule_preempt_task(task_id) continue if self.cfg.scheduler_config.splitwise_role == "decode": @@ -351,12 +381,20 @@ class TokenProcessor: result.outputs.logprob = float(logprobs_list.logprobs[0][0]) result.outputs.top_logprobs = logprobs_list except Exception as e: - llm_logger.warning(f"Failed to parse logprobs from StreamTransferData: {e}") + log_request( + RequestLogLevel.STAGES, + message="Failed to parse logprobs from StreamTransferData: {error}", + error=str(e), + ) if getattr(stream_data, "prompt_logprobs", None) is not None: try: result.prompt_logprobs = stream_data.prompt_logprobs except Exception as e: - llm_logger.warning(f"Failed to parse prompt_logprobs from StreamTransferData: {e}") + log_request( + RequestLogLevel.STAGES, + message="Failed to parse prompt_logprobs from StreamTransferData: {error}", + error=str(e), + ) if self.tokens_counter[task_id] == 0: if task.messages is not None: result.prompt = task.messages @@ -394,7 +432,9 @@ class TokenProcessor: batch_result = self._process_batch_output_use_zmq(receive_datas) self.postprocess(batch_result) except Exception as e: - llm_logger.error(f"Receive message:{receive_datas}, error:{e}") + log_request_error( + message="Receive message:{receive_datas}, error:{error}", receive_datas=receive_datas, error=e + ) continue def process_sampling_results(self): @@ -511,7 +551,11 @@ class TokenProcessor: else: self.cached_generated_tokens.put_results(batch_result) except Exception as e: - llm_logger.error(f"Error in TokenProcessor's postprocess: {e}, {str(traceback.format_exc())}") + log_request_error( + message="Error in TokenProcessor's postprocess: {error}, {traceback}", + error=e, + traceback=traceback.format_exc(), + ) def _recycle_resources(self, task_id, index, task, result=None, is_prefill=False): """ @@ -524,14 +568,21 @@ class TokenProcessor: finished_task_ids = self.engine_worker_queue.get_finished_req() if len(finished_task_ids) > 0: for finished_task_id in finished_task_ids: - llm_logger.info(f"finished_task_id: {finished_task_id}") + log_request( + RequestLogLevel.STAGES, + message="finished_task_id: {finished_task_id}", + finished_task_id=finished_task_id, + ) self.prefill_result_status[finished_task_id[0]] = finished_task_id[1] if task_id in self.prefill_result_status: if self.prefill_result_status[task_id] != "finished": result.error_code = 400 result.error_message = f"{task_id} failed to {self.prefill_result_status[task_id]}" - llm_logger.info( - f"wait for sending cache, request_id: {task_id}, cost seconds: {time.time()-start_time:.5f}" + log_request( + RequestLogLevel.STAGES, + message="wait for sending cache, request_id: {request_id}, cost seconds: {cost_seconds}", + request_id=task_id, + cost_seconds=f"{time.time()-start_time:.5f}", ) result.metrics.send_request_output_to_decode_time = time.time() self.split_connector.send_first_token(task.disaggregate_info, [result]) @@ -755,7 +806,11 @@ class TokenProcessor: if self.cfg.speculative_config.method: self._record_speculative_decoding_accept_num_per_request(task_id, accept_num[i]) if accept_num[i] == PREEMPTED_TOKEN_ID: # in MTP, means preemption has happened in worker - llm_logger.info(f"sync preemption for request_id {task_id} done.") + log_request( + RequestLogLevel.STAGES, + message="sync preemption for request_id {request_id} done.", + request_id=task_id, + ) if envs.ENABLE_V1_KVCACHE_SCHEDULER: if task_id in self.resource_manager.to_be_aborted_req_id_set: self.resource_manager.recycle_abort_task(task_id) @@ -765,7 +820,11 @@ class TokenProcessor: if accept_num[i] == -3: recovery_stop = True if recovery_stop: - llm_logger.info(f"recovery stop signal found at task {task_id}") + log_request( + RequestLogLevel.STAGES, + message="recovery stop signal found at task {request_id}", + request_id=task_id, + ) token_ids = [RECOVERY_STOP_SIGNAL] elif self.use_logprobs: token_ids = tokens[i][:, 0].tolist()[: accept_num[i]] @@ -785,7 +844,11 @@ class TokenProcessor: token_ids = [token_id] recovery_stop = token_id == RECOVERY_STOP_SIGNAL if recovery_stop: - llm_logger.info(f"recovery stop signal found at task {task_id}") + log_request( + RequestLogLevel.STAGES, + message="recovery stop signal found at task {request_id}", + request_id=task_id, + ) if not recovery_stop and token_id < 0: if envs.ENABLE_V1_KVCACHE_SCHEDULER: if ( @@ -793,12 +856,20 @@ class TokenProcessor: and token_id == PREEMPTED_TOKEN_ID ): self.resource_manager.recycle_abort_task(task_id) - llm_logger.info(f"sync abortion for request_id {task_id} done.") + log_request( + RequestLogLevel.STAGES, + message="sync abortion for request_id {request_id} done.", + request_id=task_id, + ) if ( task_id in self.resource_manager.to_be_rescheduled_request_id_set and token_id == PREEMPTED_TOKEN_ID ): - llm_logger.info(f"sync preemption for request_id {task_id} done.") + log_request( + RequestLogLevel.STAGES, + message="sync preemption for request_id {request_id} done.", + request_id=task_id, + ) self.resource_manager.reschedule_preempt_task(task_id) continue if self.cfg.scheduler_config.splitwise_role == "decode": @@ -826,7 +897,11 @@ class TokenProcessor: task.metrics.record_recv_first_token() task.metrics.cal_cost_time() metrics = copy.copy(task.metrics) - llm_logger.info(f"task:{task.request_id} start recode first token") + log_request( + RequestLogLevel.STAGES, + message="task:{request_id} start recode first token", + request_id=task.request_id, + ) self._record_first_token_metrics(task, current_time) tracing.trace_report_span( @@ -932,12 +1007,26 @@ class TokenProcessor: # Print combined log with all required information ttft = task.metrics.first_token_time if task.metrics.first_token_time else 0 ttft_s = ttft + task.metrics.time_in_queue - llm_logger.info( - f"Request={task_id}, InputToken={task.prompt_token_ids_len}, " - f"CachedDetail={cached_detail}, OutputToken={self.tokens_counter[task_id]}, " - f"TokenRatio={token_ratio:.2f}, TTFT={ttft:.2f}, TTFT_S={ttft_s:.2f}, " - f"E2E={e2e_time:.2f}, IsPrefill={is_prefill}, RecoveryStop={recovery_stop}, " - f"PreemptedCount={getattr(task.metrics, 'preempted_count', 0)}" + log_request( + RequestLogLevel.LIFECYCLE, + message=( + "Request={request_id}, InputToken={input_tokens}, " + "CachedDetail={cached_detail}, OutputToken={output_tokens}, " + "TokenRatio={token_ratio}, TTFT={ttft}, TTFT_S={ttft_s}, " + "E2E={e2e_time}, IsPrefill={is_prefill}, RecoveryStop={recovery_stop}, " + "PreemptedCount={preempted_count}" + ), + request_id=task_id, + input_tokens=task.prompt_token_ids_len, + cached_detail=cached_detail, + output_tokens=self.tokens_counter[task_id], + token_ratio=f"{token_ratio:.2f}", + ttft=f"{ttft:.2f}", + ttft_s=f"{ttft_s:.2f}", + e2e_time=f"{e2e_time:.2f}", + is_prefill=is_prefill, + recovery_stop=recovery_stop, + preempted_count=getattr(task.metrics, "preempted_count", 0), ) main_process_metrics.request_token_ratio.observe(token_ratio) @@ -946,8 +1035,11 @@ class TokenProcessor: self._compute_speculative_status(result) if not is_prefill: self._record_completion_metrics(task, current_time) - llm_logger.info(f"task {task_id} received eos token. Recycling.") - + log_request( + RequestLogLevel.STAGES, + message="task {request_id} received eos token. Recycling.", + request_id=task_id, + ) if ( envs.ENABLE_V1_KVCACHE_SCHEDULER and self.cfg.cache_config.enable_prefix_caching @@ -957,7 +1049,11 @@ class TokenProcessor: task ) # when enable prefix caching, cache kv cache for output tokens self._recycle_resources(task_id, i, task, result, is_prefill) - llm_logger.info(f"eos token {task_id} Recycle end.") + log_request( + RequestLogLevel.STAGES, + message="eos token {request_id} Recycle end.", + request_id=task_id, + ) break llm_logger.debug(f"get response from infer: {result}") diff --git a/fastdeploy/scheduler/dp_scheduler.py b/fastdeploy/scheduler/dp_scheduler.py index 2339a077c9..0719dccf5f 100644 --- a/fastdeploy/scheduler/dp_scheduler.py +++ b/fastdeploy/scheduler/dp_scheduler.py @@ -21,6 +21,7 @@ import time from typing import Dict, List, Optional from fastdeploy.engine.request import Request, RequestOutput +from fastdeploy.logger.request_logger import RequestLogLevel, log_request from fastdeploy.scheduler.data import ScheduledResponse from fastdeploy.scheduler.local_scheduler import LocalScheduler from fastdeploy.utils import get_logger @@ -58,7 +59,11 @@ class DPLocalScheduler(LocalScheduler): finished_responses = [response.request_id for response in responses if response.finished] if len(finished_responses) > 0: - self.scheduler_logger.info(f"Scheduler has received some finished responses: {finished_responses}") + log_request( + RequestLogLevel.CONTENT, + message="Scheduler has received some finished responses: {request_ids}", + request_ids=finished_responses, + ) with self.mutex: self.batch_responses_per_step.append([response.raw for response in responses]) @@ -146,8 +151,10 @@ class DPLocalScheduler(LocalScheduler): self.ids_read_cursor += 1 if len(requests) > 0: - self.scheduler_logger.info( - f"Scheduler has pulled some request: {[request.request_id for request in requests]}" + log_request( + RequestLogLevel.CONTENT, + message="Scheduler has pulled some request: {request_ids}", + request_ids=[request.request_id for request in requests], ) return requests @@ -195,7 +202,11 @@ class DPScheduler: def _put_requests_to_local(self): while True: request = self.request_queues.get() - self.scheduler_logger.info(f"Receive request from puller, request_id: {request.request_id}") + log_request( + RequestLogLevel.CONTENT, + message="Receive request from puller, request_id: {request_id}", + request_id=request.request_id, + ) self._scheduler.put_requests([request]) def _get_response_from_local(self): diff --git a/fastdeploy/scheduler/global_scheduler.py b/fastdeploy/scheduler/global_scheduler.py index 9f6d064461..edb2b0a3aa 100644 --- a/fastdeploy/scheduler/global_scheduler.py +++ b/fastdeploy/scheduler/global_scheduler.py @@ -25,6 +25,11 @@ import crcmod from redis import ConnectionPool from fastdeploy.engine.request import Request, RequestOutput +from fastdeploy.logger.request_logger import ( + RequestLogLevel, + log_request, + log_request_error, +) from fastdeploy.scheduler import utils from fastdeploy.scheduler.data import ScheduledRequest, ScheduledResponse from fastdeploy.scheduler.storage import AdaptedRedis @@ -370,7 +375,11 @@ class GlobalScheduler: rem_amount=0, ttl=self.ttl, ) - scheduler_logger.info(f"Scheduler has enqueued some requests: {requests}") + log_request( + RequestLogLevel.CONTENT, + message="Scheduler has enqueued some requests: {request_ids}", + request_ids=[request.request_id for request in requests], + ) if duplicate: scheduler_logger.warning( @@ -573,7 +582,9 @@ class GlobalScheduler: self.stolen_requests[request.request_id] = request continue - scheduler_logger.error(f"Scheduler has received a duplicate request from others: {request}") + log_request_error( + message="Scheduler has received a duplicate request from others: {request}", request=request + ) requests: List[Request] = [request.raw for request in scheduled_requests] if len(remaining_request) > 0: @@ -603,7 +614,11 @@ class GlobalScheduler: ) if len(requests) > 0: - scheduler_logger.info(f"Scheduler has pulled some request: {[request.request_id for request in requests]}") + log_request( + RequestLogLevel.CONTENT, + message="Scheduler has pulled some request: {request_ids}", + request_ids=[request.request_id for request in requests], + ) return requests def _put_results_worker(self, tasks: List[Task]): @@ -649,7 +664,9 @@ class GlobalScheduler: stolen_responses[response_queue_name].append(response.serialize()) continue - scheduler_logger.error(f"Scheduler has received a non-existent response from engine: {[response]}") + log_request_error( + message="Scheduler has received a non-existent response from engine: {response}", response=[response] + ) with self.mutex: for request_id, responses in local_responses.items(): @@ -664,7 +681,11 @@ class GlobalScheduler: self.local_response_not_empty.notify_all() if len(finished_request_ids) > 0: - scheduler_logger.info(f"Scheduler has received some finished responses: {finished_request_ids}") + log_request( + RequestLogLevel.CONTENT, + message="Scheduler has received some finished responses: {request_ids}", + request_ids=finished_request_ids, + ) for response_queue_name, responses in stolen_responses.items(): self.client.rpush(response_queue_name, *responses, ttl=self.ttl) @@ -793,7 +814,11 @@ class GlobalScheduler: if finished: del self.local_responses[request_id] - scheduler_logger.info(f"Scheduler has pulled a finished response: {[request_id]}") + log_request( + RequestLogLevel.CONTENT, + message="Scheduler has pulled a finished response: {request_ids}", + request_ids=[request_id], + ) return results def reset(self): @@ -824,7 +849,7 @@ class GlobalScheduler: self.client.zrem(self._load_table_name(), self.name) self.local_responses = dict() self.stolen_requests = dict() - scheduler_logger.info("Scheduler has been reset") + log_request(RequestLogLevel.LIFECYCLE, message="Scheduler has been reset") def update_config(self, load_shards_num: Optional[int], reallocate: Optional[bool]): """ diff --git a/fastdeploy/scheduler/local_scheduler.py b/fastdeploy/scheduler/local_scheduler.py index fc4a64686b..d406866f9b 100644 --- a/fastdeploy/scheduler/local_scheduler.py +++ b/fastdeploy/scheduler/local_scheduler.py @@ -19,6 +19,7 @@ import time from typing import Dict, List, Optional, Tuple from fastdeploy.engine.request import Request, RequestOutput +from fastdeploy.logger.request_logger import RequestLogLevel, log_request from fastdeploy.scheduler.data import ScheduledRequest, ScheduledResponse from fastdeploy.utils import envs, scheduler_logger @@ -116,7 +117,7 @@ class LocalScheduler: self.ids = list() self.requests = dict() self.responses = dict() - scheduler_logger.info("Scheduler has been reset") + log_request(RequestLogLevel.LIFECYCLE, message="Scheduler has been reset") def _recycle(self, request_id: Optional[str] = None): """ @@ -191,7 +192,12 @@ class LocalScheduler: self.ids += valid_ids self.requests_not_empty.notify_all() - scheduler_logger.info(f"Scheduler has enqueued some requests: {valid_ids}") + if len(valid_ids) > 0: + log_request( + RequestLogLevel.CONTENT, + message="Scheduler has enqueued some requests: {request_ids}", + request_ids=valid_ids, + ) if len(duplicated_ids) > 0: scheduler_logger.warning(f"Scheduler has received some duplicated requests: {duplicated_ids}") @@ -300,7 +306,11 @@ class LocalScheduler: scheduler_logger.debug(f"Scheduler has put all just-pulled request into the queue: {len(batch_ids)}") if len(requests) > 0: - scheduler_logger.info(f"Scheduler has pulled some request: {[request.request_id for request in requests]}") + log_request( + RequestLogLevel.CONTENT, + message="Scheduler has pulled some request: {request_ids}", + request_ids=[request.request_id for request in requests], + ) return requests @@ -316,7 +326,11 @@ class LocalScheduler: finished_responses = [response.request_id for response in responses if response.finished] if len(finished_responses) > 0: - scheduler_logger.info(f"Scheduler has received some finished responses: {finished_responses}") + log_request( + RequestLogLevel.CONTENT, + message="Scheduler has received some finished responses: {request_ids}", + request_ids=finished_responses, + ) with self.mutex: self.batch_responses_per_step.append([response.raw for response in responses]) @@ -381,7 +395,11 @@ class LocalScheduler: if finished: self._recycle(request_id) - scheduler_logger.info(f"Scheduler has pulled a finished response: {[request_id]}") + log_request( + RequestLogLevel.CONTENT, + message="Scheduler has pulled a finished response: {request_ids}", + request_ids=[request_id], + ) if results: scheduler_logger.debug(f"get responses, {results}") diff --git a/fastdeploy/scheduler/splitwise_scheduler.py b/fastdeploy/scheduler/splitwise_scheduler.py index cd5e073643..8116a29cf9 100644 --- a/fastdeploy/scheduler/splitwise_scheduler.py +++ b/fastdeploy/scheduler/splitwise_scheduler.py @@ -33,6 +33,7 @@ from fastdeploy.engine.request import ( RequestMetrics, RequestOutput, ) +from fastdeploy.logger.request_logger import log_request_error from fastdeploy.utils import scheduler_logger as logger @@ -240,7 +241,12 @@ class NodeInfo: for req_id, pairs in self.reqs.items(): load, arrival_time = pairs if cur_time - arrival_time > ttl: - logger.error(f"InferScheduler Expire Reqs({req_id}), arrival({arrival_time}), ttl({ttl})") + log_request_error( + message="InferScheduler Expire Reqs({req_id}), arrival({arrival_time}), ttl({ttl})", + req_id=req_id, + arrival_time=arrival_time, + ttl=ttl, + ) expire_reqs.add((req_id, load)) for req_id, load in expire_reqs: if req_id in self.reqs: @@ -378,7 +384,7 @@ class ResultReader: ) self.data.appendleft(result) - logger.error(f"Req({req_id}) is expired({self.ttl})") + log_request_error(message="Req({req_id}) is expired({ttl})", req_id=req_id, ttl=self.ttl) expired_reqs.add(req_id) continue keys.append(req_id) @@ -511,7 +517,11 @@ class APIScheduler: except IndexError: continue except Exception as e: - logger.error(f"APIScheduler Schedule req error: {e!s}, {str(traceback.format_exc())}") + log_request_error( + message="APIScheduler Schedule req error: {error}, {traceback}", + error=str(e), + traceback=traceback.format_exc(), + ) def schedule(self, req, pnodes, dnodes, mnodes, group=""): """ @@ -841,7 +851,11 @@ class InferScheduler: req = self.reqs_queue.popleft() if cur_time - req.metrics.arrival_time > self.ttl: - logger.error(f"req({req.request_id}) is expired({self.ttl}) when InferScheduler Get Requests") + log_request_error( + message="req({request_id}) is expired({ttl}) when InferScheduler Get Requests", + request_id=req.request_id, + ttl=self.ttl, + ) self.node.finish_req(req.request_id) continue current_prefill_tokens += req.prompt_token_ids_len diff --git a/fastdeploy/utils.py b/fastdeploy/utils.py index 0a591dc277..6f01c1b5cf 100644 --- a/fastdeploy/utils.py +++ b/fastdeploy/utils.py @@ -16,15 +16,12 @@ import argparse import asyncio -import codecs import hashlib import importlib import json -import logging import os import pickle import random -import re import socket import subprocess import sys @@ -36,8 +33,6 @@ from enum import Enum from functools import cache from http import HTTPStatus from importlib.metadata import PackageNotFoundError, distribution -from logging.handlers import BaseRotatingHandler -from pathlib import Path from typing import Any, Literal, TypeVar, Union import numpy as np @@ -53,7 +48,7 @@ from typing_extensions import TypeIs, assert_never from fastdeploy import envs from fastdeploy.entrypoints.openai.protocol import ErrorInfo, ErrorResponse -from fastdeploy.logger.logger import FastDeployLogger +from fastdeploy.logger.request_logger import log_request_error from fastdeploy.worker.output import PromptLogprobs T = TypeVar("T") @@ -193,6 +188,19 @@ class ExceptionHandler: loc = first_error.get("loc", []) param = loc[-1] if loc else None message = first_error.get("msg", str(exc)) + + # Try to extract request_id from request body + request_id = None + try: + body = await request.body() + if body: + import json + + body_json = json.loads(body) + request_id = body_json.get("request_id") + except Exception: + pass + err = ErrorResponse( error=ErrorInfo( message=message, @@ -201,7 +209,13 @@ class ExceptionHandler: param=param, ) ) - api_server_logger.error(f"invalid_request_error: {request.url} {param} {message}") + log_request_error( + message="request[{request_id}] invalid_request_error: {url} {param} {msg}", + request_id=request_id or "unknown", + url=str(request.url), + param=param, + msg=message, + ) return JSONResponse(content=err.model_dump(), status_code=HTTPStatus.BAD_REQUEST) @@ -224,139 +238,18 @@ class ErrorCode(str, Enum): CLIENT_ABORTED = "client_aborted" -class ColoredFormatter(logging.Formatter): - """自定义日志格式器,用于控制台输出带颜色""" +# Backward compatibility: logger classes have been moved to fastdeploy.logger module +# Use lazy import to avoid circular dependencies +def __getattr__(name): + if name == "ColoredFormatter": + from fastdeploy.logger.formatters import ColoredFormatter - COLOR_CODES = { - logging.WARNING: 33, # 黄色 - logging.ERROR: 31, # 红色 - logging.CRITICAL: 31, # 红色 - } + return ColoredFormatter + elif name == "DailyRotatingFileHandler": + from fastdeploy.logger.handlers import DailyRotatingFileHandler - def format(self, record): - color_code = self.COLOR_CODES.get(record.levelno, 0) - prefix = f"\033[{color_code}m" - suffix = "\033[0m" - message = super().format(record) - if color_code: - message = f"{prefix}{message}{suffix}" - return message - - -class DailyRotatingFileHandler(BaseRotatingHandler): - """ - like `logging.TimedRotatingFileHandler`, but this class support multi-process - """ - - def __init__( - self, - filename, - backupCount=0, - encoding="utf-8", - delay=False, - utc=False, - **kwargs, - ): - """ - 初始化 RotatingFileHandler 对象。 - - Args: - filename (str): 日志文件的路径,可以是相对路径或绝对路径。 - backupCount (int, optional, default=0): 保存的备份文件数量,默认为 0,表示不保存备份文件。 - encoding (str, optional, default='utf-8'): 编码格式,默认为 'utf-8'。 - delay (bool, optional, default=False): 是否延迟写入,默认为 False,表示立即写入。 - utc (bool, optional, default=False): 是否使用 UTC 时区,默认为 False,表示不使用 UTC 时区。 - kwargs (dict, optional): 其他参数将被传递给 BaseRotatingHandler 类的 init 方法。 - - Raises: - TypeError: 如果 filename 不是 str 类型。 - ValueError: 如果 backupCount 小于等于 0。 - """ - self.backup_count = backupCount - self.utc = utc - self.suffix = "%Y-%m-%d" - self.base_log_path = Path(filename) - self.base_filename = self.base_log_path.name - self.current_filename = self._compute_fn() - self.current_log_path = self.base_log_path.with_name(self.current_filename) - BaseRotatingHandler.__init__(self, filename, "a", encoding, delay) - - def shouldRollover(self, record): - """ - check scroll through the log - """ - if self.current_filename != self._compute_fn(): - return True - return False - - def doRollover(self): - """ - scroll log - """ - if self.stream: - self.stream.close() - self.stream = None - - self.current_filename = self._compute_fn() - self.current_log_path = self.base_log_path.with_name(self.current_filename) - - if not self.delay: - self.stream = self._open() - - self.delete_expired_files() - - def _compute_fn(self): - """ - Calculate the log file name corresponding current time - """ - return self.base_filename + "." + time.strftime(self.suffix, time.localtime()) - - def _open(self): - """ - open new log file - """ - if self.encoding is None: - stream = open(str(self.current_log_path), self.mode) - else: - stream = codecs.open(str(self.current_log_path), self.mode, self.encoding) - - if self.base_log_path.exists(): - try: - if not self.base_log_path.is_symlink() or os.readlink(self.base_log_path) != self.current_filename: - os.remove(self.base_log_path) - except OSError: - pass - - try: - os.symlink(self.current_filename, str(self.base_log_path)) - except OSError: - pass - return stream - - def delete_expired_files(self): - """ - delete expired log files - """ - if self.backup_count <= 0: - return - - file_names = os.listdir(str(self.base_log_path.parent)) - result = [] - prefix = self.base_filename + "." - plen = len(prefix) - for file_name in file_names: - if file_name[:plen] == prefix: - suffix = file_name[plen:] - if re.match(r"^\d{4}-\d{2}-\d{2}(\.\w+)?$", suffix): - result.append(file_name) - if len(result) < self.backup_count: - result = [] - else: - result.sort() - result = result[: len(result) - self.backup_count] - - for file_name in result: - os.remove(str(self.base_log_path.with_name(file_name))) + return DailyRotatingFileHandler + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") def chunk_list(lst: list[T], chunk_size: int): @@ -1054,12 +947,6 @@ def parse_quantization(value: str): return {"quantization": value} -# 日志使用全局访问点(兼容原有使用方式) -def get_logger(name, file_name=None, without_formater=False, print_to_console=False): - """全局函数包装器,保持向后兼容""" - return FastDeployLogger().get_logger(name, file_name, without_formater, print_to_console) - - def check_download_links(bos_client, links, timeout=1): """ check bos download links @@ -1156,20 +1043,6 @@ def download_from_bos(bos_client, bos_links, retry: int = 0): break -llm_logger = get_logger("fastdeploy", "fastdeploy.log") -data_processor_logger = get_logger("data_processor", "data_processor.log") -scheduler_logger = get_logger("scheduler", "scheduler.log") -api_server_logger = get_logger("api_server", "api_server.log") -console_logger = get_logger("console", "console.log", print_to_console=True) -spec_logger = get_logger("speculate", "speculate.log") -zmq_client_logger = get_logger("zmq_client", "zmq_client.log") -trace_logger = FastDeployLogger().get_trace_logger("trace", "trace.log") -router_logger = get_logger("router", "router.log") -fmq_logger = get_logger("fmq", "fmq.log") -obj_logger = get_logger("obj", "obj.log") # debug内存问题 -register_manager_logger = get_logger("register_manager", "register_manager.log") - - def parse_type(return_type: Callable[[str], T]) -> Callable[[str], T]: def _parse_type(val: str) -> T: @@ -1338,3 +1211,26 @@ def all_gather_values(value: int | float | bool, group: paddle.distributed.commu paddle.distributed.all_gather(_global, _local, group) _results = [_type(t.item()) for t in _global] return _results + + +# ============================================================================= +# Logger re-export (backward compatibility) +# Actual implementation is in fastdeploy.logger module, re-exported here to +# support existing import patterns +# NOTE: Must be at the end of file to avoid circular imports +# ============================================================================= +from fastdeploy.logger import ( # noqa: F401 + api_server_logger, + console_logger, + data_processor_logger, + fmq_logger, + get_logger, + llm_logger, + obj_logger, + register_manager_logger, + router_logger, + scheduler_logger, + spec_logger, + trace_logger, + zmq_client_logger, +) diff --git a/tests/entrypoints/openai/v1/test_serving_completion_v1.py b/tests/entrypoints/openai/v1/test_serving_completion_v1.py index 5b5982f99b..eff943469e 100644 --- a/tests/entrypoints/openai/v1/test_serving_completion_v1.py +++ b/tests/entrypoints/openai/v1/test_serving_completion_v1.py @@ -756,8 +756,8 @@ class TestAsyncLLMOpenAIServingCompletionPreprocess(unittest.IsolatedAsyncioTest expected_id = f"{request_id}_{i}" if request_id else f"_{i}" self.assertEqual(ctx.preprocess_requests[i]["request_id"], expected_id) - @patch("fastdeploy.entrypoints.openai.v1.serving_completion.api_server_logger") - async def test_preprocess_exception_logging(self, mock_logger): + @patch("fastdeploy.entrypoints.openai.v1.serving_completion.log_request_error") + async def test_preprocess_exception_logging(self, mock_log_request_error): """Test _preprocess logs exceptions properly""" # Setup - create a request that will cause an exception request = CompletionRequest(model="test_model", prompt="dummy", max_tokens=50) @@ -771,11 +771,10 @@ class TestAsyncLLMOpenAIServingCompletionPreprocess(unittest.IsolatedAsyncioTest # Assert self.assertIsInstance(result, ErrorResponse) - mock_logger.error.assert_called_once() - error_log = mock_logger.error.call_args[0][0] - self.assertIn("OpenAIServingCompletion create_completion", error_log) - self.assertIn("ValueError", error_log) - self.assertIn("Traceback", error_log) # Changed from "traceback" to "Traceback" + mock_log_request_error.assert_called_once() + error_msg = mock_log_request_error.call_args[1].get("message", "") + self.assertIn("OpenAIServingCompletion create_completion", error_msg) + self.assertIn("ValueError", error_msg) if __name__ == "__main__": diff --git a/tests/entrypoints/test_abort.py b/tests/entrypoints/test_abort.py index e378ff814e..598a3023f7 100644 --- a/tests/entrypoints/test_abort.py +++ b/tests/entrypoints/test_abort.py @@ -206,9 +206,9 @@ class TestEngineClientAbort(unittest.TestCase): mock_send_task.assert_called_once_with(expected_data) @patch("fastdeploy.entrypoints.engine_client.envs.FD_ENABLE_REQUEST_DISCONNECT_STOP_INFERENCE", True) - @patch("fastdeploy.entrypoints.engine_client.api_server_logger") + @patch("fastdeploy.entrypoints.engine_client.log_request") @patch.object(EngineClient, "_send_task") - def test_abort_logging(self, mock_send_task, mock_logger): + def test_abort_logging(self, mock_send_task, mock_log_request): """Test that abort method logs correctly""" request_id = "test_request" n = 2 @@ -216,18 +216,16 @@ class TestEngineClientAbort(unittest.TestCase): # Run the abort method self.loop.run_until_complete(self.engine_client.abort(request_id, n=n)) - # Verify info log was called twice - self.assertEqual(mock_logger.info.call_count, 2) + # Verify log_request was called twice + self.assertEqual(mock_log_request.call_count, 2) # Verify the first log message (abort start) - first_call = mock_logger.info.call_args_list[0] - self.assertEqual(first_call[0][0], "abort request_id:test_request") + first_call = mock_log_request.call_args_list[0] + self.assertIn("abort request_id", first_call[1].get("message", "")) # Verify the second log message (abort completion with request IDs) - second_call = mock_logger.info.call_args_list[1] - expected_log_message = "Aborted request(s) %s." - self.assertEqual(second_call[0][0], expected_log_message) - self.assertEqual(second_call[0][1], "test_request_0,test_request_1") + second_call = mock_log_request.call_args_list[1] + self.assertIn("Aborted request(s)", second_call[1].get("message", "")) @patch("fastdeploy.entrypoints.engine_client.envs.FD_ENABLE_REQUEST_DISCONNECT_STOP_INFERENCE", True) @patch("fastdeploy.entrypoints.engine_client.api_server_logger") diff --git a/tests/entrypoints/test_engine_client.py b/tests/entrypoints/test_engine_client.py index 71ad4b29db..bbbd42d795 100644 --- a/tests/entrypoints/test_engine_client.py +++ b/tests/entrypoints/test_engine_client.py @@ -843,11 +843,11 @@ class TestEngineClientValidParameters(unittest.TestCase): """Test valid_parameters adjusts reasoning_max_tokens when needed.""" data = {"max_tokens": 50, "reasoning_max_tokens": 100, "request_id": "test-id"} # Larger than max_tokens - with patch("fastdeploy.entrypoints.engine_client.api_server_logger") as mock_logger: + with patch("fastdeploy.entrypoints.engine_client.log_request") as mock_log_request: self.engine_client.valid_parameters(data) self.assertEqual(data["reasoning_max_tokens"], 50) - mock_logger.warning.assert_called_once() + mock_log_request.assert_called_once() def test_valid_parameters_reasoning_max_tokens_with_reasoning_effort(self): """Test valid_parameters when both reasoning_max_tokens and reasoning_effort are set.""" @@ -858,14 +858,13 @@ class TestEngineClientValidParameters(unittest.TestCase): "request_id": "test-id", } - with patch("fastdeploy.entrypoints.engine_client.api_server_logger") as mock_logger: + with patch("fastdeploy.entrypoints.engine_client.log_request") as mock_log_request: self.engine_client.valid_parameters(data) # When reasoning_effort is set, reasoning_max_tokens should be set to None self.assertIsNone(data["reasoning_max_tokens"]) - mock_logger.warning.assert_called_once() - warning_call = mock_logger.warning.call_args[0][0] - self.assertIn("reasoning_max_tokens and reasoning_effort are both set", warning_call) + # log_request is called once: for reasoning_effort conflict (reasoning_max_tokens=50 < max_tokens=100) + mock_log_request.assert_called_once() def test_valid_parameters_temperature_zero_adjustment(self): """Test valid_parameters adjusts zero temperature.""" @@ -1846,7 +1845,7 @@ def test_add_requests_objgraph_and_error_paths(minimal_engine_client): with ( patch( "fastdeploy.entrypoints.engine_client.os.getenv", - side_effect=lambda k: "1" if k == "FD_ENABLE_OBJGRAPH_DEBUG" else None, + side_effect=lambda k, default=None: "1" if k == "FD_ENABLE_OBJGRAPH_DEBUG" else default, ), patch("fastdeploy.entrypoints.engine_client._has_objgraph", True), patch("fastdeploy.entrypoints.engine_client._has_psutil", False), diff --git a/tests/entrypoints/test_llm.py b/tests/entrypoints/test_llm.py index 65ba1dc73e..9c1baa5366 100644 --- a/tests/entrypoints/test_llm.py +++ b/tests/entrypoints/test_llm.py @@ -148,7 +148,9 @@ def test_receive_output_merges(): assert first.added is True -def test_receive_output_logs_exception(caplog): +def test_receive_output_logs_exception(): + from unittest.mock import patch + llm = _make_llm(_make_engine()) calls = iter([RuntimeError("boom"), SystemExit()]) @@ -159,9 +161,14 @@ def test_receive_output_logs_exception(caplog): return nxt llm.llm_engine._get_generated_result = _get_generated_result - with pytest.raises(SystemExit): - llm._receive_output() - assert "Unexpected error happened" in caplog.text + with patch("fastdeploy.entrypoints.llm.log_request_error") as mock_log: + with pytest.raises(SystemExit): + llm._receive_output() + mock_log.assert_called_once() + call_kwargs = mock_log.call_args[1] + assert "Unexpected error happened" in call_kwargs.get( + "message", mock_log.call_args[0][0] if mock_log.call_args[0] else "" + ) def test_generate_and_chat_branches(): diff --git a/tests/input/test_video_utils.py b/tests/input/test_video_utils.py index cf85956e22..281b1b90ec 100644 --- a/tests/input/test_video_utils.py +++ b/tests/input/test_video_utils.py @@ -264,7 +264,7 @@ class TestSampleFramesQwen(unittest.TestCase): def test_fps_warning_when_nframes_exceeds_total(self): """fps so high that computed num_frames > total → warning logged.""" - with self.assertLogs(level="WARNING"): + with self.assertLogs(logger="fastdeploy.main", level="WARNING"): sample_frames_qwen(2, 4, 100, {"num_of_frame": 10, "fps": 1.0}, fps=100.0) def test_divisible_by_4_correction(self): diff --git a/tests/logger/test_logger.py b/tests/logger/test_logger.py index 4939356a62..da708db6f9 100644 --- a/tests/logger/test_logger.py +++ b/tests/logger/test_logger.py @@ -46,8 +46,12 @@ class LoggerTests(unittest.TestCase): shutil.rmtree(self.tmp_dir, ignore_errors=True) def test_unified_logger(self): - """Test _get_unified_logger through instance""" - test_cases = [(None, "fastdeploy"), ("module", "fastdeploy.module"), ("fastdeploy.utils", "fastdeploy.utils")] + """Test _get_unified_logger through instance (uses main channel)""" + test_cases = [ + (None, "fastdeploy.main"), + ("module", "fastdeploy.main.module"), + ("fastdeploy.utils", "fastdeploy.utils"), # 已有 fastdeploy. 前缀的保持不变 + ] for name, expected in test_cases: with self.subTest(name=name): diff --git a/tests/logger/test_logging_config.py b/tests/logger/test_logging_config.py new file mode 100644 index 0000000000..3a425688c5 --- /dev/null +++ b/tests/logger/test_logging_config.py @@ -0,0 +1,91 @@ +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from unittest.mock import patch + +from fastdeploy.logger.config import resolve_log_level, resolve_request_logging_defaults + + +class TestResolveLogLevel(unittest.TestCase): + """测试 resolve_log_level 函数""" + + def test_explicit_info_level(self): + """显式设置 INFO 级别""" + result = resolve_log_level(raw_level="INFO") + self.assertEqual(result, "INFO") + + def test_explicit_debug_level(self): + """显式设置 DEBUG 级别""" + result = resolve_log_level(raw_level="DEBUG") + self.assertEqual(result, "DEBUG") + + def test_case_insensitive(self): + """级别名称应该大小写不敏感""" + self.assertEqual(resolve_log_level(raw_level="info"), "INFO") + self.assertEqual(resolve_log_level(raw_level="debug"), "DEBUG") + + def test_invalid_level_raises(self): + """无效级别应该抛出 ValueError""" + with self.assertRaises(ValueError) as ctx: + resolve_log_level(raw_level="INVALID") + self.assertIn("Unsupported FD_LOG_LEVEL", str(ctx.exception)) + + def test_debug_enabled_fallback(self): + """FD_DEBUG=1 应该返回 DEBUG""" + result = resolve_log_level(raw_level=None, debug_enabled=1) + self.assertEqual(result, "DEBUG") + + def test_debug_disabled_fallback(self): + """FD_DEBUG=0 应该返回 INFO""" + result = resolve_log_level(raw_level=None, debug_enabled=0) + self.assertEqual(result, "INFO") + + def test_env_fd_log_level_priority(self): + """FD_LOG_LEVEL 环境变量优先级高于 FD_DEBUG""" + with patch.dict("os.environ", {"FD_LOG_LEVEL": "INFO", "FD_DEBUG": "1"}): + result = resolve_log_level() + self.assertEqual(result, "INFO") + + def test_env_fd_debug_fallback(self): + """无 FD_LOG_LEVEL 时使用 FD_DEBUG""" + with patch.dict("os.environ", {"FD_DEBUG": "1"}, clear=True): + result = resolve_log_level() + self.assertEqual(result, "DEBUG") + + +class TestResolveRequestLoggingDefaults(unittest.TestCase): + """测试 resolve_request_logging_defaults 函数""" + + def test_default_values(self): + """默认值测试""" + with patch.dict("os.environ", {}, clear=True): + result = resolve_request_logging_defaults() + self.assertEqual(result["enabled"], 1) + self.assertEqual(result["level"], 2) + self.assertEqual(result["max_len"], 2048) + + def test_custom_values(self): + """自定义值测试""" + with patch.dict( + "os.environ", {"FD_LOG_REQUESTS": "0", "FD_LOG_REQUESTS_LEVEL": "2", "FD_LOG_MAX_LEN": "1024"} + ): + result = resolve_request_logging_defaults() + self.assertEqual(result["enabled"], 0) + self.assertEqual(result["level"], 2) + self.assertEqual(result["max_len"], 1024) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/logger/test_request_logger.py b/tests/logger/test_request_logger.py new file mode 100644 index 0000000000..d92826169b --- /dev/null +++ b/tests/logger/test_request_logger.py @@ -0,0 +1,176 @@ +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from unittest.mock import patch + +from fastdeploy.logger.request_logger import ( + RequestLogLevel, + _should_log, + _truncate, + log_request, + log_request_error, +) + + +class TestRequestLogLevel(unittest.TestCase): + """Test RequestLogLevel enum""" + + def test_level_values(self): + """Test level values""" + self.assertEqual(int(RequestLogLevel.LIFECYCLE), 0) + self.assertEqual(int(RequestLogLevel.STAGES), 1) + self.assertEqual(int(RequestLogLevel.CONTENT), 2) + self.assertEqual(int(RequestLogLevel.FULL), 3) + + +class TestShouldLog(unittest.TestCase): + """Test _should_log function""" + + def test_disabled_returns_false(self): + """FD_LOG_REQUESTS=0 should return False""" + with patch("fastdeploy.logger.request_logger.envs") as mock_envs: + mock_envs.FD_LOG_REQUESTS = 0 + mock_envs.FD_LOG_REQUESTS_LEVEL = 3 + self.assertFalse(_should_log(RequestLogLevel.LIFECYCLE)) + + def test_level_within_threshold(self): + """Level within threshold should return True""" + with patch("fastdeploy.logger.request_logger.envs") as mock_envs: + mock_envs.FD_LOG_REQUESTS = 1 + mock_envs.FD_LOG_REQUESTS_LEVEL = 2 + self.assertTrue(_should_log(RequestLogLevel.LIFECYCLE)) + self.assertTrue(_should_log(RequestLogLevel.STAGES)) + self.assertTrue(_should_log(RequestLogLevel.CONTENT)) + + def test_level_above_threshold(self): + """Level above threshold should return False""" + with patch("fastdeploy.logger.request_logger.envs") as mock_envs: + mock_envs.FD_LOG_REQUESTS = 1 + mock_envs.FD_LOG_REQUESTS_LEVEL = 1 + self.assertFalse(_should_log(RequestLogLevel.CONTENT)) + self.assertFalse(_should_log(RequestLogLevel.FULL)) + + +class TestTruncate(unittest.TestCase): + """Test _truncate function""" + + def test_short_text_unchanged(self): + """Short text should remain unchanged""" + with patch("fastdeploy.logger.request_logger.envs") as mock_envs: + mock_envs.FD_LOG_MAX_LEN = 100 + result = _truncate("short text") + self.assertEqual(result, "short text") + + def test_long_text_truncated(self): + """Long text should be truncated""" + with patch("fastdeploy.logger.request_logger.envs") as mock_envs: + mock_envs.FD_LOG_MAX_LEN = 10 + result = _truncate("this is a very long text") + self.assertEqual(result, "this is a ") + self.assertEqual(len(result), 10) + + def test_non_string_converted(self): + """Non-string should be converted""" + with patch("fastdeploy.logger.request_logger.envs") as mock_envs: + mock_envs.FD_LOG_MAX_LEN = 100 + result = _truncate(12345) + self.assertEqual(result, "12345") + + +class TestLogRequest(unittest.TestCase): + """Test log_request function""" + + @patch("fastdeploy.logger._request_logger") + def test_log_when_enabled(self, mock_logger): + """Should log when enabled""" + with patch("fastdeploy.logger.request_logger.envs") as mock_envs: + mock_envs.FD_LOG_REQUESTS = 1 + mock_envs.FD_LOG_REQUESTS_LEVEL = 0 + mock_envs.FD_LOG_MAX_LEN = 2048 + + log_request(RequestLogLevel.LIFECYCLE, message="test {value}", value="hello") + mock_logger.info.assert_called_once() + call_args = mock_logger.info.call_args[0][0] + self.assertEqual(call_args, "test hello") + + @patch("fastdeploy.logger._request_logger") + def test_no_log_when_disabled(self, mock_logger): + """Should not log when disabled""" + with patch("fastdeploy.logger.request_logger.envs") as mock_envs: + mock_envs.FD_LOG_REQUESTS = 0 + mock_envs.FD_LOG_REQUESTS_LEVEL = 3 + + log_request(RequestLogLevel.LIFECYCLE, message="test {value}", value="hello") + mock_logger.info.assert_not_called() + + @patch("fastdeploy.logger._request_logger") + def test_no_log_when_level_too_high(self, mock_logger): + """Should not log when level is too high""" + with patch("fastdeploy.logger.request_logger.envs") as mock_envs: + mock_envs.FD_LOG_REQUESTS = 1 + mock_envs.FD_LOG_REQUESTS_LEVEL = 0 + + log_request(RequestLogLevel.CONTENT, message="test {value}", value="hello") + mock_logger.info.assert_not_called() + + @patch("fastdeploy.logger._request_logger") + def test_content_level_truncates_content(self, mock_logger): + """CONTENT level should truncate content""" + with patch("fastdeploy.logger.request_logger.envs") as mock_envs: + mock_envs.FD_LOG_REQUESTS = 1 + mock_envs.FD_LOG_REQUESTS_LEVEL = 3 + mock_envs.FD_LOG_MAX_LEN = 5 + + log_request(RequestLogLevel.CONTENT, message="content: {data}", data="very long data") + mock_logger.info.assert_called_once() + call_args = mock_logger.info.call_args[0][0] + self.assertEqual(call_args, "content: very ") + + @patch("fastdeploy.logger._request_logger") + def test_lifecycle_level_no_truncation(self, mock_logger): + """LIFECYCLE level should not truncate content""" + with patch("fastdeploy.logger.request_logger.envs") as mock_envs: + mock_envs.FD_LOG_REQUESTS = 1 + mock_envs.FD_LOG_REQUESTS_LEVEL = 3 + mock_envs.FD_LOG_MAX_LEN = 5 + + log_request(RequestLogLevel.LIFECYCLE, message="content: {data}", data="very long data") + mock_logger.info.assert_called_once() + call_args = mock_logger.info.call_args[0][0] + self.assertEqual(call_args, "content: very long data") + + +class TestLogRequestError(unittest.TestCase): + """Test log_request_error function""" + + @patch("fastdeploy.logger._request_logger") + def test_error_with_fields(self, mock_logger): + """Error log with fields should format message""" + log_request_error(message="request {request_id} failed: {error}", request_id="req-123", error="timeout") + mock_logger.error.assert_called_once() + call_args = mock_logger.error.call_args[0][0] + self.assertEqual(call_args, "request req-123 failed: timeout") + + @patch("fastdeploy.logger._request_logger") + def test_error_without_fields(self, mock_logger): + """Error log without fields should not call format""" + log_request_error(message="simple error message") + mock_logger.error.assert_called_once() + call_args = mock_logger.error.call_args[0][0] + self.assertEqual(call_args, "simple error message") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/logger/test_setup_logging.py b/tests/logger/test_setup_logging.py index 0bf02c14e8..c73c7dd5e2 100644 --- a/tests/logger/test_setup_logging.py +++ b/tests/logger/test_setup_logging.py @@ -43,58 +43,18 @@ class TestSetupLogging(unittest.TestCase): delattr(setup_logging, "_configured") def test_log_dir_created(self): + """Log directory should be created""" nested = os.path.join(self.temp_dir, "a", "b", "c") setup_logging(log_dir=nested) self.assertTrue(Path(nested).is_dir()) - def test_default_config_fallback(self): - """Pass a non-existent config_file to trigger default_config""" - fake_cfg = os.path.join(self.temp_dir, "no_such_cfg.json") - setup_logging(config_file=fake_cfg) - logger = logging.getLogger("fastdeploy") - self.assertTrue(logger.handlers) - handler_classes = [h.__class__.__name__ for h in logger.handlers] - self.assertIn("TimedRotatingFileHandler", handler_classes) - - def test_debug_level_affects_handlers(self): - """FD_DEBUG=1 should force DEBUG level""" - with patch("fastdeploy.envs.FD_DEBUG", 1): - with patch("logging.config.dictConfig") as mock_cfg: - setup_logging() - called_config = mock_cfg.call_args[0][0] - for handler in called_config["handlers"].values(): - self.assertIn("formatter", handler) - self.assertEqual(called_config["handlers"]["console_stdout"]["level"], "DEBUG") - - @patch("logging.config.dictConfig") - def test_custom_config_with_dailyrotating_and_debug(self, mock_dict): - custom_cfg = { - "version": 1, - "handlers": { - "daily": { - "class": "logging.handlers.DailyRotatingFileHandler", - "level": "INFO", - "formatter": "plain", - } - }, - "loggers": {"fastdeploy": {"handlers": ["daily"], "level": "INFO"}}, - } - cfg_path = Path(self.temp_dir) / "cfg.json" - cfg_path.write_text(json.dumps(custom_cfg)) - - with patch("fastdeploy.envs.FD_DEBUG", 1): - setup_logging(config_file=str(cfg_path)) - - config_used = mock_dict.call_args[0][0] - self.assertIn("daily", config_used["handlers"]) - self.assertEqual(config_used["handlers"]["daily"]["level"], "DEBUG") - self.assertIn("backupCount", config_used["handlers"]["daily"]) - def test_configure_once(self): - """Ensure idempotent setup""" - l1 = setup_logging() - l2 = setup_logging() - self.assertIs(l1, l2) + """Ensure idempotent setup - only configures once""" + setup_logging() + self.assertTrue(setup_logging._configured) + # Second call should not raise + setup_logging() + self.assertTrue(setup_logging._configured) def test_envs_priority_used_for_log_dir(self): """When log_dir=None, should use envs.FD_LOG_DIR""" @@ -102,15 +62,45 @@ class TestSetupLogging(unittest.TestCase): setup_logging() self.assertTrue(os.path.exists(self.temp_dir)) - @patch("logging.StreamHandler.emit") - def test_console_colored(self, mock_emit): - setup_logging() - logger = logging.getLogger("fastdeploy") - logger.error("color test") - self.assertTrue(mock_emit.called) + def test_log_dir_stored(self): + """setup_logging should store log_dir for later use""" + setup_logging(log_dir=self.temp_dir) + self.assertEqual(setup_logging._log_dir, self.temp_dir) - @patch("logging.config.dictConfig") - def test_backup_count_merging(self, mock_dict): + def test_no_config_file_no_dictconfig(self): + """When config_file is not provided, dictConfig should not be called""" + with patch("logging.config.dictConfig") as mock_dict: + setup_logging() + self.assertFalse(mock_dict.called) + + def test_config_file_with_dictconfig(self): + """When config_file is provided, dictConfig should be called""" + custom_cfg = { + "version": 1, + "handlers": {}, + "loggers": {}, + } + cfg_path = Path(self.temp_dir) / "cfg.json" + cfg_path.write_text(json.dumps(custom_cfg)) + + with patch("logging.config.dictConfig") as mock_dict: + setup_logging(config_file=str(cfg_path)) + self.assertTrue(mock_dict.called) + + def test_config_file_not_exists_uses_default(self): + """When config_file doesn't exist, use default config""" + fake_cfg = os.path.join(self.temp_dir, "no_such_cfg.json") + + with patch("logging.config.dictConfig") as mock_dict: + setup_logging(config_file=fake_cfg) + self.assertTrue(mock_dict.called) + # Should use default config + config_used = mock_dict.call_args[0][0] + self.assertIn("handlers", config_used) + self.assertIn("loggers", config_used) + + def test_backup_count_merging(self): + """backupCount should be merged into handler config""" custom_cfg = { "version": 1, "handlers": {"daily": {"class": "logging.handlers.DailyRotatingFileHandler", "formatter": "plain"}}, @@ -119,33 +109,77 @@ class TestSetupLogging(unittest.TestCase): cfg_path = Path(self.temp_dir) / "cfg.json" cfg_path.write_text(json.dumps(custom_cfg)) - setup_logging(config_file=str(cfg_path)) + with patch("logging.config.dictConfig") as mock_dict: + setup_logging(config_file=str(cfg_path)) + config_used = mock_dict.call_args[0][0] + self.assertEqual(config_used["handlers"]["daily"]["backupCount"], 3) - config_used = mock_dict.call_args[0][0] - self.assertEqual(config_used["handlers"]["daily"]["backupCount"], 3) + def test_debug_level_affects_handlers(self): + """FD_DEBUG=1 should force DEBUG level in handlers""" + custom_cfg = { + "version": 1, + "handlers": {"test": {"class": "logging.StreamHandler", "level": "INFO"}}, + "loggers": {}, + } + cfg_path = Path(self.temp_dir) / "cfg.json" + cfg_path.write_text(json.dumps(custom_cfg)) - @patch("logging.config.dictConfig") - def test_error_logs_use_stderr_handler(self, mock_dict): - """ERROR级别日志应该使用stderr输出""" - setup_logging() - config_used = mock_dict.call_args[0][0] - self.assertIn("console_stderr", config_used["handlers"]) - self.assertEqual(config_used["handlers"]["console_stderr"]["stream"], "ext://sys.stderr") - self.assertEqual(config_used["handlers"]["console_stderr"]["level"], "ERROR") + with patch("fastdeploy.envs.FD_DEBUG", 1): + with patch("logging.config.dictConfig") as mock_dict: + setup_logging(config_file=str(cfg_path)) + config_used = mock_dict.call_args[0][0] + self.assertEqual(config_used["handlers"]["test"]["level"], "DEBUG") - @patch("logging.config.dictConfig") - def test_console_stdout_filters_below_error(self, mock_dict): - """console_stdout应该只输出低于ERROR级别的日志""" - setup_logging() - config_used = mock_dict.call_args[0][0] - self.assertIn("console_stdout", config_used["handlers"]) - self.assertIn("below_error", config_used["handlers"]["console_stdout"]["filters"]) - self.assertEqual(config_used["handlers"]["console_stdout"]["stream"], "ext://sys.stdout") + def test_default_config_has_channels(self): + """Default config should have channel loggers configured""" + fake_cfg = os.path.join(self.temp_dir, "no_such_cfg.json") + + with patch("logging.config.dictConfig") as mock_dict: + setup_logging(config_file=fake_cfg) + config_used = mock_dict.call_args[0][0] + # Check channel loggers exist + self.assertIn("fastdeploy.main", config_used["loggers"]) + self.assertIn("fastdeploy.request", config_used["loggers"]) + self.assertIn("fastdeploy.console", config_used["loggers"]) + + def test_default_config_has_handlers(self): + """Default config should have file handlers configured""" + fake_cfg = os.path.join(self.temp_dir, "no_such_cfg.json") + + with patch("logging.config.dictConfig") as mock_dict: + setup_logging(config_file=fake_cfg) + config_used = mock_dict.call_args[0][0] + # Check handlers exist + self.assertIn("main_file", config_used["handlers"]) + self.assertIn("request_file", config_used["handlers"]) + self.assertIn("error_file", config_used["handlers"]) + self.assertIn("console_stderr", config_used["handlers"]) + + def test_default_config_stderr_handler(self): + """Default config console_stderr should output to stderr""" + fake_cfg = os.path.join(self.temp_dir, "no_such_cfg.json") + + with patch("logging.config.dictConfig") as mock_dict: + setup_logging(config_file=fake_cfg) + config_used = mock_dict.call_args[0][0] + self.assertEqual(config_used["handlers"]["console_stderr"]["stream"], "ext://sys.stderr") + self.assertEqual(config_used["handlers"]["console_stderr"]["level"], "ERROR") + + def test_default_config_stdout_filters_below_error(self): + """Default config console_stdout should filter below ERROR level""" + fake_cfg = os.path.join(self.temp_dir, "no_such_cfg.json") + + with patch("logging.config.dictConfig") as mock_dict: + setup_logging(config_file=fake_cfg) + config_used = mock_dict.call_args[0][0] + self.assertIn("console_stdout", config_used["handlers"]) + self.assertIn("below_error", config_used["handlers"]["console_stdout"]["filters"]) + self.assertEqual(config_used["handlers"]["console_stdout"]["stream"], "ext://sys.stdout") class TestMaxLevelFilter(unittest.TestCase): def test_filter_allows_below_level(self): - """MaxLevelFilter应该允许低于指定级别的日志通过""" + """MaxLevelFilter should allow logs below specified level""" filter = MaxLevelFilter("ERROR") record = logging.LogRecord( name="test", level=logging.INFO, pathname="", lineno=0, msg="test", args=(), exc_info=None @@ -153,7 +187,7 @@ class TestMaxLevelFilter(unittest.TestCase): self.assertTrue(filter.filter(record)) def test_filter_blocks_at_level(self): - """MaxLevelFilter应该阻止等于指定级别的日志""" + """MaxLevelFilter should block logs at specified level""" filter = MaxLevelFilter("ERROR") record = logging.LogRecord( name="test", level=logging.ERROR, pathname="", lineno=0, msg="test", args=(), exc_info=None @@ -161,7 +195,7 @@ class TestMaxLevelFilter(unittest.TestCase): self.assertFalse(filter.filter(record)) def test_filter_blocks_above_level(self): - """MaxLevelFilter应该阻止高于指定级别的日志""" + """MaxLevelFilter should block logs above specified level""" filter = MaxLevelFilter("ERROR") record = logging.LogRecord( name="test", level=logging.CRITICAL, pathname="", lineno=0, msg="test", args=(), exc_info=None @@ -169,7 +203,7 @@ class TestMaxLevelFilter(unittest.TestCase): self.assertFalse(filter.filter(record)) def test_filter_with_numeric_level(self): - """MaxLevelFilter应该支持数字级别""" + """MaxLevelFilter should support numeric level""" filter = MaxLevelFilter(logging.WARNING) info_record = logging.LogRecord( name="test", level=logging.INFO, pathname="", lineno=0, msg="test", args=(), exc_info=None @@ -181,5 +215,95 @@ class TestMaxLevelFilter(unittest.TestCase): self.assertFalse(filter.filter(warning_record)) +class TestChannelLoggers(unittest.TestCase): + """Test channel logger configuration via get_logger""" + + def setUp(self): + self.temp_dir = tempfile.mkdtemp(prefix="logger_channel_test_") + if hasattr(setup_logging, "_configured"): + delattr(setup_logging, "_configured") + # Clear channel configuration cache + from fastdeploy.logger.logger import FastDeployLogger + + FastDeployLogger._configured_channels = set() + + self.patches = [ + patch("fastdeploy.envs.FD_LOG_DIR", self.temp_dir), + patch("fastdeploy.envs.FD_DEBUG", 0), + patch("fastdeploy.envs.FD_LOG_BACKUP_COUNT", "3"), + patch("fastdeploy.envs.FD_LOG_LEVEL", None), + ] + [p.start() for p in self.patches] + + def tearDown(self): + [p.stop() for p in self.patches] + shutil.rmtree(self.temp_dir, ignore_errors=True) + if hasattr(setup_logging, "_configured"): + delattr(setup_logging, "_configured") + # Clear channel configuration cache + from fastdeploy.logger.logger import FastDeployLogger + + FastDeployLogger._configured_channels = set() + + def test_main_channel_has_handlers(self): + """main channel root logger should have handlers""" + from fastdeploy.logger import get_logger + + get_logger("test", channel="main") + main_channel = logging.getLogger("fastdeploy.main") + self.assertTrue(len(main_channel.handlers) > 0) + + def test_request_channel_has_handlers(self): + """request channel root logger should have handlers""" + from fastdeploy.logger import get_logger + + get_logger("test", channel="request") + request_channel = logging.getLogger("fastdeploy.request") + self.assertTrue(len(request_channel.handlers) > 0) + + def test_console_channel_has_stdout_handler(self): + """console channel should have stdout handler""" + from fastdeploy.logger import get_logger + + get_logger("test", channel="console") + console_channel = logging.getLogger("fastdeploy.console") + handler_types = [type(h).__name__ for h in console_channel.handlers] + self.assertIn("StreamHandler", handler_types) + + def test_child_logger_propagates_to_channel(self): + """Child loggers should propagate to channel root logger""" + from fastdeploy.logger import get_logger + + logger = get_logger("child_test", channel="main") + # Child logger should have no direct handlers (propagates to parent) + self.assertEqual(len(logger.handlers), 0) + self.assertEqual(logger.name, "fastdeploy.main.child_test") + + def test_channel_file_mapping(self): + """Each channel should write to correct log file""" + from fastdeploy.logger.logger import FastDeployLogger + + expected_files = { + "main": "fastdeploy.log", + "request": "request.log", + "console": "console.log", + } + self.assertEqual(FastDeployLogger._channel_files, expected_files) + + def test_multiple_loggers_same_channel(self): + """Multiple loggers on same channel should share channel root handlers""" + from fastdeploy.logger import get_logger + + logger1 = get_logger("test1", channel="main") + logger2 = get_logger("test2", channel="main") + + main_channel = logging.getLogger("fastdeploy.main") + # Both child loggers should have no handlers + self.assertEqual(len(logger1.handlers), 0) + self.assertEqual(len(logger2.handlers), 0) + # Channel root should have handlers + self.assertTrue(len(main_channel.handlers) > 0) + + if __name__ == "__main__": unittest.main() diff --git a/tests/output/test_process_batch_output.py b/tests/output/test_process_batch_output.py index 46282cd386..3b4d46e54e 100644 --- a/tests/output/test_process_batch_output.py +++ b/tests/output/test_process_batch_output.py @@ -22,6 +22,7 @@ from unittest.mock import MagicMock, Mock, patch import paddle from fastdeploy.engine.request import RequestMetrics, RequestOutput +from fastdeploy.logger.request_logger import RequestLogLevel from fastdeploy.output.token_processor import TokenProcessor paddle.set_device("cpu") @@ -295,9 +296,9 @@ class TestTokenProcessorProcessBatchOutput(unittest.TestCase): processor.tokens_counter[task_id] = 0 processor.tokens_counter[task2.request_id] = 0 - # Mock llm_logger to capture the log message and envs.ENABLE_V1_KVCACHE_SCHEDULER + # Mock log_request to capture the log message and envs.ENABLE_V1_KVCACHE_SCHEDULER with ( - patch("fastdeploy.output.token_processor.llm_logger") as mock_logger, + patch("fastdeploy.output.token_processor.log_request") as mock_log_request, patch("fastdeploy.output.token_processor.envs.ENABLE_V1_KVCACHE_SCHEDULER", 0), ): # Call the method @@ -306,7 +307,11 @@ class TestTokenProcessorProcessBatchOutput(unittest.TestCase): # In speculative decoding mode, when accept_num[i] == PREEMPTED_TOKEN_ID, # the code logs "sync preemption" and continues without triggering abort recycling # This is the expected behavior for speculative decoding mode - mock_logger.info.assert_any_call(f"sync preemption for request_id {task_id} done.") + mock_log_request.assert_any_call( + RequestLogLevel.STAGES, + message="sync preemption for request_id {request_id} done.", + request_id=task_id, + ) # Verify that _recycle_resources was NOT called for the aborted task # (it may be called for other tasks like test_request_2 if they receive EOS tokens) for call in processor._recycle_resources.call_args_list: diff --git a/tests/output/test_process_batch_output_use_zmq.py b/tests/output/test_process_batch_output_use_zmq.py index 07826e6f0e..c33ac38c00 100644 --- a/tests/output/test_process_batch_output_use_zmq.py +++ b/tests/output/test_process_batch_output_use_zmq.py @@ -21,6 +21,7 @@ from unittest.mock import MagicMock, patch import numpy as np from fastdeploy.engine.request import CompletionOutput, RequestMetrics, RequestOutput +from fastdeploy.logger.request_logger import RequestLogLevel from fastdeploy.output.token_processor import TokenProcessor from fastdeploy.worker.output import LogprobsLists @@ -171,16 +172,20 @@ class TestTokenProcessorLogprobs(unittest.TestCase): # Mock _recycle_resources to track if it's called self.processor._recycle_resources = MagicMock() - # Mock the llm_logger module and envs.ENABLE_V1_KVCACHE_SCHEDULER + # Mock the log_request function and envs.ENABLE_V1_KVCACHE_SCHEDULER with ( - patch("fastdeploy.output.token_processor.llm_logger") as mock_logger, + patch("fastdeploy.output.token_processor.log_request") as mock_log_request, patch("fastdeploy.output.token_processor.envs.ENABLE_V1_KVCACHE_SCHEDULER", 1), ): # Call the method result = self.processor._process_batch_output_use_zmq([stream_data]) - # Verify the recycling logic was triggered - mock_logger.info.assert_any_call(f"start to recycle abort request_id {task_id}") + # Verify the recycling logic was triggered via log_request + mock_log_request.assert_any_call( + RequestLogLevel.STAGES, + message="start to recycle abort request_id {request_id}", + request_id=task_id, + ) self.processor.resource_manager.recycle_abort_task.assert_called_once_with(task_id) self.assertNotIn(task_id, self.processor.resource_manager.to_be_aborted_req_id_set) self.assertEqual(len(result), 0) # Aborted task is skipped (continue) diff --git a/tests/scheduler/test_dp_scheduler.py b/tests/scheduler/test_dp_scheduler.py index 0e42c4491f..e67d257149 100644 --- a/tests/scheduler/test_dp_scheduler.py +++ b/tests/scheduler/test_dp_scheduler.py @@ -47,9 +47,13 @@ sys.modules["fastdeploy.engine.request"] = Mock() sys.modules["fastdeploy.scheduler"] = Mock() sys.modules["fastdeploy.scheduler.local_scheduler"] = Mock() sys.modules["fastdeploy.scheduler.data"] = Mock() +sys.modules["fastdeploy.logger"] = Mock() +sys.modules["fastdeploy.logger.request_logger"] = Mock() # Mock the get_logger function sys.modules["fastdeploy.utils"].get_logger = Mock(return_value=mock_logger) +# Mock the log_request function +sys.modules["fastdeploy.logger.request_logger"].log_request = Mock() # Mock the Request, RequestOutput, and ScheduledResponse classes @@ -240,8 +244,9 @@ class TestDPLocalScheduler(unittest.TestCase): def test_put_results_with_finished_requests(self): """Test putting results with finished requests.""" - # Reset mock logger - mock_logger.reset_mock() + # Get the mock log_request function + mock_log_request = sys.modules["fastdeploy.logger.request_logger"].log_request + mock_log_request.reset_mock() # Create mock request outputs results = [ @@ -254,13 +259,12 @@ class TestDPLocalScheduler(unittest.TestCase): with patch.object(self.scheduler, "responses_not_empty"): self.scheduler.put_results(results) - # Check that finished requests were logged - the logger should have been called - self.assertTrue(mock_logger.info.called) + # Check that finished requests were logged via log_request + self.assertTrue(mock_log_request.called) # Get the actual call arguments to verify the message format - call_args = mock_logger.info.call_args[0][0] - self.assertIn("finished responses", call_args) - self.assertIn("req1", call_args) - self.assertIn("req3", call_args) + call_kwargs = mock_log_request.call_args[1] + self.assertIn("finished responses", call_kwargs.get("message", "")) + self.assertIn("req1", str(call_kwargs.get("request_ids", []))) def test_put_results_with_new_responses(self): """Test putting results with new responses.""" diff --git a/tests/scheduler/test_local_scheduler.py b/tests/scheduler/test_local_scheduler.py index 48ef2844a0..0a57e005d9 100644 --- a/tests/scheduler/test_local_scheduler.py +++ b/tests/scheduler/test_local_scheduler.py @@ -17,10 +17,11 @@ import unittest from unittest.mock import Mock, patch # noqa: F401 from fastdeploy.engine.request import Request, RequestMetrics, RequestOutput +from fastdeploy.logger.request_logger import RequestLogLevel # Real FastDeploy imports from fastdeploy.scheduler.local_scheduler import LocalScheduler -from fastdeploy.utils import envs, scheduler_logger +from fastdeploy.utils import envs class TestLocalScheduler(unittest.TestCase): @@ -221,9 +222,9 @@ class TestLocalScheduler(unittest.TestCase): def test_reset_logs_message(self): """Test that reset logs appropriate message.""" - with patch.object(scheduler_logger, "info") as mock_info: + with patch("fastdeploy.scheduler.local_scheduler.log_request") as mock_log: self.scheduler.reset() - mock_info.assert_called_once_with("Scheduler has been reset") + mock_log.assert_called_once_with(RequestLogLevel.LIFECYCLE, message="Scheduler has been reset") def test_put_requests_duplicate_handling(self): """Test handling of duplicate request IDs.""" @@ -449,12 +450,14 @@ class TestLocalScheduler(unittest.TestCase): # Add request first self.scheduler.put_requests([self.mock_request_1]) - with patch.object(scheduler_logger, "info") as mock_info: + with patch("fastdeploy.scheduler.local_scheduler.log_request") as mock_log_request: mock_output = self._create_test_request_output("req_1", finished=True) self.scheduler.put_results([mock_output]) - # Should log finished response - self._assert_log_contains(mock_info, "finished responses") + # Should log finished response via log_request + mock_log_request.assert_called_once() + call_kwargs = mock_log_request.call_args[1] + self.assertIn("finished responses", call_kwargs.get("message", "")) if __name__ == "__main__":