mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-22 16:07:51 +08:00
[Feature] implement log channel separation and request log level system (#7190)
* feat: implement log channel separation and request log level system * fix: log system improvements based on review * add request_id to error logs, use RequestLogLevel enum, and unify logger implementation from utils to logger module
This commit is contained in:
@@ -25,7 +25,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
||||
"FD_LOG_REQUESTS": lambda: int(os.getenv("FD_LOG_REQUESTS", "1")),
|
||||
|
||||
# Request logging detail level (0-3). Higher level means more verbose output.
|
||||
"FD_LOG_REQUESTS_LEVEL": lambda: int(os.getenv("FD_LOG_REQUESTS_LEVEL", "0")),
|
||||
"FD_LOG_REQUESTS_LEVEL": lambda: int(os.getenv("FD_LOG_REQUESTS_LEVEL", "2")),
|
||||
|
||||
# Max field length for request logging truncation.
|
||||
"FD_LOG_MAX_LEN": lambda: int(os.getenv("FD_LOG_MAX_LEN", "2048")),
|
||||
|
||||
+40
-7
@@ -5,12 +5,48 @@
|
||||
FastDeploy generates the following log files during deployment. Below is an explanation of each log's purpose.
|
||||
By default, logs are stored in the `log` directory under the execution path. To specify a custom directory, set the environment variable `FD_LOG_DIR`.
|
||||
|
||||
## Log Channel Separation
|
||||
|
||||
FastDeploy separates logs into three channels:
|
||||
|
||||
| Channel | Logger Name | Output Files | Description |
|
||||
|---------|-------------|--------------|-------------|
|
||||
| main | `fastdeploy.main.*` | `fastdeploy.log`, `console.log` | Main logs for system configuration, startup info, etc. |
|
||||
| request | `fastdeploy.request.*` | `request.log` | Request logs for request lifecycle and processing details |
|
||||
| console | `fastdeploy.console.*` | `console.log` | Console logs, output to terminal and console.log |
|
||||
|
||||
## Request Log Levels
|
||||
|
||||
Request logs (`request.log`) support 4 levels, controlled by the environment variable `FD_LOG_REQUESTS_LEVEL`:
|
||||
|
||||
| Level | Enum Name | Description | Example Content |
|
||||
|-------|-----------|-------------|-----------------|
|
||||
| 0 | LIFECYCLE | Lifecycle start/end | Request creation/initialization, completion stats (InputToken/OutputToken/latency), first and last streaming response, request abort |
|
||||
| 1 | STAGES | Processing stages | Semaphore acquire/release, first token time recording, signal handling (preemption/abortion/recovery), cache task, preprocess time, parameter adjustment warnings |
|
||||
| 2 | CONTENT | Content and scheduling | Request parameters, processed request, scheduling info (enqueue/pull/finish), response content (long content is truncated) |
|
||||
| 3 | FULL | Complete raw data | Complete request and response data, raw received request |
|
||||
|
||||
Default level is 2 (CONTENT), which logs request parameters, scheduling info, and response content. Lower levels (0-1) only log critical events, while level 3 includes complete raw data.
|
||||
|
||||
## Log-Related Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `FD_LOG_DIR` | `log` | Log file storage directory |
|
||||
| `FD_LOG_LEVEL` | `INFO` | Log level, supports `INFO` or `DEBUG` |
|
||||
| `FD_LOG_REQUESTS` | `1` | Enable request logging, `0` to disable, `1` to enable |
|
||||
| `FD_LOG_REQUESTS_LEVEL` | `2` | Request log level, range 0-3 |
|
||||
| `FD_LOG_MAX_LEN` | `2048` | Maximum length for L2 level log content (excess is truncated) |
|
||||
| `FD_LOG_BACKUP_COUNT` | `7` | Number of log files to retain |
|
||||
| `FD_DEBUG` | `0` | Debug mode, `1` enables DEBUG log level |
|
||||
|
||||
## Inference Service Logs
|
||||
|
||||
* `fastdeploy.log` : Main log file, records system configuration, startup information, runtime status, etc.
|
||||
* `request.log` : Request log file, records user request lifecycle and processing details
|
||||
* `console.log` : Console log, records model startup time and other information. This log is also printed to the console.
|
||||
* `error.log` : Error log file, records all ERROR and above level logs
|
||||
* `backup_env.*.json` : Records environment variables set during instance startup. The number of files matches the number of GPU cards.
|
||||
* `envlog.*` : Logs environment variables set during instance startup. The number of files matches the number of GPU cards.
|
||||
* `console.log` : Records model startup time and other information. This log is also printed to the console.
|
||||
* `data_processor.log` : Logs input/output data encoding and decoding details.
|
||||
* `fastdeploy.log` : Records configuration information during instance startup, as well as request and response details during runtime.
|
||||
* `workerlog.*` : Tracks model loading progress and inference operator errors. Each GPU card has a corresponding file.
|
||||
* `worker_process.log` : Logs engine inference data for each iteration.
|
||||
* `cache_manager.log` : Records KV Cache logical index allocation for each request and cache hit status.
|
||||
@@ -18,9 +54,6 @@ By default, logs are stored in the `log` directory under the execution path. To
|
||||
* `gpu_worker.log` : Records KV Cache block count information during profiling.
|
||||
* `gpu_model_runner.log` : Contains model details and loading time.
|
||||
|
||||
## Online Inference Client Logs
|
||||
* `api_server.log` : Logs startup parameters and received request information.
|
||||
|
||||
## Scheduler Logs
|
||||
* `scheduler.log` : Records scheduler information, including node status and request allocation details.
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
||||
"FD_LOG_REQUESTS": lambda: int(os.getenv("FD_LOG_REQUESTS", "1")),
|
||||
|
||||
# 请求日志详细级别 (0-3)。级别越高输出越详细。
|
||||
"FD_LOG_REQUESTS_LEVEL": lambda: int(os.getenv("FD_LOG_REQUESTS_LEVEL", "0")),
|
||||
"FD_LOG_REQUESTS_LEVEL": lambda: int(os.getenv("FD_LOG_REQUESTS_LEVEL", "2")),
|
||||
|
||||
# 请求日志字段截断最大长度。
|
||||
"FD_LOG_MAX_LEN": lambda: int(os.getenv("FD_LOG_MAX_LEN", "2048")),
|
||||
|
||||
+42
-10
@@ -2,15 +2,51 @@
|
||||
|
||||
# 日志说明
|
||||
|
||||
FastDeploy 在部署过程中,会产生如下日志文件,各日志含义说明
|
||||
FastDeploy 在部署过程中,会产生如下日志文件,各日志含义说明。
|
||||
默认日志目录为执行目录下的 `log` 文件夹,若需要指定可设置环境变量 `FD_LOG_DIR`。
|
||||
|
||||
## 日志通道划分
|
||||
|
||||
FastDeploy 将日志分为三个通道:
|
||||
|
||||
| 通道 | Logger 名称 | 输出文件 | 说明 |
|
||||
|------|-------------|----------|------|
|
||||
| main | `fastdeploy.main.*` | `fastdeploy.log`, `console.log` | 主日志,记录系统配置、启动信息等 |
|
||||
| request | `fastdeploy.request.*` | `request.log` | 请求日志,记录请求生命周期和处理细节 |
|
||||
| console | `fastdeploy.console.*` | `console.log` | 控制台日志,输出到终端和 console.log |
|
||||
|
||||
## 请求日志级别
|
||||
|
||||
请求日志 (`request.log`) 支持 4 个级别,通过环境变量 `FD_LOG_REQUESTS_LEVEL` 控制:
|
||||
|
||||
| 级别 | 枚举名 | 说明 | 示例内容 |
|
||||
|------|--------|------|----------|
|
||||
| 0 | LIFECYCLE | 生命周期起止 | 请求创建/初始化、完成统计(InputToken/OutputToken/耗时)、流式响应首次和最后发送、请求中止 |
|
||||
| 1 | STAGES | 处理阶段 | 信号量获取/释放、首 token 时间记录、信号处理(preemption/abortion/recovery)、缓存任务、预处理耗时、参数调整警告 |
|
||||
| 2 | CONTENT | 内容和调度 | 请求参数、处理后的请求、调度信息(入队/拉取/完成)、响应内容(超长内容会被截断) |
|
||||
| 3 | FULL | 完整数据 | 完整的请求和响应数据、原始接收请求 |
|
||||
|
||||
默认级别为 2 (CONTENT),记录请求参数、调度信息和响应内容。较低级别 (0-1) 只记录关键事件,级别 3 则包含完整原始数据。
|
||||
|
||||
## 日志相关环境变量
|
||||
|
||||
| 环境变量 | 默认值 | 说明 |
|
||||
|----------|--------|------|
|
||||
| `FD_LOG_DIR` | `log` | 日志文件存储目录 |
|
||||
| `FD_LOG_LEVEL` | `INFO` | 日志级别,支持 `INFO` 或 `DEBUG` |
|
||||
| `FD_LOG_REQUESTS` | `1` | 是否启用请求日志,`0` 禁用,`1` 启用 |
|
||||
| `FD_LOG_REQUESTS_LEVEL` | `2` | 请求日志级别,范围 0-3 |
|
||||
| `FD_LOG_MAX_LEN` | `2048` | L2 级别日志内容的最大长度(超出部分会被截断) |
|
||||
| `FD_LOG_BACKUP_COUNT` | `7` | 日志文件保留数量 |
|
||||
| `FD_DEBUG` | `0` | 调试模式,`1` 启用时日志级别设为 `DEBUG` |
|
||||
|
||||
## 推理服务日志
|
||||
|
||||
* `fastdeploy.log` : 主日志文件,记录系统配置、启动信息、运行状态等
|
||||
* `request.log` : 请求日志文件,记录用户请求的生命周期和处理细节
|
||||
* `console.log` : 控制台日志,记录模型启动耗时等信息,该日志信息会被打印到控制台
|
||||
* `error.log` : 错误日志文件,记录所有 ERROR 及以上级别的日志
|
||||
* `backup_env.*.json` : 记录当前实例启动时设置的环境变量,文件个数与卡数相同
|
||||
* `envlog.*` : 记录当前实例启动时设置的环境变量,文件个数与卡数相同
|
||||
* `console.log` : 记录模型启动耗时等信息,该日志信息会被打印到控制台
|
||||
* `data_processor.log` : 记录输入数据及输出输出编码解码的内容
|
||||
* `fastdeploy.log` : 记录当前实例启动的各个 config 的信息,运行中记录用户请求的 request 及 response 信息
|
||||
* `workerlog.*` : 记录模型启动加载进度及推理算子报错信息,每个卡对应一个文件
|
||||
* `worker_process.log` : 记录引擎每一轮推理的数据
|
||||
* `cache_manager.log` : 记录每一个请求分配 KV Cache 的逻辑索引,以及当前请求的命中情况
|
||||
@@ -18,9 +54,6 @@ FastDeploy 在部署过程中,会产生如下日志文件,各日志含义说
|
||||
* `gpu_worker.log` : 记录 profile 时计算 KV Cache block 数目的信息
|
||||
* `gpu_model_runner.log` : 当前的模型信息及加载时间
|
||||
|
||||
## 在线推理客户端日志
|
||||
* `api_server.log` : 记录启动参数,及接收到的请求信息
|
||||
|
||||
## 调度器日志
|
||||
* `scheduler.log` : 记录调度器的信息包含当前结点的信息,每条请求分配的信息
|
||||
|
||||
@@ -31,12 +64,11 @@ FastDeploy 在部署过程中,会产生如下日志文件,各日志含义说
|
||||
|
||||
* `cache_queue_manager.log` : 记录启动参数,及接收到的请求信息
|
||||
* `cache_transfer_manager.log` : 记录启动参数,及接收到的请求信息
|
||||
* `cache_queue_manager.log` : 记录启动参数,及接收到的请求信息
|
||||
* `launch_cache_manager.log` : 启动 cache transfer 记录启动参数,报错信息
|
||||
|
||||
## PD 分离相关日志
|
||||
|
||||
* `cache_messager.log` : 记录P 实例使用的传输协议及传输信息
|
||||
* `cache_messager.log` : 记录 P 实例使用的传输协议及传输信息
|
||||
* `splitwise_connector.log` : 记录收到 P/D 发送的数据,及建联信息
|
||||
|
||||
## CudaGraph 相关日志
|
||||
|
||||
@@ -37,6 +37,7 @@ from fastdeploy.entrypoints.openai.utils import DealerConnectionManager
|
||||
from fastdeploy.input.preprocess import InputPreprocessor
|
||||
from fastdeploy.inter_communicator import IPCSignal
|
||||
from fastdeploy.inter_communicator.zmq_client import ZmqIpcClient
|
||||
from fastdeploy.logger.request_logger import log_request_error
|
||||
from fastdeploy.metrics.metrics import main_process_metrics
|
||||
from fastdeploy.utils import EngineError, envs, llm_logger
|
||||
|
||||
@@ -562,7 +563,7 @@ class AsyncLLM(EngineServiceClient):
|
||||
llm_logger.info(f"Request {conn_request_id} generator exit (outer)")
|
||||
return
|
||||
except Exception as e:
|
||||
llm_logger.error(f"Request {conn_request_id} failed: {e}")
|
||||
log_request_error(message="Request {request_id} failed: {error}", request_id=conn_request_id, error=e)
|
||||
raise EngineError(str(e), error_code=500) from e
|
||||
finally:
|
||||
# Ensure request_map/request_num are cleaned up
|
||||
@@ -584,7 +585,7 @@ class AsyncLLM(EngineServiceClient):
|
||||
await self.connection_manager.cleanup_request(request_id)
|
||||
llm_logger.info(f"Aborted request {request_id}")
|
||||
except Exception as e:
|
||||
llm_logger.error(f"Failed to abort request {request_id}: {e}")
|
||||
log_request_error(message="Failed to abort request {request_id}: {error}", request_id=request_id, error=e)
|
||||
|
||||
async def shutdown(self):
|
||||
"""
|
||||
|
||||
+45
-11
@@ -44,6 +44,11 @@ from fastdeploy.engine.common_engine import (
|
||||
from fastdeploy.engine.expert_service import start_data_parallel_service
|
||||
from fastdeploy.engine.request import Request
|
||||
from fastdeploy.inter_communicator import EngineWorkerQueue, IPCSignal
|
||||
from fastdeploy.logger.request_logger import (
|
||||
RequestLogLevel,
|
||||
log_request,
|
||||
log_request_error,
|
||||
)
|
||||
from fastdeploy.metrics.metrics import main_process_metrics
|
||||
from fastdeploy.platforms import current_platform
|
||||
from fastdeploy.utils import EngineError, console_logger, envs, llm_logger
|
||||
@@ -285,7 +290,7 @@ class LLMEngine:
|
||||
# Create Request struct after processing
|
||||
request = Request.from_dict(task)
|
||||
request.metrics.scheduler_recv_req_time = time.time()
|
||||
llm_logger.info(f"Receive request {request}")
|
||||
log_request(RequestLogLevel.CONTENT, message="Receive request {request}", request=request)
|
||||
request.metrics.preprocess_start_time = time.time()
|
||||
|
||||
request.prompt_token_ids_len = len(request.prompt_token_ids)
|
||||
@@ -304,12 +309,20 @@ class LLMEngine:
|
||||
f"Input text is too long, length of prompt token({input_ids_len}) "
|
||||
f"+ min_dec_len ({min_tokens}) >= max_model_len "
|
||||
)
|
||||
llm_logger.error(error_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] error: {error}",
|
||||
request_id=request.get("request_id"),
|
||||
error=error_msg,
|
||||
)
|
||||
raise EngineError(error_msg, error_code=400)
|
||||
|
||||
if input_ids_len > self.cfg.model_config.max_model_len:
|
||||
error_msg = f"Length of input token({input_ids_len}) exceeds the limit max_model_len({self.cfg.model_config.max_model_len})."
|
||||
llm_logger.error(error_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] error: {error}",
|
||||
request_id=request.get("request_id"),
|
||||
error=error_msg,
|
||||
)
|
||||
raise EngineError(error_msg, error_code=400)
|
||||
|
||||
if request.get("stop_seqs_len") is not None:
|
||||
@@ -320,7 +333,11 @@ class LLMEngine:
|
||||
f"Length of stop ({stop_seqs_len}) exceeds the limit max_stop_seqs_num({max_stop_seqs_num})."
|
||||
"Please reduce the number of stop or set a lager max_stop_seqs_num by `FD_MAX_STOP_SEQS_NUM`"
|
||||
)
|
||||
llm_logger.error(error_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] error: {error}",
|
||||
request_id=request.get("request_id"),
|
||||
error=error_msg,
|
||||
)
|
||||
raise EngineError(error_msg, error_code=400)
|
||||
stop_seqs_max_len = envs.FD_STOP_SEQS_MAX_LEN
|
||||
for single_stop_seq_len in stop_seqs_len:
|
||||
@@ -329,7 +346,11 @@ class LLMEngine:
|
||||
f"Length of stop_seqs({single_stop_seq_len}) exceeds the limit stop_seqs_max_len({stop_seqs_max_len})."
|
||||
"Please reduce the length of stop sequences or set a larger stop_seqs_max_len by `FD_STOP_SEQS_MAX_LEN`"
|
||||
)
|
||||
llm_logger.error(error_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] error: {error}",
|
||||
request_id=request.get("request_id"),
|
||||
error=error_msg,
|
||||
)
|
||||
raise EngineError(error_msg, error_code=400)
|
||||
|
||||
if self._has_guided_input(request):
|
||||
@@ -342,14 +363,22 @@ class LLMEngine:
|
||||
request, err_msg = self.guided_decoding_checker.schema_format(request)
|
||||
|
||||
if err_msg is not None:
|
||||
llm_logger.error(err_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] error: {error}",
|
||||
request_id=request.get("request_id"),
|
||||
error=err_msg,
|
||||
)
|
||||
raise EngineError(err_msg, error_code=400)
|
||||
|
||||
request.metrics.preprocess_end_time = time.time()
|
||||
request.metrics.scheduler_recv_req_time = time.time()
|
||||
self.engine.scheduler.put_requests([request])
|
||||
llm_logger.info(f"Cache task with request_id ({request.get('request_id')})")
|
||||
llm_logger.debug(f"cache task: {request}")
|
||||
log_request(
|
||||
RequestLogLevel.STAGES,
|
||||
message="Cache task with request_id ({request_id})",
|
||||
request_id=request.get("request_id"),
|
||||
)
|
||||
log_request(RequestLogLevel.FULL, message="cache task: {request}", request=request)
|
||||
|
||||
def _worker_processes_ready(self):
|
||||
"""
|
||||
@@ -717,11 +746,16 @@ class LLMEngine:
|
||||
Yields:
|
||||
dict: The generated response.
|
||||
"""
|
||||
llm_logger.info(f"Starting generation for prompt: {prompts}")
|
||||
log_request(RequestLogLevel.CONTENT, message="Starting generation for prompt: {prompts}", prompts=prompts)
|
||||
try:
|
||||
req_id = self._format_and_add_data(prompts)
|
||||
except Exception as e:
|
||||
llm_logger.error(f"Error happened while adding request, details={e}, {str(traceback.format_exc())}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] error while adding request: {error}, {traceback}",
|
||||
request_id=prompts.get("request_id"),
|
||||
error=str(e),
|
||||
traceback=traceback.format_exc(),
|
||||
)
|
||||
raise EngineError(str(e), error_code=400)
|
||||
|
||||
# Get the result of the current request
|
||||
@@ -740,7 +774,7 @@ class LLMEngine:
|
||||
output = self.engine.data_processor.process_response_dict(
|
||||
result.to_dict(), stream=False, include_stop_str_in_output=False, direct_decode=not stream
|
||||
)
|
||||
llm_logger.debug(f"Generate result: {output}")
|
||||
log_request(RequestLogLevel.FULL, message="Generate result: {output}", output=output)
|
||||
if not stream:
|
||||
yield output
|
||||
else:
|
||||
|
||||
@@ -39,7 +39,11 @@ from fastdeploy.entrypoints.openai.protocol import (
|
||||
StructuralTagResponseFormat,
|
||||
ToolCall,
|
||||
)
|
||||
from fastdeploy.utils import data_processor_logger
|
||||
from fastdeploy.logger.request_logger import (
|
||||
RequestLogLevel,
|
||||
log_request,
|
||||
log_request_error,
|
||||
)
|
||||
from fastdeploy.worker.output import (
|
||||
LogprobsLists,
|
||||
PromptLogprobs,
|
||||
@@ -313,15 +317,13 @@ class Request:
|
||||
), "The parameter `raw_request` is not supported now, please use completion api instead."
|
||||
for key, value in req.metadata.items():
|
||||
setattr(request, key, value)
|
||||
from fastdeploy.utils import api_server_logger
|
||||
|
||||
api_server_logger.warning("The parameter metadata is obsolete.")
|
||||
log_request(RequestLogLevel.STAGES, message="The parameter metadata is obsolete.")
|
||||
|
||||
return request
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: dict):
|
||||
data_processor_logger.debug(f"{d}")
|
||||
log_request(RequestLogLevel.FULL, message="{request}", request=d)
|
||||
sampling_params: SamplingParams = None
|
||||
pooling_params: PoolingParams = None
|
||||
metrics: RequestMetrics = None
|
||||
@@ -352,8 +354,11 @@ class Request:
|
||||
ImagePosition(**mm_pos) if not isinstance(mm_pos, ImagePosition) else mm_pos
|
||||
)
|
||||
except Exception as e:
|
||||
data_processor_logger.error(
|
||||
f"Convert mm_positions to ImagePosition error: {e}, {str(traceback.format_exc())}"
|
||||
log_request_error(
|
||||
message="request[{request_id}] Convert mm_positions to ImagePosition error: {error}, {traceback}",
|
||||
request_id=d.get("request_id"),
|
||||
error=str(e),
|
||||
traceback=traceback.format_exc(),
|
||||
)
|
||||
return cls(
|
||||
request_id=d["request_id"],
|
||||
|
||||
@@ -23,6 +23,11 @@ from fastapi.responses import Response, StreamingResponse
|
||||
|
||||
from fastdeploy.engine.args_utils import EngineArgs
|
||||
from fastdeploy.engine.engine import LLMEngine
|
||||
from fastdeploy.logger.request_logger import (
|
||||
RequestLogLevel,
|
||||
log_request,
|
||||
log_request_error,
|
||||
)
|
||||
from fastdeploy.utils import (
|
||||
FlexibleArgumentParser,
|
||||
api_server_logger,
|
||||
@@ -61,7 +66,7 @@ async def generate(request: dict):
|
||||
"""
|
||||
generate stream api
|
||||
"""
|
||||
api_server_logger.info(f"Receive request: {request}")
|
||||
log_request(RequestLogLevel.FULL, message="Receive request: {request}", request=request)
|
||||
stream = request.get("stream", 0)
|
||||
|
||||
if not stream:
|
||||
@@ -72,7 +77,11 @@ async def generate(request: dict):
|
||||
output = result
|
||||
except Exception as e:
|
||||
# 记录完整的异常堆栈信息
|
||||
api_server_logger.error(f"Error during generation: {e!s}", exc_info=True)
|
||||
log_request_error(
|
||||
message="request[{request_id}] Error during generation: {error}",
|
||||
request_id=request.get("request_id"),
|
||||
error=str(e),
|
||||
)
|
||||
# 返回结构化的错误消息并终止流
|
||||
output = {"error": str(e), "error_type": e.__class__.__name__}
|
||||
return output
|
||||
@@ -84,7 +93,11 @@ async def generate(request: dict):
|
||||
yield f"data: {json.dumps(result)}\n\n"
|
||||
except Exception as e:
|
||||
# 记录完整的异常堆栈信息
|
||||
api_server_logger.error(f"Error during generation: {e!s}", exc_info=True)
|
||||
log_request_error(
|
||||
message="request[{request_id}] Error during generation: {error}",
|
||||
request_id=request.get("request_id"),
|
||||
error=str(e),
|
||||
)
|
||||
# 返回结构化的错误消息并终止流
|
||||
error_msg = {"error": str(e), "error_type": e.__class__.__name__}
|
||||
yield f"data: {json.dumps(error_msg)}\n\n"
|
||||
|
||||
@@ -49,6 +49,11 @@ from fastdeploy.inter_communicator import (
|
||||
RearrangeExpertStatus,
|
||||
ZmqIpcClient,
|
||||
)
|
||||
from fastdeploy.logger.request_logger import (
|
||||
RequestLogLevel,
|
||||
log_request,
|
||||
log_request_error,
|
||||
)
|
||||
from fastdeploy.metrics.metrics import main_process_metrics
|
||||
from fastdeploy.platforms import current_platform
|
||||
from fastdeploy.trace.constants import LoggingEventName
|
||||
@@ -362,12 +367,16 @@ class EngineClient:
|
||||
|
||||
if "messages" in task:
|
||||
task["messages"] = None
|
||||
api_server_logger.info(f"task['max_tokens']:{task['max_tokens']}")
|
||||
main_process_metrics.request_params_max_tokens.observe(task["max_tokens"])
|
||||
main_process_metrics.prompt_tokens_total.inc(input_ids_len)
|
||||
main_process_metrics.request_prompt_tokens.observe(input_ids_len)
|
||||
except Exception as e:
|
||||
api_server_logger.error(f"add_requests error: {e}, {str(traceback.format_exc())}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] add_requests error: {error}, {traceback}",
|
||||
request_id=task.get("request_id"),
|
||||
error=e,
|
||||
traceback=traceback.format_exc(),
|
||||
)
|
||||
raise EngineError(str(e), error_code=400)
|
||||
|
||||
if input_ids_len + min_tokens >= self.max_model_len:
|
||||
@@ -375,14 +384,18 @@ class EngineClient:
|
||||
f"Input text is too long, input_ids_len ({input_ids_len}) "
|
||||
f"+ min_tokens({min_tokens}) >= max_model_len({self.max_model_len})"
|
||||
)
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {error_msg}", request_id=task.get("request_id"), error_msg=error_msg
|
||||
)
|
||||
raise EngineError(error_msg, error_code=400)
|
||||
|
||||
if input_ids_len > self.max_model_len:
|
||||
error_msg = (
|
||||
f"Length of input token({input_ids_len}) exceeds the limit max_model_len({self.max_model_len})."
|
||||
)
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {error_msg}", request_id=task.get("request_id"), error_msg=error_msg
|
||||
)
|
||||
raise EngineError(error_msg, error_code=400)
|
||||
|
||||
if "stop_seqs_len" in task and task["stop_seqs_len"]:
|
||||
@@ -393,7 +406,9 @@ class EngineClient:
|
||||
f"Length of stop ({stop_seqs_len}) exceeds the limit max_stop_seqs_num({max_stop_seqs_num})."
|
||||
"Please reduce the number of stop or set a lager max_stop_seqs_num by `FD_MAX_STOP_SEQS_NUM`"
|
||||
)
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {error_msg}", request_id=task.get("request_id"), error_msg=error_msg
|
||||
)
|
||||
raise EngineError(error_msg, error_code=400)
|
||||
stop_seqs_max_len = envs.FD_STOP_SEQS_MAX_LEN
|
||||
for single_stop_seq_len in stop_seqs_len:
|
||||
@@ -402,18 +417,28 @@ class EngineClient:
|
||||
f"Length of stop_seqs({single_stop_seq_len}) exceeds the limit stop_seqs_max_len({stop_seqs_max_len})."
|
||||
"Please reduce the length of stop sequences or set a larger stop_seqs_max_len by `FD_STOP_SEQS_MAX_LEN`"
|
||||
)
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {error_msg}",
|
||||
request_id=task.get("request_id"),
|
||||
error_msg=error_msg,
|
||||
)
|
||||
raise EngineError(error_msg, error_code=400)
|
||||
|
||||
task["metrics"]["preprocess_end_time"] = time.time()
|
||||
preprocess_cost_time = task["metrics"]["preprocess_end_time"] - task["metrics"]["preprocess_start_time"]
|
||||
api_server_logger.info(
|
||||
f"Cache request with request_id ({task.get('request_id')}), "
|
||||
f"preprocess time cost {preprocess_cost_time}"
|
||||
log_request(
|
||||
level=RequestLogLevel.STAGES,
|
||||
message="Cache request with request_id ({request_id}), preprocess time cost {preprocess_cost_time}",
|
||||
request_id=task.get("request_id"),
|
||||
preprocess_cost_time=preprocess_cost_time,
|
||||
)
|
||||
|
||||
self.valid_parameters(task)
|
||||
api_server_logger.debug(f"Receive task: {task}")
|
||||
log_request(
|
||||
level=RequestLogLevel.FULL,
|
||||
message="Receive task: {task}",
|
||||
task=task,
|
||||
)
|
||||
n = task.get("n", 1)
|
||||
try:
|
||||
request_id_idx = task.get("request_id")
|
||||
@@ -433,7 +458,12 @@ class EngineClient:
|
||||
tracing.TraceSpanName.PREPROCESSING, task.get("request_id").split("_")[0], thread_finish_flag=True
|
||||
)
|
||||
except Exception as e:
|
||||
api_server_logger.error(f"zmq_client send task error: {e}, {str(traceback.format_exc())}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] zmq_client send task error: {error}, {traceback}",
|
||||
request_id=task.get("request_id"),
|
||||
error=e,
|
||||
traceback=traceback.format_exc(),
|
||||
)
|
||||
raise EngineError(str(e), error_code=400)
|
||||
|
||||
def _send_task(self, task):
|
||||
@@ -455,8 +485,11 @@ class EngineClient:
|
||||
|
||||
if data.get("max_tokens") is not None:
|
||||
if data["max_tokens"] < 1 or data["max_tokens"] >= self.max_model_len:
|
||||
api_server_logger.error(
|
||||
f"req_id:{data['request_id']}, max_tokens must be defined [1, {self.max_model_len}), but now it's {data['max_tokens']}."
|
||||
log_request_error(
|
||||
message="req_id:{request_id}, max_tokens must be defined [1, {max_model_len}), but now it's {max_tokens}.",
|
||||
request_id=data["request_id"],
|
||||
max_model_len=self.max_model_len,
|
||||
max_tokens=data["max_tokens"],
|
||||
)
|
||||
raise ValueError(
|
||||
f"max_tokens can be defined [1, {self.max_model_len}), but now it's {data['max_tokens']}."
|
||||
@@ -467,14 +500,18 @@ class EngineClient:
|
||||
raise ParameterError("reasoning_max_tokens", "reasoning_max_tokens must be greater than 0")
|
||||
if data["reasoning_max_tokens"] > data["max_tokens"]:
|
||||
data["reasoning_max_tokens"] = data["max_tokens"]
|
||||
api_server_logger.warning(
|
||||
f"req_id: {data['request_id']}, reasoning_max_tokens exceeds max_tokens, the value of reasoning_max_tokens will be adjusted to {data['max_tokens']}"
|
||||
log_request(
|
||||
level=RequestLogLevel.STAGES,
|
||||
message="req_id: {request_id}, reasoning_max_tokens exceeds max_tokens, the value of reasoning_max_tokens will be adjusted to {max_tokens}",
|
||||
request_id=data["request_id"],
|
||||
max_tokens=data["max_tokens"],
|
||||
)
|
||||
if data.get("reasoning_effort") is not None:
|
||||
data["reasoning_max_tokens"] = None
|
||||
api_server_logger.warning(
|
||||
f"req_id: {data['request_id']}, reasoning_max_tokens and reasoning_effort are both set, "
|
||||
f"enable_thinking will be disabled."
|
||||
log_request(
|
||||
level=RequestLogLevel.STAGES,
|
||||
message="req_id: {request_id}, reasoning_max_tokens and reasoning_effort are both set, enable_thinking will be disabled.",
|
||||
request_id=data["request_id"],
|
||||
)
|
||||
|
||||
if data.get("response_max_tokens") is not None:
|
||||
@@ -493,7 +530,9 @@ class EngineClient:
|
||||
is_chat = True
|
||||
if not self.enable_logprob:
|
||||
err_msg = "Logprobs is disabled, please enable it in startup config."
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg
|
||||
)
|
||||
raise ParameterError("logprobs", err_msg)
|
||||
top_logprobs = data.get("top_logprobs")
|
||||
elif isinstance(logprobs, int):
|
||||
@@ -506,11 +545,15 @@ class EngineClient:
|
||||
max_logprobs = self.ori_vocab_size
|
||||
if max_logprobs < -1:
|
||||
err_msg = f"Invalid 'max_logprobs': must be >= -1, got {max_logprobs}."
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg
|
||||
)
|
||||
raise ValueError("max_logprobs", err_msg)
|
||||
if max_logprobs > self.ori_vocab_size:
|
||||
err_msg = f"Invalid 'max_logprobs': must be <= vocab_size {self.ori_vocab_size}, got {max_logprobs}."
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg
|
||||
)
|
||||
raise ValueError("max_logprobs", err_msg)
|
||||
|
||||
prompt_logprobs = data.get("prompt_logprobs", None)
|
||||
@@ -518,41 +561,55 @@ class EngineClient:
|
||||
if prompt_logprobs is not None:
|
||||
if not self.enable_logprob:
|
||||
err_msg = "`enable_logprob` is disabled, please enable it in startup config."
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg
|
||||
)
|
||||
raise ParameterError("prompt_logprobs", err_msg)
|
||||
|
||||
if not envs.FD_USE_GET_SAVE_OUTPUT_V1:
|
||||
err_msg = "prompt_logprobs is not support when FD_USE_GET_SAVE_OUTPUT_V1 is disabled."
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg
|
||||
)
|
||||
raise ParameterError("prompt_logprobs", err_msg)
|
||||
|
||||
if self.enable_prefix_caching:
|
||||
err_msg = "prompt_logprobs is not support when prefix caching is enabled."
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg
|
||||
)
|
||||
raise ParameterError("prompt_logprobs", err_msg)
|
||||
|
||||
if prompt_logprobs == -1 and self.ori_vocab_size > max_logprobs:
|
||||
err_msg = f"The requested value of ({self.ori_vocab_size}) for prompt_logprobs (-1) exceeds the maximum allowed value of ({max_logprobs})"
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg
|
||||
)
|
||||
raise ValueError("prompt_logprobs", err_msg)
|
||||
|
||||
if prompt_logprobs < -1:
|
||||
err_msg = (
|
||||
f"prompt_logprobs must be a non-negative value or -1; the current value is {prompt_logprobs}."
|
||||
)
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg
|
||||
)
|
||||
raise ValueError("prompt_logprobs", err_msg)
|
||||
|
||||
if prompt_logprobs > max_logprobs:
|
||||
err_msg = f"Number of prompt_logprobs requested ({prompt_logprobs}) exceeds maximum allowed value ({max_logprobs})."
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg
|
||||
)
|
||||
raise ValueError("prompt_logprobs", err_msg)
|
||||
|
||||
# enable_logprob
|
||||
if top_logprobs is not None:
|
||||
if not self.enable_logprob:
|
||||
err_msg = "Logprobs is disabled, please enable it in startup config."
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg
|
||||
)
|
||||
raise ParameterError("top_logprobs" if is_chat else "logprobs", err_msg)
|
||||
|
||||
if not isinstance(top_logprobs, int):
|
||||
@@ -560,28 +617,38 @@ class EngineClient:
|
||||
err_msg = (
|
||||
f"Invalid type for {'top_logprobs' if is_chat else 'logprobs'}: expected int but got {err_type}."
|
||||
)
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg
|
||||
)
|
||||
raise ParameterError("top_logprobs" if is_chat else "logprobs", err_msg)
|
||||
|
||||
if top_logprobs > max_logprobs:
|
||||
err_msg = f"Number of {'top_logprobs' if is_chat else 'logprobs'} requested ({top_logprobs}) exceeds maximum allowed value ({max_logprobs})."
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg
|
||||
)
|
||||
raise ValueError("top_logprobs" if is_chat else "logprobs", err_msg)
|
||||
|
||||
if not envs.FD_USE_GET_SAVE_OUTPUT_V1:
|
||||
if top_logprobs < 0 or top_logprobs > max_logprobs:
|
||||
err_msg = f"{'top_logprobs' if is_chat else 'logprobs'} must be between 0 and {max_logprobs}; the current value is {top_logprobs}."
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg
|
||||
)
|
||||
raise ValueError("top_logprobs" if is_chat else "logprobs", err_msg)
|
||||
else:
|
||||
if top_logprobs == -1 and self.ori_vocab_size > max_logprobs:
|
||||
err_msg = f"The requested value of ({self.ori_vocab_size}) for {'top_logprobs' if is_chat else 'logprobs'} (-1) exceeds the maximum allowed value of ({max_logprobs})"
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg
|
||||
)
|
||||
raise ValueError("top_logprobs" if is_chat else "logprobs", err_msg)
|
||||
|
||||
if top_logprobs < -1:
|
||||
err_msg = f"{'top_logprobs' if is_chat else 'logprobs'} must be a non-negative value or -1; the current value is {top_logprobs}."
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {err_msg}", request_id=data.get("request_id"), err_msg=err_msg
|
||||
)
|
||||
raise ValueError("top_logprobs" if is_chat else "logprobs", err_msg)
|
||||
|
||||
def check_health(self, time_interval_threashold=30):
|
||||
@@ -617,12 +684,21 @@ class EngineClient:
|
||||
return response
|
||||
except asyncio.TimeoutError:
|
||||
error_response = ControlResponse(request_id, 500, "Timeout waiting for control method response")
|
||||
api_server_logger.error(f"Control request timed out: {error_response}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] Control request timed out: {error_response}",
|
||||
request_id=request_id,
|
||||
error_response=error_response,
|
||||
)
|
||||
return error_response
|
||||
except Exception as e:
|
||||
import traceback
|
||||
|
||||
api_server_logger.error(f"Unknown error in control method: {str(e)}\n{traceback.format_exc()}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] Unknown error in control method: {error}\n{traceback}",
|
||||
request_id=request_id,
|
||||
error=str(e),
|
||||
traceback=traceback.format_exc(),
|
||||
)
|
||||
error_response = ControlResponse(request_id, 500, str(e))
|
||||
return error_response
|
||||
|
||||
@@ -1024,7 +1100,11 @@ class EngineClient:
|
||||
|
||||
async def abort(self, request_id, n=1) -> None:
|
||||
if envs.FD_ENABLE_REQUEST_DISCONNECT_STOP_INFERENCE:
|
||||
api_server_logger.info(f"abort request_id:{request_id}")
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="abort request_id: {request_id}",
|
||||
request_id=request_id,
|
||||
)
|
||||
if n <= 0:
|
||||
api_server_logger.warning("Abort function called with non-positive n: %d. No requests aborted.", n)
|
||||
return
|
||||
@@ -1044,7 +1124,11 @@ class EngineClient:
|
||||
}
|
||||
self._send_task(data)
|
||||
|
||||
api_server_logger.info("Aborted request(s) %s.", ",".join(request_ids))
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Aborted request(s) {request_ids}.",
|
||||
request_ids=",".join(request_ids),
|
||||
)
|
||||
|
||||
def process_messages(self, messages):
|
||||
for message in messages:
|
||||
|
||||
@@ -36,6 +36,7 @@ from fastdeploy.entrypoints.chat_utils import load_chat_template
|
||||
from fastdeploy.entrypoints.openai.protocol import ChatCompletionToolsParam
|
||||
from fastdeploy.entrypoints.openai.tool_parsers import ToolParserManager
|
||||
from fastdeploy.input.utils import validate_model_path
|
||||
from fastdeploy.logger.request_logger import log_request_error
|
||||
from fastdeploy.utils import (
|
||||
deprecated_kwargs_warning,
|
||||
llm_logger,
|
||||
@@ -139,7 +140,11 @@ class LLM:
|
||||
continue
|
||||
self.req_output[request_id].add(result)
|
||||
except Exception as e:
|
||||
llm_logger.error(f"Unexpected error happened: {e}, {traceback.format_exc()!s}")
|
||||
log_request_error(
|
||||
message="Unexpected error happened: {error}, {traceback}",
|
||||
error=e,
|
||||
traceback=traceback.format_exc(),
|
||||
)
|
||||
|
||||
def generate(
|
||||
self,
|
||||
@@ -437,7 +442,11 @@ class LLM:
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
llm_logger.error(f"Error building sample logprobs from LogprobsLists: {e}, {str(traceback.format_exc())}")
|
||||
log_request_error(
|
||||
message="Error building sample logprobs from LogprobsLists: {error}, {traceback}",
|
||||
error=e,
|
||||
traceback=traceback.format_exc(),
|
||||
)
|
||||
|
||||
def _build_prompt_logprobs(
|
||||
self,
|
||||
|
||||
@@ -73,6 +73,11 @@ from fastdeploy.entrypoints.openai.v1.serving_completion import (
|
||||
OpenAIServingCompletion as OpenAIServingCompletionV1,
|
||||
)
|
||||
from fastdeploy.envs import environment_variables
|
||||
from fastdeploy.logger.request_logger import (
|
||||
RequestLogLevel,
|
||||
log_request,
|
||||
log_request_error,
|
||||
)
|
||||
from fastdeploy.metrics.metrics import get_filtered_metrics
|
||||
from fastdeploy.utils import (
|
||||
ExceptionHandler,
|
||||
@@ -325,7 +330,11 @@ async def connection_manager():
|
||||
await asyncio.wait_for(connection_semaphore.acquire(), timeout=0.001)
|
||||
yield
|
||||
except asyncio.TimeoutError:
|
||||
api_server_logger.info(f"Reach max request concurrency, semaphore status: {connection_semaphore.status()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Reach max request concurrency, semaphore status: {status}",
|
||||
status=connection_semaphore.status(),
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=429, detail=f"Too many requests,current max concurrency is {args.max_concurrency}"
|
||||
)
|
||||
@@ -545,7 +554,7 @@ async def create_chat_completion(request: ChatCompletionRequest, req: Request):
|
||||
"""
|
||||
Create a chat completion for the provided prompt and parameters.
|
||||
"""
|
||||
api_server_logger.debug(f"Chat Received request: {request.model_dump_json()}")
|
||||
log_request(RequestLogLevel.FULL, message="Chat Received request: {request}", request=request.model_dump_json())
|
||||
if envs.TRACES_ENABLE:
|
||||
if req.headers:
|
||||
headers = dict(req.headers)
|
||||
@@ -572,7 +581,11 @@ async def create_chat_completion(request: ChatCompletionRequest, req: Request):
|
||||
return StreamingResponse(content=wrapped_generator(), media_type="text/event-stream")
|
||||
|
||||
except HTTPException as e:
|
||||
api_server_logger.error(f"Error in chat completion: {str(e)}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] Error in chat completion: {error}",
|
||||
request_id=getattr(request, "request_id", None),
|
||||
error=str(e),
|
||||
)
|
||||
return JSONResponse(status_code=e.status_code, content={"detail": e.detail})
|
||||
|
||||
|
||||
@@ -582,7 +595,9 @@ async def create_completion(request: CompletionRequest, req: Request):
|
||||
"""
|
||||
Create a completion for the provided prompt and parameters.
|
||||
"""
|
||||
api_server_logger.info(f"Completion Received request: {request.model_dump_json()}")
|
||||
log_request(
|
||||
RequestLogLevel.FULL, message="Completion Received request: {request}", request=request.model_dump_json()
|
||||
)
|
||||
if envs.TRACES_ENABLE:
|
||||
if req.headers:
|
||||
headers = dict(req.headers)
|
||||
|
||||
@@ -31,6 +31,7 @@ from pydantic import (
|
||||
)
|
||||
|
||||
from fastdeploy.engine.pooling_params import PoolingParams
|
||||
from fastdeploy.logger.request_logger import RequestLogLevel, log_request
|
||||
from fastdeploy.worker.output import PromptLogprobs, SpeculateMetrics
|
||||
|
||||
|
||||
@@ -758,9 +759,7 @@ class ChatCompletionRequest(BaseModel):
|
||||
), "The parameter `raw_request` is not supported now, please use completion api instead."
|
||||
for key, value in self.metadata.items():
|
||||
req_dict[key] = value
|
||||
from fastdeploy.utils import api_server_logger
|
||||
|
||||
api_server_logger.warning("The parameter metadata is obsolete.")
|
||||
log_request(RequestLogLevel.STAGES, message="The parameter metadata is obsolete.")
|
||||
for key, value in self.dict().items():
|
||||
if value is not None:
|
||||
req_dict[key] = value
|
||||
|
||||
@@ -44,6 +44,11 @@ from fastdeploy.entrypoints.openai.protocol import (
|
||||
UsageInfo,
|
||||
)
|
||||
from fastdeploy.entrypoints.openai.response_processors import ChatResponseProcessor
|
||||
from fastdeploy.logger.request_logger import (
|
||||
RequestLogLevel,
|
||||
log_request,
|
||||
log_request_error,
|
||||
)
|
||||
from fastdeploy.metrics.metrics import main_process_metrics
|
||||
from fastdeploy.trace.constants import LoggingEventName
|
||||
from fastdeploy.trace.trace_logger import print as trace_print
|
||||
@@ -112,14 +117,16 @@ class OpenAIServingChat:
|
||||
err_msg = (
|
||||
f"Only master node can accept completion request, please send request to master node: {self.master_ip}"
|
||||
)
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(message="request[{request_id}] {error}", request_id=request.request_id, error=err_msg)
|
||||
return ErrorResponse(error=ErrorInfo(message=err_msg, type=ErrorType.INTERNAL_ERROR))
|
||||
|
||||
if self.models:
|
||||
is_supported, request.model = self.models.is_supported_model(request.model)
|
||||
if not is_supported:
|
||||
err_msg = f"Unsupported model: [{request.model}], support [{', '.join([x.name for x in self.models.model_paths])}] or default"
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {error}", request_id=request.request_id, error=err_msg
|
||||
)
|
||||
return ErrorResponse(
|
||||
error=ErrorInfo(message=err_msg, type=ErrorType.INTERNAL_ERROR, code=ErrorCode.MODEL_NOT_SUPPORT)
|
||||
)
|
||||
@@ -129,7 +136,11 @@ class OpenAIServingChat:
|
||||
await self.engine_client.semaphore.acquire()
|
||||
else:
|
||||
await asyncio.wait_for(self.engine_client.semaphore.acquire(), timeout=self.max_waiting_time)
|
||||
api_server_logger.info(f"current {self.engine_client.semaphore.status()}")
|
||||
log_request(
|
||||
RequestLogLevel.STAGES,
|
||||
message="semaphore status: {status}",
|
||||
status=self.engine_client.semaphore.status(),
|
||||
)
|
||||
|
||||
if request.request_id is not None:
|
||||
request_id = request.request_id
|
||||
@@ -141,7 +152,11 @@ class OpenAIServingChat:
|
||||
request_id = f"chatcmpl-{uuid.uuid4()}"
|
||||
tracing.trace_req_start(rid=request_id, trace_content=request.trace_context, role="FastDeploy")
|
||||
del request.trace_context
|
||||
api_server_logger.info(f"create chat completion request: {request_id}")
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="create chat completion request: {request_id}",
|
||||
request_id=request_id,
|
||||
)
|
||||
prompt_tokens = None
|
||||
max_tokens = None
|
||||
try:
|
||||
@@ -156,14 +171,19 @@ class OpenAIServingChat:
|
||||
if isinstance(prompt_token_ids, np.ndarray):
|
||||
prompt_token_ids = prompt_token_ids.tolist()
|
||||
except ParameterError as e:
|
||||
api_server_logger.error(f"request[{request_id}] generator error: {str(e)}, {e.message}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] generator error: {error}, {error_message}",
|
||||
request_id=request_id,
|
||||
error=str(e),
|
||||
error_message=e.message,
|
||||
)
|
||||
self.engine_client.semaphore.release()
|
||||
return ErrorResponse(
|
||||
error=ErrorInfo(message=str(e.message), type=ErrorType.INVALID_REQUEST_ERROR, param=e.param)
|
||||
)
|
||||
except Exception as e:
|
||||
error_msg = f"request[{request_id}] generator error: {str(e)}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
self.engine_client.semaphore.release()
|
||||
return ErrorResponse(error=ErrorInfo(message=error_msg, type=ErrorType.INVALID_REQUEST_ERROR))
|
||||
|
||||
@@ -178,12 +198,12 @@ class OpenAIServingChat:
|
||||
)
|
||||
except Exception as e:
|
||||
error_msg = f"request[{request_id}]full generator error: {str(e)}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
return ErrorResponse(error=ErrorInfo(message=error_msg, type=ErrorType.INTERNAL_ERROR))
|
||||
except asyncio.CancelledError as e:
|
||||
await self.engine_client.abort(f"{request_id}_0", 1 if request.n is None else request.n)
|
||||
error_msg = f"request[{request_id}_0] client disconnected: {str(e)}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
return ErrorResponse(
|
||||
error=ErrorInfo(message=error_msg, type=ErrorType.INVALID_REQUEST_ERROR, code=ErrorCode.CLIENT_ABORTED)
|
||||
)
|
||||
@@ -192,13 +212,13 @@ class OpenAIServingChat:
|
||||
f"request[{request_id}] waiting error: {str(e)}, {str(traceback.format_exc())}, "
|
||||
f"max waiting time: {self.max_waiting_time}"
|
||||
)
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
return ErrorResponse(
|
||||
error=ErrorInfo(message=error_msg, type=ErrorType.TIMEOUT_ERROR, code=ErrorCode.TIMEOUT)
|
||||
)
|
||||
|
||||
def _create_streaming_error_response(self, message: str) -> str:
|
||||
api_server_logger.error(message)
|
||||
log_request_error(message=message)
|
||||
error_response = ErrorResponse(error=ErrorInfo(message=message, type=ErrorType.INTERNAL_ERROR))
|
||||
return error_response.model_dump_json()
|
||||
|
||||
@@ -249,7 +269,9 @@ class OpenAIServingChat:
|
||||
choices=[],
|
||||
model=model_name,
|
||||
)
|
||||
api_server_logger.info(f"create chat completion request: {request_id}")
|
||||
log_request(
|
||||
RequestLogLevel.LIFECYCLE, message="create chat completion request: {request_id}", request_id=request_id
|
||||
)
|
||||
|
||||
try:
|
||||
dealer, response_queue = await self.engine_client.connection_manager.get_connection(
|
||||
@@ -372,7 +394,12 @@ class OpenAIServingChat:
|
||||
completion_tokens_details=CompletionTokenUsageInfo(reasoning_tokens=0),
|
||||
)
|
||||
yield f"data: {chunk.model_dump_json(exclude_unset=True)} \n\n"
|
||||
api_server_logger.info(f"Chat Streaming response send_idx 0: {chunk.model_dump_json()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Chat Streaming response send_idx 0: request_id={request_id}, completion_tokens={completion_tokens}",
|
||||
request_id=request_id,
|
||||
completion_tokens=0,
|
||||
)
|
||||
first_iteration = False
|
||||
|
||||
output = res["outputs"]
|
||||
@@ -497,7 +524,14 @@ class OpenAIServingChat:
|
||||
chunk.choices = choices
|
||||
yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"
|
||||
if res["finished"]:
|
||||
api_server_logger.info(f"Chat Streaming response last send: {chunk.model_dump_json()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Chat Streaming response last send: request_id={request_id}, finish_reason={finish_reason}, completion_tokens={completion_tokens}, logprobs={logprobs}",
|
||||
request_id=request_id,
|
||||
finish_reason=choice.finish_reason,
|
||||
completion_tokens=previous_num_tokens[idx],
|
||||
logprobs=logprobs_res,
|
||||
)
|
||||
choices = []
|
||||
|
||||
if include_usage:
|
||||
@@ -525,7 +559,7 @@ class OpenAIServingChat:
|
||||
except asyncio.CancelledError as e:
|
||||
await self.engine_client.abort(f"{request_id}_0", 1 if request.n is None else request.n)
|
||||
error_msg = f"request[{request_id}_0] client disconnected: {str(e)}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
except Exception as e:
|
||||
error_data = self._create_streaming_error_response(
|
||||
f"request[{request_id}] generate stream error: {str(e)}, {str(traceback.format_exc())}"
|
||||
@@ -536,7 +570,12 @@ class OpenAIServingChat:
|
||||
tracing.trace_req_finish(request_id)
|
||||
await self.engine_client.connection_manager.cleanup_request(request_id)
|
||||
self.engine_client.semaphore.release()
|
||||
api_server_logger.info(f"release {request_id} {self.engine_client.semaphore.status()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.STAGES,
|
||||
message="release {request_id} {status}",
|
||||
request_id=request_id,
|
||||
status=self.engine_client.semaphore.status(),
|
||||
)
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
async def chat_completion_full_generator(
|
||||
@@ -704,7 +743,9 @@ class OpenAIServingChat:
|
||||
tracing.trace_req_finish(request_id)
|
||||
await self.engine_client.connection_manager.cleanup_request(request_id)
|
||||
self.engine_client.semaphore.release()
|
||||
api_server_logger.info(f"release {self.engine_client.semaphore.status()}")
|
||||
log_request(
|
||||
RequestLogLevel.STAGES, message="release {status}", status=self.engine_client.semaphore.status()
|
||||
)
|
||||
|
||||
num_prompt_tokens = len(prompt_token_ids)
|
||||
num_generated_tokens = sum(previous_num_tokens)
|
||||
@@ -731,7 +772,7 @@ class OpenAIServingChat:
|
||||
choices=choices,
|
||||
usage=usage,
|
||||
)
|
||||
api_server_logger.info(f"Chat response: {res.model_dump_json()}")
|
||||
log_request(RequestLogLevel.CONTENT, message="Chat response: {response}", response=res.model_dump_json())
|
||||
return res
|
||||
|
||||
async def _create_chat_completion_choice(
|
||||
@@ -904,7 +945,7 @@ class OpenAIServingChat:
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error in _build_logprobs_response: {e}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
return None
|
||||
|
||||
def _build_prompt_logprobs(
|
||||
|
||||
@@ -41,6 +41,11 @@ from fastdeploy.entrypoints.openai.protocol import (
|
||||
PromptTokenUsageInfo,
|
||||
UsageInfo,
|
||||
)
|
||||
from fastdeploy.logger.request_logger import (
|
||||
RequestLogLevel,
|
||||
log_request,
|
||||
log_request_error,
|
||||
)
|
||||
from fastdeploy.trace.constants import LoggingEventName
|
||||
from fastdeploy.trace.trace_logger import print as trace_print
|
||||
from fastdeploy.utils import (
|
||||
@@ -91,13 +96,15 @@ class OpenAIServingCompletion:
|
||||
err_msg = (
|
||||
f"Only master node can accept completion request, please send request to master node: {self.master_ip}"
|
||||
)
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(message="request[{request_id}] {error}", request_id=request.request_id, error=err_msg)
|
||||
return ErrorResponse(error=ErrorInfo(message=err_msg, type=ErrorType.INTERNAL_ERROR))
|
||||
if self.models:
|
||||
is_supported, request.model = self.models.is_supported_model(request.model)
|
||||
if not is_supported:
|
||||
err_msg = f"Unsupported model: [{request.model}], support [{', '.join([x.name for x in self.models.model_paths])}] or default"
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {error}", request_id=request.request_id, error=err_msg
|
||||
)
|
||||
return ErrorResponse(
|
||||
error=ErrorInfo(message=err_msg, type=ErrorType.INTERNAL_ERROR, code=ErrorCode.MODEL_NOT_SUPPORT)
|
||||
)
|
||||
@@ -110,7 +117,11 @@ class OpenAIServingCompletion:
|
||||
request_id = f"cmpl-{request.user}-{uuid.uuid4()}"
|
||||
else:
|
||||
request_id = f"cmpl-{uuid.uuid4()}"
|
||||
api_server_logger.info(f"Initialize request {request_id}: {request}")
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Initialize request {request_id}",
|
||||
request_id=request_id,
|
||||
)
|
||||
tracing.trace_req_start(rid=request_id, trace_content=request.trace_context, role="FastDeploy")
|
||||
del request.trace_context
|
||||
request_prompt_ids = None
|
||||
@@ -147,15 +158,20 @@ class OpenAIServingCompletion:
|
||||
else:
|
||||
raise ValueError("Prompt type must be one of: str, list[str], list[int], list[list[int]]")
|
||||
except Exception as e:
|
||||
error_msg = f"OpenAIServingCompletion create_completion: {e}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
error_msg = f"request[{request_id}] create_completion: {e}, {str(traceback.format_exc())}"
|
||||
log_request_error(message=error_msg)
|
||||
return ErrorResponse(error=ErrorInfo(message=error_msg, type=ErrorType.INTERNAL_ERROR))
|
||||
|
||||
if request_prompt_ids is not None:
|
||||
request_prompts = request_prompt_ids
|
||||
|
||||
num_choices = len(request_prompts) * (1 if request.n is None else request.n)
|
||||
api_server_logger.info(f"Start preprocessing request: req_id={request_id}), num_choices={num_choices}")
|
||||
log_request(
|
||||
RequestLogLevel.STAGES,
|
||||
message="Start preprocessing request: req_id={request_id}), num_choices={num_choices}",
|
||||
request_id=request_id,
|
||||
num_choices=num_choices,
|
||||
)
|
||||
prompt_batched_token_ids = []
|
||||
prompt_tokens_list = []
|
||||
max_tokens_list = []
|
||||
@@ -169,7 +185,7 @@ class OpenAIServingCompletion:
|
||||
f"OpenAIServingCompletion waiting error: {e}, {str(traceback.format_exc())}, "
|
||||
f"max waiting time: {self.max_waiting_time}"
|
||||
)
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message="request[{request_id}] {error}", request_id=request_id, error=error_msg)
|
||||
return ErrorResponse(
|
||||
error=ErrorInfo(message=error_msg, code=ErrorCode.TIMEOUT, type=ErrorType.TIMEOUT_ERROR)
|
||||
)
|
||||
@@ -188,14 +204,19 @@ class OpenAIServingCompletion:
|
||||
max_tokens_list.append(current_req_dict.get("max_tokens"))
|
||||
del current_req_dict
|
||||
except ParameterError as e:
|
||||
api_server_logger.error(f"OpenAIServingCompletion format error: {e}, {e.message}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] format error: {error}, {error_message}",
|
||||
request_id=request_id,
|
||||
error=e,
|
||||
error_message=e.message,
|
||||
)
|
||||
self.engine_client.semaphore.release()
|
||||
return ErrorResponse(
|
||||
error=ErrorInfo(code="400", message=str(e.message), type="invalid_request", param=e.param)
|
||||
)
|
||||
except Exception as e:
|
||||
error_msg = f"OpenAIServingCompletion format error: {e}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
error_msg = f"request[{request_id}] format error: {e}, {str(traceback.format_exc())}"
|
||||
log_request_error(message=error_msg)
|
||||
self.engine_client.semaphore.release()
|
||||
return ErrorResponse(
|
||||
error=ErrorInfo(message=str(e), code=ErrorCode.INVALID_VALUE, type=ErrorType.INVALID_REQUEST_ERROR)
|
||||
@@ -226,20 +247,20 @@ class OpenAIServingCompletion:
|
||||
)
|
||||
except Exception as e:
|
||||
error_msg = (
|
||||
f"OpenAIServingCompletion completion_full_generator error: {e}, {str(traceback.format_exc())}"
|
||||
f"request[{request_id}] completion_full_generator error: {e}, {str(traceback.format_exc())}"
|
||||
)
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
return ErrorResponse(error=ErrorInfo(message=error_msg, type=ErrorType.INTERNAL_ERROR))
|
||||
except asyncio.CancelledError as e:
|
||||
await self.engine_client.abort(f"{request_id}_0", num_choices)
|
||||
error_msg = f"request[{request_id}_0] client disconnected: {str(e)}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
return ErrorResponse(
|
||||
error=ErrorInfo(message=error_msg, type=ErrorType.INVALID_REQUEST_ERROR, code=ErrorCode.CLIENT_ABORTED)
|
||||
)
|
||||
except Exception as e:
|
||||
error_msg = f"OpenAIServingCompletion create_completion error: {e}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
error_msg = f"request[{request_id}] create_completion error: {e}, {str(traceback.format_exc())}"
|
||||
log_request_error(message=error_msg)
|
||||
return ErrorResponse(error=ErrorInfo(message=error_msg, type=ErrorType.INTERNAL_ERROR))
|
||||
|
||||
async def completion_full_generator(
|
||||
@@ -368,10 +389,16 @@ class OpenAIServingCompletion:
|
||||
prompt_tokens_list=prompt_tokens_list,
|
||||
max_tokens_list=max_tokens_list,
|
||||
)
|
||||
api_server_logger.info(f"Completion response: {res.model_dump_json()}")
|
||||
log_request(
|
||||
RequestLogLevel.CONTENT, message="Completion response: {response}", response=res.model_dump_json()
|
||||
)
|
||||
return res
|
||||
except Exception as e:
|
||||
api_server_logger.error(f"Error in completion_full_generator: {e}", exc_info=True)
|
||||
log_request_error(
|
||||
message="request[{request_id}] error in completion_full_generator: {error}",
|
||||
request_id=request_id,
|
||||
error=e,
|
||||
)
|
||||
finally:
|
||||
trace_print(LoggingEventName.POSTPROCESSING_END, request_id, getattr(request, "user", ""))
|
||||
tracing.trace_req_finish(request_id)
|
||||
@@ -514,8 +541,11 @@ class OpenAIServingCompletion:
|
||||
],
|
||||
)
|
||||
yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"
|
||||
api_server_logger.info(
|
||||
f"Completion Streaming response send_idx 0: {chunk.model_dump_json()}"
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Completion Streaming response send_idx 0: request_id={request_id}, completion_tokens={completion_tokens}",
|
||||
request_id=request_id,
|
||||
completion_tokens=0,
|
||||
)
|
||||
first_iteration[idx] = False
|
||||
|
||||
@@ -592,8 +622,11 @@ class OpenAIServingCompletion:
|
||||
if send_idx == 0 and not request.return_token_ids:
|
||||
chunk_temp = chunk
|
||||
chunk_temp.choices = choices
|
||||
api_server_logger.info(
|
||||
f"Completion Streaming response send_idx 0: {chunk_temp.model_dump_json()}"
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Completion Streaming response send_idx 0: request_id={request_id}, completion_tokens={completion_tokens}",
|
||||
request_id=request_id,
|
||||
completion_tokens=output_tokens[idx],
|
||||
)
|
||||
del chunk_temp
|
||||
|
||||
@@ -646,14 +679,26 @@ class OpenAIServingCompletion:
|
||||
metrics=res["metrics"] if request.collect_metrics else None,
|
||||
)
|
||||
yield f"data: {usage_chunk.model_dump_json(exclude_unset=True)}\n\n"
|
||||
api_server_logger.info(f"Completion Streaming response last send: {chunk.model_dump_json()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Completion Streaming response last send: request_id={request_id}, finish_reason={finish_reason}, completion_tokens={completion_tokens}, logprobs={logprobs}",
|
||||
request_id=request_id,
|
||||
finish_reason=chunk.choices[-1].finish_reason if chunk.choices else None,
|
||||
completion_tokens=output_tokens[idx],
|
||||
logprobs=logprobs_res,
|
||||
)
|
||||
|
||||
except asyncio.CancelledError as e:
|
||||
await self.engine_client.abort(f"{request_id}_0", num_choices)
|
||||
error_msg = f"request[{request_id}_0] client disconnected: {str(e)}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
except Exception as e:
|
||||
api_server_logger.error(f"Error in completion_stream_generator: {e}, {str(traceback.format_exc())}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] error in completion_stream_generator: {error}, {traceback}",
|
||||
request_id=request_id,
|
||||
error=e,
|
||||
traceback=traceback.format_exc(),
|
||||
)
|
||||
yield f"data: {ErrorResponse(error=ErrorInfo(message=str(e), code='400', type=ErrorType.INTERNAL_ERROR)).model_dump_json(exclude_unset=True)}\n\n"
|
||||
finally:
|
||||
trace_print(LoggingEventName.POSTPROCESSING_END, request_id, getattr(request, "user", ""))
|
||||
@@ -887,7 +932,11 @@ class OpenAIServingCompletion:
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
api_server_logger.error(f"Error in _build_logprobs_response: {str(e)}, {str(traceback.format_exc())}")
|
||||
log_request_error(
|
||||
message="Error in _build_logprobs_response: {error}, {traceback}",
|
||||
error=str(e),
|
||||
traceback=traceback.format_exc(),
|
||||
)
|
||||
return None
|
||||
|
||||
def _build_prompt_logprobs(
|
||||
|
||||
@@ -35,7 +35,7 @@ from fastdeploy.entrypoints.openai.protocol import (
|
||||
UsageInfo,
|
||||
)
|
||||
from fastdeploy.entrypoints.openai.serving_engine import ServeContext, ZmqOpenAIServing
|
||||
from fastdeploy.utils import api_server_logger
|
||||
from fastdeploy.logger.request_logger import RequestLogLevel, log_request
|
||||
|
||||
|
||||
def _get_embedding(
|
||||
@@ -140,7 +140,12 @@ class OpenAIServingEmbedding(ZmqOpenAIServing):
|
||||
@override
|
||||
def _build_response(self, ctx: ServeContext, request_output: dict):
|
||||
"""Generate final embedding response"""
|
||||
api_server_logger.info(f"[{ctx.request_id}] Embedding RequestOutput received:{request_output}")
|
||||
log_request(
|
||||
level=RequestLogLevel.CONTENT,
|
||||
message="[{request_id}] Embedding RequestOutput received:{request_output}",
|
||||
request_id=ctx.request_id,
|
||||
request_output=request_output,
|
||||
)
|
||||
|
||||
base = PoolingRequestOutput.from_dict(request_output)
|
||||
embedding_res = EmbeddingRequestOutput.from_base(base)
|
||||
|
||||
@@ -33,6 +33,11 @@ from fastdeploy.entrypoints.openai.protocol import (
|
||||
InvalidParameterException,
|
||||
)
|
||||
from fastdeploy.envs import FD_SUPPORT_MAX_CONNECTIONS
|
||||
from fastdeploy.logger.request_logger import (
|
||||
RequestLogLevel,
|
||||
log_request,
|
||||
log_request_error,
|
||||
)
|
||||
from fastdeploy.utils import ErrorCode, ErrorType, StatefulSemaphore, api_server_logger
|
||||
|
||||
RequestT = TypeVar("RequestT")
|
||||
@@ -96,13 +101,18 @@ class OpenAIServing(ABC, Generic[RequestT]):
|
||||
is_supported, adjusted_name = self.models.is_supported_model(model_name)
|
||||
if not is_supported:
|
||||
err_msg = f"Unsupported model: [{model_name}]"
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(message=err_msg)
|
||||
return is_supported, adjusted_name
|
||||
|
||||
async def _acquire_semaphore(self, request_id: str) -> bool:
|
||||
"""Acquire engine client semaphore with timeout"""
|
||||
try:
|
||||
api_server_logger.info(f"Acquire request:{request_id} status:{self._get_semaphore().status()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.STAGES,
|
||||
message="Acquire request:{request_id} status:{status}",
|
||||
request_id=request_id,
|
||||
status=self._get_semaphore().status(),
|
||||
)
|
||||
if self.max_waiting_time < 0:
|
||||
await self._get_semaphore().acquire()
|
||||
else:
|
||||
@@ -111,13 +121,18 @@ class OpenAIServing(ABC, Generic[RequestT]):
|
||||
except asyncio.TimeoutError:
|
||||
self._release_semaphore(request_id)
|
||||
error_msg = f"Request waiting timeout, request:{request_id} max waiting time:{self.max_waiting_time}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
return False
|
||||
|
||||
def _release_semaphore(self, request_id: str) -> None:
|
||||
"""Release engine client semaphore"""
|
||||
self._get_semaphore().release()
|
||||
api_server_logger.info(f"Release request:{request_id} status:{self._get_semaphore().status()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.STAGES,
|
||||
message="Release request:{request_id} status:{status}",
|
||||
request_id=request_id,
|
||||
status=self._get_semaphore().status(),
|
||||
)
|
||||
|
||||
def _create_error_response(
|
||||
self,
|
||||
@@ -128,7 +143,7 @@ class OpenAIServing(ABC, Generic[RequestT]):
|
||||
) -> ErrorResponse:
|
||||
"""Create standardized error response"""
|
||||
traceback.print_exc()
|
||||
api_server_logger.error(message)
|
||||
log_request_error(message=message)
|
||||
return ErrorResponse(error=ErrorInfo(message=message, type=error_type, code=code, param=param))
|
||||
|
||||
def _generate_request_id(self, request: RequestT) -> str:
|
||||
@@ -193,7 +208,12 @@ class OpenAIServing(ABC, Generic[RequestT]):
|
||||
|
||||
request_id = self._generate_request_id(request)
|
||||
ctx.request_id = request_id
|
||||
api_server_logger.info(f"Initialize request {request_id}: {request}")
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Initialize request {request_id}: {request}",
|
||||
request_id=request_id,
|
||||
request=request,
|
||||
)
|
||||
|
||||
# Step 2: Semaphore acquisition
|
||||
if not await self._acquire_semaphore(request_id):
|
||||
@@ -252,7 +272,12 @@ class ZmqOpenAIServing(OpenAIServing):
|
||||
request_dicts = self._request_to_batch_dicts(ctx)
|
||||
ctx.preprocess_requests = request_dicts
|
||||
for request_dict in request_dicts:
|
||||
api_server_logger.info(f"batch add request_id: {request_dict['request_id']}, request: {request_dict}")
|
||||
log_request(
|
||||
level=RequestLogLevel.CONTENT,
|
||||
message="batch add request_id: {request_id}, request: {request}",
|
||||
request_id=request_dict["request_id"],
|
||||
request=request_dict,
|
||||
)
|
||||
await self.engine_client.format_and_add_data(request_dict)
|
||||
|
||||
def _process_chat_template_kwargs(self, request_dict):
|
||||
@@ -283,7 +308,11 @@ class ZmqOpenAIServing(OpenAIServing):
|
||||
while num_choices > 0:
|
||||
request_output_dicts = await asyncio.wait_for(request_output_queue.get(), timeout=60)
|
||||
for request_output_dict in request_output_dicts:
|
||||
api_server_logger.debug(f"Received RequestOutput: {request_output_dict}")
|
||||
log_request(
|
||||
level=RequestLogLevel.FULL,
|
||||
message="Received RequestOutput: {request_output}",
|
||||
request_output=request_output_dict,
|
||||
)
|
||||
if request_output_dict["finished"] is True:
|
||||
num_choices -= 1
|
||||
yield request_output_dict
|
||||
@@ -301,7 +330,12 @@ class ZmqOpenAIServing(OpenAIServing):
|
||||
async def _acquire_semaphore(self, request_id: str) -> bool:
|
||||
"""Acquire engine client semaphore with timeout"""
|
||||
try:
|
||||
api_server_logger.info(f"Acquire request:{request_id} status:{self._get_semaphore().status()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.STAGES,
|
||||
message="Acquire request:{request_id} status:{status}",
|
||||
request_id=request_id,
|
||||
status=self._get_semaphore().status(),
|
||||
)
|
||||
if self.max_waiting_time < 0:
|
||||
await self._get_semaphore().acquire()
|
||||
else:
|
||||
@@ -310,14 +344,19 @@ class ZmqOpenAIServing(OpenAIServing):
|
||||
except asyncio.TimeoutError:
|
||||
self._release_semaphore(request_id)
|
||||
error_msg = f"Request waiting timeout, request:{request_id} max waiting time:{self.max_waiting_time}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
return False
|
||||
|
||||
@override
|
||||
def _release_semaphore(self, request_id: str) -> None:
|
||||
"""Release engine client semaphore"""
|
||||
self._get_semaphore().release()
|
||||
api_server_logger.info(f"Release request:{request_id} status:{self._get_semaphore().status()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.STAGES,
|
||||
message="Release request:{request_id} status:{status}",
|
||||
request_id=request_id,
|
||||
status=self._get_semaphore().status(),
|
||||
)
|
||||
|
||||
@override
|
||||
def _check_master(self) -> bool:
|
||||
|
||||
@@ -24,7 +24,8 @@ from fastdeploy.entrypoints.openai.protocol import (
|
||||
ModelList,
|
||||
ModelPermission,
|
||||
)
|
||||
from fastdeploy.utils import ErrorType, api_server_logger, get_host_ip
|
||||
from fastdeploy.logger.request_logger import log_request_error
|
||||
from fastdeploy.utils import ErrorType, get_host_ip
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -86,7 +87,7 @@ class OpenAIServingModels:
|
||||
err_msg = (
|
||||
f"Only master node can accept models request, please send request to master node: {self.master_ip}"
|
||||
)
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(message=err_msg)
|
||||
return ErrorResponse(error=ErrorInfo(message=err_msg, type=ErrorType.INTERNAL_ERROR))
|
||||
model_infos = [
|
||||
ModelInfo(
|
||||
|
||||
@@ -28,7 +28,7 @@ from fastdeploy.entrypoints.openai.protocol import (
|
||||
UsageInfo,
|
||||
)
|
||||
from fastdeploy.entrypoints.openai.serving_engine import ServeContext, ZmqOpenAIServing
|
||||
from fastdeploy.utils import api_server_logger
|
||||
from fastdeploy.logger.request_logger import RequestLogLevel, log_request
|
||||
|
||||
|
||||
class OpenAIServingReward(ZmqOpenAIServing):
|
||||
@@ -77,7 +77,7 @@ class OpenAIServingReward(ZmqOpenAIServing):
|
||||
response: ChatRewardResponse = None
|
||||
generators: AsyncGenerator[ChatRewardResponse, None] = self.handle(ctx)
|
||||
async for r in generators:
|
||||
api_server_logger.info(f"engine pooling result:{r}")
|
||||
log_request(RequestLogLevel.CONTENT, message="engine pooling result: {result}", result=r)
|
||||
r.data[0].index = idx
|
||||
idx += 1
|
||||
if response is None or isinstance(r, ErrorResponse):
|
||||
@@ -93,7 +93,12 @@ class OpenAIServingReward(ZmqOpenAIServing):
|
||||
@override
|
||||
def _build_response(self, ctx: ServeContext, request_output: dict):
|
||||
"""Generate final reward response"""
|
||||
api_server_logger.info(f"[{ctx.request_id}] Reward RequestOutput received:{request_output}")
|
||||
log_request(
|
||||
level=RequestLogLevel.CONTENT,
|
||||
message="Reward RequestOutput received: request_id={request_id}, output={request_output}",
|
||||
request_id=ctx.request_id,
|
||||
request_output=request_output,
|
||||
)
|
||||
|
||||
base = PoolingRequestOutput.from_dict(request_output)
|
||||
reward_res = RewardRequestOutput.from_base(base)
|
||||
|
||||
@@ -41,6 +41,7 @@ from fastdeploy.entrypoints.openai.tool_parsers.abstract_tool_parser import (
|
||||
ToolParser,
|
||||
ToolParserManager,
|
||||
)
|
||||
from fastdeploy.logger.request_logger import log_request_error
|
||||
from fastdeploy.utils import data_processor_logger
|
||||
|
||||
|
||||
@@ -184,7 +185,9 @@ class Ernie45VLThinkingToolParser(ToolParser):
|
||||
continue
|
||||
|
||||
if not function_call_arr:
|
||||
data_processor_logger.error("No valid tool calls found")
|
||||
log_request_error(
|
||||
message="request[{request_id}] No valid tool calls found", request_id=request.request_id
|
||||
)
|
||||
return ExtractedToolCallInformation(tools_called=False, content=model_output)
|
||||
|
||||
tool_calls = []
|
||||
@@ -226,7 +229,11 @@ class Ernie45VLThinkingToolParser(ToolParser):
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
data_processor_logger.error(f"Error in extracting tool call from response: {str(e)}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] Error in extracting tool call from response: {error}",
|
||||
request_id=request.request_id,
|
||||
error=str(e),
|
||||
)
|
||||
return ExtractedToolCallInformation(tools_called=False, tool_calls=None, content=model_output)
|
||||
|
||||
def extract_tool_calls_streaming(
|
||||
@@ -343,7 +350,11 @@ class Ernie45VLThinkingToolParser(ToolParser):
|
||||
)
|
||||
return delta
|
||||
except Exception as e:
|
||||
data_processor_logger.error(f"Error in streaming tool call extraction: {str(e)}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] Error in streaming tool call extraction: {error}",
|
||||
request_id=request.get("request_id"),
|
||||
error=str(e),
|
||||
)
|
||||
return None
|
||||
if "</tool_call>" in self.buffer:
|
||||
end_pos = self.buffer.find("</tool_call>")
|
||||
@@ -354,5 +365,9 @@ class Ernie45VLThinkingToolParser(ToolParser):
|
||||
return delta
|
||||
|
||||
except Exception as e:
|
||||
data_processor_logger.error(f"Error in streaming tool call extraction: {str(e)}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] Error in streaming tool call extraction: {error}",
|
||||
request_id=request.get("request_id"),
|
||||
error=str(e),
|
||||
)
|
||||
return None
|
||||
|
||||
@@ -41,6 +41,7 @@ from fastdeploy.entrypoints.openai.tool_parsers.abstract_tool_parser import (
|
||||
ToolParser,
|
||||
ToolParserManager,
|
||||
)
|
||||
from fastdeploy.logger.request_logger import log_request_error
|
||||
from fastdeploy.utils import data_processor_logger as logger
|
||||
|
||||
|
||||
@@ -254,8 +255,11 @@ class ErnieX1ToolParser(ToolParser):
|
||||
logger.debug("Skipping text %s - no arguments", delta_text)
|
||||
delta = None
|
||||
|
||||
elif cur_arguments is None and prev_arguments is not None:
|
||||
logger.error("should be impossible to have arguments reset " "mid-call. skipping streaming anything.")
|
||||
elif not cur_arguments and prev_arguments:
|
||||
log_request_error(
|
||||
message="request[{request_id}] should be impossible to have arguments reset mid-call. skipping streaming anything.",
|
||||
request_id=request.request_id,
|
||||
)
|
||||
delta = None
|
||||
|
||||
elif cur_arguments is not None and prev_arguments is None:
|
||||
|
||||
@@ -45,8 +45,12 @@ from fastdeploy.entrypoints.openai.v1.serving_base import (
|
||||
ServingResponseContext,
|
||||
)
|
||||
from fastdeploy.input.tokenizer_client import AsyncTokenizerClient, ImageDecodeRequest
|
||||
from fastdeploy.logger.request_logger import (
|
||||
RequestLogLevel,
|
||||
log_request,
|
||||
log_request_error,
|
||||
)
|
||||
from fastdeploy.metrics.metrics import main_process_metrics
|
||||
from fastdeploy.utils import api_server_logger
|
||||
from fastdeploy.worker.output import LogprobsLists
|
||||
|
||||
|
||||
@@ -178,7 +182,7 @@ class OpenAIServingChat(OpenAiServingBase):
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error in _build_logprobs_response: {e}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
return None
|
||||
|
||||
@override
|
||||
@@ -302,7 +306,14 @@ class OpenAIServingChat(OpenAiServingBase):
|
||||
max_tokens = request.max_completion_tokens or request.max_tokens
|
||||
choice_completion_tokens = response_ctx.choice_completion_tokens_dict[output.index]
|
||||
choice.finish_reason = self._calc_finish_reason(request_output, max_tokens, choice_completion_tokens)
|
||||
api_server_logger.info(f"Chat Streaming response last send: {chunk.model_dump_json()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Chat Streaming response last send: request_id={request_id}, finish_reason={finish_reason}, completion_tokens={completion_tokens}, logprobs={logprobs}",
|
||||
request_id=request_id,
|
||||
finish_reason=choice.finish_reason,
|
||||
completion_tokens=choice_completion_tokens,
|
||||
logprobs=choice.logprobs,
|
||||
)
|
||||
|
||||
yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"
|
||||
if request_output.finished and response_ctx.remain_choices == 0:
|
||||
@@ -339,7 +350,11 @@ class OpenAIServingChat(OpenAiServingBase):
|
||||
res = ChatCompletionResponse(
|
||||
id=ctx.request_id, model=request.model, choices=choices, created=ctx.created_time, usage=response_ctx.usage
|
||||
)
|
||||
api_server_logger.info(f"Chat response: {res.model_dump_json()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.CONTENT,
|
||||
message="Chat response: {response}",
|
||||
response=res.model_dump_json(),
|
||||
)
|
||||
return res
|
||||
|
||||
async def _create_chat_completion_choice(
|
||||
|
||||
@@ -38,7 +38,12 @@ from fastdeploy.entrypoints.openai.v1.serving_base import (
|
||||
ServeContext,
|
||||
ServingResponseContext,
|
||||
)
|
||||
from fastdeploy.utils import ErrorType, api_server_logger
|
||||
from fastdeploy.logger.request_logger import (
|
||||
RequestLogLevel,
|
||||
log_request,
|
||||
log_request_error,
|
||||
)
|
||||
from fastdeploy.utils import ErrorType
|
||||
from fastdeploy.worker.output import LogprobsLists
|
||||
|
||||
|
||||
@@ -94,7 +99,7 @@ class OpenAIServingCompletion(OpenAiServingBase):
|
||||
raise ValueError("Prompt type must be one of: str, list[str], list[int], list[list[int]]")
|
||||
except Exception as e:
|
||||
error_msg = f"OpenAIServingCompletion create_completion: {e}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
return ErrorResponse(error=ErrorInfo(message=error_msg, type=ErrorType.INTERNAL_ERROR))
|
||||
|
||||
if request_prompt_ids is not None:
|
||||
@@ -199,7 +204,11 @@ class OpenAIServingCompletion(OpenAiServingBase):
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
api_server_logger.error(f"Error in _build_logprobs_response: {str(e)}, {str(traceback.format_exc())}")
|
||||
log_request_error(
|
||||
message="Error in _build_logprobs_response: {error}, {traceback}",
|
||||
error=str(e),
|
||||
traceback=traceback.format_exc(),
|
||||
)
|
||||
return None
|
||||
|
||||
async def _build_stream_response(
|
||||
@@ -271,9 +280,21 @@ class OpenAIServingCompletion(OpenAiServingBase):
|
||||
choice.finish_reason = self._calc_finish_reason(
|
||||
request_output, request.max_tokens, choice_completion_tokens
|
||||
)
|
||||
api_server_logger.info(f"Completion Streaming response last send: {chunk.model_dump_json()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Completion Streaming response last send: request_id={request_id}, finish_reason={finish_reason}, completion_tokens={completion_tokens}, logprobs={logprobs}",
|
||||
request_id=request_id,
|
||||
finish_reason=choice.finish_reason,
|
||||
completion_tokens=choice_completion_tokens,
|
||||
logprobs=choice.logprobs,
|
||||
)
|
||||
if send_idx == 0 and not request.return_token_ids:
|
||||
api_server_logger.info(f"Completion Streaming response send_idx 0: {chunk.model_dump_json()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Completion Streaming response send_idx 0: request_id={request_id}, completion_tokens={completion_tokens}",
|
||||
request_id=request_id,
|
||||
completion_tokens=response_ctx.choice_completion_tokens_dict[output.index],
|
||||
)
|
||||
yield f"data: {chunk.model_dump_json()}\n\n"
|
||||
if request_output.finished and response_ctx.remain_choices == 0:
|
||||
if include_usage:
|
||||
@@ -287,7 +308,12 @@ class OpenAIServingCompletion(OpenAiServingBase):
|
||||
yield f"data: {usage_chunk.model_dump_json(exclude_unset=True)}\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
except Exception as e:
|
||||
api_server_logger.error(f"Error in completion_stream_generator: {e}, {str(traceback.format_exc())}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] Error in completion_stream_generator: {error}, {traceback}",
|
||||
request_id=request_id,
|
||||
error=e,
|
||||
traceback=traceback.format_exc(),
|
||||
)
|
||||
yield f"data: {ErrorResponse(error=ErrorInfo(message=str(e), code='400', type=ErrorType.INTERNAL_ERROR)).model_dump_json(exclude_unset=True)}\n\n"
|
||||
|
||||
async def _build_full_response(
|
||||
@@ -321,10 +347,18 @@ class OpenAIServingCompletion(OpenAiServingBase):
|
||||
choices=choices,
|
||||
usage=response_ctx.usage,
|
||||
)
|
||||
api_server_logger.info(f"Completion response: {res.model_dump_json()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.FULL,
|
||||
message="Completion response: {response}",
|
||||
response=res.model_dump_json(),
|
||||
)
|
||||
return res
|
||||
except Exception as e:
|
||||
api_server_logger.error(f"Error in completion_full_generator: {e}", exc_info=True)
|
||||
log_request_error(
|
||||
message="request[{request_id}] Error in completion_full_generator: {error}",
|
||||
request_id=ctx.request_id,
|
||||
error=e,
|
||||
)
|
||||
return self._create_error_response(str(e))
|
||||
|
||||
def build_completion_choice(
|
||||
|
||||
+1
-1
@@ -43,7 +43,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
||||
# Request logging master switch. Set to 0 to disable request logging.
|
||||
"FD_LOG_REQUESTS": lambda: int(os.getenv("FD_LOG_REQUESTS", "1")),
|
||||
# Request logging detail level (0-3). Higher level means more verbose output.
|
||||
"FD_LOG_REQUESTS_LEVEL": lambda: int(os.getenv("FD_LOG_REQUESTS_LEVEL", "0")),
|
||||
"FD_LOG_REQUESTS_LEVEL": lambda: int(os.getenv("FD_LOG_REQUESTS_LEVEL", "2")),
|
||||
# Max field length for request logging truncation.
|
||||
"FD_LOG_MAX_LEN": lambda: int(os.getenv("FD_LOG_MAX_LEN", "2048")),
|
||||
# Unified trace mode: off, local, otel, all.
|
||||
|
||||
@@ -55,6 +55,7 @@ from paddleformers.transformers import Llama3Tokenizer, LlamaTokenizer
|
||||
|
||||
from fastdeploy import envs
|
||||
from fastdeploy.input.utils import process_stop_token_ids
|
||||
from fastdeploy.logger.request_logger import RequestLogLevel, log_request
|
||||
from fastdeploy.utils import data_processor_logger
|
||||
|
||||
_SAMPLING_EPS = 1e-5
|
||||
@@ -164,7 +165,12 @@ class BaseTextProcessor(ABC):
|
||||
req_id = request.get("request_id", None) if isinstance(request, dict) else None
|
||||
tokens = self.tokenizer.tokenize(spliced_message)
|
||||
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
|
||||
data_processor_logger.info(f"req_id:{req_id}, tokens:{tokens}, token_ids: {token_ids}")
|
||||
log_request(
|
||||
level=1,
|
||||
message="req_id:{req_id}, token_ids: {token_ids}",
|
||||
req_id=req_id,
|
||||
token_ids=token_ids,
|
||||
)
|
||||
return token_ids
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
@@ -359,7 +365,7 @@ class BaseTextProcessor(ABC):
|
||||
|
||||
def process_request_dict(self, request, max_model_len=None, **kwargs):
|
||||
"""Unified request pre-processing shared by all processors."""
|
||||
data_processor_logger.info(f"Start processing request dict: {request}")
|
||||
log_request(RequestLogLevel.CONTENT, message="Start processing request dict: {request}", request=request)
|
||||
request = self._apply_default_parameters(request)
|
||||
if not request.get("eos_token_ids"):
|
||||
request["eos_token_ids"] = self.eos_token_ids
|
||||
@@ -440,7 +446,7 @@ class BaseTextProcessor(ABC):
|
||||
if request.get("response_max_tokens") is not None and request.get("enable_thinking") is False:
|
||||
request["max_tokens"] = min(request["response_max_tokens"], request["max_tokens"])
|
||||
|
||||
data_processor_logger.info(f"Processed request dict: {request}")
|
||||
log_request(RequestLogLevel.CONTENT, message="Processed request dict: {request}", request=request)
|
||||
return request
|
||||
|
||||
def _apply_reasoning_parser(self, request):
|
||||
@@ -481,7 +487,12 @@ class BaseTextProcessor(ABC):
|
||||
if seq != self.tokenizer.eos_token_id:
|
||||
stop_seqs.append(self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(seq)))
|
||||
stop_seqs, stop_seqs_len = self.pad_batch_data(stop_seqs, pad_id=-1, return_seq_len=True, return_array=False)
|
||||
data_processor_logger.debug(f"processed stop_seqs: {stop_seqs}, {stop_seqs_len}")
|
||||
log_request(
|
||||
level=3,
|
||||
message="processed stop_seqs: {stop_seqs}, {stop_seqs_len}",
|
||||
stop_seqs=stop_seqs,
|
||||
stop_seqs_len=stop_seqs_len,
|
||||
)
|
||||
return stop_seqs, stop_seqs_len
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
@@ -607,14 +618,20 @@ class BaseTextProcessor(ABC):
|
||||
prompt_token_ids = self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(prompt))
|
||||
if len(prompt_token_ids) != 1:
|
||||
if not add_prefix_space:
|
||||
data_processor_logger.warning(
|
||||
f"bad_words: '{prompt}' tokenises to {len(prompt_token_ids)} tokens, skipping"
|
||||
log_request(
|
||||
level=1,
|
||||
message="bad_words: '{prompt}' tokenises to {num_tokens} tokens, skipping",
|
||||
prompt=prompt,
|
||||
num_tokens=len(prompt_token_ids),
|
||||
)
|
||||
continue
|
||||
if prompt_token_ids[0] > self.tokenizer.vocab_size:
|
||||
if not add_prefix_space:
|
||||
data_processor_logger.warning(
|
||||
f"bad_words: '{prompt}' token id {prompt_token_ids[0]} > vocab_size, skipping"
|
||||
log_request(
|
||||
level=1,
|
||||
message="bad_words: '{prompt}' token id {token_id} > vocab_size, skipping",
|
||||
prompt=prompt,
|
||||
token_id=prompt_token_ids[0],
|
||||
)
|
||||
continue
|
||||
if prompt_token_ids not in token_ids:
|
||||
|
||||
@@ -22,6 +22,7 @@ from paddleformers.generation import GenerationConfig
|
||||
|
||||
from fastdeploy.input.ernie4_5_processor import Ernie4_5Processor
|
||||
from fastdeploy.input.utils import IDS_TYPE_FLAG, process_stop_token_ids
|
||||
from fastdeploy.logger.request_logger import RequestLogLevel, log_request
|
||||
from fastdeploy.utils import data_processor_logger
|
||||
|
||||
from .process import DataProcessor
|
||||
@@ -294,7 +295,7 @@ class Ernie4_5_VLProcessor(Ernie4_5Processor):
|
||||
if request.get("response_max_tokens") is not None and request.get("enable_thinking") is False:
|
||||
request["max_tokens"] = min(request["response_max_tokens"], request["max_tokens"])
|
||||
|
||||
data_processor_logger.info(f"Processed request {request}")
|
||||
log_request(RequestLogLevel.CONTENT, message="Processed request: {request}", request=request)
|
||||
return request
|
||||
|
||||
def append_completion_tokens(self, multimodal_inputs, completion_token_ids):
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
import numpy as np
|
||||
|
||||
from fastdeploy.input.text_processor import DataProcessor as TextProcessor
|
||||
from fastdeploy.logger.request_logger import RequestLogLevel, log_request
|
||||
from fastdeploy.utils import data_processor_logger
|
||||
|
||||
from .process import DataProcessor
|
||||
@@ -261,7 +262,7 @@ class Qwen3VLProcessor(TextProcessor):
|
||||
request["max_tokens"] = max(1, max_tokens)
|
||||
else:
|
||||
request["max_tokens"] = min(max_tokens, request["max_tokens"])
|
||||
data_processor_logger.info(f"Processed request {request}")
|
||||
log_request(RequestLogLevel.CONTENT, message="Processed request: {request}", request=request)
|
||||
|
||||
return request
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ import numpy as np
|
||||
|
||||
from fastdeploy.input.text_processor import DataProcessor as TextProcessor
|
||||
from fastdeploy.input.utils import process_stop_token_ids
|
||||
from fastdeploy.logger.request_logger import RequestLogLevel, log_request
|
||||
from fastdeploy.utils import data_processor_logger
|
||||
|
||||
from .process import DataProcessor
|
||||
@@ -262,7 +263,7 @@ class QwenVLProcessor(TextProcessor):
|
||||
else:
|
||||
self.model_status_dict[request["request_id"]] = model_status
|
||||
request["enable_thinking"] = model_status == "think_start"
|
||||
data_processor_logger.info(f"Processed request {request}")
|
||||
log_request(RequestLogLevel.CONTENT, message="Processed request: {request}", request=request)
|
||||
|
||||
return request
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ from typing import Any, Optional, Union
|
||||
import httpx
|
||||
from pydantic import BaseModel, HttpUrl
|
||||
|
||||
from fastdeploy.logger.request_logger import log_request, log_request_error
|
||||
from fastdeploy.utils import data_processor_logger
|
||||
|
||||
|
||||
@@ -161,7 +162,12 @@ class AsyncTokenizerClient:
|
||||
|
||||
except httpx.RequestError as e:
|
||||
# Network error, keep polling
|
||||
data_processor_logger.debug(f"Request error while polling tokenize task {task_tag}: {e}")
|
||||
log_request(
|
||||
level=3,
|
||||
message="Request error while polling tokenize task {task_tag}: {error}",
|
||||
task_tag=task_tag,
|
||||
error=str(e),
|
||||
)
|
||||
|
||||
# 超时检测
|
||||
if asyncio.get_event_loop().time() - start_time > self.max_wait:
|
||||
@@ -193,10 +199,15 @@ class AsyncTokenizerClient:
|
||||
raise RuntimeError(f"Tokenize task creation failed, {resp.json().get('message')}")
|
||||
return resp.json().get("result")
|
||||
except Exception as e:
|
||||
data_processor_logger.error(f"Attempt to decode_request {attempt + 1} failed: {e}")
|
||||
log_request_error(
|
||||
message="Attempt to decode_request {attempt} failed: {error}",
|
||||
attempt=attempt + 1,
|
||||
error=str(e),
|
||||
)
|
||||
if attempt == self.max_retries - 1:
|
||||
data_processor_logger.error(
|
||||
f"Max retries of decode_request reached. Giving up. request is {request}"
|
||||
log_request_error(
|
||||
message="Max retries of decode_request reached. Giving up. request is {request}",
|
||||
request=str(request),
|
||||
)
|
||||
await asyncio.sleep(1)
|
||||
except httpx.RequestError as e:
|
||||
|
||||
@@ -0,0 +1,124 @@
|
||||
"""
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
|
||||
"""
|
||||
FastDeploy Logging Module
|
||||
|
||||
Unified logging management module providing:
|
||||
- Formatters: ColoredFormatter, CustomFormatter
|
||||
- Handlers: DailyRotatingFileHandler, IntervalRotatingFileHandler, LazyFileHandler
|
||||
- Logger: FastDeployLogger
|
||||
- Request Logger: log_request, log_request_error, RequestLogLevel
|
||||
- Setup: setup_logging
|
||||
- Pre-defined logger instances (lazy-loaded)
|
||||
|
||||
Usage:
|
||||
from fastdeploy.logger import get_logger, llm_logger, console_logger
|
||||
from fastdeploy.logger import log_request, log_request_error, RequestLogLevel
|
||||
"""
|
||||
|
||||
# Formatters
|
||||
from fastdeploy.logger.formatters import ColoredFormatter, CustomFormatter
|
||||
|
||||
# Handlers
|
||||
from fastdeploy.logger.handlers import (
|
||||
DailyRotatingFileHandler,
|
||||
IntervalRotatingFileHandler,
|
||||
LazyFileHandler,
|
||||
)
|
||||
|
||||
# Logger
|
||||
from fastdeploy.logger.logger import FastDeployLogger
|
||||
|
||||
# Request logger
|
||||
from fastdeploy.logger.request_logger import (
|
||||
RequestLogLevel,
|
||||
log_request,
|
||||
log_request_error,
|
||||
)
|
||||
|
||||
# Setup
|
||||
from fastdeploy.logger.setup_logging import setup_logging
|
||||
|
||||
|
||||
def get_logger(name, file_name=None, without_formater=False, print_to_console=False, channel=None):
|
||||
"""Convenience function to get a logger instance"""
|
||||
return FastDeployLogger().get_logger(name, file_name, without_formater, print_to_console, channel=channel)
|
||||
|
||||
|
||||
# Pre-defined logger instance configs (name, file_name, without_formater, print_to_console, channel)
|
||||
_LOGGER_CONFIGS = {
|
||||
"llm_logger": ("fastdeploy", None, False, False, "main"),
|
||||
"data_processor_logger": ("data_processor", None, False, False, "main"),
|
||||
"scheduler_logger": ("scheduler", None, False, False, "main"),
|
||||
"api_server_logger": ("api_server", None, False, False, "main"),
|
||||
"console_logger": (None, None, False, False, "console"),
|
||||
"spec_logger": ("speculate", "speculate.log", False, False, None),
|
||||
"zmq_client_logger": ("zmq_client", "comm.log", False, False, None),
|
||||
"router_logger": ("router", "comm.log", False, False, None),
|
||||
"fmq_logger": ("fmq", "comm.log", False, False, None),
|
||||
"obj_logger": ("obj", "obj.log", False, False, None),
|
||||
"register_manager_logger": ("register_manager", "register_manager.log", False, False, None),
|
||||
"_request_logger": ("request", None, False, False, "request"),
|
||||
}
|
||||
|
||||
_logger_cache = {}
|
||||
|
||||
|
||||
def __getattr__(name):
|
||||
"""Lazy-load pre-defined logger instances"""
|
||||
if name in _LOGGER_CONFIGS:
|
||||
if name not in _logger_cache:
|
||||
cfg = _LOGGER_CONFIGS[name]
|
||||
_logger_cache[name] = get_logger(cfg[0], cfg[1], cfg[2], cfg[3], cfg[4])
|
||||
return _logger_cache[name]
|
||||
if name == "trace_logger":
|
||||
if name not in _logger_cache:
|
||||
_logger_cache[name] = FastDeployLogger().get_trace_logger("trace", "trace.log")
|
||||
return _logger_cache[name]
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
|
||||
|
||||
__all__ = [
|
||||
# Formatters
|
||||
"ColoredFormatter",
|
||||
"CustomFormatter",
|
||||
# Handlers
|
||||
"DailyRotatingFileHandler",
|
||||
"IntervalRotatingFileHandler",
|
||||
"LazyFileHandler",
|
||||
# Logger
|
||||
"FastDeployLogger",
|
||||
"get_logger",
|
||||
# Request logger
|
||||
"RequestLogLevel",
|
||||
"log_request",
|
||||
"log_request_error",
|
||||
# Setup
|
||||
"setup_logging",
|
||||
# Pre-defined logger instances (lazy-loaded)
|
||||
"llm_logger",
|
||||
"data_processor_logger",
|
||||
"scheduler_logger",
|
||||
"api_server_logger",
|
||||
"console_logger",
|
||||
"spec_logger",
|
||||
"zmq_client_logger",
|
||||
"trace_logger",
|
||||
"router_logger",
|
||||
"fmq_logger",
|
||||
"obj_logger",
|
||||
"register_manager_logger",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Log configuration parsing module
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
|
||||
def resolve_log_level(raw_level=None, debug_enabled=None) -> str:
|
||||
"""
|
||||
Resolve log level configuration
|
||||
|
||||
Priority: FD_LOG_LEVEL > FD_DEBUG
|
||||
"""
|
||||
raw = os.getenv("FD_LOG_LEVEL") if raw_level is None else raw_level
|
||||
# Handle None or string "None" case
|
||||
if raw and str(raw).upper() != "NONE":
|
||||
level = raw.upper()
|
||||
if level not in {"INFO", "DEBUG"}:
|
||||
raise ValueError(f"Unsupported FD_LOG_LEVEL: {raw}")
|
||||
return level
|
||||
debug = os.getenv("FD_DEBUG", "0") if debug_enabled is None else str(debug_enabled)
|
||||
return "DEBUG" if debug == "1" else "INFO"
|
||||
|
||||
|
||||
def resolve_request_logging_defaults() -> dict[str, int]:
|
||||
"""
|
||||
Resolve request logging default configuration
|
||||
"""
|
||||
return {
|
||||
"enabled": int(os.getenv("FD_LOG_REQUESTS", "1")),
|
||||
"level": int(os.getenv("FD_LOG_REQUESTS_LEVEL", "2")),
|
||||
"max_len": int(os.getenv("FD_LOG_MAX_LEN", "2048")),
|
||||
}
|
||||
+107
-17
@@ -20,21 +20,36 @@ This module provides the get_logger method to uniformly manage logging behavior
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
|
||||
from fastdeploy import envs
|
||||
from fastdeploy.logger.formatters import CustomFormatter
|
||||
from fastdeploy.logger.config import resolve_log_level
|
||||
from fastdeploy.logger.formatters import ColoredFormatter, CustomFormatter
|
||||
from fastdeploy.logger.handlers import DailyRotatingFileHandler, LazyFileHandler
|
||||
from fastdeploy.logger.setup_logging import setup_logging
|
||||
|
||||
# Standard log format
|
||||
_LOG_FORMAT = "%(levelname)-8s %(asctime)s %(process)-5s %(filename)s[line:%(lineno)d] %(message)s"
|
||||
|
||||
|
||||
class FastDeployLogger:
|
||||
_instance = None
|
||||
_initialized = False
|
||||
_lock = threading.RLock()
|
||||
|
||||
# Channel to file mapping
|
||||
_channel_files = {
|
||||
"main": "fastdeploy.log",
|
||||
"request": "request.log",
|
||||
"console": "console.log",
|
||||
}
|
||||
|
||||
# Cache for channel loggers that have been configured
|
||||
_configured_channels = set()
|
||||
|
||||
def __new__(cls):
|
||||
"""Singleton pattern implementation"""
|
||||
if cls._instance is None:
|
||||
@@ -50,7 +65,7 @@ class FastDeployLogger:
|
||||
setup_logging()
|
||||
self._initialized = True
|
||||
|
||||
def get_logger(self, name, file_name=None, without_formater=False, print_to_console=False):
|
||||
def get_logger(self, name, file_name=None, without_formater=False, print_to_console=False, channel=None):
|
||||
"""
|
||||
Get logger (compatible with the original interface)
|
||||
|
||||
@@ -59,7 +74,14 @@ class FastDeployLogger:
|
||||
file_name: Log file name (for compatibility)
|
||||
without_formater: Whether to not use a formatter
|
||||
print_to_console: Whether to print to console
|
||||
channel: Log channel (main, request, console)
|
||||
"""
|
||||
# If channel is specified, use channel-based logging
|
||||
if channel is not None:
|
||||
if not self._initialized:
|
||||
self._initialize()
|
||||
return self._get_channel_logger(name, channel)
|
||||
|
||||
# If only one parameter is provided, use the new unified naming convention
|
||||
if file_name is None and not without_formater and not print_to_console:
|
||||
# Lazy initialization
|
||||
@@ -74,27 +96,95 @@ class FastDeployLogger:
|
||||
"""
|
||||
New unified way to get logger
|
||||
"""
|
||||
if name is None:
|
||||
return logging.getLogger("fastdeploy")
|
||||
return self._get_channel_logger(name, "main")
|
||||
|
||||
# Handle __main__ special case
|
||||
if name == "__main__":
|
||||
def _get_channel_logger(self, name, channel):
|
||||
"""
|
||||
Get logger through channel with manual handler setup.
|
||||
|
||||
Uses manual addHandler instead of dictConfig for better performance.
|
||||
Handlers are attached to the channel root logger (fastdeploy.{channel}),
|
||||
and child loggers propagate to it.
|
||||
|
||||
Args:
|
||||
name: logger name
|
||||
channel: log channel (main, request, console)
|
||||
"""
|
||||
# Get or create the channel root logger (all handlers go here)
|
||||
channel_root_name = f"fastdeploy.{channel}"
|
||||
channel_logger = logging.getLogger(channel_root_name)
|
||||
|
||||
# Configure the channel root logger once
|
||||
if channel not in self._configured_channels:
|
||||
self._configured_channels.add(channel)
|
||||
|
||||
log_dir = envs.FD_LOG_DIR
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
|
||||
# Resolve log level (priority: FD_LOG_LEVEL > FD_DEBUG)
|
||||
log_level = resolve_log_level()
|
||||
channel_logger.setLevel(logging.DEBUG if log_level == "DEBUG" else logging.INFO)
|
||||
|
||||
# Create formatters
|
||||
file_formatter = logging.Formatter(_LOG_FORMAT)
|
||||
console_formatter = ColoredFormatter(_LOG_FORMAT)
|
||||
|
||||
# Clear existing handlers
|
||||
for handler in channel_logger.handlers[:]:
|
||||
channel_logger.removeHandler(handler)
|
||||
|
||||
# Create file handler for this channel
|
||||
file_name = self._channel_files.get(channel, f"{channel}.log")
|
||||
log_file = os.path.join(log_dir, file_name)
|
||||
backup_count = int(envs.FD_LOG_BACKUP_COUNT)
|
||||
|
||||
file_handler = LazyFileHandler(log_file, backupCount=backup_count)
|
||||
file_handler.setFormatter(file_formatter)
|
||||
channel_logger.addHandler(file_handler)
|
||||
|
||||
# Error file handler (all channels write errors to error.log)
|
||||
error_log_file = os.path.join(log_dir, "error.log")
|
||||
error_file_handler = LazyFileHandler(
|
||||
filename=error_log_file, backupCount=backup_count, level=logging.ERROR
|
||||
)
|
||||
error_file_handler.setFormatter(file_formatter)
|
||||
channel_logger.addHandler(error_file_handler)
|
||||
|
||||
# Stderr handler for ERROR level (all channels output errors to stderr)
|
||||
stderr_handler = logging.StreamHandler(sys.stderr)
|
||||
stderr_handler.setLevel(logging.ERROR)
|
||||
stderr_handler.setFormatter(console_formatter)
|
||||
channel_logger.addHandler(stderr_handler)
|
||||
|
||||
# Console stdout handler for console channel only
|
||||
if channel == "console":
|
||||
stdout_handler = logging.StreamHandler(sys.stdout)
|
||||
stdout_handler.setLevel(logging.DEBUG if log_level == "DEBUG" else logging.INFO)
|
||||
stdout_handler.setFormatter(console_formatter)
|
||||
# Filter to exclude ERROR and above (they go to stderr)
|
||||
stdout_handler.addFilter(lambda record: record.levelno < logging.ERROR)
|
||||
channel_logger.addHandler(stdout_handler)
|
||||
|
||||
channel_logger.propagate = False
|
||||
|
||||
# Determine the actual logger name and return the appropriate logger
|
||||
if name is None or name == "fastdeploy":
|
||||
return channel_logger
|
||||
elif name == "__main__":
|
||||
import __main__
|
||||
|
||||
# Get the __file__ attribute of the main module
|
||||
if hasattr(__main__, "__file__"):
|
||||
# Get the main module file name
|
||||
base_name = Path(__main__.__file__).stem
|
||||
# Create logger with prefix
|
||||
return logging.getLogger(f"fastdeploy.main.{base_name}")
|
||||
return logging.getLogger("fastdeploy.main")
|
||||
|
||||
# If already in fastdeploy namespace, use directly
|
||||
if name.startswith("fastdeploy.") or name == "fastdeploy":
|
||||
return logging.getLogger(name)
|
||||
logger_name = f"{channel_root_name}.{base_name}"
|
||||
else:
|
||||
return channel_logger
|
||||
elif name.startswith("fastdeploy."):
|
||||
logger_name = name
|
||||
else:
|
||||
# Add fastdeploy prefix for other cases
|
||||
return logging.getLogger(f"fastdeploy.{name}")
|
||||
logger_name = f"{channel_root_name}.{name}"
|
||||
|
||||
# Child loggers propagate to channel_logger (which has handlers)
|
||||
return logging.getLogger(logger_name)
|
||||
|
||||
def get_trace_logger(self, name, file_name, without_formater=False, print_to_console=False):
|
||||
"""
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Request logging module
|
||||
|
||||
Provides leveled request logging with L0-L3 levels:
|
||||
- L0: Critical lifecycle events (creation, completion, abort)
|
||||
- L1: Processing stage details
|
||||
- L2: Request/response content (truncated)
|
||||
- L3: Full data
|
||||
"""
|
||||
|
||||
from enum import IntEnum
|
||||
|
||||
from fastdeploy import envs
|
||||
|
||||
|
||||
class RequestLogLevel(IntEnum):
|
||||
"""Request log level"""
|
||||
|
||||
LIFECYCLE = 0 # Lifecycle start/end: creation, completion, abort
|
||||
STAGES = 1 # Processing stages: semaphore, first token, signal handling
|
||||
CONTENT = 2 # Content and scheduling: request params, scheduling, response
|
||||
FULL = 3 # Complete raw data
|
||||
|
||||
|
||||
def _should_log(level: int) -> bool:
|
||||
"""Check if this level should be logged"""
|
||||
if int(envs.FD_LOG_REQUESTS) == 0:
|
||||
return False
|
||||
return int(level) <= int(envs.FD_LOG_REQUESTS_LEVEL)
|
||||
|
||||
|
||||
def _truncate(value):
|
||||
"""Truncate long content"""
|
||||
text = str(value)
|
||||
max_len = int(envs.FD_LOG_MAX_LEN)
|
||||
if len(text) <= max_len:
|
||||
return text
|
||||
return text[:max_len]
|
||||
|
||||
|
||||
def log_request(level: int, message: str, **fields):
|
||||
"""
|
||||
Log request message
|
||||
|
||||
Args:
|
||||
level: Log level (0-3)
|
||||
message: Log message template, supports {field} formatting
|
||||
**fields: Message fields
|
||||
"""
|
||||
if not _should_log(level):
|
||||
return
|
||||
|
||||
from fastdeploy.logger import _request_logger
|
||||
|
||||
if not fields:
|
||||
_request_logger.info(message, stacklevel=2)
|
||||
return
|
||||
|
||||
payload = fields
|
||||
# CONTENT level content needs to be truncated
|
||||
if int(level) == int(RequestLogLevel.CONTENT):
|
||||
payload = {key: _truncate(value) for key, value in fields.items()}
|
||||
|
||||
_request_logger.info(message.format(**payload), stacklevel=2)
|
||||
|
||||
|
||||
def log_request_error(message: str, **fields):
|
||||
"""
|
||||
Log request error message
|
||||
|
||||
Args:
|
||||
message: Log message template, supports {field} formatting
|
||||
**fields: Message fields
|
||||
"""
|
||||
from fastdeploy.logger import _request_logger
|
||||
|
||||
if fields:
|
||||
_request_logger.error(message.format(**fields), stacklevel=2)
|
||||
else:
|
||||
_request_logger.error(message, stacklevel=2)
|
||||
+123
-100
@@ -14,7 +14,19 @@
|
||||
"""
|
||||
|
||||
"""
|
||||
配置日志系统
|
||||
Setup logging system
|
||||
|
||||
This module handles logging initialization:
|
||||
- Basic setup: log directory creation
|
||||
- Optional: load external JSON config file via dictConfig
|
||||
|
||||
Channel-based logger configuration is handled by FastDeployLogger._get_channel_logger()
|
||||
using manual addHandler for better performance.
|
||||
|
||||
Log channels:
|
||||
- main: Main logs -> fastdeploy.log
|
||||
- request: Request logs -> request.log
|
||||
- console: Console logs -> console.log + terminal
|
||||
"""
|
||||
|
||||
import json
|
||||
@@ -27,54 +39,29 @@ from fastdeploy import envs
|
||||
|
||||
|
||||
class MaxLevelFilter(logging.Filter):
|
||||
"""过滤低于指定级别的日志记录。
|
||||
"""Filter log records below specified level.
|
||||
|
||||
用于将 INFO/DEBUG 路由到 stdout,ERROR/CRITICAL 路由到 stderr。
|
||||
Used to route INFO/DEBUG to stdout, ERROR/CRITICAL to stderr.
|
||||
"""
|
||||
|
||||
def __init__(self, level):
|
||||
super().__init__()
|
||||
self.level = logging._nameToLevel.get(level, level)
|
||||
self.level = logging._nameToLevel.get(level, level) if isinstance(level, str) else level
|
||||
|
||||
def filter(self, record):
|
||||
return record.levelno < self.level
|
||||
|
||||
|
||||
def setup_logging(log_dir=None, config_file=None):
|
||||
"""
|
||||
设置FastDeploy的日志配置
|
||||
|
||||
Args:
|
||||
log_dir: 日志文件存储目录,如果不提供则使用环境变量
|
||||
config_file: JSON配置文件路径,如果不提供则使用默认配置
|
||||
"""
|
||||
|
||||
# 避免重复配置
|
||||
if getattr(setup_logging, "_configured", False):
|
||||
return logging.getLogger("fastdeploy")
|
||||
|
||||
# 使用环境变量中的日志目录,如果没有则使用传入的参数或默认值
|
||||
if log_dir is None:
|
||||
log_dir = getattr(envs, "FD_LOG_DIR", "log")
|
||||
|
||||
# 确保日志目录存在
|
||||
Path(log_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 从环境变量获取日志级别和备份数量
|
||||
is_debug = int(getattr(envs, "FD_DEBUG", 0))
|
||||
FASTDEPLOY_LOGGING_LEVEL = "DEBUG" if is_debug else "INFO"
|
||||
backup_count = int(getattr(envs, "FD_LOG_BACKUP_COUNT", 7))
|
||||
|
||||
# 定义日志输出格式
|
||||
def _build_default_config(log_dir, log_level, backup_count):
|
||||
"""Build default logging configuration for dictConfig"""
|
||||
_FORMAT = "%(levelname)-8s %(asctime)s %(process)-5s %(filename)s[line:%(lineno)d] %(message)s"
|
||||
|
||||
# 默认配置
|
||||
default_config = {
|
||||
return {
|
||||
"version": 1,
|
||||
"disable_existing_loggers": False,
|
||||
"filters": {
|
||||
"below_error": {
|
||||
"()": "fastdeploy.logger.setup_logging.MaxLevelFilter",
|
||||
"()": MaxLevelFilter,
|
||||
"level": "ERROR",
|
||||
}
|
||||
},
|
||||
@@ -91,99 +78,135 @@ def setup_logging(log_dir=None, config_file=None):
|
||||
},
|
||||
},
|
||||
"handlers": {
|
||||
# 控制台标准输出,用于 INFO/DEBUG(低于 ERROR 级别)
|
||||
# Console stdout for INFO/DEBUG (below ERROR level)
|
||||
"console_stdout": {
|
||||
"class": "logging.StreamHandler",
|
||||
"level": FASTDEPLOY_LOGGING_LEVEL,
|
||||
"level": log_level,
|
||||
"filters": ["below_error"],
|
||||
"formatter": "colored",
|
||||
"stream": "ext://sys.stdout",
|
||||
},
|
||||
# 控制台错误输出,用于 ERROR/CRITICAL
|
||||
# Console stderr for ERROR/CRITICAL
|
||||
"console_stderr": {
|
||||
"class": "logging.StreamHandler",
|
||||
"level": "ERROR",
|
||||
"formatter": "colored",
|
||||
"stream": "ext://sys.stderr",
|
||||
},
|
||||
# 默认错误日志,保留最新1个小时的日志,位置在log/error.log
|
||||
# Main log file
|
||||
"main_file": {
|
||||
"class": "fastdeploy.logger.handlers.LazyFileHandler",
|
||||
"level": log_level,
|
||||
"formatter": "standard",
|
||||
"filename": os.path.join(log_dir, "fastdeploy.log"),
|
||||
"backupCount": backup_count,
|
||||
},
|
||||
# Console log file
|
||||
"console_file": {
|
||||
"class": "fastdeploy.logger.handlers.LazyFileHandler",
|
||||
"level": log_level,
|
||||
"formatter": "standard",
|
||||
"filename": os.path.join(log_dir, "console.log"),
|
||||
"backupCount": backup_count,
|
||||
},
|
||||
# Request log file
|
||||
"request_file": {
|
||||
"class": "fastdeploy.logger.handlers.LazyFileHandler",
|
||||
"level": log_level,
|
||||
"formatter": "standard",
|
||||
"filename": os.path.join(log_dir, "request.log"),
|
||||
"backupCount": backup_count,
|
||||
},
|
||||
# Error log file
|
||||
"error_file": {
|
||||
"class": "logging.handlers.TimedRotatingFileHandler",
|
||||
"class": "fastdeploy.logger.handlers.LazyFileHandler",
|
||||
"level": "ERROR",
|
||||
"formatter": "standard",
|
||||
"filename": os.path.join(log_dir, "error.log"),
|
||||
"when": "H",
|
||||
"interval": 1,
|
||||
"backupCount": 1,
|
||||
},
|
||||
# 全量日志,保留最新1小时的日志,位置在log/default.log
|
||||
"default_file": {
|
||||
"class": "logging.handlers.TimedRotatingFileHandler",
|
||||
"level": FASTDEPLOY_LOGGING_LEVEL,
|
||||
"formatter": "standard",
|
||||
"filename": os.path.join(log_dir, "default.log"),
|
||||
"when": "H",
|
||||
"interval": 1,
|
||||
"backupCount": 1,
|
||||
},
|
||||
# 错误日志归档,保留7天内的日志,每隔1小时一个文件,形式如:FastDeploy/log/2025-08-14/error_2025-08-14-18.log
|
||||
"error_archive": {
|
||||
"class": "fastdeploy.logger.handlers.IntervalRotatingFileHandler",
|
||||
"level": "ERROR",
|
||||
"formatter": "standard",
|
||||
"filename": os.path.join(log_dir, "error.log"),
|
||||
"backupDays": 7,
|
||||
"interval": 1,
|
||||
"encoding": "utf-8",
|
||||
},
|
||||
# 全量日志归档,保留7天内的日志,每隔1小时一个文件,形式如:FastDeploy/log/2025-08-14/default_2025-08-14-18.log
|
||||
"default_archive": {
|
||||
"class": "fastdeploy.logger.handlers.IntervalRotatingFileHandler",
|
||||
"level": FASTDEPLOY_LOGGING_LEVEL,
|
||||
"formatter": "standard",
|
||||
"filename": os.path.join(log_dir, "default.log"),
|
||||
"backupDays": 7,
|
||||
"interval": 1,
|
||||
"encoding": "utf-8",
|
||||
"backupCount": backup_count,
|
||||
},
|
||||
},
|
||||
"loggers": {
|
||||
# 默认日志记录器,全局共享
|
||||
# Default logger
|
||||
"fastdeploy": {
|
||||
"level": "DEBUG",
|
||||
"handlers": [
|
||||
"console_stdout",
|
||||
"console_stderr",
|
||||
"error_file",
|
||||
"default_file",
|
||||
"error_archive",
|
||||
"default_archive",
|
||||
],
|
||||
"handlers": ["main_file", "console_file", "error_file", "console_stderr"],
|
||||
"propagate": False,
|
||||
}
|
||||
},
|
||||
# Main channel
|
||||
"fastdeploy.main": {
|
||||
"level": "DEBUG",
|
||||
"handlers": ["main_file", "console_file", "error_file", "console_stderr"],
|
||||
"propagate": False,
|
||||
},
|
||||
# Request channel - only output to request.log and error.log
|
||||
"fastdeploy.request": {
|
||||
"level": "DEBUG",
|
||||
"handlers": ["request_file", "error_file", "console_stderr"],
|
||||
"propagate": False,
|
||||
},
|
||||
# Console channel - output to console.log and terminal
|
||||
"fastdeploy.console": {
|
||||
"level": "DEBUG",
|
||||
"handlers": ["console_file", "console_stdout", "error_file", "console_stderr"],
|
||||
"propagate": False,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
# 如果提供了配置文件,则加载配置文件
|
||||
if config_file and os.path.exists(config_file):
|
||||
with open(config_file, "r", encoding="utf-8") as f:
|
||||
config = json.load(f)
|
||||
|
||||
# 合并环境变量配置到用户配置中,环境变量的优先级高于自定义的优先级
|
||||
if "handlers" in config:
|
||||
for handler_name, handler_config in config["handlers"].items():
|
||||
if "backupCount" not in handler_config and "DailyRotating" in handler_config.get("class", ""):
|
||||
handler_config["backupCount"] = backup_count
|
||||
if handler_config.get("level") == "INFO" and is_debug:
|
||||
handler_config["level"] = "DEBUG"
|
||||
else:
|
||||
config = default_config
|
||||
def setup_logging(log_dir=None, config_file=None):
|
||||
"""
|
||||
Setup FastDeploy logging configuration.
|
||||
|
||||
# 应用日志配置
|
||||
logging.config.dictConfig(config)
|
||||
This function:
|
||||
1. Ensures the log directory exists
|
||||
2. Optionally loads external JSON config file via dictConfig
|
||||
|
||||
# 避免重复加载
|
||||
Note: Channel-based loggers (get_logger with channel param) use manual addHandler
|
||||
for better performance, independent of dictConfig.
|
||||
|
||||
Args:
|
||||
log_dir: Log file storage directory, uses environment variable if not provided
|
||||
config_file: Optional JSON config file path for dictConfig
|
||||
"""
|
||||
# Avoid duplicate configuration
|
||||
if getattr(setup_logging, "_configured", False):
|
||||
return
|
||||
|
||||
# Use log directory from environment variable, or use provided parameter or default value
|
||||
if log_dir is None:
|
||||
log_dir = getattr(envs, "FD_LOG_DIR", "log")
|
||||
|
||||
# Ensure log directory exists
|
||||
Path(log_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Store log_dir for later use
|
||||
setup_logging._log_dir = log_dir
|
||||
|
||||
# If config_file is provided, use dictConfig to load it
|
||||
if config_file is not None:
|
||||
is_debug = int(getattr(envs, "FD_DEBUG", 0))
|
||||
log_level = "DEBUG" if is_debug else "INFO"
|
||||
backup_count = int(getattr(envs, "FD_LOG_BACKUP_COUNT", 7))
|
||||
|
||||
if os.path.exists(config_file):
|
||||
with open(config_file, "r", encoding="utf-8") as f:
|
||||
config = json.load(f)
|
||||
|
||||
# Merge environment variable config into user config
|
||||
if "handlers" in config:
|
||||
for handler_config in config["handlers"].values():
|
||||
if "backupCount" not in handler_config and "DailyRotating" in handler_config.get("class", ""):
|
||||
handler_config["backupCount"] = backup_count
|
||||
if handler_config.get("level") == "INFO" and log_level == "DEBUG":
|
||||
handler_config["level"] = "DEBUG"
|
||||
else:
|
||||
# Config file not found, use default config
|
||||
config = _build_default_config(log_dir, log_level, backup_count)
|
||||
|
||||
# Apply logging configuration via dictConfig
|
||||
logging.config.dictConfig(config)
|
||||
|
||||
# Mark as configured
|
||||
setup_logging._configured = True
|
||||
|
||||
# 返回fastdeploy的logger
|
||||
return logging.getLogger("fastdeploy")
|
||||
|
||||
@@ -40,6 +40,11 @@ from fastdeploy.engine.request import (
|
||||
SpeculateMetrics,
|
||||
)
|
||||
from fastdeploy.inter_communicator import ZmqIpcServer
|
||||
from fastdeploy.logger.request_logger import (
|
||||
RequestLogLevel,
|
||||
log_request,
|
||||
log_request_error,
|
||||
)
|
||||
from fastdeploy.metrics.metrics import main_process_metrics
|
||||
from fastdeploy.platforms import current_platform
|
||||
from fastdeploy.spec_decode import SpecMethod
|
||||
@@ -224,7 +229,11 @@ class TokenProcessor:
|
||||
for token_id in token_id_list:
|
||||
recovery_stop = token_id == RECOVERY_STOP_SIGNAL
|
||||
if recovery_stop:
|
||||
llm_logger.info(f"recovery stop signal found at task {task_id}")
|
||||
log_request(
|
||||
RequestLogLevel.STAGES,
|
||||
message="recovery stop signal found at task {request_id}",
|
||||
request_id=task_id,
|
||||
)
|
||||
self.tokens_counter[task_id] += 1
|
||||
if token_id != RECOVERY_STOP_SIGNAL:
|
||||
result.outputs.token_ids.append(token_id)
|
||||
@@ -252,12 +261,25 @@ class TokenProcessor:
|
||||
|
||||
# Print combined log with all required information
|
||||
ttft = task.metrics.first_token_time if task.metrics.first_token_time else 0
|
||||
llm_logger.info(
|
||||
f"Request={task_id}, InputToken={task.prompt_token_ids_len}, "
|
||||
f"CachedDetail={cached_detail}, OutputToken={self.tokens_counter[task_id]}, "
|
||||
f"TokenRatio={token_ratio:.2f}, TTFT={ttft:.2f}, "
|
||||
f"E2E={e2e_time:.2f}, IsPrefill={is_prefill}, RecoveryStop={recovery_stop}, "
|
||||
f"PreemptedCount={getattr(task.metrics, 'preempted_count', 0)}"
|
||||
log_request(
|
||||
RequestLogLevel.LIFECYCLE,
|
||||
message=(
|
||||
"Request={request_id}, InputToken={input_tokens}, "
|
||||
"CachedDetail={cached_detail}, OutputToken={output_tokens}, "
|
||||
"TokenRatio={token_ratio}, TTFT={ttft}, "
|
||||
"E2E={e2e_time}, IsPrefill={is_prefill}, RecoveryStop={recovery_stop}, "
|
||||
"PreemptedCount={preempted_count}"
|
||||
),
|
||||
request_id=task_id,
|
||||
input_tokens=task.prompt_token_ids_len,
|
||||
cached_detail=cached_detail,
|
||||
output_tokens=self.tokens_counter[task_id],
|
||||
token_ratio=f"{token_ratio:.2f}",
|
||||
ttft=f"{ttft:.2f}",
|
||||
e2e_time=f"{e2e_time:.2f}",
|
||||
is_prefill=is_prefill,
|
||||
recovery_stop=recovery_stop,
|
||||
preempted_count=getattr(task.metrics, "preempted_count", 0),
|
||||
)
|
||||
|
||||
main_process_metrics.request_token_ratio.observe(token_ratio)
|
||||
@@ -289,13 +311,21 @@ class TokenProcessor:
|
||||
task_id in self.resource_manager.to_be_aborted_req_id_set
|
||||
and token_ids[-1] == PREEMPTED_TOKEN_ID
|
||||
):
|
||||
llm_logger.info(f"start to recycle abort request_id {task_id}")
|
||||
log_request(
|
||||
RequestLogLevel.STAGES,
|
||||
message="start to recycle abort request_id {request_id}",
|
||||
request_id=task_id,
|
||||
)
|
||||
self.resource_manager.recycle_abort_task(task_id)
|
||||
if (
|
||||
task_id in self.resource_manager.to_be_rescheduled_request_id_set
|
||||
and token_ids[-1] == PREEMPTED_TOKEN_ID
|
||||
):
|
||||
llm_logger.info(f"sync preemption for request_id {task_id} done.")
|
||||
log_request(
|
||||
RequestLogLevel.STAGES,
|
||||
message="sync preemption for request_id {request_id} done.",
|
||||
request_id=task_id,
|
||||
)
|
||||
self.resource_manager.reschedule_preempt_task(task_id)
|
||||
continue
|
||||
if self.cfg.scheduler_config.splitwise_role == "decode":
|
||||
@@ -351,12 +381,20 @@ class TokenProcessor:
|
||||
result.outputs.logprob = float(logprobs_list.logprobs[0][0])
|
||||
result.outputs.top_logprobs = logprobs_list
|
||||
except Exception as e:
|
||||
llm_logger.warning(f"Failed to parse logprobs from StreamTransferData: {e}")
|
||||
log_request(
|
||||
RequestLogLevel.STAGES,
|
||||
message="Failed to parse logprobs from StreamTransferData: {error}",
|
||||
error=str(e),
|
||||
)
|
||||
if getattr(stream_data, "prompt_logprobs", None) is not None:
|
||||
try:
|
||||
result.prompt_logprobs = stream_data.prompt_logprobs
|
||||
except Exception as e:
|
||||
llm_logger.warning(f"Failed to parse prompt_logprobs from StreamTransferData: {e}")
|
||||
log_request(
|
||||
RequestLogLevel.STAGES,
|
||||
message="Failed to parse prompt_logprobs from StreamTransferData: {error}",
|
||||
error=str(e),
|
||||
)
|
||||
if self.tokens_counter[task_id] == 0:
|
||||
if task.messages is not None:
|
||||
result.prompt = task.messages
|
||||
@@ -394,7 +432,9 @@ class TokenProcessor:
|
||||
batch_result = self._process_batch_output_use_zmq(receive_datas)
|
||||
self.postprocess(batch_result)
|
||||
except Exception as e:
|
||||
llm_logger.error(f"Receive message:{receive_datas}, error:{e}")
|
||||
log_request_error(
|
||||
message="Receive message:{receive_datas}, error:{error}", receive_datas=receive_datas, error=e
|
||||
)
|
||||
continue
|
||||
|
||||
def process_sampling_results(self):
|
||||
@@ -511,7 +551,11 @@ class TokenProcessor:
|
||||
else:
|
||||
self.cached_generated_tokens.put_results(batch_result)
|
||||
except Exception as e:
|
||||
llm_logger.error(f"Error in TokenProcessor's postprocess: {e}, {str(traceback.format_exc())}")
|
||||
log_request_error(
|
||||
message="Error in TokenProcessor's postprocess: {error}, {traceback}",
|
||||
error=e,
|
||||
traceback=traceback.format_exc(),
|
||||
)
|
||||
|
||||
def _recycle_resources(self, task_id, index, task, result=None, is_prefill=False):
|
||||
"""
|
||||
@@ -524,14 +568,21 @@ class TokenProcessor:
|
||||
finished_task_ids = self.engine_worker_queue.get_finished_req()
|
||||
if len(finished_task_ids) > 0:
|
||||
for finished_task_id in finished_task_ids:
|
||||
llm_logger.info(f"finished_task_id: {finished_task_id}")
|
||||
log_request(
|
||||
RequestLogLevel.STAGES,
|
||||
message="finished_task_id: {finished_task_id}",
|
||||
finished_task_id=finished_task_id,
|
||||
)
|
||||
self.prefill_result_status[finished_task_id[0]] = finished_task_id[1]
|
||||
if task_id in self.prefill_result_status:
|
||||
if self.prefill_result_status[task_id] != "finished":
|
||||
result.error_code = 400
|
||||
result.error_message = f"{task_id} failed to {self.prefill_result_status[task_id]}"
|
||||
llm_logger.info(
|
||||
f"wait for sending cache, request_id: {task_id}, cost seconds: {time.time()-start_time:.5f}"
|
||||
log_request(
|
||||
RequestLogLevel.STAGES,
|
||||
message="wait for sending cache, request_id: {request_id}, cost seconds: {cost_seconds}",
|
||||
request_id=task_id,
|
||||
cost_seconds=f"{time.time()-start_time:.5f}",
|
||||
)
|
||||
result.metrics.send_request_output_to_decode_time = time.time()
|
||||
self.split_connector.send_first_token(task.disaggregate_info, [result])
|
||||
@@ -755,7 +806,11 @@ class TokenProcessor:
|
||||
if self.cfg.speculative_config.method:
|
||||
self._record_speculative_decoding_accept_num_per_request(task_id, accept_num[i])
|
||||
if accept_num[i] == PREEMPTED_TOKEN_ID: # in MTP, means preemption has happened in worker
|
||||
llm_logger.info(f"sync preemption for request_id {task_id} done.")
|
||||
log_request(
|
||||
RequestLogLevel.STAGES,
|
||||
message="sync preemption for request_id {request_id} done.",
|
||||
request_id=task_id,
|
||||
)
|
||||
if envs.ENABLE_V1_KVCACHE_SCHEDULER:
|
||||
if task_id in self.resource_manager.to_be_aborted_req_id_set:
|
||||
self.resource_manager.recycle_abort_task(task_id)
|
||||
@@ -765,7 +820,11 @@ class TokenProcessor:
|
||||
if accept_num[i] == -3:
|
||||
recovery_stop = True
|
||||
if recovery_stop:
|
||||
llm_logger.info(f"recovery stop signal found at task {task_id}")
|
||||
log_request(
|
||||
RequestLogLevel.STAGES,
|
||||
message="recovery stop signal found at task {request_id}",
|
||||
request_id=task_id,
|
||||
)
|
||||
token_ids = [RECOVERY_STOP_SIGNAL]
|
||||
elif self.use_logprobs:
|
||||
token_ids = tokens[i][:, 0].tolist()[: accept_num[i]]
|
||||
@@ -785,7 +844,11 @@ class TokenProcessor:
|
||||
token_ids = [token_id]
|
||||
recovery_stop = token_id == RECOVERY_STOP_SIGNAL
|
||||
if recovery_stop:
|
||||
llm_logger.info(f"recovery stop signal found at task {task_id}")
|
||||
log_request(
|
||||
RequestLogLevel.STAGES,
|
||||
message="recovery stop signal found at task {request_id}",
|
||||
request_id=task_id,
|
||||
)
|
||||
if not recovery_stop and token_id < 0:
|
||||
if envs.ENABLE_V1_KVCACHE_SCHEDULER:
|
||||
if (
|
||||
@@ -793,12 +856,20 @@ class TokenProcessor:
|
||||
and token_id == PREEMPTED_TOKEN_ID
|
||||
):
|
||||
self.resource_manager.recycle_abort_task(task_id)
|
||||
llm_logger.info(f"sync abortion for request_id {task_id} done.")
|
||||
log_request(
|
||||
RequestLogLevel.STAGES,
|
||||
message="sync abortion for request_id {request_id} done.",
|
||||
request_id=task_id,
|
||||
)
|
||||
if (
|
||||
task_id in self.resource_manager.to_be_rescheduled_request_id_set
|
||||
and token_id == PREEMPTED_TOKEN_ID
|
||||
):
|
||||
llm_logger.info(f"sync preemption for request_id {task_id} done.")
|
||||
log_request(
|
||||
RequestLogLevel.STAGES,
|
||||
message="sync preemption for request_id {request_id} done.",
|
||||
request_id=task_id,
|
||||
)
|
||||
self.resource_manager.reschedule_preempt_task(task_id)
|
||||
continue
|
||||
if self.cfg.scheduler_config.splitwise_role == "decode":
|
||||
@@ -826,7 +897,11 @@ class TokenProcessor:
|
||||
task.metrics.record_recv_first_token()
|
||||
task.metrics.cal_cost_time()
|
||||
metrics = copy.copy(task.metrics)
|
||||
llm_logger.info(f"task:{task.request_id} start recode first token")
|
||||
log_request(
|
||||
RequestLogLevel.STAGES,
|
||||
message="task:{request_id} start recode first token",
|
||||
request_id=task.request_id,
|
||||
)
|
||||
self._record_first_token_metrics(task, current_time)
|
||||
|
||||
tracing.trace_report_span(
|
||||
@@ -932,12 +1007,26 @@ class TokenProcessor:
|
||||
# Print combined log with all required information
|
||||
ttft = task.metrics.first_token_time if task.metrics.first_token_time else 0
|
||||
ttft_s = ttft + task.metrics.time_in_queue
|
||||
llm_logger.info(
|
||||
f"Request={task_id}, InputToken={task.prompt_token_ids_len}, "
|
||||
f"CachedDetail={cached_detail}, OutputToken={self.tokens_counter[task_id]}, "
|
||||
f"TokenRatio={token_ratio:.2f}, TTFT={ttft:.2f}, TTFT_S={ttft_s:.2f}, "
|
||||
f"E2E={e2e_time:.2f}, IsPrefill={is_prefill}, RecoveryStop={recovery_stop}, "
|
||||
f"PreemptedCount={getattr(task.metrics, 'preempted_count', 0)}"
|
||||
log_request(
|
||||
RequestLogLevel.LIFECYCLE,
|
||||
message=(
|
||||
"Request={request_id}, InputToken={input_tokens}, "
|
||||
"CachedDetail={cached_detail}, OutputToken={output_tokens}, "
|
||||
"TokenRatio={token_ratio}, TTFT={ttft}, TTFT_S={ttft_s}, "
|
||||
"E2E={e2e_time}, IsPrefill={is_prefill}, RecoveryStop={recovery_stop}, "
|
||||
"PreemptedCount={preempted_count}"
|
||||
),
|
||||
request_id=task_id,
|
||||
input_tokens=task.prompt_token_ids_len,
|
||||
cached_detail=cached_detail,
|
||||
output_tokens=self.tokens_counter[task_id],
|
||||
token_ratio=f"{token_ratio:.2f}",
|
||||
ttft=f"{ttft:.2f}",
|
||||
ttft_s=f"{ttft_s:.2f}",
|
||||
e2e_time=f"{e2e_time:.2f}",
|
||||
is_prefill=is_prefill,
|
||||
recovery_stop=recovery_stop,
|
||||
preempted_count=getattr(task.metrics, "preempted_count", 0),
|
||||
)
|
||||
|
||||
main_process_metrics.request_token_ratio.observe(token_ratio)
|
||||
@@ -946,8 +1035,11 @@ class TokenProcessor:
|
||||
self._compute_speculative_status(result)
|
||||
if not is_prefill:
|
||||
self._record_completion_metrics(task, current_time)
|
||||
llm_logger.info(f"task {task_id} received eos token. Recycling.")
|
||||
|
||||
log_request(
|
||||
RequestLogLevel.STAGES,
|
||||
message="task {request_id} received eos token. Recycling.",
|
||||
request_id=task_id,
|
||||
)
|
||||
if (
|
||||
envs.ENABLE_V1_KVCACHE_SCHEDULER
|
||||
and self.cfg.cache_config.enable_prefix_caching
|
||||
@@ -957,7 +1049,11 @@ class TokenProcessor:
|
||||
task
|
||||
) # when enable prefix caching, cache kv cache for output tokens
|
||||
self._recycle_resources(task_id, i, task, result, is_prefill)
|
||||
llm_logger.info(f"eos token {task_id} Recycle end.")
|
||||
log_request(
|
||||
RequestLogLevel.STAGES,
|
||||
message="eos token {request_id} Recycle end.",
|
||||
request_id=task_id,
|
||||
)
|
||||
break
|
||||
|
||||
llm_logger.debug(f"get response from infer: {result}")
|
||||
|
||||
@@ -21,6 +21,7 @@ import time
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from fastdeploy.engine.request import Request, RequestOutput
|
||||
from fastdeploy.logger.request_logger import RequestLogLevel, log_request
|
||||
from fastdeploy.scheduler.data import ScheduledResponse
|
||||
from fastdeploy.scheduler.local_scheduler import LocalScheduler
|
||||
from fastdeploy.utils import get_logger
|
||||
@@ -58,7 +59,11 @@ class DPLocalScheduler(LocalScheduler):
|
||||
|
||||
finished_responses = [response.request_id for response in responses if response.finished]
|
||||
if len(finished_responses) > 0:
|
||||
self.scheduler_logger.info(f"Scheduler has received some finished responses: {finished_responses}")
|
||||
log_request(
|
||||
RequestLogLevel.CONTENT,
|
||||
message="Scheduler has received some finished responses: {request_ids}",
|
||||
request_ids=finished_responses,
|
||||
)
|
||||
|
||||
with self.mutex:
|
||||
self.batch_responses_per_step.append([response.raw for response in responses])
|
||||
@@ -146,8 +151,10 @@ class DPLocalScheduler(LocalScheduler):
|
||||
self.ids_read_cursor += 1
|
||||
|
||||
if len(requests) > 0:
|
||||
self.scheduler_logger.info(
|
||||
f"Scheduler has pulled some request: {[request.request_id for request in requests]}"
|
||||
log_request(
|
||||
RequestLogLevel.CONTENT,
|
||||
message="Scheduler has pulled some request: {request_ids}",
|
||||
request_ids=[request.request_id for request in requests],
|
||||
)
|
||||
|
||||
return requests
|
||||
@@ -195,7 +202,11 @@ class DPScheduler:
|
||||
def _put_requests_to_local(self):
|
||||
while True:
|
||||
request = self.request_queues.get()
|
||||
self.scheduler_logger.info(f"Receive request from puller, request_id: {request.request_id}")
|
||||
log_request(
|
||||
RequestLogLevel.CONTENT,
|
||||
message="Receive request from puller, request_id: {request_id}",
|
||||
request_id=request.request_id,
|
||||
)
|
||||
self._scheduler.put_requests([request])
|
||||
|
||||
def _get_response_from_local(self):
|
||||
|
||||
@@ -25,6 +25,11 @@ import crcmod
|
||||
from redis import ConnectionPool
|
||||
|
||||
from fastdeploy.engine.request import Request, RequestOutput
|
||||
from fastdeploy.logger.request_logger import (
|
||||
RequestLogLevel,
|
||||
log_request,
|
||||
log_request_error,
|
||||
)
|
||||
from fastdeploy.scheduler import utils
|
||||
from fastdeploy.scheduler.data import ScheduledRequest, ScheduledResponse
|
||||
from fastdeploy.scheduler.storage import AdaptedRedis
|
||||
@@ -370,7 +375,11 @@ class GlobalScheduler:
|
||||
rem_amount=0,
|
||||
ttl=self.ttl,
|
||||
)
|
||||
scheduler_logger.info(f"Scheduler has enqueued some requests: {requests}")
|
||||
log_request(
|
||||
RequestLogLevel.CONTENT,
|
||||
message="Scheduler has enqueued some requests: {request_ids}",
|
||||
request_ids=[request.request_id for request in requests],
|
||||
)
|
||||
|
||||
if duplicate:
|
||||
scheduler_logger.warning(
|
||||
@@ -573,7 +582,9 @@ class GlobalScheduler:
|
||||
self.stolen_requests[request.request_id] = request
|
||||
continue
|
||||
|
||||
scheduler_logger.error(f"Scheduler has received a duplicate request from others: {request}")
|
||||
log_request_error(
|
||||
message="Scheduler has received a duplicate request from others: {request}", request=request
|
||||
)
|
||||
|
||||
requests: List[Request] = [request.raw for request in scheduled_requests]
|
||||
if len(remaining_request) > 0:
|
||||
@@ -603,7 +614,11 @@ class GlobalScheduler:
|
||||
)
|
||||
|
||||
if len(requests) > 0:
|
||||
scheduler_logger.info(f"Scheduler has pulled some request: {[request.request_id for request in requests]}")
|
||||
log_request(
|
||||
RequestLogLevel.CONTENT,
|
||||
message="Scheduler has pulled some request: {request_ids}",
|
||||
request_ids=[request.request_id for request in requests],
|
||||
)
|
||||
return requests
|
||||
|
||||
def _put_results_worker(self, tasks: List[Task]):
|
||||
@@ -649,7 +664,9 @@ class GlobalScheduler:
|
||||
stolen_responses[response_queue_name].append(response.serialize())
|
||||
continue
|
||||
|
||||
scheduler_logger.error(f"Scheduler has received a non-existent response from engine: {[response]}")
|
||||
log_request_error(
|
||||
message="Scheduler has received a non-existent response from engine: {response}", response=[response]
|
||||
)
|
||||
|
||||
with self.mutex:
|
||||
for request_id, responses in local_responses.items():
|
||||
@@ -664,7 +681,11 @@ class GlobalScheduler:
|
||||
self.local_response_not_empty.notify_all()
|
||||
|
||||
if len(finished_request_ids) > 0:
|
||||
scheduler_logger.info(f"Scheduler has received some finished responses: {finished_request_ids}")
|
||||
log_request(
|
||||
RequestLogLevel.CONTENT,
|
||||
message="Scheduler has received some finished responses: {request_ids}",
|
||||
request_ids=finished_request_ids,
|
||||
)
|
||||
|
||||
for response_queue_name, responses in stolen_responses.items():
|
||||
self.client.rpush(response_queue_name, *responses, ttl=self.ttl)
|
||||
@@ -793,7 +814,11 @@ class GlobalScheduler:
|
||||
|
||||
if finished:
|
||||
del self.local_responses[request_id]
|
||||
scheduler_logger.info(f"Scheduler has pulled a finished response: {[request_id]}")
|
||||
log_request(
|
||||
RequestLogLevel.CONTENT,
|
||||
message="Scheduler has pulled a finished response: {request_ids}",
|
||||
request_ids=[request_id],
|
||||
)
|
||||
return results
|
||||
|
||||
def reset(self):
|
||||
@@ -824,7 +849,7 @@ class GlobalScheduler:
|
||||
self.client.zrem(self._load_table_name(), self.name)
|
||||
self.local_responses = dict()
|
||||
self.stolen_requests = dict()
|
||||
scheduler_logger.info("Scheduler has been reset")
|
||||
log_request(RequestLogLevel.LIFECYCLE, message="Scheduler has been reset")
|
||||
|
||||
def update_config(self, load_shards_num: Optional[int], reallocate: Optional[bool]):
|
||||
"""
|
||||
|
||||
@@ -19,6 +19,7 @@ import time
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
from fastdeploy.engine.request import Request, RequestOutput
|
||||
from fastdeploy.logger.request_logger import RequestLogLevel, log_request
|
||||
from fastdeploy.scheduler.data import ScheduledRequest, ScheduledResponse
|
||||
from fastdeploy.utils import envs, scheduler_logger
|
||||
|
||||
@@ -116,7 +117,7 @@ class LocalScheduler:
|
||||
self.ids = list()
|
||||
self.requests = dict()
|
||||
self.responses = dict()
|
||||
scheduler_logger.info("Scheduler has been reset")
|
||||
log_request(RequestLogLevel.LIFECYCLE, message="Scheduler has been reset")
|
||||
|
||||
def _recycle(self, request_id: Optional[str] = None):
|
||||
"""
|
||||
@@ -191,7 +192,12 @@ class LocalScheduler:
|
||||
|
||||
self.ids += valid_ids
|
||||
self.requests_not_empty.notify_all()
|
||||
scheduler_logger.info(f"Scheduler has enqueued some requests: {valid_ids}")
|
||||
if len(valid_ids) > 0:
|
||||
log_request(
|
||||
RequestLogLevel.CONTENT,
|
||||
message="Scheduler has enqueued some requests: {request_ids}",
|
||||
request_ids=valid_ids,
|
||||
)
|
||||
|
||||
if len(duplicated_ids) > 0:
|
||||
scheduler_logger.warning(f"Scheduler has received some duplicated requests: {duplicated_ids}")
|
||||
@@ -300,7 +306,11 @@ class LocalScheduler:
|
||||
scheduler_logger.debug(f"Scheduler has put all just-pulled request into the queue: {len(batch_ids)}")
|
||||
|
||||
if len(requests) > 0:
|
||||
scheduler_logger.info(f"Scheduler has pulled some request: {[request.request_id for request in requests]}")
|
||||
log_request(
|
||||
RequestLogLevel.CONTENT,
|
||||
message="Scheduler has pulled some request: {request_ids}",
|
||||
request_ids=[request.request_id for request in requests],
|
||||
)
|
||||
|
||||
return requests
|
||||
|
||||
@@ -316,7 +326,11 @@ class LocalScheduler:
|
||||
|
||||
finished_responses = [response.request_id for response in responses if response.finished]
|
||||
if len(finished_responses) > 0:
|
||||
scheduler_logger.info(f"Scheduler has received some finished responses: {finished_responses}")
|
||||
log_request(
|
||||
RequestLogLevel.CONTENT,
|
||||
message="Scheduler has received some finished responses: {request_ids}",
|
||||
request_ids=finished_responses,
|
||||
)
|
||||
|
||||
with self.mutex:
|
||||
self.batch_responses_per_step.append([response.raw for response in responses])
|
||||
@@ -381,7 +395,11 @@ class LocalScheduler:
|
||||
|
||||
if finished:
|
||||
self._recycle(request_id)
|
||||
scheduler_logger.info(f"Scheduler has pulled a finished response: {[request_id]}")
|
||||
log_request(
|
||||
RequestLogLevel.CONTENT,
|
||||
message="Scheduler has pulled a finished response: {request_ids}",
|
||||
request_ids=[request_id],
|
||||
)
|
||||
|
||||
if results:
|
||||
scheduler_logger.debug(f"get responses, {results}")
|
||||
|
||||
@@ -33,6 +33,7 @@ from fastdeploy.engine.request import (
|
||||
RequestMetrics,
|
||||
RequestOutput,
|
||||
)
|
||||
from fastdeploy.logger.request_logger import log_request_error
|
||||
from fastdeploy.utils import scheduler_logger as logger
|
||||
|
||||
|
||||
@@ -240,7 +241,12 @@ class NodeInfo:
|
||||
for req_id, pairs in self.reqs.items():
|
||||
load, arrival_time = pairs
|
||||
if cur_time - arrival_time > ttl:
|
||||
logger.error(f"InferScheduler Expire Reqs({req_id}), arrival({arrival_time}), ttl({ttl})")
|
||||
log_request_error(
|
||||
message="InferScheduler Expire Reqs({req_id}), arrival({arrival_time}), ttl({ttl})",
|
||||
req_id=req_id,
|
||||
arrival_time=arrival_time,
|
||||
ttl=ttl,
|
||||
)
|
||||
expire_reqs.add((req_id, load))
|
||||
for req_id, load in expire_reqs:
|
||||
if req_id in self.reqs:
|
||||
@@ -378,7 +384,7 @@ class ResultReader:
|
||||
)
|
||||
self.data.appendleft(result)
|
||||
|
||||
logger.error(f"Req({req_id}) is expired({self.ttl})")
|
||||
log_request_error(message="Req({req_id}) is expired({ttl})", req_id=req_id, ttl=self.ttl)
|
||||
expired_reqs.add(req_id)
|
||||
continue
|
||||
keys.append(req_id)
|
||||
@@ -511,7 +517,11 @@ class APIScheduler:
|
||||
except IndexError:
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.error(f"APIScheduler Schedule req error: {e!s}, {str(traceback.format_exc())}")
|
||||
log_request_error(
|
||||
message="APIScheduler Schedule req error: {error}, {traceback}",
|
||||
error=str(e),
|
||||
traceback=traceback.format_exc(),
|
||||
)
|
||||
|
||||
def schedule(self, req, pnodes, dnodes, mnodes, group=""):
|
||||
"""
|
||||
@@ -841,7 +851,11 @@ class InferScheduler:
|
||||
|
||||
req = self.reqs_queue.popleft()
|
||||
if cur_time - req.metrics.arrival_time > self.ttl:
|
||||
logger.error(f"req({req.request_id}) is expired({self.ttl}) when InferScheduler Get Requests")
|
||||
log_request_error(
|
||||
message="req({request_id}) is expired({ttl}) when InferScheduler Get Requests",
|
||||
request_id=req.request_id,
|
||||
ttl=self.ttl,
|
||||
)
|
||||
self.node.finish_req(req.request_id)
|
||||
continue
|
||||
current_prefill_tokens += req.prompt_token_ids_len
|
||||
|
||||
+54
-158
@@ -16,15 +16,12 @@
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import codecs
|
||||
import hashlib
|
||||
import importlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import pickle
|
||||
import random
|
||||
import re
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
@@ -36,8 +33,6 @@ from enum import Enum
|
||||
from functools import cache
|
||||
from http import HTTPStatus
|
||||
from importlib.metadata import PackageNotFoundError, distribution
|
||||
from logging.handlers import BaseRotatingHandler
|
||||
from pathlib import Path
|
||||
from typing import Any, Literal, TypeVar, Union
|
||||
|
||||
import numpy as np
|
||||
@@ -53,7 +48,7 @@ from typing_extensions import TypeIs, assert_never
|
||||
|
||||
from fastdeploy import envs
|
||||
from fastdeploy.entrypoints.openai.protocol import ErrorInfo, ErrorResponse
|
||||
from fastdeploy.logger.logger import FastDeployLogger
|
||||
from fastdeploy.logger.request_logger import log_request_error
|
||||
from fastdeploy.worker.output import PromptLogprobs
|
||||
|
||||
T = TypeVar("T")
|
||||
@@ -193,6 +188,19 @@ class ExceptionHandler:
|
||||
loc = first_error.get("loc", [])
|
||||
param = loc[-1] if loc else None
|
||||
message = first_error.get("msg", str(exc))
|
||||
|
||||
# Try to extract request_id from request body
|
||||
request_id = None
|
||||
try:
|
||||
body = await request.body()
|
||||
if body:
|
||||
import json
|
||||
|
||||
body_json = json.loads(body)
|
||||
request_id = body_json.get("request_id")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
err = ErrorResponse(
|
||||
error=ErrorInfo(
|
||||
message=message,
|
||||
@@ -201,7 +209,13 @@ class ExceptionHandler:
|
||||
param=param,
|
||||
)
|
||||
)
|
||||
api_server_logger.error(f"invalid_request_error: {request.url} {param} {message}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] invalid_request_error: {url} {param} {msg}",
|
||||
request_id=request_id or "unknown",
|
||||
url=str(request.url),
|
||||
param=param,
|
||||
msg=message,
|
||||
)
|
||||
return JSONResponse(content=err.model_dump(), status_code=HTTPStatus.BAD_REQUEST)
|
||||
|
||||
|
||||
@@ -224,139 +238,18 @@ class ErrorCode(str, Enum):
|
||||
CLIENT_ABORTED = "client_aborted"
|
||||
|
||||
|
||||
class ColoredFormatter(logging.Formatter):
|
||||
"""自定义日志格式器,用于控制台输出带颜色"""
|
||||
# Backward compatibility: logger classes have been moved to fastdeploy.logger module
|
||||
# Use lazy import to avoid circular dependencies
|
||||
def __getattr__(name):
|
||||
if name == "ColoredFormatter":
|
||||
from fastdeploy.logger.formatters import ColoredFormatter
|
||||
|
||||
COLOR_CODES = {
|
||||
logging.WARNING: 33, # 黄色
|
||||
logging.ERROR: 31, # 红色
|
||||
logging.CRITICAL: 31, # 红色
|
||||
}
|
||||
return ColoredFormatter
|
||||
elif name == "DailyRotatingFileHandler":
|
||||
from fastdeploy.logger.handlers import DailyRotatingFileHandler
|
||||
|
||||
def format(self, record):
|
||||
color_code = self.COLOR_CODES.get(record.levelno, 0)
|
||||
prefix = f"\033[{color_code}m"
|
||||
suffix = "\033[0m"
|
||||
message = super().format(record)
|
||||
if color_code:
|
||||
message = f"{prefix}{message}{suffix}"
|
||||
return message
|
||||
|
||||
|
||||
class DailyRotatingFileHandler(BaseRotatingHandler):
|
||||
"""
|
||||
like `logging.TimedRotatingFileHandler`, but this class support multi-process
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
filename,
|
||||
backupCount=0,
|
||||
encoding="utf-8",
|
||||
delay=False,
|
||||
utc=False,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
初始化 RotatingFileHandler 对象。
|
||||
|
||||
Args:
|
||||
filename (str): 日志文件的路径,可以是相对路径或绝对路径。
|
||||
backupCount (int, optional, default=0): 保存的备份文件数量,默认为 0,表示不保存备份文件。
|
||||
encoding (str, optional, default='utf-8'): 编码格式,默认为 'utf-8'。
|
||||
delay (bool, optional, default=False): 是否延迟写入,默认为 False,表示立即写入。
|
||||
utc (bool, optional, default=False): 是否使用 UTC 时区,默认为 False,表示不使用 UTC 时区。
|
||||
kwargs (dict, optional): 其他参数将被传递给 BaseRotatingHandler 类的 init 方法。
|
||||
|
||||
Raises:
|
||||
TypeError: 如果 filename 不是 str 类型。
|
||||
ValueError: 如果 backupCount 小于等于 0。
|
||||
"""
|
||||
self.backup_count = backupCount
|
||||
self.utc = utc
|
||||
self.suffix = "%Y-%m-%d"
|
||||
self.base_log_path = Path(filename)
|
||||
self.base_filename = self.base_log_path.name
|
||||
self.current_filename = self._compute_fn()
|
||||
self.current_log_path = self.base_log_path.with_name(self.current_filename)
|
||||
BaseRotatingHandler.__init__(self, filename, "a", encoding, delay)
|
||||
|
||||
def shouldRollover(self, record):
|
||||
"""
|
||||
check scroll through the log
|
||||
"""
|
||||
if self.current_filename != self._compute_fn():
|
||||
return True
|
||||
return False
|
||||
|
||||
def doRollover(self):
|
||||
"""
|
||||
scroll log
|
||||
"""
|
||||
if self.stream:
|
||||
self.stream.close()
|
||||
self.stream = None
|
||||
|
||||
self.current_filename = self._compute_fn()
|
||||
self.current_log_path = self.base_log_path.with_name(self.current_filename)
|
||||
|
||||
if not self.delay:
|
||||
self.stream = self._open()
|
||||
|
||||
self.delete_expired_files()
|
||||
|
||||
def _compute_fn(self):
|
||||
"""
|
||||
Calculate the log file name corresponding current time
|
||||
"""
|
||||
return self.base_filename + "." + time.strftime(self.suffix, time.localtime())
|
||||
|
||||
def _open(self):
|
||||
"""
|
||||
open new log file
|
||||
"""
|
||||
if self.encoding is None:
|
||||
stream = open(str(self.current_log_path), self.mode)
|
||||
else:
|
||||
stream = codecs.open(str(self.current_log_path), self.mode, self.encoding)
|
||||
|
||||
if self.base_log_path.exists():
|
||||
try:
|
||||
if not self.base_log_path.is_symlink() or os.readlink(self.base_log_path) != self.current_filename:
|
||||
os.remove(self.base_log_path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
try:
|
||||
os.symlink(self.current_filename, str(self.base_log_path))
|
||||
except OSError:
|
||||
pass
|
||||
return stream
|
||||
|
||||
def delete_expired_files(self):
|
||||
"""
|
||||
delete expired log files
|
||||
"""
|
||||
if self.backup_count <= 0:
|
||||
return
|
||||
|
||||
file_names = os.listdir(str(self.base_log_path.parent))
|
||||
result = []
|
||||
prefix = self.base_filename + "."
|
||||
plen = len(prefix)
|
||||
for file_name in file_names:
|
||||
if file_name[:plen] == prefix:
|
||||
suffix = file_name[plen:]
|
||||
if re.match(r"^\d{4}-\d{2}-\d{2}(\.\w+)?$", suffix):
|
||||
result.append(file_name)
|
||||
if len(result) < self.backup_count:
|
||||
result = []
|
||||
else:
|
||||
result.sort()
|
||||
result = result[: len(result) - self.backup_count]
|
||||
|
||||
for file_name in result:
|
||||
os.remove(str(self.base_log_path.with_name(file_name)))
|
||||
return DailyRotatingFileHandler
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
|
||||
|
||||
def chunk_list(lst: list[T], chunk_size: int):
|
||||
@@ -1054,12 +947,6 @@ def parse_quantization(value: str):
|
||||
return {"quantization": value}
|
||||
|
||||
|
||||
# 日志使用全局访问点(兼容原有使用方式)
|
||||
def get_logger(name, file_name=None, without_formater=False, print_to_console=False):
|
||||
"""全局函数包装器,保持向后兼容"""
|
||||
return FastDeployLogger().get_logger(name, file_name, without_formater, print_to_console)
|
||||
|
||||
|
||||
def check_download_links(bos_client, links, timeout=1):
|
||||
"""
|
||||
check bos download links
|
||||
@@ -1156,20 +1043,6 @@ def download_from_bos(bos_client, bos_links, retry: int = 0):
|
||||
break
|
||||
|
||||
|
||||
llm_logger = get_logger("fastdeploy", "fastdeploy.log")
|
||||
data_processor_logger = get_logger("data_processor", "data_processor.log")
|
||||
scheduler_logger = get_logger("scheduler", "scheduler.log")
|
||||
api_server_logger = get_logger("api_server", "api_server.log")
|
||||
console_logger = get_logger("console", "console.log", print_to_console=True)
|
||||
spec_logger = get_logger("speculate", "speculate.log")
|
||||
zmq_client_logger = get_logger("zmq_client", "zmq_client.log")
|
||||
trace_logger = FastDeployLogger().get_trace_logger("trace", "trace.log")
|
||||
router_logger = get_logger("router", "router.log")
|
||||
fmq_logger = get_logger("fmq", "fmq.log")
|
||||
obj_logger = get_logger("obj", "obj.log") # debug内存问题
|
||||
register_manager_logger = get_logger("register_manager", "register_manager.log")
|
||||
|
||||
|
||||
def parse_type(return_type: Callable[[str], T]) -> Callable[[str], T]:
|
||||
|
||||
def _parse_type(val: str) -> T:
|
||||
@@ -1338,3 +1211,26 @@ def all_gather_values(value: int | float | bool, group: paddle.distributed.commu
|
||||
paddle.distributed.all_gather(_global, _local, group)
|
||||
_results = [_type(t.item()) for t in _global]
|
||||
return _results
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Logger re-export (backward compatibility)
|
||||
# Actual implementation is in fastdeploy.logger module, re-exported here to
|
||||
# support existing import patterns
|
||||
# NOTE: Must be at the end of file to avoid circular imports
|
||||
# =============================================================================
|
||||
from fastdeploy.logger import ( # noqa: F401
|
||||
api_server_logger,
|
||||
console_logger,
|
||||
data_processor_logger,
|
||||
fmq_logger,
|
||||
get_logger,
|
||||
llm_logger,
|
||||
obj_logger,
|
||||
register_manager_logger,
|
||||
router_logger,
|
||||
scheduler_logger,
|
||||
spec_logger,
|
||||
trace_logger,
|
||||
zmq_client_logger,
|
||||
)
|
||||
|
||||
@@ -756,8 +756,8 @@ class TestAsyncLLMOpenAIServingCompletionPreprocess(unittest.IsolatedAsyncioTest
|
||||
expected_id = f"{request_id}_{i}" if request_id else f"_{i}"
|
||||
self.assertEqual(ctx.preprocess_requests[i]["request_id"], expected_id)
|
||||
|
||||
@patch("fastdeploy.entrypoints.openai.v1.serving_completion.api_server_logger")
|
||||
async def test_preprocess_exception_logging(self, mock_logger):
|
||||
@patch("fastdeploy.entrypoints.openai.v1.serving_completion.log_request_error")
|
||||
async def test_preprocess_exception_logging(self, mock_log_request_error):
|
||||
"""Test _preprocess logs exceptions properly"""
|
||||
# Setup - create a request that will cause an exception
|
||||
request = CompletionRequest(model="test_model", prompt="dummy", max_tokens=50)
|
||||
@@ -771,11 +771,10 @@ class TestAsyncLLMOpenAIServingCompletionPreprocess(unittest.IsolatedAsyncioTest
|
||||
|
||||
# Assert
|
||||
self.assertIsInstance(result, ErrorResponse)
|
||||
mock_logger.error.assert_called_once()
|
||||
error_log = mock_logger.error.call_args[0][0]
|
||||
self.assertIn("OpenAIServingCompletion create_completion", error_log)
|
||||
self.assertIn("ValueError", error_log)
|
||||
self.assertIn("Traceback", error_log) # Changed from "traceback" to "Traceback"
|
||||
mock_log_request_error.assert_called_once()
|
||||
error_msg = mock_log_request_error.call_args[1].get("message", "")
|
||||
self.assertIn("OpenAIServingCompletion create_completion", error_msg)
|
||||
self.assertIn("ValueError", error_msg)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -206,9 +206,9 @@ class TestEngineClientAbort(unittest.TestCase):
|
||||
mock_send_task.assert_called_once_with(expected_data)
|
||||
|
||||
@patch("fastdeploy.entrypoints.engine_client.envs.FD_ENABLE_REQUEST_DISCONNECT_STOP_INFERENCE", True)
|
||||
@patch("fastdeploy.entrypoints.engine_client.api_server_logger")
|
||||
@patch("fastdeploy.entrypoints.engine_client.log_request")
|
||||
@patch.object(EngineClient, "_send_task")
|
||||
def test_abort_logging(self, mock_send_task, mock_logger):
|
||||
def test_abort_logging(self, mock_send_task, mock_log_request):
|
||||
"""Test that abort method logs correctly"""
|
||||
request_id = "test_request"
|
||||
n = 2
|
||||
@@ -216,18 +216,16 @@ class TestEngineClientAbort(unittest.TestCase):
|
||||
# Run the abort method
|
||||
self.loop.run_until_complete(self.engine_client.abort(request_id, n=n))
|
||||
|
||||
# Verify info log was called twice
|
||||
self.assertEqual(mock_logger.info.call_count, 2)
|
||||
# Verify log_request was called twice
|
||||
self.assertEqual(mock_log_request.call_count, 2)
|
||||
|
||||
# Verify the first log message (abort start)
|
||||
first_call = mock_logger.info.call_args_list[0]
|
||||
self.assertEqual(first_call[0][0], "abort request_id:test_request")
|
||||
first_call = mock_log_request.call_args_list[0]
|
||||
self.assertIn("abort request_id", first_call[1].get("message", ""))
|
||||
|
||||
# Verify the second log message (abort completion with request IDs)
|
||||
second_call = mock_logger.info.call_args_list[1]
|
||||
expected_log_message = "Aborted request(s) %s."
|
||||
self.assertEqual(second_call[0][0], expected_log_message)
|
||||
self.assertEqual(second_call[0][1], "test_request_0,test_request_1")
|
||||
second_call = mock_log_request.call_args_list[1]
|
||||
self.assertIn("Aborted request(s)", second_call[1].get("message", ""))
|
||||
|
||||
@patch("fastdeploy.entrypoints.engine_client.envs.FD_ENABLE_REQUEST_DISCONNECT_STOP_INFERENCE", True)
|
||||
@patch("fastdeploy.entrypoints.engine_client.api_server_logger")
|
||||
|
||||
@@ -843,11 +843,11 @@ class TestEngineClientValidParameters(unittest.TestCase):
|
||||
"""Test valid_parameters adjusts reasoning_max_tokens when needed."""
|
||||
data = {"max_tokens": 50, "reasoning_max_tokens": 100, "request_id": "test-id"} # Larger than max_tokens
|
||||
|
||||
with patch("fastdeploy.entrypoints.engine_client.api_server_logger") as mock_logger:
|
||||
with patch("fastdeploy.entrypoints.engine_client.log_request") as mock_log_request:
|
||||
self.engine_client.valid_parameters(data)
|
||||
|
||||
self.assertEqual(data["reasoning_max_tokens"], 50)
|
||||
mock_logger.warning.assert_called_once()
|
||||
mock_log_request.assert_called_once()
|
||||
|
||||
def test_valid_parameters_reasoning_max_tokens_with_reasoning_effort(self):
|
||||
"""Test valid_parameters when both reasoning_max_tokens and reasoning_effort are set."""
|
||||
@@ -858,14 +858,13 @@ class TestEngineClientValidParameters(unittest.TestCase):
|
||||
"request_id": "test-id",
|
||||
}
|
||||
|
||||
with patch("fastdeploy.entrypoints.engine_client.api_server_logger") as mock_logger:
|
||||
with patch("fastdeploy.entrypoints.engine_client.log_request") as mock_log_request:
|
||||
self.engine_client.valid_parameters(data)
|
||||
|
||||
# When reasoning_effort is set, reasoning_max_tokens should be set to None
|
||||
self.assertIsNone(data["reasoning_max_tokens"])
|
||||
mock_logger.warning.assert_called_once()
|
||||
warning_call = mock_logger.warning.call_args[0][0]
|
||||
self.assertIn("reasoning_max_tokens and reasoning_effort are both set", warning_call)
|
||||
# log_request is called once: for reasoning_effort conflict (reasoning_max_tokens=50 < max_tokens=100)
|
||||
mock_log_request.assert_called_once()
|
||||
|
||||
def test_valid_parameters_temperature_zero_adjustment(self):
|
||||
"""Test valid_parameters adjusts zero temperature."""
|
||||
@@ -1846,7 +1845,7 @@ def test_add_requests_objgraph_and_error_paths(minimal_engine_client):
|
||||
with (
|
||||
patch(
|
||||
"fastdeploy.entrypoints.engine_client.os.getenv",
|
||||
side_effect=lambda k: "1" if k == "FD_ENABLE_OBJGRAPH_DEBUG" else None,
|
||||
side_effect=lambda k, default=None: "1" if k == "FD_ENABLE_OBJGRAPH_DEBUG" else default,
|
||||
),
|
||||
patch("fastdeploy.entrypoints.engine_client._has_objgraph", True),
|
||||
patch("fastdeploy.entrypoints.engine_client._has_psutil", False),
|
||||
|
||||
@@ -148,7 +148,9 @@ def test_receive_output_merges():
|
||||
assert first.added is True
|
||||
|
||||
|
||||
def test_receive_output_logs_exception(caplog):
|
||||
def test_receive_output_logs_exception():
|
||||
from unittest.mock import patch
|
||||
|
||||
llm = _make_llm(_make_engine())
|
||||
calls = iter([RuntimeError("boom"), SystemExit()])
|
||||
|
||||
@@ -159,9 +161,14 @@ def test_receive_output_logs_exception(caplog):
|
||||
return nxt
|
||||
|
||||
llm.llm_engine._get_generated_result = _get_generated_result
|
||||
with pytest.raises(SystemExit):
|
||||
llm._receive_output()
|
||||
assert "Unexpected error happened" in caplog.text
|
||||
with patch("fastdeploy.entrypoints.llm.log_request_error") as mock_log:
|
||||
with pytest.raises(SystemExit):
|
||||
llm._receive_output()
|
||||
mock_log.assert_called_once()
|
||||
call_kwargs = mock_log.call_args[1]
|
||||
assert "Unexpected error happened" in call_kwargs.get(
|
||||
"message", mock_log.call_args[0][0] if mock_log.call_args[0] else ""
|
||||
)
|
||||
|
||||
|
||||
def test_generate_and_chat_branches():
|
||||
|
||||
@@ -264,7 +264,7 @@ class TestSampleFramesQwen(unittest.TestCase):
|
||||
|
||||
def test_fps_warning_when_nframes_exceeds_total(self):
|
||||
"""fps so high that computed num_frames > total → warning logged."""
|
||||
with self.assertLogs(level="WARNING"):
|
||||
with self.assertLogs(logger="fastdeploy.main", level="WARNING"):
|
||||
sample_frames_qwen(2, 4, 100, {"num_of_frame": 10, "fps": 1.0}, fps=100.0)
|
||||
|
||||
def test_divisible_by_4_correction(self):
|
||||
|
||||
@@ -46,8 +46,12 @@ class LoggerTests(unittest.TestCase):
|
||||
shutil.rmtree(self.tmp_dir, ignore_errors=True)
|
||||
|
||||
def test_unified_logger(self):
|
||||
"""Test _get_unified_logger through instance"""
|
||||
test_cases = [(None, "fastdeploy"), ("module", "fastdeploy.module"), ("fastdeploy.utils", "fastdeploy.utils")]
|
||||
"""Test _get_unified_logger through instance (uses main channel)"""
|
||||
test_cases = [
|
||||
(None, "fastdeploy.main"),
|
||||
("module", "fastdeploy.main.module"),
|
||||
("fastdeploy.utils", "fastdeploy.utils"), # 已有 fastdeploy. 前缀的保持不变
|
||||
]
|
||||
|
||||
for name, expected in test_cases:
|
||||
with self.subTest(name=name):
|
||||
|
||||
@@ -0,0 +1,91 @@
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
from fastdeploy.logger.config import resolve_log_level, resolve_request_logging_defaults
|
||||
|
||||
|
||||
class TestResolveLogLevel(unittest.TestCase):
|
||||
"""测试 resolve_log_level 函数"""
|
||||
|
||||
def test_explicit_info_level(self):
|
||||
"""显式设置 INFO 级别"""
|
||||
result = resolve_log_level(raw_level="INFO")
|
||||
self.assertEqual(result, "INFO")
|
||||
|
||||
def test_explicit_debug_level(self):
|
||||
"""显式设置 DEBUG 级别"""
|
||||
result = resolve_log_level(raw_level="DEBUG")
|
||||
self.assertEqual(result, "DEBUG")
|
||||
|
||||
def test_case_insensitive(self):
|
||||
"""级别名称应该大小写不敏感"""
|
||||
self.assertEqual(resolve_log_level(raw_level="info"), "INFO")
|
||||
self.assertEqual(resolve_log_level(raw_level="debug"), "DEBUG")
|
||||
|
||||
def test_invalid_level_raises(self):
|
||||
"""无效级别应该抛出 ValueError"""
|
||||
with self.assertRaises(ValueError) as ctx:
|
||||
resolve_log_level(raw_level="INVALID")
|
||||
self.assertIn("Unsupported FD_LOG_LEVEL", str(ctx.exception))
|
||||
|
||||
def test_debug_enabled_fallback(self):
|
||||
"""FD_DEBUG=1 应该返回 DEBUG"""
|
||||
result = resolve_log_level(raw_level=None, debug_enabled=1)
|
||||
self.assertEqual(result, "DEBUG")
|
||||
|
||||
def test_debug_disabled_fallback(self):
|
||||
"""FD_DEBUG=0 应该返回 INFO"""
|
||||
result = resolve_log_level(raw_level=None, debug_enabled=0)
|
||||
self.assertEqual(result, "INFO")
|
||||
|
||||
def test_env_fd_log_level_priority(self):
|
||||
"""FD_LOG_LEVEL 环境变量优先级高于 FD_DEBUG"""
|
||||
with patch.dict("os.environ", {"FD_LOG_LEVEL": "INFO", "FD_DEBUG": "1"}):
|
||||
result = resolve_log_level()
|
||||
self.assertEqual(result, "INFO")
|
||||
|
||||
def test_env_fd_debug_fallback(self):
|
||||
"""无 FD_LOG_LEVEL 时使用 FD_DEBUG"""
|
||||
with patch.dict("os.environ", {"FD_DEBUG": "1"}, clear=True):
|
||||
result = resolve_log_level()
|
||||
self.assertEqual(result, "DEBUG")
|
||||
|
||||
|
||||
class TestResolveRequestLoggingDefaults(unittest.TestCase):
|
||||
"""测试 resolve_request_logging_defaults 函数"""
|
||||
|
||||
def test_default_values(self):
|
||||
"""默认值测试"""
|
||||
with patch.dict("os.environ", {}, clear=True):
|
||||
result = resolve_request_logging_defaults()
|
||||
self.assertEqual(result["enabled"], 1)
|
||||
self.assertEqual(result["level"], 2)
|
||||
self.assertEqual(result["max_len"], 2048)
|
||||
|
||||
def test_custom_values(self):
|
||||
"""自定义值测试"""
|
||||
with patch.dict(
|
||||
"os.environ", {"FD_LOG_REQUESTS": "0", "FD_LOG_REQUESTS_LEVEL": "2", "FD_LOG_MAX_LEN": "1024"}
|
||||
):
|
||||
result = resolve_request_logging_defaults()
|
||||
self.assertEqual(result["enabled"], 0)
|
||||
self.assertEqual(result["level"], 2)
|
||||
self.assertEqual(result["max_len"], 1024)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,176 @@
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
from fastdeploy.logger.request_logger import (
|
||||
RequestLogLevel,
|
||||
_should_log,
|
||||
_truncate,
|
||||
log_request,
|
||||
log_request_error,
|
||||
)
|
||||
|
||||
|
||||
class TestRequestLogLevel(unittest.TestCase):
|
||||
"""Test RequestLogLevel enum"""
|
||||
|
||||
def test_level_values(self):
|
||||
"""Test level values"""
|
||||
self.assertEqual(int(RequestLogLevel.LIFECYCLE), 0)
|
||||
self.assertEqual(int(RequestLogLevel.STAGES), 1)
|
||||
self.assertEqual(int(RequestLogLevel.CONTENT), 2)
|
||||
self.assertEqual(int(RequestLogLevel.FULL), 3)
|
||||
|
||||
|
||||
class TestShouldLog(unittest.TestCase):
|
||||
"""Test _should_log function"""
|
||||
|
||||
def test_disabled_returns_false(self):
|
||||
"""FD_LOG_REQUESTS=0 should return False"""
|
||||
with patch("fastdeploy.logger.request_logger.envs") as mock_envs:
|
||||
mock_envs.FD_LOG_REQUESTS = 0
|
||||
mock_envs.FD_LOG_REQUESTS_LEVEL = 3
|
||||
self.assertFalse(_should_log(RequestLogLevel.LIFECYCLE))
|
||||
|
||||
def test_level_within_threshold(self):
|
||||
"""Level within threshold should return True"""
|
||||
with patch("fastdeploy.logger.request_logger.envs") as mock_envs:
|
||||
mock_envs.FD_LOG_REQUESTS = 1
|
||||
mock_envs.FD_LOG_REQUESTS_LEVEL = 2
|
||||
self.assertTrue(_should_log(RequestLogLevel.LIFECYCLE))
|
||||
self.assertTrue(_should_log(RequestLogLevel.STAGES))
|
||||
self.assertTrue(_should_log(RequestLogLevel.CONTENT))
|
||||
|
||||
def test_level_above_threshold(self):
|
||||
"""Level above threshold should return False"""
|
||||
with patch("fastdeploy.logger.request_logger.envs") as mock_envs:
|
||||
mock_envs.FD_LOG_REQUESTS = 1
|
||||
mock_envs.FD_LOG_REQUESTS_LEVEL = 1
|
||||
self.assertFalse(_should_log(RequestLogLevel.CONTENT))
|
||||
self.assertFalse(_should_log(RequestLogLevel.FULL))
|
||||
|
||||
|
||||
class TestTruncate(unittest.TestCase):
|
||||
"""Test _truncate function"""
|
||||
|
||||
def test_short_text_unchanged(self):
|
||||
"""Short text should remain unchanged"""
|
||||
with patch("fastdeploy.logger.request_logger.envs") as mock_envs:
|
||||
mock_envs.FD_LOG_MAX_LEN = 100
|
||||
result = _truncate("short text")
|
||||
self.assertEqual(result, "short text")
|
||||
|
||||
def test_long_text_truncated(self):
|
||||
"""Long text should be truncated"""
|
||||
with patch("fastdeploy.logger.request_logger.envs") as mock_envs:
|
||||
mock_envs.FD_LOG_MAX_LEN = 10
|
||||
result = _truncate("this is a very long text")
|
||||
self.assertEqual(result, "this is a ")
|
||||
self.assertEqual(len(result), 10)
|
||||
|
||||
def test_non_string_converted(self):
|
||||
"""Non-string should be converted"""
|
||||
with patch("fastdeploy.logger.request_logger.envs") as mock_envs:
|
||||
mock_envs.FD_LOG_MAX_LEN = 100
|
||||
result = _truncate(12345)
|
||||
self.assertEqual(result, "12345")
|
||||
|
||||
|
||||
class TestLogRequest(unittest.TestCase):
|
||||
"""Test log_request function"""
|
||||
|
||||
@patch("fastdeploy.logger._request_logger")
|
||||
def test_log_when_enabled(self, mock_logger):
|
||||
"""Should log when enabled"""
|
||||
with patch("fastdeploy.logger.request_logger.envs") as mock_envs:
|
||||
mock_envs.FD_LOG_REQUESTS = 1
|
||||
mock_envs.FD_LOG_REQUESTS_LEVEL = 0
|
||||
mock_envs.FD_LOG_MAX_LEN = 2048
|
||||
|
||||
log_request(RequestLogLevel.LIFECYCLE, message="test {value}", value="hello")
|
||||
mock_logger.info.assert_called_once()
|
||||
call_args = mock_logger.info.call_args[0][0]
|
||||
self.assertEqual(call_args, "test hello")
|
||||
|
||||
@patch("fastdeploy.logger._request_logger")
|
||||
def test_no_log_when_disabled(self, mock_logger):
|
||||
"""Should not log when disabled"""
|
||||
with patch("fastdeploy.logger.request_logger.envs") as mock_envs:
|
||||
mock_envs.FD_LOG_REQUESTS = 0
|
||||
mock_envs.FD_LOG_REQUESTS_LEVEL = 3
|
||||
|
||||
log_request(RequestLogLevel.LIFECYCLE, message="test {value}", value="hello")
|
||||
mock_logger.info.assert_not_called()
|
||||
|
||||
@patch("fastdeploy.logger._request_logger")
|
||||
def test_no_log_when_level_too_high(self, mock_logger):
|
||||
"""Should not log when level is too high"""
|
||||
with patch("fastdeploy.logger.request_logger.envs") as mock_envs:
|
||||
mock_envs.FD_LOG_REQUESTS = 1
|
||||
mock_envs.FD_LOG_REQUESTS_LEVEL = 0
|
||||
|
||||
log_request(RequestLogLevel.CONTENT, message="test {value}", value="hello")
|
||||
mock_logger.info.assert_not_called()
|
||||
|
||||
@patch("fastdeploy.logger._request_logger")
|
||||
def test_content_level_truncates_content(self, mock_logger):
|
||||
"""CONTENT level should truncate content"""
|
||||
with patch("fastdeploy.logger.request_logger.envs") as mock_envs:
|
||||
mock_envs.FD_LOG_REQUESTS = 1
|
||||
mock_envs.FD_LOG_REQUESTS_LEVEL = 3
|
||||
mock_envs.FD_LOG_MAX_LEN = 5
|
||||
|
||||
log_request(RequestLogLevel.CONTENT, message="content: {data}", data="very long data")
|
||||
mock_logger.info.assert_called_once()
|
||||
call_args = mock_logger.info.call_args[0][0]
|
||||
self.assertEqual(call_args, "content: very ")
|
||||
|
||||
@patch("fastdeploy.logger._request_logger")
|
||||
def test_lifecycle_level_no_truncation(self, mock_logger):
|
||||
"""LIFECYCLE level should not truncate content"""
|
||||
with patch("fastdeploy.logger.request_logger.envs") as mock_envs:
|
||||
mock_envs.FD_LOG_REQUESTS = 1
|
||||
mock_envs.FD_LOG_REQUESTS_LEVEL = 3
|
||||
mock_envs.FD_LOG_MAX_LEN = 5
|
||||
|
||||
log_request(RequestLogLevel.LIFECYCLE, message="content: {data}", data="very long data")
|
||||
mock_logger.info.assert_called_once()
|
||||
call_args = mock_logger.info.call_args[0][0]
|
||||
self.assertEqual(call_args, "content: very long data")
|
||||
|
||||
|
||||
class TestLogRequestError(unittest.TestCase):
|
||||
"""Test log_request_error function"""
|
||||
|
||||
@patch("fastdeploy.logger._request_logger")
|
||||
def test_error_with_fields(self, mock_logger):
|
||||
"""Error log with fields should format message"""
|
||||
log_request_error(message="request {request_id} failed: {error}", request_id="req-123", error="timeout")
|
||||
mock_logger.error.assert_called_once()
|
||||
call_args = mock_logger.error.call_args[0][0]
|
||||
self.assertEqual(call_args, "request req-123 failed: timeout")
|
||||
|
||||
@patch("fastdeploy.logger._request_logger")
|
||||
def test_error_without_fields(self, mock_logger):
|
||||
"""Error log without fields should not call format"""
|
||||
log_request_error(message="simple error message")
|
||||
mock_logger.error.assert_called_once()
|
||||
call_args = mock_logger.error.call_args[0][0]
|
||||
self.assertEqual(call_args, "simple error message")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -43,58 +43,18 @@ class TestSetupLogging(unittest.TestCase):
|
||||
delattr(setup_logging, "_configured")
|
||||
|
||||
def test_log_dir_created(self):
|
||||
"""Log directory should be created"""
|
||||
nested = os.path.join(self.temp_dir, "a", "b", "c")
|
||||
setup_logging(log_dir=nested)
|
||||
self.assertTrue(Path(nested).is_dir())
|
||||
|
||||
def test_default_config_fallback(self):
|
||||
"""Pass a non-existent config_file to trigger default_config"""
|
||||
fake_cfg = os.path.join(self.temp_dir, "no_such_cfg.json")
|
||||
setup_logging(config_file=fake_cfg)
|
||||
logger = logging.getLogger("fastdeploy")
|
||||
self.assertTrue(logger.handlers)
|
||||
handler_classes = [h.__class__.__name__ for h in logger.handlers]
|
||||
self.assertIn("TimedRotatingFileHandler", handler_classes)
|
||||
|
||||
def test_debug_level_affects_handlers(self):
|
||||
"""FD_DEBUG=1 should force DEBUG level"""
|
||||
with patch("fastdeploy.envs.FD_DEBUG", 1):
|
||||
with patch("logging.config.dictConfig") as mock_cfg:
|
||||
setup_logging()
|
||||
called_config = mock_cfg.call_args[0][0]
|
||||
for handler in called_config["handlers"].values():
|
||||
self.assertIn("formatter", handler)
|
||||
self.assertEqual(called_config["handlers"]["console_stdout"]["level"], "DEBUG")
|
||||
|
||||
@patch("logging.config.dictConfig")
|
||||
def test_custom_config_with_dailyrotating_and_debug(self, mock_dict):
|
||||
custom_cfg = {
|
||||
"version": 1,
|
||||
"handlers": {
|
||||
"daily": {
|
||||
"class": "logging.handlers.DailyRotatingFileHandler",
|
||||
"level": "INFO",
|
||||
"formatter": "plain",
|
||||
}
|
||||
},
|
||||
"loggers": {"fastdeploy": {"handlers": ["daily"], "level": "INFO"}},
|
||||
}
|
||||
cfg_path = Path(self.temp_dir) / "cfg.json"
|
||||
cfg_path.write_text(json.dumps(custom_cfg))
|
||||
|
||||
with patch("fastdeploy.envs.FD_DEBUG", 1):
|
||||
setup_logging(config_file=str(cfg_path))
|
||||
|
||||
config_used = mock_dict.call_args[0][0]
|
||||
self.assertIn("daily", config_used["handlers"])
|
||||
self.assertEqual(config_used["handlers"]["daily"]["level"], "DEBUG")
|
||||
self.assertIn("backupCount", config_used["handlers"]["daily"])
|
||||
|
||||
def test_configure_once(self):
|
||||
"""Ensure idempotent setup"""
|
||||
l1 = setup_logging()
|
||||
l2 = setup_logging()
|
||||
self.assertIs(l1, l2)
|
||||
"""Ensure idempotent setup - only configures once"""
|
||||
setup_logging()
|
||||
self.assertTrue(setup_logging._configured)
|
||||
# Second call should not raise
|
||||
setup_logging()
|
||||
self.assertTrue(setup_logging._configured)
|
||||
|
||||
def test_envs_priority_used_for_log_dir(self):
|
||||
"""When log_dir=None, should use envs.FD_LOG_DIR"""
|
||||
@@ -102,15 +62,45 @@ class TestSetupLogging(unittest.TestCase):
|
||||
setup_logging()
|
||||
self.assertTrue(os.path.exists(self.temp_dir))
|
||||
|
||||
@patch("logging.StreamHandler.emit")
|
||||
def test_console_colored(self, mock_emit):
|
||||
setup_logging()
|
||||
logger = logging.getLogger("fastdeploy")
|
||||
logger.error("color test")
|
||||
self.assertTrue(mock_emit.called)
|
||||
def test_log_dir_stored(self):
|
||||
"""setup_logging should store log_dir for later use"""
|
||||
setup_logging(log_dir=self.temp_dir)
|
||||
self.assertEqual(setup_logging._log_dir, self.temp_dir)
|
||||
|
||||
@patch("logging.config.dictConfig")
|
||||
def test_backup_count_merging(self, mock_dict):
|
||||
def test_no_config_file_no_dictconfig(self):
|
||||
"""When config_file is not provided, dictConfig should not be called"""
|
||||
with patch("logging.config.dictConfig") as mock_dict:
|
||||
setup_logging()
|
||||
self.assertFalse(mock_dict.called)
|
||||
|
||||
def test_config_file_with_dictconfig(self):
|
||||
"""When config_file is provided, dictConfig should be called"""
|
||||
custom_cfg = {
|
||||
"version": 1,
|
||||
"handlers": {},
|
||||
"loggers": {},
|
||||
}
|
||||
cfg_path = Path(self.temp_dir) / "cfg.json"
|
||||
cfg_path.write_text(json.dumps(custom_cfg))
|
||||
|
||||
with patch("logging.config.dictConfig") as mock_dict:
|
||||
setup_logging(config_file=str(cfg_path))
|
||||
self.assertTrue(mock_dict.called)
|
||||
|
||||
def test_config_file_not_exists_uses_default(self):
|
||||
"""When config_file doesn't exist, use default config"""
|
||||
fake_cfg = os.path.join(self.temp_dir, "no_such_cfg.json")
|
||||
|
||||
with patch("logging.config.dictConfig") as mock_dict:
|
||||
setup_logging(config_file=fake_cfg)
|
||||
self.assertTrue(mock_dict.called)
|
||||
# Should use default config
|
||||
config_used = mock_dict.call_args[0][0]
|
||||
self.assertIn("handlers", config_used)
|
||||
self.assertIn("loggers", config_used)
|
||||
|
||||
def test_backup_count_merging(self):
|
||||
"""backupCount should be merged into handler config"""
|
||||
custom_cfg = {
|
||||
"version": 1,
|
||||
"handlers": {"daily": {"class": "logging.handlers.DailyRotatingFileHandler", "formatter": "plain"}},
|
||||
@@ -119,33 +109,77 @@ class TestSetupLogging(unittest.TestCase):
|
||||
cfg_path = Path(self.temp_dir) / "cfg.json"
|
||||
cfg_path.write_text(json.dumps(custom_cfg))
|
||||
|
||||
setup_logging(config_file=str(cfg_path))
|
||||
with patch("logging.config.dictConfig") as mock_dict:
|
||||
setup_logging(config_file=str(cfg_path))
|
||||
config_used = mock_dict.call_args[0][0]
|
||||
self.assertEqual(config_used["handlers"]["daily"]["backupCount"], 3)
|
||||
|
||||
config_used = mock_dict.call_args[0][0]
|
||||
self.assertEqual(config_used["handlers"]["daily"]["backupCount"], 3)
|
||||
def test_debug_level_affects_handlers(self):
|
||||
"""FD_DEBUG=1 should force DEBUG level in handlers"""
|
||||
custom_cfg = {
|
||||
"version": 1,
|
||||
"handlers": {"test": {"class": "logging.StreamHandler", "level": "INFO"}},
|
||||
"loggers": {},
|
||||
}
|
||||
cfg_path = Path(self.temp_dir) / "cfg.json"
|
||||
cfg_path.write_text(json.dumps(custom_cfg))
|
||||
|
||||
@patch("logging.config.dictConfig")
|
||||
def test_error_logs_use_stderr_handler(self, mock_dict):
|
||||
"""ERROR级别日志应该使用stderr输出"""
|
||||
setup_logging()
|
||||
config_used = mock_dict.call_args[0][0]
|
||||
self.assertIn("console_stderr", config_used["handlers"])
|
||||
self.assertEqual(config_used["handlers"]["console_stderr"]["stream"], "ext://sys.stderr")
|
||||
self.assertEqual(config_used["handlers"]["console_stderr"]["level"], "ERROR")
|
||||
with patch("fastdeploy.envs.FD_DEBUG", 1):
|
||||
with patch("logging.config.dictConfig") as mock_dict:
|
||||
setup_logging(config_file=str(cfg_path))
|
||||
config_used = mock_dict.call_args[0][0]
|
||||
self.assertEqual(config_used["handlers"]["test"]["level"], "DEBUG")
|
||||
|
||||
@patch("logging.config.dictConfig")
|
||||
def test_console_stdout_filters_below_error(self, mock_dict):
|
||||
"""console_stdout应该只输出低于ERROR级别的日志"""
|
||||
setup_logging()
|
||||
config_used = mock_dict.call_args[0][0]
|
||||
self.assertIn("console_stdout", config_used["handlers"])
|
||||
self.assertIn("below_error", config_used["handlers"]["console_stdout"]["filters"])
|
||||
self.assertEqual(config_used["handlers"]["console_stdout"]["stream"], "ext://sys.stdout")
|
||||
def test_default_config_has_channels(self):
|
||||
"""Default config should have channel loggers configured"""
|
||||
fake_cfg = os.path.join(self.temp_dir, "no_such_cfg.json")
|
||||
|
||||
with patch("logging.config.dictConfig") as mock_dict:
|
||||
setup_logging(config_file=fake_cfg)
|
||||
config_used = mock_dict.call_args[0][0]
|
||||
# Check channel loggers exist
|
||||
self.assertIn("fastdeploy.main", config_used["loggers"])
|
||||
self.assertIn("fastdeploy.request", config_used["loggers"])
|
||||
self.assertIn("fastdeploy.console", config_used["loggers"])
|
||||
|
||||
def test_default_config_has_handlers(self):
|
||||
"""Default config should have file handlers configured"""
|
||||
fake_cfg = os.path.join(self.temp_dir, "no_such_cfg.json")
|
||||
|
||||
with patch("logging.config.dictConfig") as mock_dict:
|
||||
setup_logging(config_file=fake_cfg)
|
||||
config_used = mock_dict.call_args[0][0]
|
||||
# Check handlers exist
|
||||
self.assertIn("main_file", config_used["handlers"])
|
||||
self.assertIn("request_file", config_used["handlers"])
|
||||
self.assertIn("error_file", config_used["handlers"])
|
||||
self.assertIn("console_stderr", config_used["handlers"])
|
||||
|
||||
def test_default_config_stderr_handler(self):
|
||||
"""Default config console_stderr should output to stderr"""
|
||||
fake_cfg = os.path.join(self.temp_dir, "no_such_cfg.json")
|
||||
|
||||
with patch("logging.config.dictConfig") as mock_dict:
|
||||
setup_logging(config_file=fake_cfg)
|
||||
config_used = mock_dict.call_args[0][0]
|
||||
self.assertEqual(config_used["handlers"]["console_stderr"]["stream"], "ext://sys.stderr")
|
||||
self.assertEqual(config_used["handlers"]["console_stderr"]["level"], "ERROR")
|
||||
|
||||
def test_default_config_stdout_filters_below_error(self):
|
||||
"""Default config console_stdout should filter below ERROR level"""
|
||||
fake_cfg = os.path.join(self.temp_dir, "no_such_cfg.json")
|
||||
|
||||
with patch("logging.config.dictConfig") as mock_dict:
|
||||
setup_logging(config_file=fake_cfg)
|
||||
config_used = mock_dict.call_args[0][0]
|
||||
self.assertIn("console_stdout", config_used["handlers"])
|
||||
self.assertIn("below_error", config_used["handlers"]["console_stdout"]["filters"])
|
||||
self.assertEqual(config_used["handlers"]["console_stdout"]["stream"], "ext://sys.stdout")
|
||||
|
||||
|
||||
class TestMaxLevelFilter(unittest.TestCase):
|
||||
def test_filter_allows_below_level(self):
|
||||
"""MaxLevelFilter应该允许低于指定级别的日志通过"""
|
||||
"""MaxLevelFilter should allow logs below specified level"""
|
||||
filter = MaxLevelFilter("ERROR")
|
||||
record = logging.LogRecord(
|
||||
name="test", level=logging.INFO, pathname="", lineno=0, msg="test", args=(), exc_info=None
|
||||
@@ -153,7 +187,7 @@ class TestMaxLevelFilter(unittest.TestCase):
|
||||
self.assertTrue(filter.filter(record))
|
||||
|
||||
def test_filter_blocks_at_level(self):
|
||||
"""MaxLevelFilter应该阻止等于指定级别的日志"""
|
||||
"""MaxLevelFilter should block logs at specified level"""
|
||||
filter = MaxLevelFilter("ERROR")
|
||||
record = logging.LogRecord(
|
||||
name="test", level=logging.ERROR, pathname="", lineno=0, msg="test", args=(), exc_info=None
|
||||
@@ -161,7 +195,7 @@ class TestMaxLevelFilter(unittest.TestCase):
|
||||
self.assertFalse(filter.filter(record))
|
||||
|
||||
def test_filter_blocks_above_level(self):
|
||||
"""MaxLevelFilter应该阻止高于指定级别的日志"""
|
||||
"""MaxLevelFilter should block logs above specified level"""
|
||||
filter = MaxLevelFilter("ERROR")
|
||||
record = logging.LogRecord(
|
||||
name="test", level=logging.CRITICAL, pathname="", lineno=0, msg="test", args=(), exc_info=None
|
||||
@@ -169,7 +203,7 @@ class TestMaxLevelFilter(unittest.TestCase):
|
||||
self.assertFalse(filter.filter(record))
|
||||
|
||||
def test_filter_with_numeric_level(self):
|
||||
"""MaxLevelFilter应该支持数字级别"""
|
||||
"""MaxLevelFilter should support numeric level"""
|
||||
filter = MaxLevelFilter(logging.WARNING)
|
||||
info_record = logging.LogRecord(
|
||||
name="test", level=logging.INFO, pathname="", lineno=0, msg="test", args=(), exc_info=None
|
||||
@@ -181,5 +215,95 @@ class TestMaxLevelFilter(unittest.TestCase):
|
||||
self.assertFalse(filter.filter(warning_record))
|
||||
|
||||
|
||||
class TestChannelLoggers(unittest.TestCase):
|
||||
"""Test channel logger configuration via get_logger"""
|
||||
|
||||
def setUp(self):
|
||||
self.temp_dir = tempfile.mkdtemp(prefix="logger_channel_test_")
|
||||
if hasattr(setup_logging, "_configured"):
|
||||
delattr(setup_logging, "_configured")
|
||||
# Clear channel configuration cache
|
||||
from fastdeploy.logger.logger import FastDeployLogger
|
||||
|
||||
FastDeployLogger._configured_channels = set()
|
||||
|
||||
self.patches = [
|
||||
patch("fastdeploy.envs.FD_LOG_DIR", self.temp_dir),
|
||||
patch("fastdeploy.envs.FD_DEBUG", 0),
|
||||
patch("fastdeploy.envs.FD_LOG_BACKUP_COUNT", "3"),
|
||||
patch("fastdeploy.envs.FD_LOG_LEVEL", None),
|
||||
]
|
||||
[p.start() for p in self.patches]
|
||||
|
||||
def tearDown(self):
|
||||
[p.stop() for p in self.patches]
|
||||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||||
if hasattr(setup_logging, "_configured"):
|
||||
delattr(setup_logging, "_configured")
|
||||
# Clear channel configuration cache
|
||||
from fastdeploy.logger.logger import FastDeployLogger
|
||||
|
||||
FastDeployLogger._configured_channels = set()
|
||||
|
||||
def test_main_channel_has_handlers(self):
|
||||
"""main channel root logger should have handlers"""
|
||||
from fastdeploy.logger import get_logger
|
||||
|
||||
get_logger("test", channel="main")
|
||||
main_channel = logging.getLogger("fastdeploy.main")
|
||||
self.assertTrue(len(main_channel.handlers) > 0)
|
||||
|
||||
def test_request_channel_has_handlers(self):
|
||||
"""request channel root logger should have handlers"""
|
||||
from fastdeploy.logger import get_logger
|
||||
|
||||
get_logger("test", channel="request")
|
||||
request_channel = logging.getLogger("fastdeploy.request")
|
||||
self.assertTrue(len(request_channel.handlers) > 0)
|
||||
|
||||
def test_console_channel_has_stdout_handler(self):
|
||||
"""console channel should have stdout handler"""
|
||||
from fastdeploy.logger import get_logger
|
||||
|
||||
get_logger("test", channel="console")
|
||||
console_channel = logging.getLogger("fastdeploy.console")
|
||||
handler_types = [type(h).__name__ for h in console_channel.handlers]
|
||||
self.assertIn("StreamHandler", handler_types)
|
||||
|
||||
def test_child_logger_propagates_to_channel(self):
|
||||
"""Child loggers should propagate to channel root logger"""
|
||||
from fastdeploy.logger import get_logger
|
||||
|
||||
logger = get_logger("child_test", channel="main")
|
||||
# Child logger should have no direct handlers (propagates to parent)
|
||||
self.assertEqual(len(logger.handlers), 0)
|
||||
self.assertEqual(logger.name, "fastdeploy.main.child_test")
|
||||
|
||||
def test_channel_file_mapping(self):
|
||||
"""Each channel should write to correct log file"""
|
||||
from fastdeploy.logger.logger import FastDeployLogger
|
||||
|
||||
expected_files = {
|
||||
"main": "fastdeploy.log",
|
||||
"request": "request.log",
|
||||
"console": "console.log",
|
||||
}
|
||||
self.assertEqual(FastDeployLogger._channel_files, expected_files)
|
||||
|
||||
def test_multiple_loggers_same_channel(self):
|
||||
"""Multiple loggers on same channel should share channel root handlers"""
|
||||
from fastdeploy.logger import get_logger
|
||||
|
||||
logger1 = get_logger("test1", channel="main")
|
||||
logger2 = get_logger("test2", channel="main")
|
||||
|
||||
main_channel = logging.getLogger("fastdeploy.main")
|
||||
# Both child loggers should have no handlers
|
||||
self.assertEqual(len(logger1.handlers), 0)
|
||||
self.assertEqual(len(logger2.handlers), 0)
|
||||
# Channel root should have handlers
|
||||
self.assertTrue(len(main_channel.handlers) > 0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -22,6 +22,7 @@ from unittest.mock import MagicMock, Mock, patch
|
||||
import paddle
|
||||
|
||||
from fastdeploy.engine.request import RequestMetrics, RequestOutput
|
||||
from fastdeploy.logger.request_logger import RequestLogLevel
|
||||
from fastdeploy.output.token_processor import TokenProcessor
|
||||
|
||||
paddle.set_device("cpu")
|
||||
@@ -295,9 +296,9 @@ class TestTokenProcessorProcessBatchOutput(unittest.TestCase):
|
||||
processor.tokens_counter[task_id] = 0
|
||||
processor.tokens_counter[task2.request_id] = 0
|
||||
|
||||
# Mock llm_logger to capture the log message and envs.ENABLE_V1_KVCACHE_SCHEDULER
|
||||
# Mock log_request to capture the log message and envs.ENABLE_V1_KVCACHE_SCHEDULER
|
||||
with (
|
||||
patch("fastdeploy.output.token_processor.llm_logger") as mock_logger,
|
||||
patch("fastdeploy.output.token_processor.log_request") as mock_log_request,
|
||||
patch("fastdeploy.output.token_processor.envs.ENABLE_V1_KVCACHE_SCHEDULER", 0),
|
||||
):
|
||||
# Call the method
|
||||
@@ -306,7 +307,11 @@ class TestTokenProcessorProcessBatchOutput(unittest.TestCase):
|
||||
# In speculative decoding mode, when accept_num[i] == PREEMPTED_TOKEN_ID,
|
||||
# the code logs "sync preemption" and continues without triggering abort recycling
|
||||
# This is the expected behavior for speculative decoding mode
|
||||
mock_logger.info.assert_any_call(f"sync preemption for request_id {task_id} done.")
|
||||
mock_log_request.assert_any_call(
|
||||
RequestLogLevel.STAGES,
|
||||
message="sync preemption for request_id {request_id} done.",
|
||||
request_id=task_id,
|
||||
)
|
||||
# Verify that _recycle_resources was NOT called for the aborted task
|
||||
# (it may be called for other tasks like test_request_2 if they receive EOS tokens)
|
||||
for call in processor._recycle_resources.call_args_list:
|
||||
|
||||
@@ -21,6 +21,7 @@ from unittest.mock import MagicMock, patch
|
||||
import numpy as np
|
||||
|
||||
from fastdeploy.engine.request import CompletionOutput, RequestMetrics, RequestOutput
|
||||
from fastdeploy.logger.request_logger import RequestLogLevel
|
||||
from fastdeploy.output.token_processor import TokenProcessor
|
||||
from fastdeploy.worker.output import LogprobsLists
|
||||
|
||||
@@ -171,16 +172,20 @@ class TestTokenProcessorLogprobs(unittest.TestCase):
|
||||
# Mock _recycle_resources to track if it's called
|
||||
self.processor._recycle_resources = MagicMock()
|
||||
|
||||
# Mock the llm_logger module and envs.ENABLE_V1_KVCACHE_SCHEDULER
|
||||
# Mock the log_request function and envs.ENABLE_V1_KVCACHE_SCHEDULER
|
||||
with (
|
||||
patch("fastdeploy.output.token_processor.llm_logger") as mock_logger,
|
||||
patch("fastdeploy.output.token_processor.log_request") as mock_log_request,
|
||||
patch("fastdeploy.output.token_processor.envs.ENABLE_V1_KVCACHE_SCHEDULER", 1),
|
||||
):
|
||||
# Call the method
|
||||
result = self.processor._process_batch_output_use_zmq([stream_data])
|
||||
|
||||
# Verify the recycling logic was triggered
|
||||
mock_logger.info.assert_any_call(f"start to recycle abort request_id {task_id}")
|
||||
# Verify the recycling logic was triggered via log_request
|
||||
mock_log_request.assert_any_call(
|
||||
RequestLogLevel.STAGES,
|
||||
message="start to recycle abort request_id {request_id}",
|
||||
request_id=task_id,
|
||||
)
|
||||
self.processor.resource_manager.recycle_abort_task.assert_called_once_with(task_id)
|
||||
self.assertNotIn(task_id, self.processor.resource_manager.to_be_aborted_req_id_set)
|
||||
self.assertEqual(len(result), 0) # Aborted task is skipped (continue)
|
||||
|
||||
@@ -47,9 +47,13 @@ sys.modules["fastdeploy.engine.request"] = Mock()
|
||||
sys.modules["fastdeploy.scheduler"] = Mock()
|
||||
sys.modules["fastdeploy.scheduler.local_scheduler"] = Mock()
|
||||
sys.modules["fastdeploy.scheduler.data"] = Mock()
|
||||
sys.modules["fastdeploy.logger"] = Mock()
|
||||
sys.modules["fastdeploy.logger.request_logger"] = Mock()
|
||||
|
||||
# Mock the get_logger function
|
||||
sys.modules["fastdeploy.utils"].get_logger = Mock(return_value=mock_logger)
|
||||
# Mock the log_request function
|
||||
sys.modules["fastdeploy.logger.request_logger"].log_request = Mock()
|
||||
|
||||
|
||||
# Mock the Request, RequestOutput, and ScheduledResponse classes
|
||||
@@ -240,8 +244,9 @@ class TestDPLocalScheduler(unittest.TestCase):
|
||||
|
||||
def test_put_results_with_finished_requests(self):
|
||||
"""Test putting results with finished requests."""
|
||||
# Reset mock logger
|
||||
mock_logger.reset_mock()
|
||||
# Get the mock log_request function
|
||||
mock_log_request = sys.modules["fastdeploy.logger.request_logger"].log_request
|
||||
mock_log_request.reset_mock()
|
||||
|
||||
# Create mock request outputs
|
||||
results = [
|
||||
@@ -254,13 +259,12 @@ class TestDPLocalScheduler(unittest.TestCase):
|
||||
with patch.object(self.scheduler, "responses_not_empty"):
|
||||
self.scheduler.put_results(results)
|
||||
|
||||
# Check that finished requests were logged - the logger should have been called
|
||||
self.assertTrue(mock_logger.info.called)
|
||||
# Check that finished requests were logged via log_request
|
||||
self.assertTrue(mock_log_request.called)
|
||||
# Get the actual call arguments to verify the message format
|
||||
call_args = mock_logger.info.call_args[0][0]
|
||||
self.assertIn("finished responses", call_args)
|
||||
self.assertIn("req1", call_args)
|
||||
self.assertIn("req3", call_args)
|
||||
call_kwargs = mock_log_request.call_args[1]
|
||||
self.assertIn("finished responses", call_kwargs.get("message", ""))
|
||||
self.assertIn("req1", str(call_kwargs.get("request_ids", [])))
|
||||
|
||||
def test_put_results_with_new_responses(self):
|
||||
"""Test putting results with new responses."""
|
||||
|
||||
@@ -17,10 +17,11 @@ import unittest
|
||||
from unittest.mock import Mock, patch # noqa: F401
|
||||
|
||||
from fastdeploy.engine.request import Request, RequestMetrics, RequestOutput
|
||||
from fastdeploy.logger.request_logger import RequestLogLevel
|
||||
|
||||
# Real FastDeploy imports
|
||||
from fastdeploy.scheduler.local_scheduler import LocalScheduler
|
||||
from fastdeploy.utils import envs, scheduler_logger
|
||||
from fastdeploy.utils import envs
|
||||
|
||||
|
||||
class TestLocalScheduler(unittest.TestCase):
|
||||
@@ -221,9 +222,9 @@ class TestLocalScheduler(unittest.TestCase):
|
||||
|
||||
def test_reset_logs_message(self):
|
||||
"""Test that reset logs appropriate message."""
|
||||
with patch.object(scheduler_logger, "info") as mock_info:
|
||||
with patch("fastdeploy.scheduler.local_scheduler.log_request") as mock_log:
|
||||
self.scheduler.reset()
|
||||
mock_info.assert_called_once_with("Scheduler has been reset")
|
||||
mock_log.assert_called_once_with(RequestLogLevel.LIFECYCLE, message="Scheduler has been reset")
|
||||
|
||||
def test_put_requests_duplicate_handling(self):
|
||||
"""Test handling of duplicate request IDs."""
|
||||
@@ -449,12 +450,14 @@ class TestLocalScheduler(unittest.TestCase):
|
||||
# Add request first
|
||||
self.scheduler.put_requests([self.mock_request_1])
|
||||
|
||||
with patch.object(scheduler_logger, "info") as mock_info:
|
||||
with patch("fastdeploy.scheduler.local_scheduler.log_request") as mock_log_request:
|
||||
mock_output = self._create_test_request_output("req_1", finished=True)
|
||||
self.scheduler.put_results([mock_output])
|
||||
|
||||
# Should log finished response
|
||||
self._assert_log_contains(mock_info, "finished responses")
|
||||
# Should log finished response via log_request
|
||||
mock_log_request.assert_called_once()
|
||||
call_kwargs = mock_log_request.call_args[1]
|
||||
self.assertIn("finished responses", call_kwargs.get("message", ""))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user