mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Feature] implement log channel separation and request log level system (#7190)
* feat: implement log channel separation and request log level system * fix: log system improvements based on review * add request_id to error logs, use RequestLogLevel enum, and unify logger implementation from utils to logger module
This commit is contained in:
@@ -73,6 +73,11 @@ from fastdeploy.entrypoints.openai.v1.serving_completion import (
|
||||
OpenAIServingCompletion as OpenAIServingCompletionV1,
|
||||
)
|
||||
from fastdeploy.envs import environment_variables
|
||||
from fastdeploy.logger.request_logger import (
|
||||
RequestLogLevel,
|
||||
log_request,
|
||||
log_request_error,
|
||||
)
|
||||
from fastdeploy.metrics.metrics import get_filtered_metrics
|
||||
from fastdeploy.utils import (
|
||||
ExceptionHandler,
|
||||
@@ -325,7 +330,11 @@ async def connection_manager():
|
||||
await asyncio.wait_for(connection_semaphore.acquire(), timeout=0.001)
|
||||
yield
|
||||
except asyncio.TimeoutError:
|
||||
api_server_logger.info(f"Reach max request concurrency, semaphore status: {connection_semaphore.status()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Reach max request concurrency, semaphore status: {status}",
|
||||
status=connection_semaphore.status(),
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=429, detail=f"Too many requests,current max concurrency is {args.max_concurrency}"
|
||||
)
|
||||
@@ -545,7 +554,7 @@ async def create_chat_completion(request: ChatCompletionRequest, req: Request):
|
||||
"""
|
||||
Create a chat completion for the provided prompt and parameters.
|
||||
"""
|
||||
api_server_logger.debug(f"Chat Received request: {request.model_dump_json()}")
|
||||
log_request(RequestLogLevel.FULL, message="Chat Received request: {request}", request=request.model_dump_json())
|
||||
if envs.TRACES_ENABLE:
|
||||
if req.headers:
|
||||
headers = dict(req.headers)
|
||||
@@ -572,7 +581,11 @@ async def create_chat_completion(request: ChatCompletionRequest, req: Request):
|
||||
return StreamingResponse(content=wrapped_generator(), media_type="text/event-stream")
|
||||
|
||||
except HTTPException as e:
|
||||
api_server_logger.error(f"Error in chat completion: {str(e)}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] Error in chat completion: {error}",
|
||||
request_id=getattr(request, "request_id", None),
|
||||
error=str(e),
|
||||
)
|
||||
return JSONResponse(status_code=e.status_code, content={"detail": e.detail})
|
||||
|
||||
|
||||
@@ -582,7 +595,9 @@ async def create_completion(request: CompletionRequest, req: Request):
|
||||
"""
|
||||
Create a completion for the provided prompt and parameters.
|
||||
"""
|
||||
api_server_logger.info(f"Completion Received request: {request.model_dump_json()}")
|
||||
log_request(
|
||||
RequestLogLevel.FULL, message="Completion Received request: {request}", request=request.model_dump_json()
|
||||
)
|
||||
if envs.TRACES_ENABLE:
|
||||
if req.headers:
|
||||
headers = dict(req.headers)
|
||||
|
||||
@@ -31,6 +31,7 @@ from pydantic import (
|
||||
)
|
||||
|
||||
from fastdeploy.engine.pooling_params import PoolingParams
|
||||
from fastdeploy.logger.request_logger import RequestLogLevel, log_request
|
||||
from fastdeploy.worker.output import PromptLogprobs, SpeculateMetrics
|
||||
|
||||
|
||||
@@ -758,9 +759,7 @@ class ChatCompletionRequest(BaseModel):
|
||||
), "The parameter `raw_request` is not supported now, please use completion api instead."
|
||||
for key, value in self.metadata.items():
|
||||
req_dict[key] = value
|
||||
from fastdeploy.utils import api_server_logger
|
||||
|
||||
api_server_logger.warning("The parameter metadata is obsolete.")
|
||||
log_request(RequestLogLevel.STAGES, message="The parameter metadata is obsolete.")
|
||||
for key, value in self.dict().items():
|
||||
if value is not None:
|
||||
req_dict[key] = value
|
||||
|
||||
@@ -44,6 +44,11 @@ from fastdeploy.entrypoints.openai.protocol import (
|
||||
UsageInfo,
|
||||
)
|
||||
from fastdeploy.entrypoints.openai.response_processors import ChatResponseProcessor
|
||||
from fastdeploy.logger.request_logger import (
|
||||
RequestLogLevel,
|
||||
log_request,
|
||||
log_request_error,
|
||||
)
|
||||
from fastdeploy.metrics.metrics import main_process_metrics
|
||||
from fastdeploy.trace.constants import LoggingEventName
|
||||
from fastdeploy.trace.trace_logger import print as trace_print
|
||||
@@ -112,14 +117,16 @@ class OpenAIServingChat:
|
||||
err_msg = (
|
||||
f"Only master node can accept completion request, please send request to master node: {self.master_ip}"
|
||||
)
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(message="request[{request_id}] {error}", request_id=request.request_id, error=err_msg)
|
||||
return ErrorResponse(error=ErrorInfo(message=err_msg, type=ErrorType.INTERNAL_ERROR))
|
||||
|
||||
if self.models:
|
||||
is_supported, request.model = self.models.is_supported_model(request.model)
|
||||
if not is_supported:
|
||||
err_msg = f"Unsupported model: [{request.model}], support [{', '.join([x.name for x in self.models.model_paths])}] or default"
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {error}", request_id=request.request_id, error=err_msg
|
||||
)
|
||||
return ErrorResponse(
|
||||
error=ErrorInfo(message=err_msg, type=ErrorType.INTERNAL_ERROR, code=ErrorCode.MODEL_NOT_SUPPORT)
|
||||
)
|
||||
@@ -129,7 +136,11 @@ class OpenAIServingChat:
|
||||
await self.engine_client.semaphore.acquire()
|
||||
else:
|
||||
await asyncio.wait_for(self.engine_client.semaphore.acquire(), timeout=self.max_waiting_time)
|
||||
api_server_logger.info(f"current {self.engine_client.semaphore.status()}")
|
||||
log_request(
|
||||
RequestLogLevel.STAGES,
|
||||
message="semaphore status: {status}",
|
||||
status=self.engine_client.semaphore.status(),
|
||||
)
|
||||
|
||||
if request.request_id is not None:
|
||||
request_id = request.request_id
|
||||
@@ -141,7 +152,11 @@ class OpenAIServingChat:
|
||||
request_id = f"chatcmpl-{uuid.uuid4()}"
|
||||
tracing.trace_req_start(rid=request_id, trace_content=request.trace_context, role="FastDeploy")
|
||||
del request.trace_context
|
||||
api_server_logger.info(f"create chat completion request: {request_id}")
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="create chat completion request: {request_id}",
|
||||
request_id=request_id,
|
||||
)
|
||||
prompt_tokens = None
|
||||
max_tokens = None
|
||||
try:
|
||||
@@ -156,14 +171,19 @@ class OpenAIServingChat:
|
||||
if isinstance(prompt_token_ids, np.ndarray):
|
||||
prompt_token_ids = prompt_token_ids.tolist()
|
||||
except ParameterError as e:
|
||||
api_server_logger.error(f"request[{request_id}] generator error: {str(e)}, {e.message}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] generator error: {error}, {error_message}",
|
||||
request_id=request_id,
|
||||
error=str(e),
|
||||
error_message=e.message,
|
||||
)
|
||||
self.engine_client.semaphore.release()
|
||||
return ErrorResponse(
|
||||
error=ErrorInfo(message=str(e.message), type=ErrorType.INVALID_REQUEST_ERROR, param=e.param)
|
||||
)
|
||||
except Exception as e:
|
||||
error_msg = f"request[{request_id}] generator error: {str(e)}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
self.engine_client.semaphore.release()
|
||||
return ErrorResponse(error=ErrorInfo(message=error_msg, type=ErrorType.INVALID_REQUEST_ERROR))
|
||||
|
||||
@@ -178,12 +198,12 @@ class OpenAIServingChat:
|
||||
)
|
||||
except Exception as e:
|
||||
error_msg = f"request[{request_id}]full generator error: {str(e)}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
return ErrorResponse(error=ErrorInfo(message=error_msg, type=ErrorType.INTERNAL_ERROR))
|
||||
except asyncio.CancelledError as e:
|
||||
await self.engine_client.abort(f"{request_id}_0", 1 if request.n is None else request.n)
|
||||
error_msg = f"request[{request_id}_0] client disconnected: {str(e)}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
return ErrorResponse(
|
||||
error=ErrorInfo(message=error_msg, type=ErrorType.INVALID_REQUEST_ERROR, code=ErrorCode.CLIENT_ABORTED)
|
||||
)
|
||||
@@ -192,13 +212,13 @@ class OpenAIServingChat:
|
||||
f"request[{request_id}] waiting error: {str(e)}, {str(traceback.format_exc())}, "
|
||||
f"max waiting time: {self.max_waiting_time}"
|
||||
)
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
return ErrorResponse(
|
||||
error=ErrorInfo(message=error_msg, type=ErrorType.TIMEOUT_ERROR, code=ErrorCode.TIMEOUT)
|
||||
)
|
||||
|
||||
def _create_streaming_error_response(self, message: str) -> str:
|
||||
api_server_logger.error(message)
|
||||
log_request_error(message=message)
|
||||
error_response = ErrorResponse(error=ErrorInfo(message=message, type=ErrorType.INTERNAL_ERROR))
|
||||
return error_response.model_dump_json()
|
||||
|
||||
@@ -249,7 +269,9 @@ class OpenAIServingChat:
|
||||
choices=[],
|
||||
model=model_name,
|
||||
)
|
||||
api_server_logger.info(f"create chat completion request: {request_id}")
|
||||
log_request(
|
||||
RequestLogLevel.LIFECYCLE, message="create chat completion request: {request_id}", request_id=request_id
|
||||
)
|
||||
|
||||
try:
|
||||
dealer, response_queue = await self.engine_client.connection_manager.get_connection(
|
||||
@@ -372,7 +394,12 @@ class OpenAIServingChat:
|
||||
completion_tokens_details=CompletionTokenUsageInfo(reasoning_tokens=0),
|
||||
)
|
||||
yield f"data: {chunk.model_dump_json(exclude_unset=True)} \n\n"
|
||||
api_server_logger.info(f"Chat Streaming response send_idx 0: {chunk.model_dump_json()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Chat Streaming response send_idx 0: request_id={request_id}, completion_tokens={completion_tokens}",
|
||||
request_id=request_id,
|
||||
completion_tokens=0,
|
||||
)
|
||||
first_iteration = False
|
||||
|
||||
output = res["outputs"]
|
||||
@@ -497,7 +524,14 @@ class OpenAIServingChat:
|
||||
chunk.choices = choices
|
||||
yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"
|
||||
if res["finished"]:
|
||||
api_server_logger.info(f"Chat Streaming response last send: {chunk.model_dump_json()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Chat Streaming response last send: request_id={request_id}, finish_reason={finish_reason}, completion_tokens={completion_tokens}, logprobs={logprobs}",
|
||||
request_id=request_id,
|
||||
finish_reason=choice.finish_reason,
|
||||
completion_tokens=previous_num_tokens[idx],
|
||||
logprobs=logprobs_res,
|
||||
)
|
||||
choices = []
|
||||
|
||||
if include_usage:
|
||||
@@ -525,7 +559,7 @@ class OpenAIServingChat:
|
||||
except asyncio.CancelledError as e:
|
||||
await self.engine_client.abort(f"{request_id}_0", 1 if request.n is None else request.n)
|
||||
error_msg = f"request[{request_id}_0] client disconnected: {str(e)}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
except Exception as e:
|
||||
error_data = self._create_streaming_error_response(
|
||||
f"request[{request_id}] generate stream error: {str(e)}, {str(traceback.format_exc())}"
|
||||
@@ -536,7 +570,12 @@ class OpenAIServingChat:
|
||||
tracing.trace_req_finish(request_id)
|
||||
await self.engine_client.connection_manager.cleanup_request(request_id)
|
||||
self.engine_client.semaphore.release()
|
||||
api_server_logger.info(f"release {request_id} {self.engine_client.semaphore.status()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.STAGES,
|
||||
message="release {request_id} {status}",
|
||||
request_id=request_id,
|
||||
status=self.engine_client.semaphore.status(),
|
||||
)
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
async def chat_completion_full_generator(
|
||||
@@ -704,7 +743,9 @@ class OpenAIServingChat:
|
||||
tracing.trace_req_finish(request_id)
|
||||
await self.engine_client.connection_manager.cleanup_request(request_id)
|
||||
self.engine_client.semaphore.release()
|
||||
api_server_logger.info(f"release {self.engine_client.semaphore.status()}")
|
||||
log_request(
|
||||
RequestLogLevel.STAGES, message="release {status}", status=self.engine_client.semaphore.status()
|
||||
)
|
||||
|
||||
num_prompt_tokens = len(prompt_token_ids)
|
||||
num_generated_tokens = sum(previous_num_tokens)
|
||||
@@ -731,7 +772,7 @@ class OpenAIServingChat:
|
||||
choices=choices,
|
||||
usage=usage,
|
||||
)
|
||||
api_server_logger.info(f"Chat response: {res.model_dump_json()}")
|
||||
log_request(RequestLogLevel.CONTENT, message="Chat response: {response}", response=res.model_dump_json())
|
||||
return res
|
||||
|
||||
async def _create_chat_completion_choice(
|
||||
@@ -904,7 +945,7 @@ class OpenAIServingChat:
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error in _build_logprobs_response: {e}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
return None
|
||||
|
||||
def _build_prompt_logprobs(
|
||||
|
||||
@@ -41,6 +41,11 @@ from fastdeploy.entrypoints.openai.protocol import (
|
||||
PromptTokenUsageInfo,
|
||||
UsageInfo,
|
||||
)
|
||||
from fastdeploy.logger.request_logger import (
|
||||
RequestLogLevel,
|
||||
log_request,
|
||||
log_request_error,
|
||||
)
|
||||
from fastdeploy.trace.constants import LoggingEventName
|
||||
from fastdeploy.trace.trace_logger import print as trace_print
|
||||
from fastdeploy.utils import (
|
||||
@@ -91,13 +96,15 @@ class OpenAIServingCompletion:
|
||||
err_msg = (
|
||||
f"Only master node can accept completion request, please send request to master node: {self.master_ip}"
|
||||
)
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(message="request[{request_id}] {error}", request_id=request.request_id, error=err_msg)
|
||||
return ErrorResponse(error=ErrorInfo(message=err_msg, type=ErrorType.INTERNAL_ERROR))
|
||||
if self.models:
|
||||
is_supported, request.model = self.models.is_supported_model(request.model)
|
||||
if not is_supported:
|
||||
err_msg = f"Unsupported model: [{request.model}], support [{', '.join([x.name for x in self.models.model_paths])}] or default"
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(
|
||||
message="request[{request_id}] {error}", request_id=request.request_id, error=err_msg
|
||||
)
|
||||
return ErrorResponse(
|
||||
error=ErrorInfo(message=err_msg, type=ErrorType.INTERNAL_ERROR, code=ErrorCode.MODEL_NOT_SUPPORT)
|
||||
)
|
||||
@@ -110,7 +117,11 @@ class OpenAIServingCompletion:
|
||||
request_id = f"cmpl-{request.user}-{uuid.uuid4()}"
|
||||
else:
|
||||
request_id = f"cmpl-{uuid.uuid4()}"
|
||||
api_server_logger.info(f"Initialize request {request_id}: {request}")
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Initialize request {request_id}",
|
||||
request_id=request_id,
|
||||
)
|
||||
tracing.trace_req_start(rid=request_id, trace_content=request.trace_context, role="FastDeploy")
|
||||
del request.trace_context
|
||||
request_prompt_ids = None
|
||||
@@ -147,15 +158,20 @@ class OpenAIServingCompletion:
|
||||
else:
|
||||
raise ValueError("Prompt type must be one of: str, list[str], list[int], list[list[int]]")
|
||||
except Exception as e:
|
||||
error_msg = f"OpenAIServingCompletion create_completion: {e}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
error_msg = f"request[{request_id}] create_completion: {e}, {str(traceback.format_exc())}"
|
||||
log_request_error(message=error_msg)
|
||||
return ErrorResponse(error=ErrorInfo(message=error_msg, type=ErrorType.INTERNAL_ERROR))
|
||||
|
||||
if request_prompt_ids is not None:
|
||||
request_prompts = request_prompt_ids
|
||||
|
||||
num_choices = len(request_prompts) * (1 if request.n is None else request.n)
|
||||
api_server_logger.info(f"Start preprocessing request: req_id={request_id}), num_choices={num_choices}")
|
||||
log_request(
|
||||
RequestLogLevel.STAGES,
|
||||
message="Start preprocessing request: req_id={request_id}), num_choices={num_choices}",
|
||||
request_id=request_id,
|
||||
num_choices=num_choices,
|
||||
)
|
||||
prompt_batched_token_ids = []
|
||||
prompt_tokens_list = []
|
||||
max_tokens_list = []
|
||||
@@ -169,7 +185,7 @@ class OpenAIServingCompletion:
|
||||
f"OpenAIServingCompletion waiting error: {e}, {str(traceback.format_exc())}, "
|
||||
f"max waiting time: {self.max_waiting_time}"
|
||||
)
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message="request[{request_id}] {error}", request_id=request_id, error=error_msg)
|
||||
return ErrorResponse(
|
||||
error=ErrorInfo(message=error_msg, code=ErrorCode.TIMEOUT, type=ErrorType.TIMEOUT_ERROR)
|
||||
)
|
||||
@@ -188,14 +204,19 @@ class OpenAIServingCompletion:
|
||||
max_tokens_list.append(current_req_dict.get("max_tokens"))
|
||||
del current_req_dict
|
||||
except ParameterError as e:
|
||||
api_server_logger.error(f"OpenAIServingCompletion format error: {e}, {e.message}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] format error: {error}, {error_message}",
|
||||
request_id=request_id,
|
||||
error=e,
|
||||
error_message=e.message,
|
||||
)
|
||||
self.engine_client.semaphore.release()
|
||||
return ErrorResponse(
|
||||
error=ErrorInfo(code="400", message=str(e.message), type="invalid_request", param=e.param)
|
||||
)
|
||||
except Exception as e:
|
||||
error_msg = f"OpenAIServingCompletion format error: {e}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
error_msg = f"request[{request_id}] format error: {e}, {str(traceback.format_exc())}"
|
||||
log_request_error(message=error_msg)
|
||||
self.engine_client.semaphore.release()
|
||||
return ErrorResponse(
|
||||
error=ErrorInfo(message=str(e), code=ErrorCode.INVALID_VALUE, type=ErrorType.INVALID_REQUEST_ERROR)
|
||||
@@ -226,20 +247,20 @@ class OpenAIServingCompletion:
|
||||
)
|
||||
except Exception as e:
|
||||
error_msg = (
|
||||
f"OpenAIServingCompletion completion_full_generator error: {e}, {str(traceback.format_exc())}"
|
||||
f"request[{request_id}] completion_full_generator error: {e}, {str(traceback.format_exc())}"
|
||||
)
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
return ErrorResponse(error=ErrorInfo(message=error_msg, type=ErrorType.INTERNAL_ERROR))
|
||||
except asyncio.CancelledError as e:
|
||||
await self.engine_client.abort(f"{request_id}_0", num_choices)
|
||||
error_msg = f"request[{request_id}_0] client disconnected: {str(e)}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
return ErrorResponse(
|
||||
error=ErrorInfo(message=error_msg, type=ErrorType.INVALID_REQUEST_ERROR, code=ErrorCode.CLIENT_ABORTED)
|
||||
)
|
||||
except Exception as e:
|
||||
error_msg = f"OpenAIServingCompletion create_completion error: {e}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
error_msg = f"request[{request_id}] create_completion error: {e}, {str(traceback.format_exc())}"
|
||||
log_request_error(message=error_msg)
|
||||
return ErrorResponse(error=ErrorInfo(message=error_msg, type=ErrorType.INTERNAL_ERROR))
|
||||
|
||||
async def completion_full_generator(
|
||||
@@ -368,10 +389,16 @@ class OpenAIServingCompletion:
|
||||
prompt_tokens_list=prompt_tokens_list,
|
||||
max_tokens_list=max_tokens_list,
|
||||
)
|
||||
api_server_logger.info(f"Completion response: {res.model_dump_json()}")
|
||||
log_request(
|
||||
RequestLogLevel.CONTENT, message="Completion response: {response}", response=res.model_dump_json()
|
||||
)
|
||||
return res
|
||||
except Exception as e:
|
||||
api_server_logger.error(f"Error in completion_full_generator: {e}", exc_info=True)
|
||||
log_request_error(
|
||||
message="request[{request_id}] error in completion_full_generator: {error}",
|
||||
request_id=request_id,
|
||||
error=e,
|
||||
)
|
||||
finally:
|
||||
trace_print(LoggingEventName.POSTPROCESSING_END, request_id, getattr(request, "user", ""))
|
||||
tracing.trace_req_finish(request_id)
|
||||
@@ -514,8 +541,11 @@ class OpenAIServingCompletion:
|
||||
],
|
||||
)
|
||||
yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"
|
||||
api_server_logger.info(
|
||||
f"Completion Streaming response send_idx 0: {chunk.model_dump_json()}"
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Completion Streaming response send_idx 0: request_id={request_id}, completion_tokens={completion_tokens}",
|
||||
request_id=request_id,
|
||||
completion_tokens=0,
|
||||
)
|
||||
first_iteration[idx] = False
|
||||
|
||||
@@ -592,8 +622,11 @@ class OpenAIServingCompletion:
|
||||
if send_idx == 0 and not request.return_token_ids:
|
||||
chunk_temp = chunk
|
||||
chunk_temp.choices = choices
|
||||
api_server_logger.info(
|
||||
f"Completion Streaming response send_idx 0: {chunk_temp.model_dump_json()}"
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Completion Streaming response send_idx 0: request_id={request_id}, completion_tokens={completion_tokens}",
|
||||
request_id=request_id,
|
||||
completion_tokens=output_tokens[idx],
|
||||
)
|
||||
del chunk_temp
|
||||
|
||||
@@ -646,14 +679,26 @@ class OpenAIServingCompletion:
|
||||
metrics=res["metrics"] if request.collect_metrics else None,
|
||||
)
|
||||
yield f"data: {usage_chunk.model_dump_json(exclude_unset=True)}\n\n"
|
||||
api_server_logger.info(f"Completion Streaming response last send: {chunk.model_dump_json()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Completion Streaming response last send: request_id={request_id}, finish_reason={finish_reason}, completion_tokens={completion_tokens}, logprobs={logprobs}",
|
||||
request_id=request_id,
|
||||
finish_reason=chunk.choices[-1].finish_reason if chunk.choices else None,
|
||||
completion_tokens=output_tokens[idx],
|
||||
logprobs=logprobs_res,
|
||||
)
|
||||
|
||||
except asyncio.CancelledError as e:
|
||||
await self.engine_client.abort(f"{request_id}_0", num_choices)
|
||||
error_msg = f"request[{request_id}_0] client disconnected: {str(e)}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
except Exception as e:
|
||||
api_server_logger.error(f"Error in completion_stream_generator: {e}, {str(traceback.format_exc())}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] error in completion_stream_generator: {error}, {traceback}",
|
||||
request_id=request_id,
|
||||
error=e,
|
||||
traceback=traceback.format_exc(),
|
||||
)
|
||||
yield f"data: {ErrorResponse(error=ErrorInfo(message=str(e), code='400', type=ErrorType.INTERNAL_ERROR)).model_dump_json(exclude_unset=True)}\n\n"
|
||||
finally:
|
||||
trace_print(LoggingEventName.POSTPROCESSING_END, request_id, getattr(request, "user", ""))
|
||||
@@ -887,7 +932,11 @@ class OpenAIServingCompletion:
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
api_server_logger.error(f"Error in _build_logprobs_response: {str(e)}, {str(traceback.format_exc())}")
|
||||
log_request_error(
|
||||
message="Error in _build_logprobs_response: {error}, {traceback}",
|
||||
error=str(e),
|
||||
traceback=traceback.format_exc(),
|
||||
)
|
||||
return None
|
||||
|
||||
def _build_prompt_logprobs(
|
||||
|
||||
@@ -35,7 +35,7 @@ from fastdeploy.entrypoints.openai.protocol import (
|
||||
UsageInfo,
|
||||
)
|
||||
from fastdeploy.entrypoints.openai.serving_engine import ServeContext, ZmqOpenAIServing
|
||||
from fastdeploy.utils import api_server_logger
|
||||
from fastdeploy.logger.request_logger import RequestLogLevel, log_request
|
||||
|
||||
|
||||
def _get_embedding(
|
||||
@@ -140,7 +140,12 @@ class OpenAIServingEmbedding(ZmqOpenAIServing):
|
||||
@override
|
||||
def _build_response(self, ctx: ServeContext, request_output: dict):
|
||||
"""Generate final embedding response"""
|
||||
api_server_logger.info(f"[{ctx.request_id}] Embedding RequestOutput received:{request_output}")
|
||||
log_request(
|
||||
level=RequestLogLevel.CONTENT,
|
||||
message="[{request_id}] Embedding RequestOutput received:{request_output}",
|
||||
request_id=ctx.request_id,
|
||||
request_output=request_output,
|
||||
)
|
||||
|
||||
base = PoolingRequestOutput.from_dict(request_output)
|
||||
embedding_res = EmbeddingRequestOutput.from_base(base)
|
||||
|
||||
@@ -33,6 +33,11 @@ from fastdeploy.entrypoints.openai.protocol import (
|
||||
InvalidParameterException,
|
||||
)
|
||||
from fastdeploy.envs import FD_SUPPORT_MAX_CONNECTIONS
|
||||
from fastdeploy.logger.request_logger import (
|
||||
RequestLogLevel,
|
||||
log_request,
|
||||
log_request_error,
|
||||
)
|
||||
from fastdeploy.utils import ErrorCode, ErrorType, StatefulSemaphore, api_server_logger
|
||||
|
||||
RequestT = TypeVar("RequestT")
|
||||
@@ -96,13 +101,18 @@ class OpenAIServing(ABC, Generic[RequestT]):
|
||||
is_supported, adjusted_name = self.models.is_supported_model(model_name)
|
||||
if not is_supported:
|
||||
err_msg = f"Unsupported model: [{model_name}]"
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(message=err_msg)
|
||||
return is_supported, adjusted_name
|
||||
|
||||
async def _acquire_semaphore(self, request_id: str) -> bool:
|
||||
"""Acquire engine client semaphore with timeout"""
|
||||
try:
|
||||
api_server_logger.info(f"Acquire request:{request_id} status:{self._get_semaphore().status()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.STAGES,
|
||||
message="Acquire request:{request_id} status:{status}",
|
||||
request_id=request_id,
|
||||
status=self._get_semaphore().status(),
|
||||
)
|
||||
if self.max_waiting_time < 0:
|
||||
await self._get_semaphore().acquire()
|
||||
else:
|
||||
@@ -111,13 +121,18 @@ class OpenAIServing(ABC, Generic[RequestT]):
|
||||
except asyncio.TimeoutError:
|
||||
self._release_semaphore(request_id)
|
||||
error_msg = f"Request waiting timeout, request:{request_id} max waiting time:{self.max_waiting_time}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
return False
|
||||
|
||||
def _release_semaphore(self, request_id: str) -> None:
|
||||
"""Release engine client semaphore"""
|
||||
self._get_semaphore().release()
|
||||
api_server_logger.info(f"Release request:{request_id} status:{self._get_semaphore().status()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.STAGES,
|
||||
message="Release request:{request_id} status:{status}",
|
||||
request_id=request_id,
|
||||
status=self._get_semaphore().status(),
|
||||
)
|
||||
|
||||
def _create_error_response(
|
||||
self,
|
||||
@@ -128,7 +143,7 @@ class OpenAIServing(ABC, Generic[RequestT]):
|
||||
) -> ErrorResponse:
|
||||
"""Create standardized error response"""
|
||||
traceback.print_exc()
|
||||
api_server_logger.error(message)
|
||||
log_request_error(message=message)
|
||||
return ErrorResponse(error=ErrorInfo(message=message, type=error_type, code=code, param=param))
|
||||
|
||||
def _generate_request_id(self, request: RequestT) -> str:
|
||||
@@ -193,7 +208,12 @@ class OpenAIServing(ABC, Generic[RequestT]):
|
||||
|
||||
request_id = self._generate_request_id(request)
|
||||
ctx.request_id = request_id
|
||||
api_server_logger.info(f"Initialize request {request_id}: {request}")
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Initialize request {request_id}: {request}",
|
||||
request_id=request_id,
|
||||
request=request,
|
||||
)
|
||||
|
||||
# Step 2: Semaphore acquisition
|
||||
if not await self._acquire_semaphore(request_id):
|
||||
@@ -252,7 +272,12 @@ class ZmqOpenAIServing(OpenAIServing):
|
||||
request_dicts = self._request_to_batch_dicts(ctx)
|
||||
ctx.preprocess_requests = request_dicts
|
||||
for request_dict in request_dicts:
|
||||
api_server_logger.info(f"batch add request_id: {request_dict['request_id']}, request: {request_dict}")
|
||||
log_request(
|
||||
level=RequestLogLevel.CONTENT,
|
||||
message="batch add request_id: {request_id}, request: {request}",
|
||||
request_id=request_dict["request_id"],
|
||||
request=request_dict,
|
||||
)
|
||||
await self.engine_client.format_and_add_data(request_dict)
|
||||
|
||||
def _process_chat_template_kwargs(self, request_dict):
|
||||
@@ -283,7 +308,11 @@ class ZmqOpenAIServing(OpenAIServing):
|
||||
while num_choices > 0:
|
||||
request_output_dicts = await asyncio.wait_for(request_output_queue.get(), timeout=60)
|
||||
for request_output_dict in request_output_dicts:
|
||||
api_server_logger.debug(f"Received RequestOutput: {request_output_dict}")
|
||||
log_request(
|
||||
level=RequestLogLevel.FULL,
|
||||
message="Received RequestOutput: {request_output}",
|
||||
request_output=request_output_dict,
|
||||
)
|
||||
if request_output_dict["finished"] is True:
|
||||
num_choices -= 1
|
||||
yield request_output_dict
|
||||
@@ -301,7 +330,12 @@ class ZmqOpenAIServing(OpenAIServing):
|
||||
async def _acquire_semaphore(self, request_id: str) -> bool:
|
||||
"""Acquire engine client semaphore with timeout"""
|
||||
try:
|
||||
api_server_logger.info(f"Acquire request:{request_id} status:{self._get_semaphore().status()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.STAGES,
|
||||
message="Acquire request:{request_id} status:{status}",
|
||||
request_id=request_id,
|
||||
status=self._get_semaphore().status(),
|
||||
)
|
||||
if self.max_waiting_time < 0:
|
||||
await self._get_semaphore().acquire()
|
||||
else:
|
||||
@@ -310,14 +344,19 @@ class ZmqOpenAIServing(OpenAIServing):
|
||||
except asyncio.TimeoutError:
|
||||
self._release_semaphore(request_id)
|
||||
error_msg = f"Request waiting timeout, request:{request_id} max waiting time:{self.max_waiting_time}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
return False
|
||||
|
||||
@override
|
||||
def _release_semaphore(self, request_id: str) -> None:
|
||||
"""Release engine client semaphore"""
|
||||
self._get_semaphore().release()
|
||||
api_server_logger.info(f"Release request:{request_id} status:{self._get_semaphore().status()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.STAGES,
|
||||
message="Release request:{request_id} status:{status}",
|
||||
request_id=request_id,
|
||||
status=self._get_semaphore().status(),
|
||||
)
|
||||
|
||||
@override
|
||||
def _check_master(self) -> bool:
|
||||
|
||||
@@ -24,7 +24,8 @@ from fastdeploy.entrypoints.openai.protocol import (
|
||||
ModelList,
|
||||
ModelPermission,
|
||||
)
|
||||
from fastdeploy.utils import ErrorType, api_server_logger, get_host_ip
|
||||
from fastdeploy.logger.request_logger import log_request_error
|
||||
from fastdeploy.utils import ErrorType, get_host_ip
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -86,7 +87,7 @@ class OpenAIServingModels:
|
||||
err_msg = (
|
||||
f"Only master node can accept models request, please send request to master node: {self.master_ip}"
|
||||
)
|
||||
api_server_logger.error(err_msg)
|
||||
log_request_error(message=err_msg)
|
||||
return ErrorResponse(error=ErrorInfo(message=err_msg, type=ErrorType.INTERNAL_ERROR))
|
||||
model_infos = [
|
||||
ModelInfo(
|
||||
|
||||
@@ -28,7 +28,7 @@ from fastdeploy.entrypoints.openai.protocol import (
|
||||
UsageInfo,
|
||||
)
|
||||
from fastdeploy.entrypoints.openai.serving_engine import ServeContext, ZmqOpenAIServing
|
||||
from fastdeploy.utils import api_server_logger
|
||||
from fastdeploy.logger.request_logger import RequestLogLevel, log_request
|
||||
|
||||
|
||||
class OpenAIServingReward(ZmqOpenAIServing):
|
||||
@@ -77,7 +77,7 @@ class OpenAIServingReward(ZmqOpenAIServing):
|
||||
response: ChatRewardResponse = None
|
||||
generators: AsyncGenerator[ChatRewardResponse, None] = self.handle(ctx)
|
||||
async for r in generators:
|
||||
api_server_logger.info(f"engine pooling result:{r}")
|
||||
log_request(RequestLogLevel.CONTENT, message="engine pooling result: {result}", result=r)
|
||||
r.data[0].index = idx
|
||||
idx += 1
|
||||
if response is None or isinstance(r, ErrorResponse):
|
||||
@@ -93,7 +93,12 @@ class OpenAIServingReward(ZmqOpenAIServing):
|
||||
@override
|
||||
def _build_response(self, ctx: ServeContext, request_output: dict):
|
||||
"""Generate final reward response"""
|
||||
api_server_logger.info(f"[{ctx.request_id}] Reward RequestOutput received:{request_output}")
|
||||
log_request(
|
||||
level=RequestLogLevel.CONTENT,
|
||||
message="Reward RequestOutput received: request_id={request_id}, output={request_output}",
|
||||
request_id=ctx.request_id,
|
||||
request_output=request_output,
|
||||
)
|
||||
|
||||
base = PoolingRequestOutput.from_dict(request_output)
|
||||
reward_res = RewardRequestOutput.from_base(base)
|
||||
|
||||
@@ -41,6 +41,7 @@ from fastdeploy.entrypoints.openai.tool_parsers.abstract_tool_parser import (
|
||||
ToolParser,
|
||||
ToolParserManager,
|
||||
)
|
||||
from fastdeploy.logger.request_logger import log_request_error
|
||||
from fastdeploy.utils import data_processor_logger
|
||||
|
||||
|
||||
@@ -184,7 +185,9 @@ class Ernie45VLThinkingToolParser(ToolParser):
|
||||
continue
|
||||
|
||||
if not function_call_arr:
|
||||
data_processor_logger.error("No valid tool calls found")
|
||||
log_request_error(
|
||||
message="request[{request_id}] No valid tool calls found", request_id=request.request_id
|
||||
)
|
||||
return ExtractedToolCallInformation(tools_called=False, content=model_output)
|
||||
|
||||
tool_calls = []
|
||||
@@ -226,7 +229,11 @@ class Ernie45VLThinkingToolParser(ToolParser):
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
data_processor_logger.error(f"Error in extracting tool call from response: {str(e)}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] Error in extracting tool call from response: {error}",
|
||||
request_id=request.request_id,
|
||||
error=str(e),
|
||||
)
|
||||
return ExtractedToolCallInformation(tools_called=False, tool_calls=None, content=model_output)
|
||||
|
||||
def extract_tool_calls_streaming(
|
||||
@@ -343,7 +350,11 @@ class Ernie45VLThinkingToolParser(ToolParser):
|
||||
)
|
||||
return delta
|
||||
except Exception as e:
|
||||
data_processor_logger.error(f"Error in streaming tool call extraction: {str(e)}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] Error in streaming tool call extraction: {error}",
|
||||
request_id=request.get("request_id"),
|
||||
error=str(e),
|
||||
)
|
||||
return None
|
||||
if "</tool_call>" in self.buffer:
|
||||
end_pos = self.buffer.find("</tool_call>")
|
||||
@@ -354,5 +365,9 @@ class Ernie45VLThinkingToolParser(ToolParser):
|
||||
return delta
|
||||
|
||||
except Exception as e:
|
||||
data_processor_logger.error(f"Error in streaming tool call extraction: {str(e)}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] Error in streaming tool call extraction: {error}",
|
||||
request_id=request.get("request_id"),
|
||||
error=str(e),
|
||||
)
|
||||
return None
|
||||
|
||||
@@ -41,6 +41,7 @@ from fastdeploy.entrypoints.openai.tool_parsers.abstract_tool_parser import (
|
||||
ToolParser,
|
||||
ToolParserManager,
|
||||
)
|
||||
from fastdeploy.logger.request_logger import log_request_error
|
||||
from fastdeploy.utils import data_processor_logger as logger
|
||||
|
||||
|
||||
@@ -254,8 +255,11 @@ class ErnieX1ToolParser(ToolParser):
|
||||
logger.debug("Skipping text %s - no arguments", delta_text)
|
||||
delta = None
|
||||
|
||||
elif cur_arguments is None and prev_arguments is not None:
|
||||
logger.error("should be impossible to have arguments reset " "mid-call. skipping streaming anything.")
|
||||
elif not cur_arguments and prev_arguments:
|
||||
log_request_error(
|
||||
message="request[{request_id}] should be impossible to have arguments reset mid-call. skipping streaming anything.",
|
||||
request_id=request.request_id,
|
||||
)
|
||||
delta = None
|
||||
|
||||
elif cur_arguments is not None and prev_arguments is None:
|
||||
|
||||
@@ -45,8 +45,12 @@ from fastdeploy.entrypoints.openai.v1.serving_base import (
|
||||
ServingResponseContext,
|
||||
)
|
||||
from fastdeploy.input.tokenizer_client import AsyncTokenizerClient, ImageDecodeRequest
|
||||
from fastdeploy.logger.request_logger import (
|
||||
RequestLogLevel,
|
||||
log_request,
|
||||
log_request_error,
|
||||
)
|
||||
from fastdeploy.metrics.metrics import main_process_metrics
|
||||
from fastdeploy.utils import api_server_logger
|
||||
from fastdeploy.worker.output import LogprobsLists
|
||||
|
||||
|
||||
@@ -178,7 +182,7 @@ class OpenAIServingChat(OpenAiServingBase):
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error in _build_logprobs_response: {e}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
return None
|
||||
|
||||
@override
|
||||
@@ -302,7 +306,14 @@ class OpenAIServingChat(OpenAiServingBase):
|
||||
max_tokens = request.max_completion_tokens or request.max_tokens
|
||||
choice_completion_tokens = response_ctx.choice_completion_tokens_dict[output.index]
|
||||
choice.finish_reason = self._calc_finish_reason(request_output, max_tokens, choice_completion_tokens)
|
||||
api_server_logger.info(f"Chat Streaming response last send: {chunk.model_dump_json()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Chat Streaming response last send: request_id={request_id}, finish_reason={finish_reason}, completion_tokens={completion_tokens}, logprobs={logprobs}",
|
||||
request_id=request_id,
|
||||
finish_reason=choice.finish_reason,
|
||||
completion_tokens=choice_completion_tokens,
|
||||
logprobs=choice.logprobs,
|
||||
)
|
||||
|
||||
yield f"data: {chunk.model_dump_json(exclude_unset=True)}\n\n"
|
||||
if request_output.finished and response_ctx.remain_choices == 0:
|
||||
@@ -339,7 +350,11 @@ class OpenAIServingChat(OpenAiServingBase):
|
||||
res = ChatCompletionResponse(
|
||||
id=ctx.request_id, model=request.model, choices=choices, created=ctx.created_time, usage=response_ctx.usage
|
||||
)
|
||||
api_server_logger.info(f"Chat response: {res.model_dump_json()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.CONTENT,
|
||||
message="Chat response: {response}",
|
||||
response=res.model_dump_json(),
|
||||
)
|
||||
return res
|
||||
|
||||
async def _create_chat_completion_choice(
|
||||
|
||||
@@ -38,7 +38,12 @@ from fastdeploy.entrypoints.openai.v1.serving_base import (
|
||||
ServeContext,
|
||||
ServingResponseContext,
|
||||
)
|
||||
from fastdeploy.utils import ErrorType, api_server_logger
|
||||
from fastdeploy.logger.request_logger import (
|
||||
RequestLogLevel,
|
||||
log_request,
|
||||
log_request_error,
|
||||
)
|
||||
from fastdeploy.utils import ErrorType
|
||||
from fastdeploy.worker.output import LogprobsLists
|
||||
|
||||
|
||||
@@ -94,7 +99,7 @@ class OpenAIServingCompletion(OpenAiServingBase):
|
||||
raise ValueError("Prompt type must be one of: str, list[str], list[int], list[list[int]]")
|
||||
except Exception as e:
|
||||
error_msg = f"OpenAIServingCompletion create_completion: {e}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
log_request_error(message=error_msg)
|
||||
return ErrorResponse(error=ErrorInfo(message=error_msg, type=ErrorType.INTERNAL_ERROR))
|
||||
|
||||
if request_prompt_ids is not None:
|
||||
@@ -199,7 +204,11 @@ class OpenAIServingCompletion(OpenAiServingBase):
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
api_server_logger.error(f"Error in _build_logprobs_response: {str(e)}, {str(traceback.format_exc())}")
|
||||
log_request_error(
|
||||
message="Error in _build_logprobs_response: {error}, {traceback}",
|
||||
error=str(e),
|
||||
traceback=traceback.format_exc(),
|
||||
)
|
||||
return None
|
||||
|
||||
async def _build_stream_response(
|
||||
@@ -271,9 +280,21 @@ class OpenAIServingCompletion(OpenAiServingBase):
|
||||
choice.finish_reason = self._calc_finish_reason(
|
||||
request_output, request.max_tokens, choice_completion_tokens
|
||||
)
|
||||
api_server_logger.info(f"Completion Streaming response last send: {chunk.model_dump_json()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Completion Streaming response last send: request_id={request_id}, finish_reason={finish_reason}, completion_tokens={completion_tokens}, logprobs={logprobs}",
|
||||
request_id=request_id,
|
||||
finish_reason=choice.finish_reason,
|
||||
completion_tokens=choice_completion_tokens,
|
||||
logprobs=choice.logprobs,
|
||||
)
|
||||
if send_idx == 0 and not request.return_token_ids:
|
||||
api_server_logger.info(f"Completion Streaming response send_idx 0: {chunk.model_dump_json()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.LIFECYCLE,
|
||||
message="Completion Streaming response send_idx 0: request_id={request_id}, completion_tokens={completion_tokens}",
|
||||
request_id=request_id,
|
||||
completion_tokens=response_ctx.choice_completion_tokens_dict[output.index],
|
||||
)
|
||||
yield f"data: {chunk.model_dump_json()}\n\n"
|
||||
if request_output.finished and response_ctx.remain_choices == 0:
|
||||
if include_usage:
|
||||
@@ -287,7 +308,12 @@ class OpenAIServingCompletion(OpenAiServingBase):
|
||||
yield f"data: {usage_chunk.model_dump_json(exclude_unset=True)}\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
except Exception as e:
|
||||
api_server_logger.error(f"Error in completion_stream_generator: {e}, {str(traceback.format_exc())}")
|
||||
log_request_error(
|
||||
message="request[{request_id}] Error in completion_stream_generator: {error}, {traceback}",
|
||||
request_id=request_id,
|
||||
error=e,
|
||||
traceback=traceback.format_exc(),
|
||||
)
|
||||
yield f"data: {ErrorResponse(error=ErrorInfo(message=str(e), code='400', type=ErrorType.INTERNAL_ERROR)).model_dump_json(exclude_unset=True)}\n\n"
|
||||
|
||||
async def _build_full_response(
|
||||
@@ -321,10 +347,18 @@ class OpenAIServingCompletion(OpenAiServingBase):
|
||||
choices=choices,
|
||||
usage=response_ctx.usage,
|
||||
)
|
||||
api_server_logger.info(f"Completion response: {res.model_dump_json()}")
|
||||
log_request(
|
||||
level=RequestLogLevel.FULL,
|
||||
message="Completion response: {response}",
|
||||
response=res.model_dump_json(),
|
||||
)
|
||||
return res
|
||||
except Exception as e:
|
||||
api_server_logger.error(f"Error in completion_full_generator: {e}", exc_info=True)
|
||||
log_request_error(
|
||||
message="request[{request_id}] Error in completion_full_generator: {error}",
|
||||
request_id=ctx.request_id,
|
||||
error=e,
|
||||
)
|
||||
return self._create_error_response(str(e))
|
||||
|
||||
def build_completion_choice(
|
||||
|
||||
Reference in New Issue
Block a user