mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 08:21:53 +08:00
[APIServer][Feature] Add configurable worker health check timeout via FD_WORKER_ALIVE_TIMEOUT (#5865)
* Initial plan * Add configurable FD_WORKER_ALIVE_TIMEOUT environment variable Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> * Add test for FD_WORKER_ALIVE_TIMEOUT environment variable Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> * Update docs/zh/usage/environment_variables.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update docs/usage/environment_variables.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Improve test coverage to validate integration with check_health calls Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> * Remove test_worker_alive_timeout.py per reviewer feedback Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -24,6 +24,7 @@ from typing import List, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
import fastdeploy.envs as envs
|
||||
import fastdeploy.metrics.trace as tracing
|
||||
from fastdeploy.entrypoints.openai.protocol import (
|
||||
ChatCompletionRequest,
|
||||
@@ -266,7 +267,7 @@ class OpenAIServingChat:
|
||||
except asyncio.TimeoutError:
|
||||
current_waiting_time += 10
|
||||
if current_waiting_time == 300:
|
||||
status, msg = self.engine_client.check_health()
|
||||
status, msg = self.engine_client.check_health(time_interval_threashold=envs.FD_WORKER_ALIVE_TIMEOUT)
|
||||
if not status:
|
||||
if choices:
|
||||
chunk.choices = choices
|
||||
@@ -576,7 +577,7 @@ class OpenAIServingChat:
|
||||
except asyncio.TimeoutError:
|
||||
current_waiting_time += 10
|
||||
if current_waiting_time == 300:
|
||||
status, msg = self.engine_client.check_health()
|
||||
status, msg = self.engine_client.check_health(time_interval_threashold=envs.FD_WORKER_ALIVE_TIMEOUT)
|
||||
if not status:
|
||||
raise ValueError(f"Engine is not healthy: {msg}")
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user