[Feature] [PD Disaggregation] simplify configuration for pd-disaggregated deployment, and refactor post-init and usage for all ports (#5415)

* [feat] simplify configuration for pd-disaggregated deployment, and refactor post-init and usage for all ports

* [fix] fix some bugs

* [fix] fix rdma port for cache manager/messager

* [fix] temporarily cancel port availability check to see if it can pass ci test

* [feat] simplify args for multi api server

* [fix] fix dp

* [fix] fix port for xpu

* [fix] add tests for ports post processing & fix ci

* [test] fix test_multi_api_server

* [fix] fix rdma_comm_ports args for multi_api_server

* [fix] fix test_common_engine

* [fix] fix test_cache_transfer_manager

* [chore] automatically setting FD_ENABLE_MULTI_API_SERVER

* [fix] avoid api server from creating engine_args twice

* [fix] fix test_run_batch

* [fix] fix test_metrics

* [fix] fix splitwise connector init

* [test] add test_rdma_transfer and test_expert_service

* [fix] fix code syntax

* [fix] fix test_rdma_transfer and build wheel with rdma script
This commit is contained in:
Yonghua Li
2025-12-17 15:50:42 +08:00
committed by GitHub
parent cdc0004894
commit 0c8c6369ed
34 changed files with 1323 additions and 409 deletions
@@ -14,6 +14,8 @@
# limitations under the License.
"""
import traceback
from fastdeploy.utils import get_logger
logger = get_logger("cache_messager", "cache_messager.log")
@@ -37,13 +39,66 @@ class RDMACommManager:
prefill_tp_size,
prefill_tp_idx,
):
try:
import importlib
import os
import subprocess
from fastdeploy.platforms import current_platform
if os.getenv("KVCACHE_GDRCOPY_FLUSH_ENABLE", "") == "" and current_platform.is_cuda():
command = ["nvidia-smi", "-i", "0", "--query-gpu=compute_cap", "--format=csv,noheader"]
result = subprocess.run(
command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
)
logger.info(f"nvidia-smi command: {command}")
logger.info(f"nvidia-smi output: {result.stdout}")
if result.returncode != 0:
raise RuntimeError(f"Failed to get compute capability via nvidia-smi: {result.stderr.strip()}")
major, minor = result.stdout.strip().split(".")
if major == "8": # for ampere arch
os.environ["KVCACHE_GDRCOPY_FLUSH_ENABLE"] = "1"
logger.info("Setting environment variable: export KVCACHE_GDRCOPY_FLUSH_ENABLE=1")
if os.getenv("KVCACHE_RDMA_NICS", "") == "" and current_platform.is_cuda():
res = importlib.resources.files("fastdeploy.cache_manager.transfer_factory") / "get_rdma_nics.sh"
get_rdma_nics = None
with importlib.resources.as_file(res) as path:
get_rdma_nics = str(path)
nic_type = current_platform.device_name
command = ["bash", get_rdma_nics, nic_type]
result = subprocess.run(
command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
)
logger.info(f"get_rdma_nics command: {command}")
logger.info(f"get_rdma_nics output: {result.stdout}")
if result.returncode != 0:
raise RuntimeError(f"Failed to execute script `get_rdma_nics.sh`: {result.stderr.strip()}")
env_name, env_value = result.stdout.strip().split("=")
assert env_name == "KVCACHE_RDMA_NICS"
os.environ[env_name] = env_value
logger.info(f"Setting environment variable: export {env_name}={env_value}")
except Exception as e:
raise RuntimeError(f"Failed to initialize RDMA environment! {e} {traceback.format_exc()}")
try:
import rdma_comm
except:
except ImportError:
raise RuntimeError(
"The installation of the RDMA library failed."
"Confirm whether your network card supports RDMA transmission."
"The installation of the RDMA library failed. Confirm whether your network card supports RDMA transmission."
)
self.messager = rdma_comm.RDMACommunicator(
splitwise_role,
gpu_id,