[BugFix] fix multinode deployment (#2977)

This commit is contained in:
ltd0924
2025-07-24 15:04:04 +08:00
committed by GitHub
parent 3792345c3a
commit f935d6f862
9 changed files with 71 additions and 81 deletions
+9 -34
View File
@@ -138,20 +138,10 @@ class EngineArgs:
"""
Token slot threshold for preallocating decoder blocks.
"""
ips: Optional[List[str]] = None
"""
The ips of multinode deployment
dist_init_ip: Optional[str] = None
"""
The master node ip of multinode deployment
"""
nnodes: int = 1
"""
The number of nodes in multinode deployment
"""
node_rank: int = 0
"""
The rank of the current node in multinode deployment
"""
swap_space: float = None
@@ -566,24 +556,11 @@ class EngineArgs:
# Cluster system parameters group
system_group = parser.add_argument_group("System Configuration")
system_group.add_argument(
"--dist-init-ip",
default=EngineArgs.dist_init_ip,
help="IP addresses of master node.",
)
system_group.add_argument(
"--nnodes",
type=int,
default=EngineArgs.nnodes,
help="The number of all nodes.",
)
system_group.add_argument(
"--node-rank",
type=int,
default=EngineArgs.node_rank,
help="node rank id (range [0, nnodes)).",
)
"--ips",
type=lambda s: s.split(",") if s else None,
default=EngineArgs.ips,
help=
"IP addresses of all nodes participating in distributed inference.")
# Performance tuning parameters group
perf_group = parser.add_argument_group("Performance Tuning")
@@ -899,9 +876,7 @@ class EngineArgs:
max_num_seqs=self.max_num_seqs,
speculative_config=speculative_cfg,
max_num_batched_tokens=self.max_num_batched_tokens,
dist_init_ip=self.dist_init_ip,
nnodes=self.nnodes,
node_rank=self.node_rank,
ips=self.ips,
use_warmup=self.use_warmup,
engine_worker_queue_port=self.engine_worker_queue_port,
limit_mm_per_prompt=self.limit_mm_per_prompt,