[Iluvatar] Support CudaGraph and optimize flash_attn_unpadded and fused_neox_rope_embedding (#6553)

This commit is contained in:
yzwu
2026-03-02 14:07:17 +08:00
committed by GitHub
parent ecfd088a03
commit 6674131b0b
25 changed files with 723 additions and 123 deletions
@@ -160,6 +160,8 @@ def main():
if os.getenv("XPU_VISIBLE_DEVICES") is None:
raise ValueError("Please manually set XPU_VISIBLE_DEVICES when launching multi-api-server.")
device_count = len(os.getenv("XPU_VISIBLE_DEVICES").split(","))
elif current_platform.is_iluvatar():
device_count = len(os.getenv("CUDA_VISIBLE_DEVICES", "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15").split(","))
processes = start_servers(
server_count=args.num_servers,