mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 08:21:53 +08:00
[Iluvatar] Support CudaGraph and optimize flash_attn_unpadded and fused_neox_rope_embedding (#6553)
This commit is contained in:
@@ -160,6 +160,8 @@ def main():
|
||||
if os.getenv("XPU_VISIBLE_DEVICES") is None:
|
||||
raise ValueError("Please manually set XPU_VISIBLE_DEVICES when launching multi-api-server.")
|
||||
device_count = len(os.getenv("XPU_VISIBLE_DEVICES").split(","))
|
||||
elif current_platform.is_iluvatar():
|
||||
device_count = len(os.getenv("CUDA_VISIBLE_DEVICES", "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15").split(","))
|
||||
|
||||
processes = start_servers(
|
||||
server_count=args.num_servers,
|
||||
|
||||
Reference in New Issue
Block a user