Files
FastDeploy/tests/layers/test_trtllm_allreduce_rms_fusion.py
T
Bingoo 6b891da02b [Optimization] enable trtllm_all_reduce fusion kernel in glm model (#6660)
* enable trtllm_all_reduce fusion kernel in glm model

* fix conflict

* format update

* fix a bug

* modify test

* modify test

* support empty tensor and modify test

* fix test_linear config issues

* modify test name

* add edge test case

* modify format

* fix conflict

* modify default max token num in trtllm_allreduce_fusion

* add max token num branch for trtllm_allreduce_fusion

* fix format

* fix rmsnorm config issue

* modify 2025 to 2026

* using compat grard

* Lazily import flashinfer.comm and fix test config issue

* fix test issues

* add flashinfer cache dir clean machine

* fix some issues
2026-04-16 14:10:19 +08:00

57 lines
1.9 KiB
Python

"""
# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
import os
import subprocess
import sys
def test_run_distributed():
"""Launch multi-GPU distributed test via paddle.distributed.launch as subprocess"""
# clearn flashinfer cache directory
flashinfer_cache_dir = os.path.join(os.sep, "root", ".cache", "flashinfer")
if os.path.exists(flashinfer_cache_dir):
print(f"=== Clearing flashinfer cache directory: {flashinfer_cache_dir} ===")
subprocess.run(["rm", "-rf", flashinfer_cache_dir], check=True)
current_dir = os.path.dirname(os.path.abspath(__file__))
run_script = os.path.join(current_dir, "trtllm_allreduce_rms_fusion.py")
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
command = [
sys.executable,
"-m",
"paddle.distributed.launch",
"--gpus",
"0,1",
run_script,
]
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
try:
stdout, stderr = process.communicate(timeout=400)
return_code = process.returncode
except subprocess.TimeoutExpired:
process.kill()
stdout, stderr = process.communicate()
return_code = -1
print(f"=== Distributed test stdout ===\n{stdout}")
print(f"=== Distributed test stderr ===\n{stderr}")
assert return_code in (0, 250), f"Process exited with code {return_code}"
test_run_distributed()