mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Feature] Add Deterministic Inference Support (#6476)
* add * [tests] Add Paddle attention determinism tests and refactor resource manager Add comprehensive determinism tests for Paddle attention layer and refactor resource manager for deterministic mode support. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * add * add * add * add * add more * add more * fixsome * fixsome * fix bugs * fix bugs * only in gpu * add docs * fix comments * fix some * fix some * fix comments * add more * fix potential problem * remove not need * remove not need * remove no need * fix bug * fix bugs * fix comments * fix comments * Update tests/ce/deterministic/test_determinism_verification.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/inter_communicator/test_ipc_signal.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/layers/test_paddle_attention_determinism.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/engine/test_sampling_params_determinism.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/layers/test_paddle_attention_determinism.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/layers/test_paddle_attention_determinism_standalone.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix comments * fix import error * fix a bug * fix bugs * fix bugs * fix coverage * refine codes * refine code * fix comments * fix comments * fix comments * rm not need * fix allreduce large tensor bug * mv log files * mv log files * add files --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -0,0 +1,36 @@
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.gpu
|
||||
|
||||
|
||||
def test_rollout_model_with_distributed_launch():
|
||||
"""
|
||||
test_rollout_model
|
||||
"""
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
rollout_script = os.path.join(current_dir, "allreduce_deterministic.py")
|
||||
|
||||
command = [sys.executable, "-m", "paddle.distributed.launch", "--gpus", "0,1", rollout_script]
|
||||
|
||||
print(f"Executing command: {' '.join(command)}")
|
||||
|
||||
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
|
||||
try:
|
||||
stdout, stderr = process.communicate(timeout=300)
|
||||
return_code = process.returncode
|
||||
except subprocess.TimeoutExpired:
|
||||
process.kill()
|
||||
stdout, stderr = process.communicate()
|
||||
return_code = -1
|
||||
|
||||
print("\n" + "=" * 50 + " STDOUT " + "=" * 50)
|
||||
print(stdout)
|
||||
print("\n" + "=" * 50 + " STDERR " + "=" * 50)
|
||||
print(stderr)
|
||||
|
||||
assert return_code == 0, f"Process exited with code {return_code}\nSTDERR: {stderr[-500:] if stderr else 'N/A'}"
|
||||
Reference in New Issue
Block a user