[BugFix][Optimization] Replace silent failures with catchable exceptions and informative error messages (#6533)

* init

* init

* fix format

* add

* add files

* add ut

* fix some

* add ut

* add more

* add

* fix pre-commit

* fix pre-commit

* fix cover

* skip long seq

* add

* add

* fix

* remove not need

* fix set attr

* fix comments

* fix comments

* fix failed tests

---------

Co-authored-by: gongweibao <gognweibao@baidu.com>
This commit is contained in:
gongweibao
2026-03-16 21:32:43 +08:00
committed by GitHub
parent d113397b09
commit a6351dea0b
61 changed files with 1595 additions and 171 deletions
@@ -63,8 +63,8 @@ void decode_alltoall_transpose(paddle::Tensor& inp,
auto hidden_size = inp.shape()[1];
auto reg_buffer = reinterpret_cast<void*>(_reg_buffer);
if (reg_buffer) {
cudaMemcpyAsync(
reg_buffer, inp.data(), input_size, cudaMemcpyDeviceToDevice, stream);
CUDACHECK(cudaMemcpyAsync(
reg_buffer, inp.data(), input_size, cudaMemcpyDeviceToDevice, stream));
} else {
reg_buffer = inp.data();
}
@@ -124,8 +124,8 @@ void all_reduce(paddle::Tensor& inp,
auto input_size = inp.numel() * phi::SizeOf(inp.dtype());
auto reg_buffer = reinterpret_cast<void*>(_reg_buffer);
if (reg_buffer) {
cudaMemcpyAsync(
reg_buffer, inp.data(), input_size, cudaMemcpyDeviceToDevice, stream);
CUDACHECK(cudaMemcpyAsync(
reg_buffer, inp.data(), input_size, cudaMemcpyDeviceToDevice, stream));
} else {
reg_buffer = inp.data();
}