mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-25 18:13:21 +08:00
[BugFix][Optimization] Replace silent failures with catchable exceptions and informative error messages (#6533)
* init * init * fix format * add * add files * add ut * fix some * add ut * add more * add * fix pre-commit * fix pre-commit * fix cover * skip long seq * add * add * fix * remove not need * fix set attr * fix comments * fix comments * fix failed tests --------- Co-authored-by: gongweibao <gognweibao@baidu.com>
This commit is contained in:
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "cuda_multiprocess.h"
|
||||
#include "helper.h"
|
||||
#include "paddle/extension.h"
|
||||
|
||||
@@ -74,19 +75,19 @@ void SwapCacheImplAllLayers(
|
||||
auto* cache_cpu_ptr_now =
|
||||
cache_cpu_ptr + first_cpu_block_id * cache_stride;
|
||||
if (mode == 0) { // copy from device to host
|
||||
cudaMemcpyAsync(
|
||||
checkCudaErrors(cudaMemcpyAsync(
|
||||
cache_cpu_ptr_now,
|
||||
cache_gpu_ptr_now,
|
||||
cache_stride * sizeof(DataType_) * consecutive_block_count,
|
||||
cudaMemcpyDeviceToHost,
|
||||
stream);
|
||||
stream));
|
||||
} else { // copy from host to device
|
||||
cudaMemcpyAsync(
|
||||
checkCudaErrors(cudaMemcpyAsync(
|
||||
cache_gpu_ptr_now,
|
||||
cache_cpu_ptr_now,
|
||||
cache_stride * sizeof(DataType_) * consecutive_block_count,
|
||||
cudaMemcpyHostToDevice,
|
||||
stream);
|
||||
stream));
|
||||
}
|
||||
first_gpu_block_id = gpu_block_id;
|
||||
first_cpu_block_id = cpu_block_id;
|
||||
@@ -100,22 +101,22 @@ void SwapCacheImplAllLayers(
|
||||
auto* cache_gpu_ptr_now = cache_gpu_ptr + first_gpu_block_id * cache_stride;
|
||||
auto* cache_cpu_ptr_now = cache_cpu_ptr + first_cpu_block_id * cache_stride;
|
||||
if (mode == 0) { // copy from device to host
|
||||
cudaMemcpyAsync(
|
||||
checkCudaErrors(cudaMemcpyAsync(
|
||||
cache_cpu_ptr_now,
|
||||
cache_gpu_ptr_now,
|
||||
cache_stride * sizeof(DataType_) * consecutive_block_count,
|
||||
cudaMemcpyDeviceToHost,
|
||||
stream);
|
||||
stream));
|
||||
} else { // copy from host to device
|
||||
cudaMemcpyAsync(
|
||||
checkCudaErrors(cudaMemcpyAsync(
|
||||
cache_gpu_ptr_now,
|
||||
cache_cpu_ptr_now,
|
||||
cache_stride * sizeof(DataType_) * consecutive_block_count,
|
||||
cudaMemcpyHostToDevice,
|
||||
stream);
|
||||
stream));
|
||||
}
|
||||
}
|
||||
cudaStreamSynchronize(stream);
|
||||
checkCudaErrors(cudaStreamSynchronize(stream));
|
||||
}
|
||||
|
||||
void SwapCacheAllLayers(
|
||||
@@ -126,7 +127,7 @@ void SwapCacheAllLayers(
|
||||
const std::vector<int64_t>& swap_block_ids_cpu,
|
||||
int rank,
|
||||
int mode) {
|
||||
cudaSetDevice(rank); // used for distributed launch
|
||||
checkCudaErrors(cudaSetDevice(rank)); // used for distributed launch
|
||||
assert(cache_gpu_tensors.size() > 0 &&
|
||||
cache_gpu_tensors.size() == cache_cpu_ptrs.size());
|
||||
switch (cache_gpu_tensors[0].dtype()) {
|
||||
|
||||
Reference in New Issue
Block a user