Co-authored-by: gongweibao <gognweibao@baidu.com>
This commit is contained in:
gongweibao
2026-03-04 21:55:31 +08:00
committed by GitHub
parent 5c8f5184d9
commit ddb06ff83f
306 changed files with 40627 additions and 34418 deletions
+26 -27
View File
@@ -30,44 +30,43 @@ namespace cub = hipcub;
template <typename T>
__global__ void set_data(T *input, int n) {
if (threadIdx.x == 0) {
for (int i = 0; i < n; ++i) {
*(input + i) = static_cast<T>(i);
printf("set[%d]: %f\n", i, *(input + i));
}
if (threadIdx.x == 0) {
for (int i = 0; i < n; ++i) {
*(input + i) = static_cast<T>(i);
printf("set[%d]: %f\n", i, *(input + i));
}
}
}
template <typename T>
__global__ void print_data(T *input, int n) {
if (threadIdx.x == 0) {
for (int i = 0; i < n; ++i) {
printf("input[%d]: %f\n", i, input[i]);
}
if (threadIdx.x == 0) {
for (int i = 0; i < n; ++i) {
printf("input[%d]: %f\n", i, input[i]);
}
}
}
void ReadDataIpc(const paddle::Tensor &tmp_input,
int64_t data_ptr,
const std::string &shm_name) {
volatile shmStruct *shm = NULL;
sharedMemoryInfo info;
if (sharedMemoryOpen(shm_name.c_str(), sizeof(shmStruct), &info) != 0) {
printf("Failed to create shared memory slab\n");
printf("Func ReadDataIpc. Shm_name: %s\n", shm_name.c_str());
exit(EXIT_FAILURE);
}
shm = (volatile shmStruct *)info.addr;
void *ptr = nullptr;
checkCudaErrors(
GPU(IpcOpenMemHandle)(&ptr,
*(GPU(IpcMemHandle_t) *)&shm->memHandle,
GPU(IpcMemLazyEnablePeerAccess)));
printf("ptr: %p\n", ptr);
print_data<float><<<1, 1>>>(reinterpret_cast<float *>(ptr), 10);
GPU(DeviceSynchronize)();
checkCudaErrors(GPU(IpcCloseMemHandle)(ptr));
sharedMemoryClose(&info);
volatile shmStruct *shm = NULL;
sharedMemoryInfo info;
if (sharedMemoryOpen(shm_name.c_str(), sizeof(shmStruct), &info) != 0) {
printf("Failed to create shared memory slab\n");
printf("Func ReadDataIpc. Shm_name: %s\n", shm_name.c_str());
exit(EXIT_FAILURE);
}
shm = (volatile shmStruct *)info.addr;
void *ptr = nullptr;
checkCudaErrors(GPU(IpcOpenMemHandle)(&ptr,
*(GPU(IpcMemHandle_t) *)&shm->memHandle,
GPU(IpcMemLazyEnablePeerAccess)));
printf("ptr: %p\n", ptr);
print_data<float><<<1, 1>>>(reinterpret_cast<float *>(ptr), 10);
GPU(DeviceSynchronize)();
checkCudaErrors(GPU(IpcCloseMemHandle)(ptr));
sharedMemoryClose(&info);
}
PD_BUILD_STATIC_OP(read_data_ipc)