diff --git a/custom_ops/gpu_ops/set_data_ipc.cu b/custom_ops/gpu_ops/set_data_ipc.cu index b7336e5ae6..b8deb0e5d8 100644 --- a/custom_ops/gpu_ops/set_data_ipc.cu +++ b/custom_ops/gpu_ops/set_data_ipc.cu @@ -12,14 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "helper.h" #include "cuda_multiprocess.h" +#include "helper.h" -int sharedMemoryCreate(const char *name, size_t sz, sharedMemoryInfo *info) { +int sharedMemoryCreate(const char* name, size_t sz, sharedMemoryInfo* info) { #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) info->size = sz; - info->shmHandle = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, - PAGE_READWRITE, 0, (DWORD)sz, name); + info->shmHandle = CreateFileMapping( + INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, (DWORD)sz, name); if (info->shmHandle == 0) { return GetLastError(); } @@ -42,20 +42,22 @@ int sharedMemoryCreate(const char *name, size_t sz, sharedMemoryInfo *info) { status = ftruncate(info->shmFd, sz); if (status != 0) { - return status; + return errno; } info->addr = mmap(0, sz, PROT_READ | PROT_WRITE, MAP_SHARED, info->shmFd, 0); - if (info->addr == NULL) { + if (info->addr == MAP_FAILED) { return errno; } + close(info->shmFd); + info->shmFd = -1; return 0; #endif } template -__global__ void set_data(T *input, int n) { +__global__ void set_data(T* input, int n) { if (threadIdx.x == 0) { for (int i = 0; i < n; ++i) { *(input + i) = static_cast(i); @@ -65,7 +67,7 @@ __global__ void set_data(T *input, int n) { } template -__global__ void print_data(const T *input, int n) { +__global__ void print_data(const T* input, int n) { if (threadIdx.x == 0) { for (int i = 0; i < n; ++i) { printf("input[%d]: %f\n", i, input[i]); @@ -81,72 +83,57 @@ void set_data_ipc(const paddle::Tensor& tmp_input, typedef typename traits_::data_t data_t; sharedMemoryInfo info; - volatile shmStruct *shm = NULL; + volatile shmStruct* shm = NULL; if (sharedMemoryCreate(shm_name.c_str(), sizeof(*shm), &info) != 0) { - printf("Failed to create shared memory slab\n"); - printf("Func sharedMemoryCreate. Shm_name: %s\n", shm_name.c_str()); - exit(EXIT_FAILURE); + printf("Failed to create shared memory slab\n"); + printf("Func sharedMemoryCreate. Shm_name: %s\n", shm_name.c_str()); + exit(EXIT_FAILURE); } - shm = (volatile shmStruct *)info.addr; - memset((void *)shm, 0, sizeof(*shm)); + shm = (volatile shmStruct*)info.addr; + memset((void*)shm, 0, sizeof(*shm)); - void *data_ptr_now = reinterpret_cast(const_cast(tmp_input.data())); + void* data_ptr_now = + reinterpret_cast(const_cast(tmp_input.data())); #ifdef PADDLE_WITH_HIP - checkCudaErrors(hipIpcGetMemHandle((hipIpcMemHandle_t *)&shm->memHandle, data_ptr_now)); + checkCudaErrors( + hipIpcGetMemHandle((hipIpcMemHandle_t*)&shm->memHandle, data_ptr_now)); #else - checkCudaErrors(cudaIpcGetMemHandle((cudaIpcMemHandle_t *)&shm->memHandle, data_ptr_now)); + checkCudaErrors( + cudaIpcGetMemHandle((cudaIpcMemHandle_t*)&shm->memHandle, data_ptr_now)); #endif - - } -void SetDataIpc(const paddle::Tensor& tmp_input, - const std::string& shm_name) { - std::vector shape = tmp_input.shape(); +void SetDataIpc(const paddle::Tensor& tmp_input, const std::string& shm_name) { + std::vector shape = tmp_input.shape(); - switch (tmp_input.type()) { - case paddle::DataType::BFLOAT16: { - return set_data_ipc( - tmp_input, - shm_name - ); - } - case paddle::DataType::FLOAT16: { - return set_data_ipc( - tmp_input, - shm_name - ); - } - case paddle::DataType::FLOAT32: { - return set_data_ipc( - tmp_input, - shm_name - ); - } - case paddle::DataType::INT8: { - return set_data_ipc( - tmp_input, - shm_name - ); - } - case paddle::DataType::UINT8: { - return set_data_ipc( - tmp_input, - shm_name - ); - } - default: { - PD_THROW( - "NOT supported data type. " - "Only float16, bfloat16 and float32 are supported. "); - break; - } + switch (tmp_input.type()) { + case paddle::DataType::BFLOAT16: { + return set_data_ipc(tmp_input, shm_name); } + case paddle::DataType::FLOAT16: { + return set_data_ipc(tmp_input, shm_name); + } + case paddle::DataType::FLOAT32: { + return set_data_ipc(tmp_input, shm_name); + } + case paddle::DataType::INT8: { + return set_data_ipc(tmp_input, shm_name); + } + case paddle::DataType::UINT8: { + return set_data_ipc(tmp_input, shm_name); + } + default: { + PD_THROW( + "NOT supported data type. " + "Only float16, bfloat16 and float32 are supported. "); + break; + } + } } PD_BUILD_STATIC_OP(set_data_ipc) .Inputs({"tmp_input"}) - .Attrs({ "shm_name: std::string"}) + .Attrs({"shm_name: std::string"}) .Outputs({"tmp_input_out"}) .SetInplaceMap({{"tmp_input", "tmp_input_out"}}) .SetKernelFn(PD_KERNEL(SetDataIpc)); diff --git a/tests/ce/stable_cases/launch_model.sh b/tests/ce/stable_cases/launch_model.sh index 22c2a0052b..1021aa2b8f 100644 --- a/tests/ce/stable_cases/launch_model.sh +++ b/tests/ce/stable_cases/launch_model.sh @@ -37,6 +37,7 @@ python -m fastdeploy.entrypoints.openai.api_server \ --max-num-seqs 1 \ --gpu-memory-utilization 0.9 \ --model "$MODEL_PATH" \ + --no-shutdown-comm-group-if-worker-idle \ --load-strategy ipc_snapshot \ --dynamic-load-weight &