[Cherry-Pick][CI] Sync dev optimizations to 2.4(#7335) (#7346)

* [Cherry-Pick][CI] Sync dev optimizations to 2.4(#7335)
2026-04-23 00:17:25 +08:00 · 2026-04-12 20:21:17 +08:00
parent cdc5fce1b6
commit 19b0038234
17 changed files with 282 additions and 2126 deletions
@@ -69,12 +69,27 @@ jobs:
            if ls "${REPO_NAME}"* >/dev/null 2>&1; then
              echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
              ls -ld "${REPO_NAME}"*
-              exit 1
+              echo "Attempting force cleanup with find..."
+              find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true
+              if ls "${REPO_NAME}"* >/dev/null 2>&1; then
+                echo "ERROR: Force cleanup still failed"
+                exit 1
+              else
+                echo "Force cleanup succeeded"
+              fi
            fi
          '

-          wget -q --no-proxy ${fd_archive_url}
-          tar -xf FastDeploy.tar.gz
+          wget -q --no-proxy ${fd_archive_url} || {
+            echo "ERROR: Failed to download archive from ${fd_archive_url}"
+            exit 1
+          }
+
+          tar --no-same-owner -xf FastDeploy.tar.gz || {
+            echo "ERROR: Failed to extract archive"
+            exit 1
+          }
+
          rm -rf FastDeploy.tar.gz
          cd FastDeploy
          git config --global user.name "FastDeployCI"
@@ -145,7 +160,10 @@ jobs:
            docker rm -f ${runner_name} || true
          fi

-          docker run --rm --ipc=host --pid=host --net=host \
+          docker run --rm --net=host \
+          --shm-size=64g \
+          --sysctl kernel.msgmax=1048576 \
+          --sysctl kernel.msgmnb=268435456 \
          --name ${runner_name} \
          -v $(pwd):/workspace \
          -w /workspace \
@@ -160,10 +178,11 @@ jobs:
          -v "${CACHE_DIR}/.cache:/root/.cache" \
          -v "${CACHE_DIR}/ConfigDir:/root/.config" \
          -e TZ="Asia/Shanghai" \
+          -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
          --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
          # Avoid using pip cache to ensure the wheel is updated to the latest version
          wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
-          python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
+          python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/

          pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple

@@ -206,3 +225,10 @@ jobs:
          fi
          echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}"
          exit ${TEST_EXIT_CODE}
+
+      - name: Terminate and delete the container
+        if: always()
+        run: |
+          set +e
+          docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
+          docker rm -f ${{ runner.name }}
@@ -81,7 +81,14 @@ jobs:
              if ls "${REPO_NAME}"* >/dev/null 2>&1; then
                echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
                ls -ld "${REPO_NAME}"*
-                exit 1
+                echo "Attempting force cleanup with find..."
+                find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true
+                if ls "${REPO_NAME}"* >/dev/null 2>&1; then
+                  echo "ERROR: Force cleanup still failed"
+                  exit 1
+                else
+                  echo "Force cleanup succeeded"
+                fi
              fi
            '

@@ -111,7 +118,11 @@ jobs:
            exit 1
          fi

-          tar -xf FastDeploy.tar.gz
+          tar --no-same-owner -xf FastDeploy.tar.gz || {
+            echo "ERROR: Failed to extract archive"
+            exit 1
+          }
+
          rm -rf FastDeploy.tar.gz
          cd FastDeploy
          git config --global user.name "FastDeployCI"
@@ -182,7 +193,10 @@ jobs:
            docker rm -f ${runner_name} || true
          fi

-          docker run --rm --ipc=host --pid=host --net=host \
+          docker run --rm --net=host \
+          --shm-size=64g \
+          --sysctl kernel.msgmax=1048576 \
+          --sysctl kernel.msgmnb=268435456 \
          --name ${runner_name} \
          -v $(pwd):/workspace \
          -w /workspace \
@@ -197,17 +211,18 @@ jobs:
          -v "${CACHE_DIR}/.cache:/root/.cache" \
          -v "${CACHE_DIR}/ConfigDir:/root/.config" \
          -e TZ="Asia/Shanghai" \
+          -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
          --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
          # Avoid using pip cache to ensure the wheel is updated to the latest version
          wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
-          python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
+          python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/

          pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple

          python -m pip install ${fastdeploy_wheel_url}
          python -m pip install pytest

-          wget https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64
+          wget --no-proxy https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64
          chmod +x ./llm-deploy-linux-amd64
          ./llm-deploy-linux-amd64 -python python3.10 \
          -model_name ERNIE-4.5-0.3B-Paddle \
@@ -279,3 +294,10 @@ jobs:
          fi
          echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}"
          exit ${TEST_EXIT_CODE}
+
+      - name: Terminate and delete the container
+        if: always()
+        run: |
+          set +e
+          docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
+          docker rm -f ${{ runner.name }}
@@ -120,6 +120,7 @@ jobs:
            git config --global user.name "FastDeployCI"
            git config --global user.email "fastdeploy_ci@example.com"
            git log -n 3 --oneline
+
      - name: FastDeploy Build
        shell: bash
        env:
@@ -150,7 +151,8 @@ jobs:
            PARENT_DIR=$(dirname "$WORKSPACE")
            echo "PARENT_DIR:$PARENT_DIR"
            docker run --rm --net=host \
-            --cap-add=SYS_PTRACE --privileged --shm-size=64G \
+            --cap-add=SYS_PTRACE --shm-size=64G \
+            --name ${runner_name} \
            -v $(pwd):/workspace -w /workspace \
            -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
            -v "${CACHE_DIR}/.cache:/root/.cache" \
@@ -164,6 +166,7 @@ jobs:
            -e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \
            -e "BRANCH_REF=${BRANCH_REF}" \
            -e "CCACHE_MAXSIZE=50G" \
+            -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
            --gpus "\"device=${gpu_id}\"" ${docker_image} /bin/bash -c '
            if [[ -n "${FD_VERSION}" ]]; then
              export FASTDEPLOY_VERSION=${FD_VERSION}
@@ -188,7 +191,7 @@ jobs:
            else
              # Avoid using pip cache to ensure the wheel is updated to the latest version
              wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
-              python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
+              python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/
            fi

            pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
@@ -237,3 +240,10 @@ jobs:
            target_path_stripped="${target_path#paddle-github-action/}"
            WHEEL_PATH=https://paddle-github-action.bj.bcebos.com/${target_path_stripped}/${fd_wheel_name}
            echo "wheel_path=${WHEEL_PATH}" >> $GITHUB_OUTPUT
+
+      - name: Terminate and delete the container
+        if: always()
+        run: |
+          set +e
+          docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
+          docker rm -f ${{ runner.name }}
@@ -8,7 +8,7 @@ on:
        description: "Build Images"
        required: true
        type: string
-        default: "iregistry.baidu-int.com/tiangexiao/base-images:paddlecloud-ubuntu24.04-gcc13.3-cuda12.9-cudnn9.9-bccl1.4.1.4-nccl2.26.5-openmpi4.1.5-FleetY13.0.0-rc2"
+        default: "iregistry.baidu-int.com/new_rl_infra/base-images:paddlecloud-ubuntu24.04-gcc13.3-cuda12.9-cudnn9.9-bccl1.4.1.4-nccl2.26.5-openmpi4.1.5-FleetY13.0.0-v2.4.0-rc1"
      FASTDEPLOY_ARCHIVE_URL:
        description: "URL of the compressed FastDeploy code archive."
        required: true
@@ -52,9 +52,10 @@ on:
      wheel_path_rl:
        description: "Output path of the generated wheel"
        value: ${{ jobs.fd-build-rl.outputs.wheel_path_rl }}
+
 jobs:
  fd-build-rl:
-    runs-on: [self-hosted, GPU-Build]
+    runs-on: [self-hosted, GPU-Build-RL]
    timeout-minutes: 360
    outputs:
      wheel_path_rl: ${{ steps.set_output.outputs.wheel_path_rl }}
@@ -107,6 +108,7 @@ jobs:
            git config --global user.name "FastDeployCI"
            git config --global user.email "fastdeploy_ci@example.com"
            git log -n 3 --oneline
+
      - name: FastDeploy Build
        shell: bash
        env:
@@ -137,7 +139,8 @@ jobs:
            PARENT_DIR=$(dirname "$WORKSPACE")
            echo "PARENT_DIR:$PARENT_DIR"
            docker run --rm --net=host \
-            --cap-add=SYS_PTRACE --privileged --shm-size=64G \
+            --cap-add=SYS_PTRACE --shm-size=64G \
+            --name ${runner_name} \
            -v $(pwd):/workspace -w /workspace \
            -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
            -v "${CACHE_DIR}/.cache_rl:/root/.cache" \
@@ -151,6 +154,7 @@ jobs:
            -e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \
            -e "BRANCH_REF=${BRANCH_REF}" \
            -e "CCACHE_MAXSIZE=50G" \
+            -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
            --gpus "\"device=${gpu_id}\"" ${docker_image} /bin/bash -c '
            if [[ -n "${FD_VERSION}" ]]; then
              export FASTDEPLOY_VERSION=${FD_VERSION}
@@ -162,6 +166,7 @@ jobs:
            cd FastDeploy

            # Avoid using pip cache to ensure the wheel is updated to the latest version
+            python -m pip uninstall paddlepaddle-gpu -y || true
            wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Paddle-RL-Compile/release/3.3/latest/paddlepaddle_gpu-3.3.0.dev-cp310-cp310-linux_x86_64.whl
            python -m pip install paddlepaddle_gpu*

@@ -202,3 +207,10 @@ jobs:
            target_path_stripped="${target_path#paddle-github-action/}"
            WHEEL_PATH=https://paddle-github-action.bj.bcebos.com/${target_path_stripped}/${fd_wheel_name}
            echo "wheel_path_rl=${WHEEL_PATH}" >> $GITHUB_OUTPUT
+
+      - name: Terminate and delete the container
+        if: always()
+        run: |
+          set +e
+          docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
+          docker rm -f ${{ runner.name }}
@@ -81,12 +81,27 @@ jobs:
            if ls "${REPO_NAME}"* >/dev/null 2>&1; then
              echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
              ls -ld "${REPO_NAME}"*
-              exit 1
+              echo "Attempting force cleanup with find..."
+              find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true
+              if ls "${REPO_NAME}"* >/dev/null 2>&1; then
+                echo "ERROR: Force cleanup still failed"
+                exit 1
+              else
+                echo "Force cleanup succeeded"
+              fi
            fi
          '

-          wget -q --no-proxy ${fd_archive_url}
-          tar -xf FastDeploy.tar.gz
+          wget -q --no-proxy ${fd_archive_url} || {
+            echo "ERROR: Failed to download archive from ${fd_archive_url}"
+            exit 1
+          }
+
+          tar --no-same-owner -xf FastDeploy.tar.gz || {
+            echo "ERROR: Failed to extract archive"
+            exit 1
+          }
+
          rm -rf FastDeploy.tar.gz
          cd FastDeploy
          git config --global user.name "FastDeployCI"
@@ -166,7 +181,10 @@ jobs:
            docker rm -f ${runner_name} || true
          fi

-          docker run --rm --ipc=host --net=host \
+          docker run --rm --net=host \
+          --shm-size=64g \
+          --sysctl kernel.msgmax=1048576 \
+          --sysctl kernel.msgmnb=268435456 \
          --name ${runner_name} \
          -v $(pwd):/workspace -w /workspace \
          -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
@@ -183,6 +201,7 @@ jobs:
          -e "fd_wheel_url=${fd_wheel_url}" \
          -e "BASE_REF=${BASE_REF}" \
          -e "IS_PR=${IS_PR}" \
+          -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
          --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -c '

          git config --global --add safe.directory /workspace/FastDeploy
@@ -191,8 +210,7 @@ jobs:

          # Avoid using pip cache to ensure the wheel is updated to the latest version
          wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
-          python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
-
+          python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/
          pip config set global.extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple

          python -m pip install -r scripts/unittest_requirement.txt
@@ -204,3 +222,10 @@ jobs:
          export CUDA_VISIBLE_DEVICES=0,1,2,3
          bash scripts/run_gpu_4cards.sh
          '
+
+      - name: Terminate and delete the container
+        if: always()
+        run: |
+          set +e
+          docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
+          docker rm -f ${{ runner.name }}
@@ -78,11 +78,27 @@ jobs:
            if ls /workspace/* >/dev/null 2>&1; then
              echo "ERROR: Failed to clean /workspace/* after multiple attempts"
              ls -ld /workspace/*
-              exit 1
+              echo "Attempting force cleanup with find..."
+              find /workspace -mindepth 1 -maxdepth 1 -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true
+              if ls /workspace/* >/dev/null 2>&1; then
+                echo "ERROR: Force cleanup failed. Exiting..."
+                exit 1
+              else
+                echo "Force cleanup succeeded."
+              fi
            fi
          '
-          wget -q --no-proxy ${paddletest_archive_url}
-          tar -xf PaddleTest.tar.gz
+
+          wget -q --no-proxy ${paddletest_archive_url} || {
+            echo "ERROR: Failed to download archive from ${paddletest_archive_url}"
+            exit 1
+          }
+
+          tar --no-same-owner -xf PaddleTest.tar.gz || {
+            echo "ERROR: Failed to extract archive"
+            exit 1
+          }
+
          rm -rf PaddleTest.tar.gz
          cd PaddleTest
          git config --global user.name "FastDeployCI"
@@ -152,7 +168,11 @@ jobs:
            echo "Removing stale container: ${runner_name}"
            docker rm -f ${runner_name} || true
          fi
-          docker run --rm --ipc=host --pid=host --net=host \
+
+          docker run --rm --net=host \
+          --shm-size=64g \
+          --sysctl kernel.msgmax=1048576 \
+          --sysctl kernel.msgmnb=268435456 \
          --name ${runner_name} \
          -v $(pwd):/workspace \
          -w /workspace \
@@ -167,10 +187,11 @@ jobs:
          -v "${CACHE_DIR}/.cache:/root/.cache" \
          -v "${CACHE_DIR}/ConfigDir:/root/.config" \
          -e TZ="Asia/Shanghai" \
+          -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
          --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
          # Avoid using pip cache to ensure the wheel is updated to the latest version
          wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
-          python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
+          python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/

          pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple

@@ -221,3 +242,10 @@ jobs:
        run: |
          echo "logprob test failed with exit code ${{ env.LOGPROB_EXIT_CODE }}"
          exit 8
+
+      - name: Terminate and delete the container
+        if: always()
+        run: |
+          set +e
+          docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
+          docker rm -f ${{ runner.name }}
@@ -83,12 +83,27 @@ jobs:
            if ls "${REPO_NAME}"* >/dev/null 2>&1; then
              echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
              ls -ld "${REPO_NAME}"*
-              exit 1
+              echo "Attempting force cleanup with find..."
+              find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true
+              if ls "${REPO_NAME}"* >/dev/null 2>&1; then
+                echo "ERROR: Force cleanup still failed"
+                exit 1
+              else
+                echo "Force cleanup succeeded"
+              fi
            fi
          '

-          wget -q --no-proxy ${fd_archive_url}
-          tar -xf FastDeploy.tar.gz
+          wget -q --no-proxy ${fd_archive_url} || {
+            echo "ERROR: Failed to download archive from ${fd_archive_url}"
+            exit 1
+          }
+
+          tar --no-same-owner -xf FastDeploy.tar.gz || {
+            echo "ERROR: Failed to extract archive"
+            exit 1
+          }
+
          rm -rf FastDeploy.tar.gz
          cd FastDeploy
          git config --global user.name "FastDeployCI"
@@ -163,6 +178,7 @@ jobs:
          fi

          docker run --rm --net=host \
+          --shm-size=64G \
          --name ${runner_name} \
          -v $(pwd):/workspace \
          -w /workspace \
@@ -181,14 +197,20 @@ jobs:
          -e "FD_ZMQ_SEND_RESPONSE_SERVER_PORT=${FD_ZMQ_SEND_RESPONSE_SERVER_PORT}" \
          -e "FD_ZMQ_CONTROL_CMD_SERVER_PORTS=${FD_ZMQ_CONTROL_CMD_SERVER_PORTS}" \
          -e "fd_wheel_url=${fd_wheel_url}" \
+          -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
          --gpus "\"device=${DEVICES}\"" ${docker_image} /bin/bash -c '
          git config --global --add safe.directory /workspace/FastDeploy
          cd FastDeploy
          # Avoid using pip cache to ensure the wheel is updated to the latest version
          wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
-          python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
-
-          pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+          python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/
          python -m pip install ${fd_wheel_url}
          bash scripts/run_pre_ce.sh
          '
+
+      - name: Terminate and delete the container
+        if: always()
+        run: |
+          set +e
+          docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
+          docker rm -f ${{ runner.name }}
@@ -81,12 +81,27 @@ jobs:
            if ls "${REPO_NAME}"* >/dev/null 2>&1; then
              echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
              ls -ld "${REPO_NAME}"*
-              exit 1
+              echo "Attempting force cleanup with find..."
+              find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true
+              if ls "${REPO_NAME}"* >/dev/null 2>&1; then
+                echo "ERROR: Force cleanup still failed"
+                exit 1
+              else
+                echo "Force cleanup succeeded"
+              fi
            fi
          '

-          wget -q --no-proxy ${fd_archive_url}
-          tar -xf FastDeploy.tar.gz
+          wget -q --no-proxy ${fd_archive_url} || {
+            echo "ERROR: Failed to download archive from ${fd_archive_url}"
+            exit 1
+          }
+
+          tar --no-same-owner -xf FastDeploy.tar.gz || {
+            echo "ERROR: Failed to extract archive"
+            exit 1
+          }
+
          rm -rf FastDeploy.tar.gz
          cd FastDeploy
          git config --global user.name "FastDeployCI"
@@ -160,6 +175,7 @@ jobs:
          fi

          docker run --rm --net=host \
+          --shm-size=64G \
          --name ${runner_name} \
          -v $(pwd):/workspace \
          -w /workspace \
@@ -175,10 +191,11 @@ jobs:
          -v "${CACHE_DIR}/.cache:/root/.cache" \
          -v "${CACHE_DIR}/ConfigDir:/root/.config" \
          -e TZ="Asia/Shanghai" \
+          -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
          --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
          # Avoid using pip cache to ensure the wheel is updated to the latest version
          wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
-          python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
+          python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/

          pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple

@@ -190,6 +207,7 @@ jobs:
          TEST_EXIT_CODE=0
          pushd tests/ce/stable_cases
          bash launch_model.sh /MODELDATA
+
          TEST_EXIT_CODE=0
          bash run.sh || {
            TEST_EXIT_CODE=1
@@ -211,6 +229,7 @@ jobs:

            echo "======================================================="
          }
+
          popd
          echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> /workspace/FastDeploy/exit_code.env
          '
@@ -220,3 +239,10 @@ jobs:
          fi
          echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}"
          exit ${TEST_EXIT_CODE}
+
+      - name: Terminate and delete the container
+        if: always()
+        run: |
+          set +e
+          docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
+          docker rm -f ${{ runner.name }}
@@ -85,12 +85,27 @@ jobs:
            if ls "${REPO_NAME}"* >/dev/null 2>&1; then
              echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
              ls -ld "${REPO_NAME}"*
-              exit 1
+              echo "Attempting force cleanup with find..."
+              find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true
+              if ls "${REPO_NAME}"* >/dev/null 2>&1; then
+                echo "ERROR: Force cleanup still failed"
+                exit 1
+              else
+                echo "Force cleanup succeeded"
+              fi
            fi
          '

-          wget -q --no-proxy ${fd_archive_url}
-          tar -xf FastDeploy.tar.gz
+          wget -q --no-proxy ${fd_archive_url} || {
+            echo "ERROR: Failed to download archive from ${fd_archive_url}"
+            exit 1
+          }
+
+          tar --no-same-owner -xf FastDeploy.tar.gz || {
+            echo "ERROR: Failed to extract archive"
+            exit 1
+          }
+
          rm -rf FastDeploy.tar.gz
          cd FastDeploy
          git config --global user.name "FastDeployCI"
@@ -173,12 +188,16 @@ jobs:
          export RDMA_DEVICES=$(find /dev/infiniband/uverbs* -maxdepth 1 -not -type d | xargs -I{} echo '--device {}:{}')

          docker run --rm --net=host \
+          --sysctl kernel.msgmax=1048576 \
+          --sysctl kernel.msgmnb=268435456 \
          --name ${runner_name} \
          --cap-add=SYS_PTRACE --cap-add=IPC_LOCK \
-          --shm-size=64G \
+          --shm-size=128G \
          ${RDMA_DEVICES} \
          --device=/dev/infiniband/rdma_cm \
          --ulimit memlock=-1:-1 \
+          --ulimit nofile=65536:65536 \
+          --ulimit nproc=8192:8192 \
          -v $(pwd):/workspace -w /workspace \
          -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
          -v "${CACHE_DIR}/.cache:/root/.cache" \
@@ -198,6 +217,7 @@ jobs:
          -e "fd_wheel_url=${fd_wheel_url}" \
          -e "BASE_REF=${BASE_REF}" \
          -e "IS_PR=${IS_PR}" \
+          -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
          --gpus "\"device=${DEVICES}\"" ${docker_image} /bin/bash -c '

          git config --global --add safe.directory /workspace/FastDeploy
@@ -205,7 +225,7 @@ jobs:
          git diff origin/${BASE_REF}..HEAD --unified=0 > diff.txt
          # Avoid using pip cache to ensure the wheel is updated to the latest version
          wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
-          python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
+          python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/
          pip config set global.extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple

          python -m pip install -r scripts/unittest_requirement.txt
@@ -380,6 +400,13 @@ jobs:
          echo "coverage passed"
          exit 0

+      - name: Terminate and delete the container
+        if: always()
+        run: |
+          set +e
+          docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
+          docker rm -f ${{ runner.name }}
+
  diff_coverage_report:
    needs: run_tests_with_coverage
    if: always()
@@ -0,0 +1,19 @@
+name: PR Build and Test
+on:
+  pull_request:
+    types: [closed]
+    branches: [develop, release/**]
+permissions: read-all
+
+concurrency:
+  group: ${{ github.event.pull_request.number }}-${{ github.workflow }}
+  cancel-in-progress: true
+
+jobs:
+  cancel:
+    name: Cancel PR Build and Test for ${{ github.event.pull_request.number }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: Cancel PR Build and Test
+        run: |
+          exit 0
@@ -4,7 +4,6 @@ on:
  pull_request:
    branches:
      - develop
-      - 'release/*'
  workflow_dispatch:

 concurrency: