[Cherry-Pick][CI] Sync dev optimizations to 2.4(#7335) (#7346)

* [Cherry-Pick][CI] Sync dev optimizations to 2.4(#7335)
This commit is contained in:
YuBaoku
2026-04-12 20:21:17 +08:00
committed by GitHub
parent cdc5fce1b6
commit 19b0038234
17 changed files with 282 additions and 2126 deletions
+31 -5
View File
@@ -69,12 +69,27 @@ jobs:
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
ls -ld "${REPO_NAME}"*
exit 1
echo "Attempting force cleanup with find..."
find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "ERROR: Force cleanup still failed"
exit 1
else
echo "Force cleanup succeeded"
fi
fi
'
wget -q --no-proxy ${fd_archive_url}
tar -xf FastDeploy.tar.gz
wget -q --no-proxy ${fd_archive_url} || {
echo "ERROR: Failed to download archive from ${fd_archive_url}"
exit 1
}
tar --no-same-owner -xf FastDeploy.tar.gz || {
echo "ERROR: Failed to extract archive"
exit 1
}
rm -rf FastDeploy.tar.gz
cd FastDeploy
git config --global user.name "FastDeployCI"
@@ -145,7 +160,10 @@ jobs:
docker rm -f ${runner_name} || true
fi
docker run --rm --ipc=host --pid=host --net=host \
docker run --rm --net=host \
--shm-size=64g \
--sysctl kernel.msgmax=1048576 \
--sysctl kernel.msgmnb=268435456 \
--name ${runner_name} \
-v $(pwd):/workspace \
-w /workspace \
@@ -160,10 +178,11 @@ jobs:
-v "${CACHE_DIR}/.cache:/root/.cache" \
-v "${CACHE_DIR}/ConfigDir:/root/.config" \
-e TZ="Asia/Shanghai" \
-e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
--gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
# Avoid using pip cache to ensure the wheel is updated to the latest version
wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
@@ -206,3 +225,10 @@ jobs:
fi
echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}"
exit ${TEST_EXIT_CODE}
- name: Terminate and delete the container
if: always()
run: |
set +e
docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
docker rm -f ${{ runner.name }}
+27 -5
View File
@@ -81,7 +81,14 @@ jobs:
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
ls -ld "${REPO_NAME}"*
exit 1
echo "Attempting force cleanup with find..."
find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "ERROR: Force cleanup still failed"
exit 1
else
echo "Force cleanup succeeded"
fi
fi
'
@@ -111,7 +118,11 @@ jobs:
exit 1
fi
tar -xf FastDeploy.tar.gz
tar --no-same-owner -xf FastDeploy.tar.gz || {
echo "ERROR: Failed to extract archive"
exit 1
}
rm -rf FastDeploy.tar.gz
cd FastDeploy
git config --global user.name "FastDeployCI"
@@ -182,7 +193,10 @@ jobs:
docker rm -f ${runner_name} || true
fi
docker run --rm --ipc=host --pid=host --net=host \
docker run --rm --net=host \
--shm-size=64g \
--sysctl kernel.msgmax=1048576 \
--sysctl kernel.msgmnb=268435456 \
--name ${runner_name} \
-v $(pwd):/workspace \
-w /workspace \
@@ -197,17 +211,18 @@ jobs:
-v "${CACHE_DIR}/.cache:/root/.cache" \
-v "${CACHE_DIR}/ConfigDir:/root/.config" \
-e TZ="Asia/Shanghai" \
-e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
--gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
# Avoid using pip cache to ensure the wheel is updated to the latest version
wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
python -m pip install ${fastdeploy_wheel_url}
python -m pip install pytest
wget https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64
wget --no-proxy https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64
chmod +x ./llm-deploy-linux-amd64
./llm-deploy-linux-amd64 -python python3.10 \
-model_name ERNIE-4.5-0.3B-Paddle \
@@ -279,3 +294,10 @@ jobs:
fi
echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}"
exit ${TEST_EXIT_CODE}
- name: Terminate and delete the container
if: always()
run: |
set +e
docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
docker rm -f ${{ runner.name }}
+12 -2
View File
@@ -120,6 +120,7 @@ jobs:
git config --global user.name "FastDeployCI"
git config --global user.email "fastdeploy_ci@example.com"
git log -n 3 --oneline
- name: FastDeploy Build
shell: bash
env:
@@ -150,7 +151,8 @@ jobs:
PARENT_DIR=$(dirname "$WORKSPACE")
echo "PARENT_DIR:$PARENT_DIR"
docker run --rm --net=host \
--cap-add=SYS_PTRACE --privileged --shm-size=64G \
--cap-add=SYS_PTRACE --shm-size=64G \
--name ${runner_name} \
-v $(pwd):/workspace -w /workspace \
-v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
-v "${CACHE_DIR}/.cache:/root/.cache" \
@@ -164,6 +166,7 @@ jobs:
-e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \
-e "BRANCH_REF=${BRANCH_REF}" \
-e "CCACHE_MAXSIZE=50G" \
-e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
--gpus "\"device=${gpu_id}\"" ${docker_image} /bin/bash -c '
if [[ -n "${FD_VERSION}" ]]; then
export FASTDEPLOY_VERSION=${FD_VERSION}
@@ -188,7 +191,7 @@ jobs:
else
# Avoid using pip cache to ensure the wheel is updated to the latest version
wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/
fi
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
@@ -237,3 +240,10 @@ jobs:
target_path_stripped="${target_path#paddle-github-action/}"
WHEEL_PATH=https://paddle-github-action.bj.bcebos.com/${target_path_stripped}/${fd_wheel_name}
echo "wheel_path=${WHEEL_PATH}" >> $GITHUB_OUTPUT
- name: Terminate and delete the container
if: always()
run: |
set +e
docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
docker rm -f ${{ runner.name }}
+15 -3
View File
@@ -8,7 +8,7 @@ on:
description: "Build Images"
required: true
type: string
default: "iregistry.baidu-int.com/tiangexiao/base-images:paddlecloud-ubuntu24.04-gcc13.3-cuda12.9-cudnn9.9-bccl1.4.1.4-nccl2.26.5-openmpi4.1.5-FleetY13.0.0-rc2"
default: "iregistry.baidu-int.com/new_rl_infra/base-images:paddlecloud-ubuntu24.04-gcc13.3-cuda12.9-cudnn9.9-bccl1.4.1.4-nccl2.26.5-openmpi4.1.5-FleetY13.0.0-v2.4.0-rc1"
FASTDEPLOY_ARCHIVE_URL:
description: "URL of the compressed FastDeploy code archive."
required: true
@@ -52,9 +52,10 @@ on:
wheel_path_rl:
description: "Output path of the generated wheel"
value: ${{ jobs.fd-build-rl.outputs.wheel_path_rl }}
jobs:
fd-build-rl:
runs-on: [self-hosted, GPU-Build]
runs-on: [self-hosted, GPU-Build-RL]
timeout-minutes: 360
outputs:
wheel_path_rl: ${{ steps.set_output.outputs.wheel_path_rl }}
@@ -107,6 +108,7 @@ jobs:
git config --global user.name "FastDeployCI"
git config --global user.email "fastdeploy_ci@example.com"
git log -n 3 --oneline
- name: FastDeploy Build
shell: bash
env:
@@ -137,7 +139,8 @@ jobs:
PARENT_DIR=$(dirname "$WORKSPACE")
echo "PARENT_DIR:$PARENT_DIR"
docker run --rm --net=host \
--cap-add=SYS_PTRACE --privileged --shm-size=64G \
--cap-add=SYS_PTRACE --shm-size=64G \
--name ${runner_name} \
-v $(pwd):/workspace -w /workspace \
-v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
-v "${CACHE_DIR}/.cache_rl:/root/.cache" \
@@ -151,6 +154,7 @@ jobs:
-e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \
-e "BRANCH_REF=${BRANCH_REF}" \
-e "CCACHE_MAXSIZE=50G" \
-e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
--gpus "\"device=${gpu_id}\"" ${docker_image} /bin/bash -c '
if [[ -n "${FD_VERSION}" ]]; then
export FASTDEPLOY_VERSION=${FD_VERSION}
@@ -162,6 +166,7 @@ jobs:
cd FastDeploy
# Avoid using pip cache to ensure the wheel is updated to the latest version
python -m pip uninstall paddlepaddle-gpu -y || true
wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Paddle-RL-Compile/release/3.3/latest/paddlepaddle_gpu-3.3.0.dev-cp310-cp310-linux_x86_64.whl
python -m pip install paddlepaddle_gpu*
@@ -202,3 +207,10 @@ jobs:
target_path_stripped="${target_path#paddle-github-action/}"
WHEEL_PATH=https://paddle-github-action.bj.bcebos.com/${target_path_stripped}/${fd_wheel_name}
echo "wheel_path_rl=${WHEEL_PATH}" >> $GITHUB_OUTPUT
- name: Terminate and delete the container
if: always()
run: |
set +e
docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
docker rm -f ${{ runner.name }}
+31 -6
View File
@@ -81,12 +81,27 @@ jobs:
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
ls -ld "${REPO_NAME}"*
exit 1
echo "Attempting force cleanup with find..."
find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "ERROR: Force cleanup still failed"
exit 1
else
echo "Force cleanup succeeded"
fi
fi
'
wget -q --no-proxy ${fd_archive_url}
tar -xf FastDeploy.tar.gz
wget -q --no-proxy ${fd_archive_url} || {
echo "ERROR: Failed to download archive from ${fd_archive_url}"
exit 1
}
tar --no-same-owner -xf FastDeploy.tar.gz || {
echo "ERROR: Failed to extract archive"
exit 1
}
rm -rf FastDeploy.tar.gz
cd FastDeploy
git config --global user.name "FastDeployCI"
@@ -166,7 +181,10 @@ jobs:
docker rm -f ${runner_name} || true
fi
docker run --rm --ipc=host --net=host \
docker run --rm --net=host \
--shm-size=64g \
--sysctl kernel.msgmax=1048576 \
--sysctl kernel.msgmnb=268435456 \
--name ${runner_name} \
-v $(pwd):/workspace -w /workspace \
-v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
@@ -183,6 +201,7 @@ jobs:
-e "fd_wheel_url=${fd_wheel_url}" \
-e "BASE_REF=${BASE_REF}" \
-e "IS_PR=${IS_PR}" \
-e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
--gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -c '
git config --global --add safe.directory /workspace/FastDeploy
@@ -191,8 +210,7 @@ jobs:
# Avoid using pip cache to ensure the wheel is updated to the latest version
wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/
pip config set global.extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
python -m pip install -r scripts/unittest_requirement.txt
@@ -204,3 +222,10 @@ jobs:
export CUDA_VISIBLE_DEVICES=0,1,2,3
bash scripts/run_gpu_4cards.sh
'
- name: Terminate and delete the container
if: always()
run: |
set +e
docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
docker rm -f ${{ runner.name }}
+33 -5
View File
@@ -78,11 +78,27 @@ jobs:
if ls /workspace/* >/dev/null 2>&1; then
echo "ERROR: Failed to clean /workspace/* after multiple attempts"
ls -ld /workspace/*
exit 1
echo "Attempting force cleanup with find..."
find /workspace -mindepth 1 -maxdepth 1 -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true
if ls /workspace/* >/dev/null 2>&1; then
echo "ERROR: Force cleanup failed. Exiting..."
exit 1
else
echo "Force cleanup succeeded."
fi
fi
'
wget -q --no-proxy ${paddletest_archive_url}
tar -xf PaddleTest.tar.gz
wget -q --no-proxy ${paddletest_archive_url} || {
echo "ERROR: Failed to download archive from ${paddletest_archive_url}"
exit 1
}
tar --no-same-owner -xf PaddleTest.tar.gz || {
echo "ERROR: Failed to extract archive"
exit 1
}
rm -rf PaddleTest.tar.gz
cd PaddleTest
git config --global user.name "FastDeployCI"
@@ -152,7 +168,11 @@ jobs:
echo "Removing stale container: ${runner_name}"
docker rm -f ${runner_name} || true
fi
docker run --rm --ipc=host --pid=host --net=host \
docker run --rm --net=host \
--shm-size=64g \
--sysctl kernel.msgmax=1048576 \
--sysctl kernel.msgmnb=268435456 \
--name ${runner_name} \
-v $(pwd):/workspace \
-w /workspace \
@@ -167,10 +187,11 @@ jobs:
-v "${CACHE_DIR}/.cache:/root/.cache" \
-v "${CACHE_DIR}/ConfigDir:/root/.config" \
-e TZ="Asia/Shanghai" \
-e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
--gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
# Avoid using pip cache to ensure the wheel is updated to the latest version
wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
@@ -221,3 +242,10 @@ jobs:
run: |
echo "logprob test failed with exit code ${{ env.LOGPROB_EXIT_CODE }}"
exit 8
- name: Terminate and delete the container
if: always()
run: |
set +e
docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
docker rm -f ${{ runner.name }}
+28 -6
View File
@@ -83,12 +83,27 @@ jobs:
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
ls -ld "${REPO_NAME}"*
exit 1
echo "Attempting force cleanup with find..."
find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "ERROR: Force cleanup still failed"
exit 1
else
echo "Force cleanup succeeded"
fi
fi
'
wget -q --no-proxy ${fd_archive_url}
tar -xf FastDeploy.tar.gz
wget -q --no-proxy ${fd_archive_url} || {
echo "ERROR: Failed to download archive from ${fd_archive_url}"
exit 1
}
tar --no-same-owner -xf FastDeploy.tar.gz || {
echo "ERROR: Failed to extract archive"
exit 1
}
rm -rf FastDeploy.tar.gz
cd FastDeploy
git config --global user.name "FastDeployCI"
@@ -163,6 +178,7 @@ jobs:
fi
docker run --rm --net=host \
--shm-size=64G \
--name ${runner_name} \
-v $(pwd):/workspace \
-w /workspace \
@@ -181,14 +197,20 @@ jobs:
-e "FD_ZMQ_SEND_RESPONSE_SERVER_PORT=${FD_ZMQ_SEND_RESPONSE_SERVER_PORT}" \
-e "FD_ZMQ_CONTROL_CMD_SERVER_PORTS=${FD_ZMQ_CONTROL_CMD_SERVER_PORTS}" \
-e "fd_wheel_url=${fd_wheel_url}" \
-e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
--gpus "\"device=${DEVICES}\"" ${docker_image} /bin/bash -c '
git config --global --add safe.directory /workspace/FastDeploy
cd FastDeploy
# Avoid using pip cache to ensure the wheel is updated to the latest version
wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/
python -m pip install ${fd_wheel_url}
bash scripts/run_pre_ce.sh
'
- name: Terminate and delete the container
if: always()
run: |
set +e
docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
docker rm -f ${{ runner.name }}
+30 -4
View File
@@ -81,12 +81,27 @@ jobs:
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
ls -ld "${REPO_NAME}"*
exit 1
echo "Attempting force cleanup with find..."
find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "ERROR: Force cleanup still failed"
exit 1
else
echo "Force cleanup succeeded"
fi
fi
'
wget -q --no-proxy ${fd_archive_url}
tar -xf FastDeploy.tar.gz
wget -q --no-proxy ${fd_archive_url} || {
echo "ERROR: Failed to download archive from ${fd_archive_url}"
exit 1
}
tar --no-same-owner -xf FastDeploy.tar.gz || {
echo "ERROR: Failed to extract archive"
exit 1
}
rm -rf FastDeploy.tar.gz
cd FastDeploy
git config --global user.name "FastDeployCI"
@@ -160,6 +175,7 @@ jobs:
fi
docker run --rm --net=host \
--shm-size=64G \
--name ${runner_name} \
-v $(pwd):/workspace \
-w /workspace \
@@ -175,10 +191,11 @@ jobs:
-v "${CACHE_DIR}/.cache:/root/.cache" \
-v "${CACHE_DIR}/ConfigDir:/root/.config" \
-e TZ="Asia/Shanghai" \
-e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
--gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc '
# Avoid using pip cache to ensure the wheel is updated to the latest version
wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
@@ -190,6 +207,7 @@ jobs:
TEST_EXIT_CODE=0
pushd tests/ce/stable_cases
bash launch_model.sh /MODELDATA
TEST_EXIT_CODE=0
bash run.sh || {
TEST_EXIT_CODE=1
@@ -211,6 +229,7 @@ jobs:
echo "======================================================="
}
popd
echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> /workspace/FastDeploy/exit_code.env
'
@@ -220,3 +239,10 @@ jobs:
fi
echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}"
exit ${TEST_EXIT_CODE}
- name: Terminate and delete the container
if: always()
run: |
set +e
docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
docker rm -f ${{ runner.name }}
+32 -5
View File
@@ -85,12 +85,27 @@ jobs:
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
ls -ld "${REPO_NAME}"*
exit 1
echo "Attempting force cleanup with find..."
find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "ERROR: Force cleanup still failed"
exit 1
else
echo "Force cleanup succeeded"
fi
fi
'
wget -q --no-proxy ${fd_archive_url}
tar -xf FastDeploy.tar.gz
wget -q --no-proxy ${fd_archive_url} || {
echo "ERROR: Failed to download archive from ${fd_archive_url}"
exit 1
}
tar --no-same-owner -xf FastDeploy.tar.gz || {
echo "ERROR: Failed to extract archive"
exit 1
}
rm -rf FastDeploy.tar.gz
cd FastDeploy
git config --global user.name "FastDeployCI"
@@ -173,12 +188,16 @@ jobs:
export RDMA_DEVICES=$(find /dev/infiniband/uverbs* -maxdepth 1 -not -type d | xargs -I{} echo '--device {}:{}')
docker run --rm --net=host \
--sysctl kernel.msgmax=1048576 \
--sysctl kernel.msgmnb=268435456 \
--name ${runner_name} \
--cap-add=SYS_PTRACE --cap-add=IPC_LOCK \
--shm-size=64G \
--shm-size=128G \
${RDMA_DEVICES} \
--device=/dev/infiniband/rdma_cm \
--ulimit memlock=-1:-1 \
--ulimit nofile=65536:65536 \
--ulimit nproc=8192:8192 \
-v $(pwd):/workspace -w /workspace \
-v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
-v "${CACHE_DIR}/.cache:/root/.cache" \
@@ -198,6 +217,7 @@ jobs:
-e "fd_wheel_url=${fd_wheel_url}" \
-e "BASE_REF=${BASE_REF}" \
-e "IS_PR=${IS_PR}" \
-e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \
--gpus "\"device=${DEVICES}\"" ${docker_image} /bin/bash -c '
git config --global --add safe.directory /workspace/FastDeploy
@@ -205,7 +225,7 @@ jobs:
git diff origin/${BASE_REF}..HEAD --unified=0 > diff.txt
# Avoid using pip cache to ensure the wheel is updated to the latest version
wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/
pip config set global.extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
python -m pip install -r scripts/unittest_requirement.txt
@@ -380,6 +400,13 @@ jobs:
echo "coverage passed"
exit 0
- name: Terminate and delete the container
if: always()
run: |
set +e
docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete'
docker rm -f ${{ runner.name }}
diff_coverage_report:
needs: run_tests_with_coverage
if: always()
@@ -0,0 +1,19 @@
name: PR Build and Test
on:
pull_request:
types: [closed]
branches: [develop, release/**]
permissions: read-all
concurrency:
group: ${{ github.event.pull_request.number }}-${{ github.workflow }}
cancel-in-progress: true
jobs:
cancel:
name: Cancel PR Build and Test for ${{ github.event.pull_request.number }}
runs-on: ubuntu-latest
steps:
- name: Cancel PR Build and Test
run: |
exit 0
-1
View File
@@ -4,7 +4,6 @@ on:
pull_request:
branches:
- develop
- 'release/*'
workflow_dispatch:
concurrency: