mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-05-10 01:21:55 +08:00
[CI] Add retry and robust cleanup for removal (#5725)
* [CI] Add retry and robust cleanup for removal * [CI] Ensure clean GPU memory by killing leftover processes
This commit is contained in:
@@ -39,29 +39,47 @@ jobs:
|
||||
docker_image: ${{ inputs.DOCKER_IMAGE }}
|
||||
fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }}
|
||||
run: |
|
||||
set -x
|
||||
REPO="https://github.com/${{ github.repository }}.git"
|
||||
FULL_REPO="${{ github.repository }}"
|
||||
REPO_NAME="${FULL_REPO##*/}"
|
||||
BASE_BRANCH="${{ github.base_ref }}"
|
||||
docker pull ${docker_image}
|
||||
# Clean the repository directory before starting
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
if [ -d ${REPO_NAME} ]; then
|
||||
echo "Directory ${REPO_NAME} exists, removing it..."
|
||||
rm -rf ${REPO_NAME}*
|
||||
fi
|
||||
'
|
||||
set -x
|
||||
REPO="https://github.com/${{ github.repository }}.git"
|
||||
FULL_REPO="${{ github.repository }}"
|
||||
REPO_NAME="${FULL_REPO##*/}"
|
||||
BASE_BRANCH="${{ github.base_ref }}"
|
||||
docker pull ${docker_image}
|
||||
# Clean the repository directory before starting
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
CLEAN_RETRIES=3
|
||||
CLEAN_COUNT=0
|
||||
|
||||
wget -q --no-proxy ${fd_archive_url}
|
||||
tar -xf FastDeploy.tar.gz
|
||||
rm -rf FastDeploy.tar.gz
|
||||
cd FastDeploy
|
||||
git config --global user.name "FastDeployCI"
|
||||
git config --global user.email "fastdeploy_ci@example.com"
|
||||
git log -n 3 --oneline
|
||||
while [ $CLEAN_COUNT -lt $CLEAN_RETRIES ]; do
|
||||
echo "Attempt $((CLEAN_COUNT+1)) to remove ${REPO_NAME}* ..."
|
||||
rm -rf "${REPO_NAME}"* || true
|
||||
sleep 2
|
||||
|
||||
# Check if anything matching ${REPO_NAME}* still exists
|
||||
if ! ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "All ${REPO_NAME}* removed successfully"
|
||||
break
|
||||
fi
|
||||
|
||||
CLEAN_COUNT=$((CLEAN_COUNT + 1))
|
||||
done
|
||||
|
||||
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
|
||||
ls -ld "${REPO_NAME}"*
|
||||
exit 1
|
||||
fi
|
||||
'
|
||||
|
||||
wget -q --no-proxy ${fd_archive_url}
|
||||
tar -xf FastDeploy.tar.gz
|
||||
rm -rf FastDeploy.tar.gz
|
||||
cd FastDeploy
|
||||
git config --global user.name "FastDeployCI"
|
||||
git config --global user.email "fastdeploy_ci@example.com"
|
||||
git log -n 3 --oneline
|
||||
|
||||
- name: Run FastDeploy Base Tests
|
||||
shell: bash
|
||||
@@ -150,7 +168,7 @@ jobs:
|
||||
python -m pip install ${fastdeploy_wheel_url}
|
||||
python -m pip install pytest
|
||||
|
||||
wget https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64
|
||||
wget --no-proxy https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64
|
||||
chmod +x ./llm-deploy-linux-amd64
|
||||
./llm-deploy-linux-amd64 -python python3.10 \
|
||||
-model_name ERNIE-4.5-0.3B-Paddle \
|
||||
|
||||
@@ -49,11 +49,29 @@ jobs:
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
if [ -d ${REPO_NAME} ]; then
|
||||
echo "Directory ${REPO_NAME} exists, removing it..."
|
||||
rm -rf ${REPO_NAME}*
|
||||
fi
|
||||
'
|
||||
CLEAN_RETRIES=3
|
||||
CLEAN_COUNT=0
|
||||
|
||||
while [ $CLEAN_COUNT -lt $CLEAN_RETRIES ]; do
|
||||
echo "Attempt $((CLEAN_COUNT+1)) to remove ${REPO_NAME}* ..."
|
||||
rm -rf "${REPO_NAME}"* || true
|
||||
sleep 2
|
||||
|
||||
# Check if anything matching ${REPO_NAME}* still exists
|
||||
if ! ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "All ${REPO_NAME}* removed successfully"
|
||||
break
|
||||
fi
|
||||
|
||||
CLEAN_COUNT=$((CLEAN_COUNT + 1))
|
||||
done
|
||||
|
||||
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
|
||||
ls -ld "${REPO_NAME}"*
|
||||
exit 1
|
||||
fi
|
||||
'
|
||||
|
||||
# Download with retry and validation
|
||||
MAX_RETRIES=3
|
||||
@@ -175,7 +193,7 @@ jobs:
|
||||
python -m pip install ${fastdeploy_wheel_url}
|
||||
python -m pip install pytest
|
||||
|
||||
wget https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64
|
||||
wget --no-proxy https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64
|
||||
chmod +x ./llm-deploy-linux-amd64
|
||||
./llm-deploy-linux-amd64 -python python3.10 \
|
||||
-model_name ERNIE-4.5-0.3B-Paddle \
|
||||
@@ -263,7 +281,7 @@ jobs:
|
||||
curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"--model\": \"/MODELDATA/ERNIE-4.5-VL-28B-A3B-Thinking\", \"--reasoning-parser\": \"ernie-45-vl-thinking\", \"--tool-call-parser\": \"ernie-45-vl-thinking\", \"--tensor-parallel-size\": 1, \"--quantization\": \"wint4\", \"--max-model-len\": 131072, \"--max-num-seqs\": 32, \"--no-enable-prefix-caching\": true}"
|
||||
check_service 90
|
||||
check_service 180
|
||||
python -m pytest -sv test_prompt_ids.py || TEST_EXIT_CODE=1
|
||||
|
||||
popd
|
||||
|
||||
@@ -76,9 +76,27 @@ jobs:
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
if [ -d ${REPO_NAME} ]; then
|
||||
echo "Directory ${REPO_NAME} exists, removing it..."
|
||||
rm -rf ${REPO_NAME}*
|
||||
CLEAN_RETRIES=3
|
||||
CLEAN_COUNT=0
|
||||
|
||||
while [ $CLEAN_COUNT -lt $CLEAN_RETRIES ]; do
|
||||
echo "Attempt $((CLEAN_COUNT+1)) to remove ${REPO_NAME}* ..."
|
||||
rm -rf "${REPO_NAME}"* || true
|
||||
sleep 2
|
||||
|
||||
# Check if anything matching ${REPO_NAME}* still exists
|
||||
if ! ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "All ${REPO_NAME}* removed successfully"
|
||||
break
|
||||
fi
|
||||
|
||||
CLEAN_COUNT=$((CLEAN_COUNT + 1))
|
||||
done
|
||||
|
||||
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
|
||||
ls -ld "${REPO_NAME}"*
|
||||
exit 1
|
||||
fi
|
||||
'
|
||||
|
||||
|
||||
@@ -40,21 +40,43 @@ jobs:
|
||||
docker_image: ${{ inputs.DOCKER_IMAGE }}
|
||||
paddletest_archive_url: ${{ inputs.PADDLETEST_ARCHIVE_URL }}
|
||||
run: |
|
||||
docker pull ${docker_image}
|
||||
# Clean the repository directory before starting
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
-e "BASE_BRANCH=${BASE_BRANCH}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
rm -rf /workspace/*
|
||||
'
|
||||
wget -q --no-proxy ${paddletest_archive_url}
|
||||
tar -xf PaddleTest.tar.gz
|
||||
rm -rf PaddleTest.tar.gz
|
||||
cd PaddleTest
|
||||
git config --global user.name "FastDeployCI"
|
||||
git config --global user.email "fastdeploy_ci@example.com"
|
||||
git log -n 3 --oneline
|
||||
docker pull ${docker_image}
|
||||
# Clean the repository directory before starting
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
-e "BASE_BRANCH=${BASE_BRANCH}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
CLEAN_RETRIES=3
|
||||
CLEAN_COUNT=0
|
||||
|
||||
while [ $CLEAN_COUNT -lt $CLEAN_RETRIES ]; do
|
||||
echo "Attempt $((CLEAN_COUNT+1)) to remove /workspace/* ..."
|
||||
rm -rf "${REPO_NAME}"* || true
|
||||
sleep 2
|
||||
|
||||
# Check if anything matching /workspace/* still exists
|
||||
if ! ls /workspace/* >/dev/null 2>&1; then
|
||||
echo "All /workspace/* removed successfully"
|
||||
break
|
||||
fi
|
||||
|
||||
CLEAN_COUNT=$((CLEAN_COUNT + 1))
|
||||
done
|
||||
|
||||
if ls /workspace/* >/dev/null 2>&1; then
|
||||
echo "ERROR: Failed to clean /workspace/* after multiple attempts"
|
||||
ls -ld /workspace/*
|
||||
exit 1
|
||||
fi
|
||||
'
|
||||
wget -q --no-proxy ${paddletest_archive_url}
|
||||
tar -xf PaddleTest.tar.gz
|
||||
rm -rf PaddleTest.tar.gz
|
||||
cd PaddleTest
|
||||
git config --global user.name "FastDeployCI"
|
||||
git config --global user.email "fastdeploy_ci@example.com"
|
||||
git log -n 3 --oneline
|
||||
|
||||
- name: logprob test
|
||||
shell: bash
|
||||
env:
|
||||
@@ -140,7 +162,7 @@ jobs:
|
||||
|
||||
python -m pip install ${fastdeploy_wheel_url}
|
||||
|
||||
wget https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64
|
||||
wget --no-proxy https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64
|
||||
chmod +x ./llm-deploy-linux-amd64
|
||||
./llm-deploy-linux-amd64 -python python3.10 \
|
||||
-model_name ERNIE-4.5-0.3B-Paddle \
|
||||
|
||||
@@ -41,29 +41,47 @@ jobs:
|
||||
docker_image: ${{ inputs.DOCKER_IMAGE }}
|
||||
fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }}
|
||||
run: |
|
||||
set -x
|
||||
REPO="https://github.com/${{ github.repository }}.git"
|
||||
FULL_REPO="${{ github.repository }}"
|
||||
REPO_NAME="${FULL_REPO##*/}"
|
||||
BASE_BRANCH="${{ github.base_ref }}"
|
||||
docker pull ${docker_image}
|
||||
# Clean the repository directory before starting
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
if [ -d ${REPO_NAME} ]; then
|
||||
echo "Directory ${REPO_NAME} exists, removing it..."
|
||||
rm -rf ${REPO_NAME}*
|
||||
fi
|
||||
'
|
||||
set -x
|
||||
REPO="https://github.com/${{ github.repository }}.git"
|
||||
FULL_REPO="${{ github.repository }}"
|
||||
REPO_NAME="${FULL_REPO##*/}"
|
||||
BASE_BRANCH="${{ github.base_ref }}"
|
||||
docker pull ${docker_image}
|
||||
# Clean the repository directory before starting
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
CLEAN_RETRIES=3
|
||||
CLEAN_COUNT=0
|
||||
|
||||
wget -q --no-proxy ${fd_archive_url}
|
||||
tar -xf FastDeploy.tar.gz
|
||||
rm -rf FastDeploy.tar.gz
|
||||
cd FastDeploy
|
||||
git config --global user.name "FastDeployCI"
|
||||
git config --global user.email "fastdeploy_ci@example.com"
|
||||
git log -n 3 --oneline
|
||||
while [ $CLEAN_COUNT -lt $CLEAN_RETRIES ]; do
|
||||
echo "Attempt $((CLEAN_COUNT+1)) to remove ${REPO_NAME}* ..."
|
||||
rm -rf "${REPO_NAME}"* || true
|
||||
sleep 2
|
||||
|
||||
# Check if anything matching ${REPO_NAME}* still exists
|
||||
if ! ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "All ${REPO_NAME}* removed successfully"
|
||||
break
|
||||
fi
|
||||
|
||||
CLEAN_COUNT=$((CLEAN_COUNT + 1))
|
||||
done
|
||||
|
||||
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
|
||||
ls -ld "${REPO_NAME}"*
|
||||
exit 1
|
||||
fi
|
||||
'
|
||||
|
||||
wget -q --no-proxy ${fd_archive_url}
|
||||
tar -xf FastDeploy.tar.gz
|
||||
rm -rf FastDeploy.tar.gz
|
||||
cd FastDeploy
|
||||
git config --global user.name "FastDeployCI"
|
||||
git config --global user.email "fastdeploy_ci@example.com"
|
||||
git log -n 3 --oneline
|
||||
|
||||
- name: Run CI unittest
|
||||
env:
|
||||
|
||||
@@ -39,29 +39,47 @@ jobs:
|
||||
docker_image: ${{ inputs.DOCKER_IMAGE }}
|
||||
fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }}
|
||||
run: |
|
||||
set -x
|
||||
REPO="https://github.com/${{ github.repository }}.git"
|
||||
FULL_REPO="${{ github.repository }}"
|
||||
REPO_NAME="${FULL_REPO##*/}"
|
||||
BASE_BRANCH="${{ github.base_ref }}"
|
||||
docker pull ${docker_image}
|
||||
# Clean the repository directory before starting
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
if [ -d ${REPO_NAME} ]; then
|
||||
echo "Directory ${REPO_NAME} exists, removing it..."
|
||||
rm -rf ${REPO_NAME}*
|
||||
fi
|
||||
'
|
||||
set -x
|
||||
REPO="https://github.com/${{ github.repository }}.git"
|
||||
FULL_REPO="${{ github.repository }}"
|
||||
REPO_NAME="${FULL_REPO##*/}"
|
||||
BASE_BRANCH="${{ github.base_ref }}"
|
||||
docker pull ${docker_image}
|
||||
# Clean the repository directory before starting
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
CLEAN_RETRIES=3
|
||||
CLEAN_COUNT=0
|
||||
|
||||
wget -q --no-proxy ${fd_archive_url}
|
||||
tar -xf FastDeploy.tar.gz
|
||||
rm -rf FastDeploy.tar.gz
|
||||
cd FastDeploy
|
||||
git config --global user.name "FastDeployCI"
|
||||
git config --global user.email "fastdeploy_ci@example.com"
|
||||
git log -n 3 --oneline
|
||||
while [ $CLEAN_COUNT -lt $CLEAN_RETRIES ]; do
|
||||
echo "Attempt $((CLEAN_COUNT+1)) to remove ${REPO_NAME}* ..."
|
||||
rm -rf "${REPO_NAME}"* || true
|
||||
sleep 2
|
||||
|
||||
# Check if anything matching ${REPO_NAME}* still exists
|
||||
if ! ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "All ${REPO_NAME}* removed successfully"
|
||||
break
|
||||
fi
|
||||
|
||||
CLEAN_COUNT=$((CLEAN_COUNT + 1))
|
||||
done
|
||||
|
||||
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
|
||||
ls -ld "${REPO_NAME}"*
|
||||
exit 1
|
||||
fi
|
||||
'
|
||||
|
||||
wget -q --no-proxy ${fd_archive_url}
|
||||
tar -xf FastDeploy.tar.gz
|
||||
rm -rf FastDeploy.tar.gz
|
||||
cd FastDeploy
|
||||
git config --global user.name "FastDeployCI"
|
||||
git config --global user.email "fastdeploy_ci@example.com"
|
||||
git log -n 3 --oneline
|
||||
|
||||
- name: Run FastDeploy Stable Tests
|
||||
shell: bash
|
||||
|
||||
@@ -55,29 +55,48 @@ jobs:
|
||||
docker_image: ${{ inputs.DOCKER_IMAGE }}
|
||||
fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }}
|
||||
run: |
|
||||
set -x
|
||||
REPO="https://github.com/${{ github.repository }}.git"
|
||||
FULL_REPO="${{ github.repository }}"
|
||||
REPO_NAME="${FULL_REPO##*/}"
|
||||
BASE_BRANCH="${{ github.base_ref }}"
|
||||
docker pull ${docker_image}
|
||||
# Clean the repository directory before starting
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
if [ -d ${REPO_NAME} ]; then
|
||||
echo "Directory ${REPO_NAME} exists, removing it..."
|
||||
rm -rf ${REPO_NAME}*
|
||||
fi
|
||||
'
|
||||
set -x
|
||||
REPO="https://github.com/${{ github.repository }}.git"
|
||||
FULL_REPO="${{ github.repository }}"
|
||||
REPO_NAME="${FULL_REPO##*/}"
|
||||
BASE_BRANCH="${{ github.base_ref }}"
|
||||
docker pull ${docker_image}
|
||||
# Clean the repository directory before starting
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
CLEAN_RETRIES=3
|
||||
CLEAN_COUNT=0
|
||||
|
||||
while [ $CLEAN_COUNT -lt $CLEAN_RETRIES ]; do
|
||||
echo "Attempt $((CLEAN_COUNT+1)) to remove ${REPO_NAME}* ..."
|
||||
rm -rf "${REPO_NAME}"* || true
|
||||
sleep 2
|
||||
|
||||
# Check if anything matching ${REPO_NAME}* still exists
|
||||
if ! ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "All ${REPO_NAME}* removed successfully"
|
||||
break
|
||||
fi
|
||||
|
||||
CLEAN_COUNT=$((CLEAN_COUNT + 1))
|
||||
done
|
||||
|
||||
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
|
||||
ls -ld "${REPO_NAME}"*
|
||||
exit 1
|
||||
fi
|
||||
'
|
||||
|
||||
wget -q --no-proxy ${fd_archive_url}
|
||||
tar -xf FastDeploy.tar.gz
|
||||
rm -rf FastDeploy.tar.gz
|
||||
cd FastDeploy
|
||||
git config --global user.name "FastDeployCI"
|
||||
git config --global user.email "fastdeploy_ci@example.com"
|
||||
git log -n 3 --oneline
|
||||
|
||||
wget -q --no-proxy ${fd_archive_url}
|
||||
tar -xf FastDeploy.tar.gz
|
||||
rm -rf FastDeploy.tar.gz
|
||||
cd FastDeploy
|
||||
git config --global user.name "FastDeployCI"
|
||||
git config --global user.email "fastdeploy_ci@example.com"
|
||||
git log -n 3 --oneline
|
||||
- name: Run FastDeploy Unit Tests and Coverage
|
||||
shell: bash
|
||||
env:
|
||||
|
||||
Reference in New Issue
Block a user