[CI] Add retry and robust cleanup for removal (#5725)

* [CI] Add retry and robust cleanup for removal

* [CI] Ensure clean GPU memory by killing leftover processes
This commit is contained in:
YuBaoku
2025-12-25 17:08:27 +08:00
committed by GitHub
parent 412867fd99
commit 7247dc5f3a
8 changed files with 259 additions and 125 deletions
+21 -3
View File
@@ -76,9 +76,27 @@ jobs:
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
-e "REPO_NAME=${REPO_NAME}" \
${docker_image} /bin/bash -c '
if [ -d ${REPO_NAME} ]; then
echo "Directory ${REPO_NAME} exists, removing it..."
rm -rf ${REPO_NAME}*
CLEAN_RETRIES=3
CLEAN_COUNT=0
while [ $CLEAN_COUNT -lt $CLEAN_RETRIES ]; do
echo "Attempt $((CLEAN_COUNT+1)) to remove ${REPO_NAME}* ..."
rm -rf "${REPO_NAME}"* || true
sleep 2
# Check if anything matching ${REPO_NAME}* still exists
if ! ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "All ${REPO_NAME}* removed successfully"
break
fi
CLEAN_COUNT=$((CLEAN_COUNT + 1))
done
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
ls -ld "${REPO_NAME}"*
exit 1
fi
'