mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
1cfb042045
* [CI] Add ep4_mtp e2e test
195 lines
7.4 KiB
YAML
195 lines
7.4 KiB
YAML
name: 4-GPU E2E Tests
|
|
description: "Run FastDeploy e2e tests on 4 GPUs"
|
|
|
|
on:
|
|
workflow_call:
|
|
inputs:
|
|
DOCKER_IMAGE:
|
|
description: "Build Images"
|
|
required: true
|
|
type: string
|
|
default: "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-paddle-dev"
|
|
FASTDEPLOY_ARCHIVE_URL:
|
|
description: "URL of the compressed FastDeploy code archive."
|
|
required: true
|
|
type: string
|
|
FASTDEPLOY_WHEEL_URL:
|
|
description: "URL of the FastDeploy Wheel."
|
|
required: true
|
|
type: string
|
|
CACHE_DIR:
|
|
description: "Cache Dir Use"
|
|
required: false
|
|
type: string
|
|
default: ""
|
|
MODEL_CACHE_DIR:
|
|
description: "Cache Dir Use"
|
|
required: false
|
|
type: string
|
|
default: ""
|
|
secrets:
|
|
github-token:
|
|
required: true
|
|
|
|
jobs:
|
|
run_4_cards_tests:
|
|
runs-on: [self-hosted, GPU-h20-4Cards]
|
|
timeout-minutes: 30
|
|
steps:
|
|
- name: Code Prepare
|
|
shell: bash
|
|
env:
|
|
docker_image: ${{ inputs.DOCKER_IMAGE }}
|
|
fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }}
|
|
run: |
|
|
set -x
|
|
REPO="https://github.com/${{ github.repository }}.git"
|
|
FULL_REPO="${{ github.repository }}"
|
|
REPO_NAME="${FULL_REPO##*/}"
|
|
BASE_BRANCH="${{ github.base_ref }}"
|
|
docker pull ${docker_image}
|
|
# Clean the repository directory before starting
|
|
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
|
-e "REPO_NAME=${REPO_NAME}" \
|
|
${docker_image} /bin/bash -c '
|
|
CLEAN_RETRIES=3
|
|
CLEAN_COUNT=0
|
|
|
|
while [ $CLEAN_COUNT -lt $CLEAN_RETRIES ]; do
|
|
echo "Attempt $((CLEAN_COUNT+1)) to remove ${REPO_NAME}* ..."
|
|
rm -rf "${REPO_NAME}"* || true
|
|
sleep 2
|
|
|
|
# Check if anything matching ${REPO_NAME}* still exists
|
|
if ! ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
|
echo "All ${REPO_NAME}* removed successfully"
|
|
break
|
|
fi
|
|
|
|
CLEAN_COUNT=$((CLEAN_COUNT + 1))
|
|
done
|
|
|
|
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
|
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
|
|
ls -ld "${REPO_NAME}"*
|
|
exit 1
|
|
fi
|
|
'
|
|
|
|
wget -q --no-proxy ${fd_archive_url}
|
|
tar -xf FastDeploy.tar.gz
|
|
rm -rf FastDeploy.tar.gz
|
|
cd FastDeploy
|
|
git config --global user.name "FastDeployCI"
|
|
git config --global user.email "fastdeploy_ci@example.com"
|
|
git log -n 3 --oneline
|
|
|
|
- name: Run Four Cards Tests
|
|
shell: bash
|
|
env:
|
|
docker_image: ${{ inputs.DOCKER_IMAGE }}
|
|
fd_wheel_url: ${{ inputs.FASTDEPLOY_WHEEL_URL }}
|
|
CACHE_DIR: ${{ inputs.CACHE_DIR }}
|
|
BASE_REF: ${{ github.event.pull_request.base.ref }}
|
|
MODEL_CACHE_DIR: ${{ inputs.MODEL_CACHE_DIR }}
|
|
IS_PR: ${{ github.event_name == 'pull_request' }}
|
|
run: |
|
|
if [[ "$IS_PR" == "true" ]]; then
|
|
echo "Running on PR"
|
|
else
|
|
echo "Not a PR"
|
|
fi
|
|
runner_name="${{ runner.name }}"
|
|
CARD_ID=$(echo "${runner_name}" | awk -F'-' '{print $NF}')
|
|
DEVICES=$(echo "$CARD_ID" | fold -w1 | paste -sd,)
|
|
DEVICE_PORT=$(echo "$DEVICES" | cut -d',' -f1)
|
|
|
|
FLASK_PORT=$((8068 + DEVICE_PORT * 100))
|
|
FD_API_PORT=$((8088 + DEVICE_PORT * 100))
|
|
FD_ENGINE_QUEUE_PORT=$((8058 + DEVICE_PORT * 100))
|
|
FD_METRICS_PORT=$((8078 + DEVICE_PORT * 100))
|
|
FD_CACHE_QUEUE_PORT=$((8098 + DEVICE_PORT * 100))
|
|
FD_ROUTER_PORT=$((8048 + DEVICE_PORT * 100))
|
|
FD_CONNECTOR_PORT=$((8038 + DEVICE_PORT * 100))
|
|
FD_RDMA_PORT=$((8028 + DEVICE_PORT * 100))
|
|
echo "Test ENV Parameter:"
|
|
echo "========================================================="
|
|
echo "FLASK_PORT=${FLASK_PORT}"
|
|
echo "FD_API_PORT=${FD_API_PORT}"
|
|
echo "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}"
|
|
echo "FD_METRICS_PORT=${FD_METRICS_PORT}"
|
|
echo "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}"
|
|
echo "FD_ROUTER_PORT=${FD_ROUTER_PORT}"
|
|
echo "FD_CONNECTOR_PORT=${FD_CONNECTOR_PORT}"
|
|
echo "FD_RDMA_PORT=${FD_RDMA_PORT}"
|
|
echo "DEVICES=${DEVICES}"
|
|
echo "========================================================="
|
|
|
|
CACHE_DIR="${CACHE_DIR:-$(dirname "$(dirname "${{ github.workspace }}")")}"
|
|
echo "CACHE_DIR is set to ${CACHE_DIR}"
|
|
if [ ! -f "${CACHE_DIR}/gitconfig" ]; then
|
|
touch "${CACHE_DIR}/gitconfig"
|
|
fi
|
|
|
|
PORTS=($FLASK_PORT $FD_API_PORT $FD_ENGINE_QUEUE_PORT $FD_METRICS_PORT $FD_CACHE_QUEUE_PORT)
|
|
LOG_FILE="./port_cleanup_$(date +%Y%m%d_%H%M%S).log"
|
|
echo "==== LOG_FILE is ${LOG_FILE} ===="
|
|
|
|
echo "==== PORT CLEAN BEFORE TASK RUN ====" | tee -a $LOG_FILE
|
|
|
|
for port in "${PORTS[@]}"; do
|
|
PIDS=$(lsof -t -i :$port || true)
|
|
if [ -n "$PIDS" ]; then
|
|
echo "Port $port is occupied by PID(s): $PIDS" | tee -a $LOG_FILE
|
|
echo "$PIDS" | xargs -r kill -9
|
|
echo "Port $port cleared" | tee -a $LOG_FILE
|
|
else
|
|
echo "Port $port is free" | tee -a $LOG_FILE
|
|
fi
|
|
done
|
|
|
|
echo "==== PORT CLEAN COMPLETE ====" | tee -a $LOG_FILE
|
|
|
|
echo "========================================================="
|
|
echo "Ensuring no stale container named ${runner_name} ..."
|
|
if [ "$(docker ps -a -q -f name=${runner_name})" ]; then
|
|
echo "Removing stale container: ${runner_name}"
|
|
docker rm -f ${runner_name} || true
|
|
fi
|
|
|
|
docker run --rm --ipc=host --net=host \
|
|
--name ${runner_name} \
|
|
-v $(pwd):/workspace -w /workspace \
|
|
-v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
|
|
-v "${CACHE_DIR}/.cache:/root/.cache" \
|
|
-v "${CACHE_DIR}/ConfigDir:/root/.config" \
|
|
-v "${MODEL_CACHE_DIR}:/ModelData:ro" \
|
|
-e "MODEL_PATH=/ModelData" \
|
|
-e "FD_API_PORT=${FD_API_PORT}" \
|
|
-e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
|
|
-e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
|
|
-e "FLASK_PORT=${FLASK_PORT}" \
|
|
-e "FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT}" \
|
|
-e TZ="Asia/Shanghai" \
|
|
-e "fd_wheel_url=${fd_wheel_url}" \
|
|
-e "BASE_REF=${BASE_REF}" \
|
|
-e "IS_PR=${IS_PR}" \
|
|
--gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -c '
|
|
|
|
git config --global --add safe.directory /workspace/FastDeploy
|
|
cd FastDeploy
|
|
git diff origin/${BASE_REF}..HEAD --unified=0 > diff.txt
|
|
|
|
python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
|
|
pip config set global.extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
|
|
|
|
python -m pip install -r scripts/unittest_requirement.txt
|
|
python -m pip install ${fd_wheel_url}
|
|
rm -rf fastdeploy
|
|
python -m pip install ${fd_wheel_url} --no-deps --target=/workspace/FastDeploy
|
|
export PYTHONPATH=/workspace/FastDeploy/
|
|
|
|
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
|
bash scripts/run_gpu_4cards.sh
|
|
'
|