mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[CI] Add retry and robust cleanup for removal (#5725)
* [CI] Add retry and robust cleanup for removal * [CI] Ensure clean GPU memory by killing leftover processes
This commit is contained in:
@@ -39,29 +39,47 @@ jobs:
|
||||
docker_image: ${{ inputs.DOCKER_IMAGE }}
|
||||
fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }}
|
||||
run: |
|
||||
set -x
|
||||
REPO="https://github.com/${{ github.repository }}.git"
|
||||
FULL_REPO="${{ github.repository }}"
|
||||
REPO_NAME="${FULL_REPO##*/}"
|
||||
BASE_BRANCH="${{ github.base_ref }}"
|
||||
docker pull ${docker_image}
|
||||
# Clean the repository directory before starting
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
if [ -d ${REPO_NAME} ]; then
|
||||
echo "Directory ${REPO_NAME} exists, removing it..."
|
||||
rm -rf ${REPO_NAME}*
|
||||
fi
|
||||
'
|
||||
set -x
|
||||
REPO="https://github.com/${{ github.repository }}.git"
|
||||
FULL_REPO="${{ github.repository }}"
|
||||
REPO_NAME="${FULL_REPO##*/}"
|
||||
BASE_BRANCH="${{ github.base_ref }}"
|
||||
docker pull ${docker_image}
|
||||
# Clean the repository directory before starting
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
CLEAN_RETRIES=3
|
||||
CLEAN_COUNT=0
|
||||
|
||||
wget -q --no-proxy ${fd_archive_url}
|
||||
tar -xf FastDeploy.tar.gz
|
||||
rm -rf FastDeploy.tar.gz
|
||||
cd FastDeploy
|
||||
git config --global user.name "FastDeployCI"
|
||||
git config --global user.email "fastdeploy_ci@example.com"
|
||||
git log -n 3 --oneline
|
||||
while [ $CLEAN_COUNT -lt $CLEAN_RETRIES ]; do
|
||||
echo "Attempt $((CLEAN_COUNT+1)) to remove ${REPO_NAME}* ..."
|
||||
rm -rf "${REPO_NAME}"* || true
|
||||
sleep 2
|
||||
|
||||
# Check if anything matching ${REPO_NAME}* still exists
|
||||
if ! ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "All ${REPO_NAME}* removed successfully"
|
||||
break
|
||||
fi
|
||||
|
||||
CLEAN_COUNT=$((CLEAN_COUNT + 1))
|
||||
done
|
||||
|
||||
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
|
||||
ls -ld "${REPO_NAME}"*
|
||||
exit 1
|
||||
fi
|
||||
'
|
||||
|
||||
wget -q --no-proxy ${fd_archive_url}
|
||||
tar -xf FastDeploy.tar.gz
|
||||
rm -rf FastDeploy.tar.gz
|
||||
cd FastDeploy
|
||||
git config --global user.name "FastDeployCI"
|
||||
git config --global user.email "fastdeploy_ci@example.com"
|
||||
git log -n 3 --oneline
|
||||
|
||||
- name: Run FastDeploy Base Tests
|
||||
shell: bash
|
||||
@@ -150,7 +168,7 @@ jobs:
|
||||
python -m pip install ${fastdeploy_wheel_url}
|
||||
python -m pip install pytest
|
||||
|
||||
wget https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64
|
||||
wget --no-proxy https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64
|
||||
chmod +x ./llm-deploy-linux-amd64
|
||||
./llm-deploy-linux-amd64 -python python3.10 \
|
||||
-model_name ERNIE-4.5-0.3B-Paddle \
|
||||
|
||||
@@ -49,11 +49,29 @@ jobs:
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
if [ -d ${REPO_NAME} ]; then
|
||||
echo "Directory ${REPO_NAME} exists, removing it..."
|
||||
rm -rf ${REPO_NAME}*
|
||||
fi
|
||||
'
|
||||
CLEAN_RETRIES=3
|
||||
CLEAN_COUNT=0
|
||||
|
||||
while [ $CLEAN_COUNT -lt $CLEAN_RETRIES ]; do
|
||||
echo "Attempt $((CLEAN_COUNT+1)) to remove ${REPO_NAME}* ..."
|
||||
rm -rf "${REPO_NAME}"* || true
|
||||
sleep 2
|
||||
|
||||
# Check if anything matching ${REPO_NAME}* still exists
|
||||
if ! ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "All ${REPO_NAME}* removed successfully"
|
||||
break
|
||||
fi
|
||||
|
||||
CLEAN_COUNT=$((CLEAN_COUNT + 1))
|
||||
done
|
||||
|
||||
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
|
||||
ls -ld "${REPO_NAME}"*
|
||||
exit 1
|
||||
fi
|
||||
'
|
||||
|
||||
# Download with retry and validation
|
||||
MAX_RETRIES=3
|
||||
@@ -175,7 +193,7 @@ jobs:
|
||||
python -m pip install ${fastdeploy_wheel_url}
|
||||
python -m pip install pytest
|
||||
|
||||
wget https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64
|
||||
wget --no-proxy https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64
|
||||
chmod +x ./llm-deploy-linux-amd64
|
||||
./llm-deploy-linux-amd64 -python python3.10 \
|
||||
-model_name ERNIE-4.5-0.3B-Paddle \
|
||||
@@ -263,7 +281,7 @@ jobs:
|
||||
curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"--model\": \"/MODELDATA/ERNIE-4.5-VL-28B-A3B-Thinking\", \"--reasoning-parser\": \"ernie-45-vl-thinking\", \"--tool-call-parser\": \"ernie-45-vl-thinking\", \"--tensor-parallel-size\": 1, \"--quantization\": \"wint4\", \"--max-model-len\": 131072, \"--max-num-seqs\": 32, \"--no-enable-prefix-caching\": true}"
|
||||
check_service 90
|
||||
check_service 180
|
||||
python -m pytest -sv test_prompt_ids.py || TEST_EXIT_CODE=1
|
||||
|
||||
popd
|
||||
|
||||
@@ -76,9 +76,27 @@ jobs:
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
if [ -d ${REPO_NAME} ]; then
|
||||
echo "Directory ${REPO_NAME} exists, removing it..."
|
||||
rm -rf ${REPO_NAME}*
|
||||
CLEAN_RETRIES=3
|
||||
CLEAN_COUNT=0
|
||||
|
||||
while [ $CLEAN_COUNT -lt $CLEAN_RETRIES ]; do
|
||||
echo "Attempt $((CLEAN_COUNT+1)) to remove ${REPO_NAME}* ..."
|
||||
rm -rf "${REPO_NAME}"* || true
|
||||
sleep 2
|
||||
|
||||
# Check if anything matching ${REPO_NAME}* still exists
|
||||
if ! ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "All ${REPO_NAME}* removed successfully"
|
||||
break
|
||||
fi
|
||||
|
||||
CLEAN_COUNT=$((CLEAN_COUNT + 1))
|
||||
done
|
||||
|
||||
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
|
||||
ls -ld "${REPO_NAME}"*
|
||||
exit 1
|
||||
fi
|
||||
'
|
||||
|
||||
|
||||
@@ -40,21 +40,43 @@ jobs:
|
||||
docker_image: ${{ inputs.DOCKER_IMAGE }}
|
||||
paddletest_archive_url: ${{ inputs.PADDLETEST_ARCHIVE_URL }}
|
||||
run: |
|
||||
docker pull ${docker_image}
|
||||
# Clean the repository directory before starting
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
-e "BASE_BRANCH=${BASE_BRANCH}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
rm -rf /workspace/*
|
||||
'
|
||||
wget -q --no-proxy ${paddletest_archive_url}
|
||||
tar -xf PaddleTest.tar.gz
|
||||
rm -rf PaddleTest.tar.gz
|
||||
cd PaddleTest
|
||||
git config --global user.name "FastDeployCI"
|
||||
git config --global user.email "fastdeploy_ci@example.com"
|
||||
git log -n 3 --oneline
|
||||
docker pull ${docker_image}
|
||||
# Clean the repository directory before starting
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
-e "BASE_BRANCH=${BASE_BRANCH}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
CLEAN_RETRIES=3
|
||||
CLEAN_COUNT=0
|
||||
|
||||
while [ $CLEAN_COUNT -lt $CLEAN_RETRIES ]; do
|
||||
echo "Attempt $((CLEAN_COUNT+1)) to remove /workspace/* ..."
|
||||
rm -rf "${REPO_NAME}"* || true
|
||||
sleep 2
|
||||
|
||||
# Check if anything matching /workspace/* still exists
|
||||
if ! ls /workspace/* >/dev/null 2>&1; then
|
||||
echo "All /workspace/* removed successfully"
|
||||
break
|
||||
fi
|
||||
|
||||
CLEAN_COUNT=$((CLEAN_COUNT + 1))
|
||||
done
|
||||
|
||||
if ls /workspace/* >/dev/null 2>&1; then
|
||||
echo "ERROR: Failed to clean /workspace/* after multiple attempts"
|
||||
ls -ld /workspace/*
|
||||
exit 1
|
||||
fi
|
||||
'
|
||||
wget -q --no-proxy ${paddletest_archive_url}
|
||||
tar -xf PaddleTest.tar.gz
|
||||
rm -rf PaddleTest.tar.gz
|
||||
cd PaddleTest
|
||||
git config --global user.name "FastDeployCI"
|
||||
git config --global user.email "fastdeploy_ci@example.com"
|
||||
git log -n 3 --oneline
|
||||
|
||||
- name: logprob test
|
||||
shell: bash
|
||||
env:
|
||||
@@ -140,7 +162,7 @@ jobs:
|
||||
|
||||
python -m pip install ${fastdeploy_wheel_url}
|
||||
|
||||
wget https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64
|
||||
wget --no-proxy https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64
|
||||
chmod +x ./llm-deploy-linux-amd64
|
||||
./llm-deploy-linux-amd64 -python python3.10 \
|
||||
-model_name ERNIE-4.5-0.3B-Paddle \
|
||||
|
||||
@@ -41,29 +41,47 @@ jobs:
|
||||
docker_image: ${{ inputs.DOCKER_IMAGE }}
|
||||
fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }}
|
||||
run: |
|
||||
set -x
|
||||
REPO="https://github.com/${{ github.repository }}.git"
|
||||
FULL_REPO="${{ github.repository }}"
|
||||
REPO_NAME="${FULL_REPO##*/}"
|
||||
BASE_BRANCH="${{ github.base_ref }}"
|
||||
docker pull ${docker_image}
|
||||
# Clean the repository directory before starting
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
if [ -d ${REPO_NAME} ]; then
|
||||
echo "Directory ${REPO_NAME} exists, removing it..."
|
||||
rm -rf ${REPO_NAME}*
|
||||
fi
|
||||
'
|
||||
set -x
|
||||
REPO="https://github.com/${{ github.repository }}.git"
|
||||
FULL_REPO="${{ github.repository }}"
|
||||
REPO_NAME="${FULL_REPO##*/}"
|
||||
BASE_BRANCH="${{ github.base_ref }}"
|
||||
docker pull ${docker_image}
|
||||
# Clean the repository directory before starting
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
CLEAN_RETRIES=3
|
||||
CLEAN_COUNT=0
|
||||
|
||||
wget -q --no-proxy ${fd_archive_url}
|
||||
tar -xf FastDeploy.tar.gz
|
||||
rm -rf FastDeploy.tar.gz
|
||||
cd FastDeploy
|
||||
git config --global user.name "FastDeployCI"
|
||||
git config --global user.email "fastdeploy_ci@example.com"
|
||||
git log -n 3 --oneline
|
||||
while [ $CLEAN_COUNT -lt $CLEAN_RETRIES ]; do
|
||||
echo "Attempt $((CLEAN_COUNT+1)) to remove ${REPO_NAME}* ..."
|
||||
rm -rf "${REPO_NAME}"* || true
|
||||
sleep 2
|
||||
|
||||
# Check if anything matching ${REPO_NAME}* still exists
|
||||
if ! ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "All ${REPO_NAME}* removed successfully"
|
||||
break
|
||||
fi
|
||||
|
||||
CLEAN_COUNT=$((CLEAN_COUNT + 1))
|
||||
done
|
||||
|
||||
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
|
||||
ls -ld "${REPO_NAME}"*
|
||||
exit 1
|
||||
fi
|
||||
'
|
||||
|
||||
wget -q --no-proxy ${fd_archive_url}
|
||||
tar -xf FastDeploy.tar.gz
|
||||
rm -rf FastDeploy.tar.gz
|
||||
cd FastDeploy
|
||||
git config --global user.name "FastDeployCI"
|
||||
git config --global user.email "fastdeploy_ci@example.com"
|
||||
git log -n 3 --oneline
|
||||
|
||||
- name: Run CI unittest
|
||||
env:
|
||||
|
||||
@@ -39,29 +39,47 @@ jobs:
|
||||
docker_image: ${{ inputs.DOCKER_IMAGE }}
|
||||
fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }}
|
||||
run: |
|
||||
set -x
|
||||
REPO="https://github.com/${{ github.repository }}.git"
|
||||
FULL_REPO="${{ github.repository }}"
|
||||
REPO_NAME="${FULL_REPO##*/}"
|
||||
BASE_BRANCH="${{ github.base_ref }}"
|
||||
docker pull ${docker_image}
|
||||
# Clean the repository directory before starting
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
if [ -d ${REPO_NAME} ]; then
|
||||
echo "Directory ${REPO_NAME} exists, removing it..."
|
||||
rm -rf ${REPO_NAME}*
|
||||
fi
|
||||
'
|
||||
set -x
|
||||
REPO="https://github.com/${{ github.repository }}.git"
|
||||
FULL_REPO="${{ github.repository }}"
|
||||
REPO_NAME="${FULL_REPO##*/}"
|
||||
BASE_BRANCH="${{ github.base_ref }}"
|
||||
docker pull ${docker_image}
|
||||
# Clean the repository directory before starting
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
CLEAN_RETRIES=3
|
||||
CLEAN_COUNT=0
|
||||
|
||||
wget -q --no-proxy ${fd_archive_url}
|
||||
tar -xf FastDeploy.tar.gz
|
||||
rm -rf FastDeploy.tar.gz
|
||||
cd FastDeploy
|
||||
git config --global user.name "FastDeployCI"
|
||||
git config --global user.email "fastdeploy_ci@example.com"
|
||||
git log -n 3 --oneline
|
||||
while [ $CLEAN_COUNT -lt $CLEAN_RETRIES ]; do
|
||||
echo "Attempt $((CLEAN_COUNT+1)) to remove ${REPO_NAME}* ..."
|
||||
rm -rf "${REPO_NAME}"* || true
|
||||
sleep 2
|
||||
|
||||
# Check if anything matching ${REPO_NAME}* still exists
|
||||
if ! ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "All ${REPO_NAME}* removed successfully"
|
||||
break
|
||||
fi
|
||||
|
||||
CLEAN_COUNT=$((CLEAN_COUNT + 1))
|
||||
done
|
||||
|
||||
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
|
||||
ls -ld "${REPO_NAME}"*
|
||||
exit 1
|
||||
fi
|
||||
'
|
||||
|
||||
wget -q --no-proxy ${fd_archive_url}
|
||||
tar -xf FastDeploy.tar.gz
|
||||
rm -rf FastDeploy.tar.gz
|
||||
cd FastDeploy
|
||||
git config --global user.name "FastDeployCI"
|
||||
git config --global user.email "fastdeploy_ci@example.com"
|
||||
git log -n 3 --oneline
|
||||
|
||||
- name: Run FastDeploy Stable Tests
|
||||
shell: bash
|
||||
|
||||
@@ -55,29 +55,48 @@ jobs:
|
||||
docker_image: ${{ inputs.DOCKER_IMAGE }}
|
||||
fd_archive_url: ${{ inputs.FASTDEPLOY_ARCHIVE_URL }}
|
||||
run: |
|
||||
set -x
|
||||
REPO="https://github.com/${{ github.repository }}.git"
|
||||
FULL_REPO="${{ github.repository }}"
|
||||
REPO_NAME="${FULL_REPO##*/}"
|
||||
BASE_BRANCH="${{ github.base_ref }}"
|
||||
docker pull ${docker_image}
|
||||
# Clean the repository directory before starting
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
if [ -d ${REPO_NAME} ]; then
|
||||
echo "Directory ${REPO_NAME} exists, removing it..."
|
||||
rm -rf ${REPO_NAME}*
|
||||
fi
|
||||
'
|
||||
set -x
|
||||
REPO="https://github.com/${{ github.repository }}.git"
|
||||
FULL_REPO="${{ github.repository }}"
|
||||
REPO_NAME="${FULL_REPO##*/}"
|
||||
BASE_BRANCH="${{ github.base_ref }}"
|
||||
docker pull ${docker_image}
|
||||
# Clean the repository directory before starting
|
||||
docker run --rm --net=host -v $(pwd):/workspace -w /workspace \
|
||||
-e "REPO_NAME=${REPO_NAME}" \
|
||||
${docker_image} /bin/bash -c '
|
||||
CLEAN_RETRIES=3
|
||||
CLEAN_COUNT=0
|
||||
|
||||
while [ $CLEAN_COUNT -lt $CLEAN_RETRIES ]; do
|
||||
echo "Attempt $((CLEAN_COUNT+1)) to remove ${REPO_NAME}* ..."
|
||||
rm -rf "${REPO_NAME}"* || true
|
||||
sleep 2
|
||||
|
||||
# Check if anything matching ${REPO_NAME}* still exists
|
||||
if ! ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "All ${REPO_NAME}* removed successfully"
|
||||
break
|
||||
fi
|
||||
|
||||
CLEAN_COUNT=$((CLEAN_COUNT + 1))
|
||||
done
|
||||
|
||||
if ls "${REPO_NAME}"* >/dev/null 2>&1; then
|
||||
echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts"
|
||||
ls -ld "${REPO_NAME}"*
|
||||
exit 1
|
||||
fi
|
||||
'
|
||||
|
||||
wget -q --no-proxy ${fd_archive_url}
|
||||
tar -xf FastDeploy.tar.gz
|
||||
rm -rf FastDeploy.tar.gz
|
||||
cd FastDeploy
|
||||
git config --global user.name "FastDeployCI"
|
||||
git config --global user.email "fastdeploy_ci@example.com"
|
||||
git log -n 3 --oneline
|
||||
|
||||
wget -q --no-proxy ${fd_archive_url}
|
||||
tar -xf FastDeploy.tar.gz
|
||||
rm -rf FastDeploy.tar.gz
|
||||
cd FastDeploy
|
||||
git config --global user.name "FastDeployCI"
|
||||
git config --global user.email "fastdeploy_ci@example.com"
|
||||
git log -n 3 --oneline
|
||||
- name: Run FastDeploy Unit Tests and Coverage
|
||||
shell: bash
|
||||
env:
|
||||
|
||||
@@ -5,31 +5,33 @@ FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT:-8181}
|
||||
FD_METRICS_PORT=${FD_METRICS_PORT:-8182}
|
||||
FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT:-8183}
|
||||
|
||||
|
||||
|
||||
if [ -z "$MODEL_PATH" ]; then
|
||||
echo "❌ 用法: $0 <模型路径>"
|
||||
echo "❌ Usage: $0 <model_path>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -d "$MODEL_PATH" ]; then
|
||||
echo "❌ 错误:模型目录不存在: $MODEL_PATH"
|
||||
echo "❌ Error: Model directory does not exist: $MODEL_PATH"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "使用模型: $MODEL_PATH"
|
||||
echo "Using model: $MODEL_PATH"
|
||||
|
||||
|
||||
# 清理日志
|
||||
# Clean logs
|
||||
rm -rf log/*
|
||||
mkdir -p log
|
||||
|
||||
# 环境变量
|
||||
# Environment variables
|
||||
export CUDA_VISIBLE_DEVICES=0,1
|
||||
export INFERENCE_MSG_QUEUE_ID=${FD_INFERENCE_MSG_QUEUE_ID:-7679}
|
||||
export ENABLE_V1_KVCACHE_SCHEDULER=1
|
||||
|
||||
echo "Cleaning GPU memory (CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES})"
|
||||
|
||||
fuser -k /dev/nvidia* 2>/dev/null || true
|
||||
sleep 2
|
||||
|
||||
echo "Starting API server"
|
||||
python -m fastdeploy.entrypoints.openai.api_server \
|
||||
--tensor-parallel-size 2 \
|
||||
--port ${FD_API_PORT} \
|
||||
@@ -47,12 +49,13 @@ success=0
|
||||
|
||||
for i in $(seq 1 300); do
|
||||
if (echo > /dev/tcp/127.0.0.1/$FD_API_PORT) >/dev/null 2>&1; then
|
||||
echo "API server is up on port $FD_API_PORT on iteration $i"
|
||||
echo "API server is up on port $FD_API_PORT at iteration $i"
|
||||
success=1
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if [ $success -eq 0 ]; then
|
||||
echo "超时: API 服务在 300 秒内未启动 (端口 $FD_API_PORT)"
|
||||
echo "Timeout: API server did not start within 300 seconds (port $FD_API_PORT)"
|
||||
fi
|
||||
|
||||
Reference in New Issue
Block a user