mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
* [Cherry-Pick][CI] Sync parallelism optimization from dev to 2.5 (#7085)
This commit is contained in:
@@ -182,7 +182,10 @@ jobs:
|
||||
docker rm -f ${runner_name} || true
|
||||
fi
|
||||
|
||||
docker run --rm --ipc=host --pid=host --net=host \
|
||||
docker run --rm --net=host \
|
||||
--shm-size=64g \
|
||||
--sysctl kernel.msgmax=1048576 \
|
||||
--sysctl kernel.msgmnb=268435456 \
|
||||
--name ${runner_name} \
|
||||
-v $(pwd):/workspace \
|
||||
-w /workspace \
|
||||
|
||||
@@ -166,7 +166,10 @@ jobs:
|
||||
docker rm -f ${runner_name} || true
|
||||
fi
|
||||
|
||||
docker run --rm --ipc=host --net=host \
|
||||
docker run --rm --net=host \
|
||||
--shm-size=64g \
|
||||
--sysctl kernel.msgmax=1048576 \
|
||||
--sysctl kernel.msgmnb=268435456 \
|
||||
--name ${runner_name} \
|
||||
-v $(pwd):/workspace -w /workspace \
|
||||
-v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \
|
||||
|
||||
@@ -152,7 +152,11 @@ jobs:
|
||||
echo "Removing stale container: ${runner_name}"
|
||||
docker rm -f ${runner_name} || true
|
||||
fi
|
||||
docker run --rm --ipc=host --pid=host --net=host \
|
||||
|
||||
docker run --rm --net=host \
|
||||
--shm-size=64g \
|
||||
--sysctl kernel.msgmax=1048576 \
|
||||
--sysctl kernel.msgmnb=268435456 \
|
||||
--name ${runner_name} \
|
||||
-v $(pwd):/workspace \
|
||||
-w /workspace \
|
||||
|
||||
@@ -163,6 +163,7 @@ jobs:
|
||||
fi
|
||||
|
||||
docker run --rm --net=host \
|
||||
--shm-size=64G \
|
||||
--name ${runner_name} \
|
||||
-v $(pwd):/workspace \
|
||||
-w /workspace \
|
||||
|
||||
@@ -160,6 +160,7 @@ jobs:
|
||||
fi
|
||||
|
||||
docker run --rm --net=host \
|
||||
--shm-size=64G \
|
||||
--name ${runner_name} \
|
||||
-v $(pwd):/workspace \
|
||||
-w /workspace \
|
||||
|
||||
@@ -47,6 +47,7 @@ jobs:
|
||||
outputs:
|
||||
all_cov_file_url: ${{ steps.cov_upload.outputs.all_cov_file_url }}
|
||||
unittest_failed_url: ${{ steps.cov_upload.outputs.unittest_failed_url }}
|
||||
unittest_logs_url: ${{ steps.cov_upload.outputs.unittest_logs_url }}
|
||||
diff_cov_result_json_url: ${{ steps.cov_upload.outputs.diff_cov_result_json_url }}
|
||||
steps:
|
||||
- name: Code Prepare
|
||||
@@ -173,6 +174,8 @@ jobs:
|
||||
export RDMA_DEVICES=$(find /dev/infiniband/uverbs* -maxdepth 1 -not -type d | xargs -I{} echo '--device {}:{}')
|
||||
|
||||
docker run --rm --net=host \
|
||||
--sysctl kernel.msgmax=1048576 \
|
||||
--sysctl kernel.msgmnb=268435456 \
|
||||
--name ${runner_name} \
|
||||
--cap-add=SYS_PTRACE --cap-add=IPC_LOCK \
|
||||
--shm-size=64G \
|
||||
@@ -309,6 +312,15 @@ jobs:
|
||||
echo "unittest_failed_url=${UNIT_TEST_RESULT_URL}" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
# Only upload logs when tests failed
|
||||
unittest_logs_archive="unittest_logs.tar.gz"
|
||||
if [ "$HAS_FAILED_TESTS" = true ]; then
|
||||
python ${push_file} ${unittest_logs_archive} ${target_path}/UnitTestResult
|
||||
UNIT_TEST_LOGS_URL=https://paddle-github-action.bj.bcebos.com/${target_path_stripped}/UnitTestResult/${unittest_logs_archive}
|
||||
echo "unittest_logs_url=${UNIT_TEST_LOGS_URL}" >> $GITHUB_OUTPUT
|
||||
echo "unittest_logs_url=${UNIT_TEST_LOGS_URL}" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
if [[ "$IS_PR" != "true" ]]; then
|
||||
full_cov_file="full_coverage_report.txt"
|
||||
full_cov_csv="full_coverage_report.csv"
|
||||
@@ -345,6 +357,7 @@ jobs:
|
||||
if [ -f "${filename}" ];then
|
||||
echo "Failed test cases:"
|
||||
cat "${filename}"
|
||||
echo "unittest_logs_url=${unittest_logs_url}"
|
||||
fi
|
||||
exit "$TEST_EXIT_CODE"
|
||||
fi
|
||||
|
||||
+311
-66
@@ -7,73 +7,95 @@ run_path=$( realpath "$DIR/../")
|
||||
export COVERAGE_FILE=${COVERAGE_FILE:-$DIR/../coveragedata/.coverage}
|
||||
export COVERAGE_RCFILE=${COVERAGE_RCFILE:-$DIR/../scripts/.coveragerc}
|
||||
|
||||
# ============================================================
|
||||
# Classify tests into one of the following categories
|
||||
# - multi_gpu: requires multiple GPUs / ports (run sequentially)
|
||||
# - single_gpu: independent tests (can run in parallel)
|
||||
# ============================================================
|
||||
classify_tests() {
|
||||
local test_file=$1
|
||||
# Rule 1: distributed tests (explicit multi-GPU launch)
|
||||
if [[ "$test_file" =~ tests/distributed/.*test_.*\.py ]]; then
|
||||
echo "multi_gpu"
|
||||
return
|
||||
fi
|
||||
|
||||
failed_tests_file="failed_tests.log"
|
||||
> "$failed_tests_file"
|
||||
# Rule 2: e2e tests (usually involve service / ports)
|
||||
if [[ "$test_file" =~ tests/e2e/.*test_.*\.py ]]; then
|
||||
echo "multi_gpu"
|
||||
return
|
||||
fi
|
||||
|
||||
# Rule 3: model loader tests (allocate multiple GPUs)
|
||||
if [[ "$test_file" =~ tests/model_loader/.*test_.*\.py ]]; then
|
||||
echo "multi_gpu"
|
||||
return
|
||||
fi
|
||||
|
||||
##################################
|
||||
# Run pytest, one file at a time
|
||||
# Use pytest's --collect-only output to extract the actual test file paths (e.g., tests/.../test_*.py).
|
||||
# Note: pytest may output lines like "ERROR tests/xxx/test_xxx.py::test_xxx ..." on collection failure,
|
||||
# to avoid treating prefixes like "ERROR"/"FAILED"/"collecting" as filenames,
|
||||
# we only keep the "tests/.../test_*.py" portion and discard everything else.
|
||||
TEST_FILES=$(
|
||||
python -m pytest --collect-only -q -c "${PYTEST_INI}" "${tests_path}" --rootdir="${run_path}" --disable-warnings 2>&1 \
|
||||
| grep -E 'tests/.+\/test_.*\.py' \
|
||||
| sed -E 's@.*(tests/[^: ]*test_[^: ]*\.py).*@\1@' \
|
||||
| sort -u
|
||||
)
|
||||
# Rule 4: check file content for tensor_parallel_size=[234] or --tensor-parallel-size [234]
|
||||
# or CUDA_VISIBLE_DEVICES="0,1"
|
||||
# or PORT environment variables
|
||||
if [ -f "$test_file" ]; then
|
||||
if grep -q '"tensor_parallel_size".*[1234]\|--tensor-parallel-size.*[1234]\|tensor_parallel_size.*=[1234]\|CUDA_VISIBLE_DEVICES.*0.*1\|paddle\.distributed\.launch.*--gpus.*0.*1\|FD_API_PORT\|FLASK_PORT\|FD_ENGINE_QUEUE_PORT\|FD_METRICS_PORT\|FD_CACHE_QUEUE_PORT\|FD_ROUTER_PORT\|FD_CONNECTOR_PORT\|FD_RDMA_PORT' "$test_file" 2>/dev/null; then
|
||||
echo "multi_gpu"
|
||||
return
|
||||
fi
|
||||
fi
|
||||
|
||||
# ========== Single-GPU tests (no port required, can run in parallel) ==========
|
||||
echo "single_gpu"
|
||||
}
|
||||
|
||||
failed_pytest=0
|
||||
success_pytest=0
|
||||
# ============================================================
|
||||
# Run Test With Logging
|
||||
# ============================================================
|
||||
run_test_with_logging() {
|
||||
local test_file=$1
|
||||
local log_prefix=$2
|
||||
local status
|
||||
|
||||
# nullglob: if no match, the pattern expands to nothing
|
||||
shopt -s nullglob
|
||||
echo "Running pytest file: $test_file"
|
||||
|
||||
for file in $TEST_FILES; do
|
||||
echo "Running pytest file: $file"
|
||||
# Clean up previous logs
|
||||
rm -rf "${run_path}"/log* || true
|
||||
for f in "${run_path}"/*.log; do
|
||||
[[ "$(basename "$f")" != "${failed_tests_file}" ]] && rm -f "$f"
|
||||
done
|
||||
# Create isolated log directory for this test to avoid race conditions
|
||||
# Format: unittest_logs/<test_dir>/<test_file_base>/log
|
||||
local test_rel_path="${test_file#tests/}"
|
||||
local test_dir=$(dirname "$test_rel_path")
|
||||
local test_name=$(basename "$test_file" .py)
|
||||
local isolated_log_dir="${run_path}/unittest_logs/${test_dir}/${test_name}/log"
|
||||
mkdir -p "$isolated_log_dir"
|
||||
|
||||
# Run pytest with coverage for the current file
|
||||
# Set timeout to 600 seconds to avoid infinite loop
|
||||
timeout 600 python -m coverage run -m pytest -c ${PYTEST_INI} "$file" -vv -s
|
||||
# Set FD_LOG_DIR to isolate logs for each test
|
||||
export FD_LOG_DIR="$isolated_log_dir"
|
||||
|
||||
# Run test
|
||||
timeout 600 python -m coverage run -m pytest -c ${PYTEST_INI} "$test_file" -vv -s
|
||||
status=$?
|
||||
|
||||
if [ "$status" -ne 0 ]; then
|
||||
echo "$file" >> "$failed_tests_file"
|
||||
failed_pytest=$((failed_pytest+1))
|
||||
|
||||
echo "$test_file" >> "$log_prefix"
|
||||
echo ""
|
||||
echo "==================== Dumping Logs ===================="
|
||||
echo "==================== Test Failed: $test_file ===================="
|
||||
|
||||
for log_dir in "${run_path}"/log*; do
|
||||
if [ -d "${log_dir}" ]; then
|
||||
echo
|
||||
echo ">>>> Processing log directory: ${log_dir}"
|
||||
# Use isolated log directory for this test
|
||||
if [ -d "$isolated_log_dir" ]; then
|
||||
echo
|
||||
echo ">>>> Processing log directory: ${isolated_log_dir}"
|
||||
|
||||
# print all workerlog.0
|
||||
worker_logs=("${log_dir}"/workerlog.0)
|
||||
if [ "${#worker_logs[@]}" -gt 0 ]; then
|
||||
for worker_log in "${worker_logs[@]}"; do
|
||||
if [ -f "${worker_log}" ]; then
|
||||
echo "---------------- ${worker_log} (last 100 lines) ----------------"
|
||||
tail -n 100 "${worker_log}" || true
|
||||
echo "---------------------------------------------------------------"
|
||||
fi
|
||||
done
|
||||
else
|
||||
echo "No workerlog.0 found in ${log_dir}"
|
||||
fi
|
||||
# workerlog
|
||||
worker_logs=("${isolated_log_dir}"/workerlog.0)
|
||||
|
||||
echo ">>> grep error in ${log_dir}"
|
||||
grep -Rni --color=auto "error" "${log_dir}" || true
|
||||
if [ -f "${worker_logs[0]}" ]; then
|
||||
for worker_log in "${worker_logs[@]}"; do
|
||||
[ -f "${worker_log}" ] || continue
|
||||
echo "---------------- ${worker_log} (last 100 lines) ----------------"
|
||||
tail -n 100 "${worker_log}" || true
|
||||
echo "---------------------------------------------------------------"
|
||||
done
|
||||
fi
|
||||
done
|
||||
|
||||
echo ">>> grep error in ${isolated_log_dir}"
|
||||
grep -Rni --color=auto "error" "${isolated_log_dir}" || true
|
||||
fi
|
||||
|
||||
# print all server logs
|
||||
server_logs=("${run_path}"/*.log)
|
||||
@@ -92,28 +114,251 @@ for file in $TEST_FILES; do
|
||||
echo "No *.log files found"
|
||||
fi
|
||||
|
||||
echo "======================================================"
|
||||
else
|
||||
success_pytest=$((success_pytest+1))
|
||||
echo "======================================================="
|
||||
fi
|
||||
ps -ef | grep "${FD_CACHE_QUEUE_PORT}" | grep -v grep | awk '{print $2}' | xargs -r kill -9
|
||||
ps -ef | grep "${FD_ENGINE_QUEUE_PORT}" | grep -v grep | awk '{print $2}' | xargs -r kill -9
|
||||
done
|
||||
shopt -u nullglob
|
||||
|
||||
##################################
|
||||
# Summary
|
||||
##################################
|
||||
# Clean up port-related processes
|
||||
if [ -n "$FD_CACHE_QUEUE_PORT" ]; then
|
||||
ps -ef | grep "${FD_CACHE_QUEUE_PORT}" | grep -v grep | awk '{print $2}' | xargs -r kill -9 || true
|
||||
fi
|
||||
if [ -n "$FD_ENGINE_QUEUE_PORT" ]; then
|
||||
ps -ef | grep "${FD_ENGINE_QUEUE_PORT}" | grep -v grep | awk '{print $2}' | xargs -r kill -9 || true
|
||||
fi
|
||||
|
||||
# if passed, remove the isolated log directory and server logs
|
||||
if [ "$status" -eq 0 ]; then
|
||||
rm -rf "${isolated_log_dir}" || true
|
||||
# Clean up server logs in run_path on pass
|
||||
for f in "${run_path}"/*.log; do
|
||||
[[ "$(basename "$f")" != "${failed_tests_file}" ]] && rm -f "$f" || true
|
||||
done
|
||||
fi
|
||||
|
||||
# Unset FD_LOG_DIR to avoid affecting next test
|
||||
unset FD_LOG_DIR
|
||||
return $status
|
||||
}
|
||||
|
||||
# ============================================================
|
||||
# Run a shard of tests on a dedicated GPU
|
||||
# - one shard = one process = one GPU
|
||||
# ============================================================
|
||||
run_shard() {
|
||||
local shard_name=$1
|
||||
local gpu_id=$2
|
||||
shift 2
|
||||
local tests=("$@")
|
||||
|
||||
echo "===================================="
|
||||
echo "Starting shard '${shard_name}' on GPU ${gpu_id}"
|
||||
echo "Tests count: ${#tests[@]}"
|
||||
echo "===================================="
|
||||
|
||||
# Set GPU
|
||||
export CUDA_VISIBLE_DEVICES="$gpu_id"
|
||||
export COVERAGE_FILE="${DIR}/../coveragedata/.coverage.${shard_name}"
|
||||
|
||||
# Failed log filename (no path, directly in project root)
|
||||
local failed_log="${shard_name}_failed.txt"
|
||||
rm -f "$failed_log"
|
||||
> "$failed_log"
|
||||
|
||||
local success_count=0
|
||||
local failed_count=0
|
||||
|
||||
for file in "${tests[@]}"; do
|
||||
echo "[${shard_name}] Running: $file"
|
||||
|
||||
run_test_with_logging "$file" "$failed_log"
|
||||
local status=$?
|
||||
|
||||
if [ "$status" -eq 0 ]; then
|
||||
success_count=$((success_count + 1))
|
||||
else
|
||||
failed_count=$((failed_count + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
unset COVERAGE_FILE
|
||||
|
||||
echo "===================================="
|
||||
echo "Shard '${shard_name}' completed"
|
||||
echo "Successful: $success_count"
|
||||
echo "Failed: $failed_count"
|
||||
echo "===================================="
|
||||
|
||||
unset CUDA_VISIBLE_DEVICES
|
||||
|
||||
return $failed_count
|
||||
}
|
||||
|
||||
# ============================================================
|
||||
# Main Flow
|
||||
# ============================================================
|
||||
|
||||
failed_tests_file="failed_tests.log"
|
||||
> "$failed_tests_file"
|
||||
|
||||
echo "===================================="
|
||||
echo "Coverage Test Execution with Parallel Single-GPU Tests"
|
||||
echo "===================================="
|
||||
echo "Pytest total: $((failed_pytest + success_pytest))"
|
||||
echo "Pytest successful: $success_pytest"
|
||||
echo "Pytest failed: $failed_pytest"
|
||||
|
||||
# ============================================================
|
||||
# Step 1: Collect & classify tests
|
||||
# ============================================================
|
||||
echo "Step 1: Collecting and classifying tests"
|
||||
|
||||
if [ "$failed_pytest" -ne 0 ]; then
|
||||
ALL_TEST_FILES=$(
|
||||
python -m pytest --collect-only -q -c "${PYTEST_INI}" "${tests_path}" --rootdir="${run_path}" --disable-warnings 2>&1 \
|
||||
| grep -E 'tests/.+\/test_.*\.py' \
|
||||
| sed -E 's@.*(tests/[^: ]*test_[^: ]*\.py).*@\1@' \
|
||||
| sort -u
|
||||
)
|
||||
|
||||
if [ -z "$ALL_TEST_FILES" ]; then
|
||||
echo "ERROR: No test files found!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
MULTI_GPU_TESTS=()
|
||||
SINGLE_GPU_TESTS=()
|
||||
|
||||
TOTAL_TESTS=0
|
||||
for file in $ALL_TEST_FILES; do
|
||||
TOTAL_TESTS=$((TOTAL_TESTS + 1))
|
||||
test_type=$(classify_tests "$file")
|
||||
|
||||
case "$test_type" in
|
||||
"multi_gpu")
|
||||
MULTI_GPU_TESTS+=("$file")
|
||||
;;
|
||||
"single_gpu")
|
||||
SINGLE_GPU_TESTS+=("$file")
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "Multi-GPU tests: ${#MULTI_GPU_TESTS[@]}"
|
||||
echo "Single-GPU tests: ${#SINGLE_GPU_TESTS[@]}"
|
||||
echo "Total tests: $TOTAL_TESTS"
|
||||
|
||||
# ============================================================
|
||||
# Step 2: Run multi-GPU tests (sequential)
|
||||
# ============================================================
|
||||
echo "Step 2: Running multi-GPU tests"
|
||||
|
||||
if [ ${#MULTI_GPU_TESTS[@]} -gt 0 ]; then
|
||||
for file in "${MULTI_GPU_TESTS[@]}"; do
|
||||
run_test_with_logging "$file" "$failed_tests_file"
|
||||
done
|
||||
else
|
||||
echo "No multi-GPU tests to run."
|
||||
fi
|
||||
|
||||
# ============================================================
|
||||
# Step 3: Run single-GPU tests (parallel shards)
|
||||
# ============================================================
|
||||
echo "Step 3: Running single-GPU tests in parallel"
|
||||
|
||||
if [ ${#SINGLE_GPU_TESTS[@]} -gt 0 ]; then
|
||||
# Split single-GPU tests into 2 shards (1 per GPU)
|
||||
TOTAL=${#SINGLE_GPU_TESTS[@]}
|
||||
HALF=$(( TOTAL / 2 ))
|
||||
|
||||
SHARD_1=("${SINGLE_GPU_TESTS[@]:0:$HALF}")
|
||||
SHARD_2=("${SINGLE_GPU_TESTS[@]:$HALF}")
|
||||
|
||||
echo "Shard 1: ${#SHARD_1[@]} tests on GPU 0"
|
||||
echo "Shard 2: ${#SHARD_2[@]} tests on GPU 1"
|
||||
|
||||
# Run in parallel (1 process per GPU)
|
||||
run_shard "shard1" 0 "${SHARD_1[@]}" &
|
||||
PID1=$!
|
||||
run_shard "shard2" 1 "${SHARD_2[@]}" &
|
||||
PID2=$!
|
||||
|
||||
# Wait for all shards to complete
|
||||
wait $PID1
|
||||
EXIT_CODE1=$?
|
||||
wait $PID2
|
||||
EXIT_CODE2=$?
|
||||
|
||||
# Merge shard failed logs to main failed log
|
||||
for shard in shard1 shard2; do
|
||||
if [ -f "${shard}_failed.txt" ]; then
|
||||
cat "${shard}_failed.txt" >> "$failed_tests_file"
|
||||
rm -f "${shard}_failed.txt"
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "===================================="
|
||||
echo "Parallel execution completed"
|
||||
echo "Shard 1 exit code: $EXIT_CODE1"
|
||||
echo "Shard 2 exit code: $EXIT_CODE2"
|
||||
echo "===================================="
|
||||
else
|
||||
echo "No single-GPU tests to run."
|
||||
fi
|
||||
|
||||
# ============================================================
|
||||
# Step 4: Summary
|
||||
# ============================================================
|
||||
echo "Step 4: Summary"
|
||||
|
||||
# Count failed tests
|
||||
if [ -f "$failed_tests_file" ]; then
|
||||
failed_count=$(wc -l < "$failed_tests_file" | tr -d ' ')
|
||||
else
|
||||
failed_count=0
|
||||
fi
|
||||
|
||||
success_count=$((TOTAL_TESTS - failed_count))
|
||||
|
||||
echo "Pytest total: $TOTAL_TESTS"
|
||||
echo "Pytest successful: $success_count"
|
||||
echo "Pytest failed: $failed_count"
|
||||
|
||||
echo "===================================="
|
||||
|
||||
# Exit with error and package logs if there were failures
|
||||
if [ "$failed_count" -ne 0 ]; then
|
||||
echo "Failed test cases are listed in $failed_tests_file"
|
||||
cat "$failed_tests_file"
|
||||
|
||||
# clean the empty directories
|
||||
if [ -d "${run_path}/unittest_logs" ]; then
|
||||
echo "Cleaning empty directories..."
|
||||
|
||||
# remove console_error.log files (cleanup logs from stopped processes)
|
||||
find "${run_path}/unittest_logs" -name "console_error.log*" -delete || true
|
||||
|
||||
# perform multi-round clean until no more empty directories are found
|
||||
while true; do
|
||||
before=$(find "${run_path}/unittest_logs" -type d | wc -l)
|
||||
find "${run_path}/unittest_logs" -mindepth 1 -type d -empty -delete || true
|
||||
after=$(find "${run_path}/unittest_logs" -type d | wc -l)
|
||||
[ "$before" -eq "$after" ] && break
|
||||
done
|
||||
fi
|
||||
|
||||
# Only package logs when there are failures
|
||||
echo "===================================="
|
||||
echo "Step 5: Packaging logs (only on failure)"
|
||||
echo "===================================="
|
||||
|
||||
if [ -d "${run_path}/unittest_logs" ]; then
|
||||
tar -czf "${run_path}/unittest_logs.tar.gz" -C "${run_path}" unittest_logs
|
||||
echo "Logs packaged to: ${run_path}/unittest_logs.tar.gz"
|
||||
ls -lh "${run_path}/unittest_logs.tar.gz"
|
||||
else
|
||||
echo "No unittest_logs directory found."
|
||||
fi
|
||||
|
||||
echo "===================================="
|
||||
|
||||
exit 8
|
||||
fi
|
||||
|
||||
echo "All tests passed!"
|
||||
exit 0
|
||||
|
||||
@@ -1389,7 +1389,8 @@ def test_streaming_chat_finish_reason(openai_client):
|
||||
|
||||
def test_profile_reset_block_num():
|
||||
"""测试profile reset_block_num功能,与baseline diff不能超过5%"""
|
||||
log_file = "./log/config.log"
|
||||
log_dir = os.getenv("FD_LOG_DIR", "log")
|
||||
log_file = os.path.join(log_dir, "config.log")
|
||||
baseline = 31446
|
||||
|
||||
if not os.path.exists(log_file):
|
||||
|
||||
@@ -734,7 +734,8 @@ def test_chat_with_response_max_tokens(openai_client):
|
||||
|
||||
def test_profile_reset_block_num():
|
||||
"""测试profile reset_block_num功能,与baseline diff不能超过5%"""
|
||||
log_file = "./log/config.log"
|
||||
log_dir = os.getenv("FD_LOG_DIR", "log")
|
||||
log_file = os.path.join(log_dir, "config.log")
|
||||
baseline = 40000
|
||||
|
||||
if not os.path.exists(log_file):
|
||||
|
||||
@@ -612,7 +612,8 @@ def test_streaming(openai_client, capsys):
|
||||
|
||||
def test_profile_reset_block_num():
|
||||
"""测试profile reset_block_num功能,与baseline diff不能超过5%"""
|
||||
log_file = "./log/config.log"
|
||||
log_dir = os.getenv("FD_LOG_DIR", "log")
|
||||
log_file = os.path.join(log_dir, "config.log")
|
||||
baseline = 32562
|
||||
|
||||
if not os.path.exists(log_file):
|
||||
|
||||
@@ -430,7 +430,8 @@ def test_streaming_chat_with_return_token_ids(openai_client, capsys):
|
||||
|
||||
def test_profile_reset_block_num():
|
||||
"""测试profile reset_block_num功能,与baseline diff不能超过15%"""
|
||||
log_file = "./log/config.log"
|
||||
log_dir = os.getenv("FD_LOG_DIR", "log")
|
||||
log_file = os.path.join(log_dir, "config.log")
|
||||
baseline = 30000
|
||||
|
||||
if not os.path.exists(log_file):
|
||||
|
||||
@@ -81,10 +81,12 @@ def setup_and_run_server():
|
||||
model_path = "baidu/ERNIE-4.5-0.3B-Paddle"
|
||||
print(f"model_path: {model_path}")
|
||||
|
||||
base_log_dir = os.getenv("FD_LOG_DIR", "log")
|
||||
|
||||
# router
|
||||
print("start router...")
|
||||
env_router = os.environ.copy()
|
||||
env_router["FD_LOG_DIR"] = "log_router"
|
||||
env_router["FD_LOG_DIR"] = os.path.join(base_log_dir, "log_router")
|
||||
router_log_path = "router.log"
|
||||
|
||||
router_cmd = [
|
||||
@@ -110,7 +112,7 @@ def setup_and_run_server():
|
||||
env_prefill = os.environ.copy()
|
||||
env_prefill["CUDA_VISIBLE_DEVICES"] = "0"
|
||||
env_prefill["ENABLE_V1_KVCACHE_SCHEDULER"] = "0"
|
||||
env_prefill["FD_LOG_DIR"] = "log_prefill"
|
||||
env_prefill["FD_LOG_DIR"] = os.path.join(base_log_dir, "log_prefill")
|
||||
prefill_log_path = "server_prefill.log"
|
||||
prefill_cmd = [
|
||||
sys.executable,
|
||||
@@ -160,7 +162,7 @@ def setup_and_run_server():
|
||||
env_decode = os.environ.copy()
|
||||
env_decode["CUDA_VISIBLE_DEVICES"] = "1"
|
||||
env_decode["ENABLE_V1_KVCACHE_SCHEDULER"] = "0"
|
||||
env_decode["FD_LOG_DIR"] = "log_decode"
|
||||
env_decode["FD_LOG_DIR"] = os.path.join(base_log_dir, "log_decode")
|
||||
decode_log_path = "server_decode.log"
|
||||
decode_cmd = [
|
||||
sys.executable,
|
||||
|
||||
@@ -81,10 +81,12 @@ def setup_and_run_server():
|
||||
model_path = "baidu/ERNIE-4.5-0.3B-Paddle"
|
||||
print(f"model_path: {model_path}")
|
||||
|
||||
base_log_dir = os.getenv("FD_LOG_DIR", "log")
|
||||
|
||||
# router
|
||||
print("start router...")
|
||||
env_router = os.environ.copy()
|
||||
env_router["FD_LOG_DIR"] = "log_router"
|
||||
env_router["FD_LOG_DIR"] = os.path.join(base_log_dir, "log_router")
|
||||
router_log_path = "router.log"
|
||||
|
||||
router_cmd = [
|
||||
@@ -110,7 +112,7 @@ def setup_and_run_server():
|
||||
env_prefill = os.environ.copy()
|
||||
env_prefill["CUDA_VISIBLE_DEVICES"] = "0"
|
||||
env_prefill["ENABLE_V1_KVCACHE_SCHEDULER"] = "1"
|
||||
env_prefill["FD_LOG_DIR"] = "log_prefill"
|
||||
env_prefill["FD_LOG_DIR"] = os.path.join(base_log_dir, "log_prefill")
|
||||
prefill_log_path = "prefill.log"
|
||||
prefill_cmd = [
|
||||
sys.executable,
|
||||
@@ -160,7 +162,7 @@ def setup_and_run_server():
|
||||
env_decode = os.environ.copy()
|
||||
env_decode["CUDA_VISIBLE_DEVICES"] = "1"
|
||||
env_decode["ENABLE_V1_KVCACHE_SCHEDULER"] = "1"
|
||||
env_decode["FD_LOG_DIR"] = "log_decode"
|
||||
env_decode["FD_LOG_DIR"] = os.path.join(base_log_dir, "log_decode")
|
||||
decode_log_path = "decode.log"
|
||||
decode_cmd = [
|
||||
sys.executable,
|
||||
|
||||
@@ -84,6 +84,8 @@ def setup_and_run_server():
|
||||
model_path = "baidu/ERNIE-4.5-0.3B-Paddle"
|
||||
print(f"model_path: {model_path}")
|
||||
|
||||
base_log_dir = os.getenv("FD_LOG_DIR", "log")
|
||||
|
||||
# get rdma nics
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
shell_path = os.path.join(current_dir, "utils/get_rdma_nics.sh")
|
||||
@@ -94,7 +96,7 @@ def setup_and_run_server():
|
||||
# router
|
||||
print("start router...")
|
||||
env_router = os.environ.copy()
|
||||
env_router["FD_LOG_DIR"] = "log_router"
|
||||
env_router["FD_LOG_DIR"] = os.path.join(base_log_dir, "log_router")
|
||||
router_log_path = "router.log"
|
||||
|
||||
router_cmd = [
|
||||
@@ -119,7 +121,7 @@ def setup_and_run_server():
|
||||
print("start prefill...")
|
||||
env_prefill = os.environ.copy()
|
||||
env_prefill["CUDA_VISIBLE_DEVICES"] = "0"
|
||||
env_prefill["FD_LOG_DIR"] = "log_prefill"
|
||||
env_prefill["FD_LOG_DIR"] = os.path.join(base_log_dir, "log_prefill")
|
||||
env_prefill["KVCACHE_RDMA_NICS"] = rdma_nics
|
||||
|
||||
prefill_log_path = "prefill.log"
|
||||
@@ -166,7 +168,7 @@ def setup_and_run_server():
|
||||
print("start decode...")
|
||||
env_decode = os.environ.copy()
|
||||
env_decode["CUDA_VISIBLE_DEVICES"] = "1"
|
||||
env_decode["FD_LOG_DIR"] = "log_decode"
|
||||
env_decode["FD_LOG_DIR"] = os.path.join(base_log_dir, "log_decode")
|
||||
env_decode["KVCACHE_RDMA_NICS"] = rdma_nics
|
||||
|
||||
decode_log_path = "decode.log"
|
||||
|
||||
@@ -86,6 +86,8 @@ def setup_and_run_server():
|
||||
model_path = "baidu/ERNIE-4.5-0.3B-Paddle"
|
||||
print(f"model_path: {model_path}")
|
||||
|
||||
base_log_dir = os.getenv("FD_LOG_DIR", "log")
|
||||
|
||||
# get rdma nics
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
shell_path = os.path.join(current_dir, "utils/get_rdma_nics.sh")
|
||||
@@ -96,7 +98,7 @@ def setup_and_run_server():
|
||||
# router
|
||||
print("start router...")
|
||||
env_router = os.environ.copy()
|
||||
env_router["FD_LOG_DIR"] = "log_router"
|
||||
env_router["FD_LOG_DIR"] = os.path.join(base_log_dir, "log_router")
|
||||
router_log_path = "router.log"
|
||||
|
||||
router_cmd = [
|
||||
@@ -121,7 +123,7 @@ def setup_and_run_server():
|
||||
print("start prefill...")
|
||||
env_prefill = os.environ.copy()
|
||||
env_prefill["CUDA_VISIBLE_DEVICES"] = "0,1"
|
||||
env_prefill["FD_LOG_DIR"] = "log_prefill"
|
||||
env_prefill["FD_LOG_DIR"] = os.path.join(base_log_dir, "log_prefill")
|
||||
env_prefill["KVCACHE_RDMA_NICS"] = rdma_nics
|
||||
|
||||
prefill_log_path = "prefill.log"
|
||||
@@ -170,7 +172,7 @@ def setup_and_run_server():
|
||||
print("start decode...")
|
||||
env_decode = os.environ.copy()
|
||||
env_decode["CUDA_VISIBLE_DEVICES"] = "1"
|
||||
env_decode["FD_LOG_DIR"] = "log_decode"
|
||||
env_decode["FD_LOG_DIR"] = os.path.join(base_log_dir, "log_decode")
|
||||
env_decode["KVCACHE_RDMA_NICS"] = rdma_nics
|
||||
|
||||
decode_log_path = "decode.log"
|
||||
|
||||
@@ -97,10 +97,12 @@ def setup_and_run_server():
|
||||
model_path = "baidu/ERNIE-4.5-0.3B-Paddle"
|
||||
print(f"model_path: {model_path}")
|
||||
|
||||
base_log_dir = os.getenv("FD_LOG_DIR", "log")
|
||||
|
||||
# router
|
||||
print("start router...")
|
||||
env_router = os.environ.copy()
|
||||
env_router["FD_LOG_DIR"] = "log_router"
|
||||
env_router["FD_LOG_DIR"] = os.path.join(base_log_dir, "log_router")
|
||||
router_log_path = "router.log"
|
||||
|
||||
router_cmd = [
|
||||
@@ -121,11 +123,11 @@ def setup_and_run_server():
|
||||
)
|
||||
|
||||
# server0
|
||||
print("start server0...")
|
||||
print("start server 0...")
|
||||
env_server_0 = os.environ.copy()
|
||||
env_server_0["CUDA_VISIBLE_DEVICES"] = "0"
|
||||
env_server_0["ENABLE_V1_KVCACHE_SCHEDULER"] = "0"
|
||||
env_server_0["FD_LOG_DIR"] = "log_server_0"
|
||||
env_server_0["FD_LOG_DIR"] = os.path.join(base_log_dir, "log_server_0")
|
||||
env_server_0["INFERENCE_MSG_QUEUE_ID"] = str(FD_API_PORT)
|
||||
log_path = "server_0.log"
|
||||
cmd = [
|
||||
@@ -171,7 +173,7 @@ def setup_and_run_server():
|
||||
env_server_1["CUDA_VISIBLE_DEVICES"] = "1"
|
||||
env_server_1["ENABLE_V1_KVCACHE_SCHEDULER"] = "0"
|
||||
env_server_1["INFERENCE_MSG_QUEUE_ID"] = str(FD_API_PORT + 1)
|
||||
env_server_1["FD_LOG_DIR"] = "log_server_1"
|
||||
env_server_1["FD_LOG_DIR"] = os.path.join(base_log_dir, "log_server_1")
|
||||
log_path = "server_1.log"
|
||||
cmd = [
|
||||
sys.executable,
|
||||
|
||||
@@ -99,7 +99,8 @@ class TestMultiApiServer(unittest.TestCase):
|
||||
# Verify environment variables are set correctly
|
||||
first_call_kwargs = mock_popen.call_args_list[0][1]
|
||||
self.assertIn("env", first_call_kwargs)
|
||||
self.assertEqual(first_call_kwargs["env"]["FD_LOG_DIR"], "log/log_0")
|
||||
log_dir = os.getenv("FD_LOG_DIR", "log")
|
||||
self.assertEqual(first_call_kwargs["env"]["FD_LOG_DIR"], os.path.join(log_dir, "log_0"))
|
||||
|
||||
@patch("fastdeploy.entrypoints.openai.multi_api_server.is_port_available")
|
||||
def test_check_param_success(self, mock_is_port_available):
|
||||
|
||||
Reference in New Issue
Block a user