Files
FastDeploy/tests/pooling/test_Ernie4_5_reward_serving.py
T
lizexu123 946025480e [Bug fix] fix pooling models (#5358)
* fix

* fix

* fix test

* fix gpu_model_runner

---------

Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com>
2025-12-04 11:06:30 +08:00

205 lines
6.5 KiB
Python

# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
import signal
import subprocess
import sys
import time
import pytest
import requests
from e2e.utils.serving_utils import (
FD_API_PORT,
FD_CACHE_QUEUE_PORT,
FD_ENGINE_QUEUE_PORT,
FD_METRICS_PORT,
clean_ports,
is_port_open,
)
# ==========================
# Shared Helper Functions
# ==========================
def _start_server_process(enable_caching: bool, log_filename: str):
print(f"\n[Server Setup] Cleaning ports before starting (Caching={'ON' if enable_caching else 'OFF'})...")
clean_ports()
base_path = os.getenv("MODEL_PATH")
if base_path:
model_path = os.path.join(base_path, "RM_v1008_5")
else:
model_path = "./RM_v1008_5"
if not os.path.exists(model_path):
raise FileNotFoundError(f"Model path not found: {model_path}")
cmd = [
sys.executable,
"-m",
"fastdeploy.entrypoints.openai.api_server",
"--model",
model_path,
"--port",
str(FD_API_PORT),
"--tensor-parallel-size",
"2",
"--engine-worker-queue-port",
str(FD_ENGINE_QUEUE_PORT),
"--metrics-port",
str(FD_METRICS_PORT),
"--cache-queue-port",
str(FD_CACHE_QUEUE_PORT),
"--max-model-len",
"8192",
"--max-num-seqs",
"256",
"--runner",
"pooling",
"--convert",
"embed",
]
if enable_caching:
cmd.append("--enable-prefix-caching")
else:
cmd.append("--no-enable-prefix-caching")
print(f"[Server Setup] Command: {' '.join(cmd)}")
with open(log_filename, "w") as logfile:
process = subprocess.Popen(
cmd,
stdout=logfile,
stderr=subprocess.STDOUT,
start_new_session=True,
)
# Wait for server to start
for _ in range(300):
if is_port_open("127.0.0.1", FD_API_PORT):
print(f"[Server Setup] Server is up on port {FD_API_PORT}")
break
time.sleep(1)
else:
print("[Server Setup] Server failed to start. Cleaning up...")
try:
os.killpg(process.pid, signal.SIGTERM)
except Exception:
pass
if os.path.exists(log_filename):
with open(log_filename, "r") as f:
print(f"Server Log Tail ({log_filename}):\n{f.read()[-500:]}")
raise RuntimeError(f"Server did not start on port {FD_API_PORT}")
return process
@pytest.fixture(scope="function")
def reward_api_url():
"""Returns the API endpoint URL for reward."""
return f"http://0.0.0.0:{FD_API_PORT}/v1/reward"
@pytest.fixture(scope="function")
def headers():
"""Returns common HTTP request headers."""
return {"Content-Type": "application/json"}
@pytest.fixture(scope="function")
def server_default_caching():
_start_server_process(enable_caching=True, log_filename="reward_server_caching_on.log")
@pytest.fixture(scope="function")
def server_no_caching():
_start_server_process(enable_caching=False, log_filename="reward_server_caching_off.log")
def save_score_baseline(score: float, baseline_file: str):
"""Save reward score to baseline file."""
baseline_data = {"score": score}
with open(baseline_file, "w", encoding="utf-8") as f:
json.dump(baseline_data, f, indent=2)
print(f"Baseline saved to: {baseline_file}")
def check_score_against_baseline(current_score: float, baseline_file: str, threshold: float = 0.01):
"""Check reward score against baseline file."""
try:
with open(baseline_file, "r", encoding="utf-8") as f:
baseline_data = json.load(f)
baseline_score = baseline_data["score"]
except FileNotFoundError:
print(f"Baseline file not found: {baseline_file}. Saving current as baseline.")
save_score_baseline(current_score, baseline_file)
return
diff = abs(current_score - baseline_score)
print(f"Score Difference: {diff:.6f} (Current: {current_score}, Baseline: {baseline_score})")
if diff >= threshold:
temp_file = f"{baseline_file}.current"
save_score_baseline(current_score, temp_file)
raise AssertionError(
f"Score differs from baseline by too much (diff={diff:.6f} >= {threshold}):\n"
f"Current score saved to: {temp_file}"
)
def _run_test_logic(reward_api_url, headers, baseline_filename):
payload = {
"model": "default",
"messages": [
{"role": "user", "content": [{"type": "text", "text": "北京天安门在哪里?"}]},
{"role": "assistant", "content": [{"type": "text", "text": "北京天安门在中国北京故宫的前面。"}]},
],
"user": "user-123",
"enable_thinking": False,
}
print(f"\n=== Sending request to {reward_api_url} ===")
response = requests.post(reward_api_url, headers=headers, json=payload, timeout=30)
assert response.status_code == 200, f"API request failed with status {response.status_code}: {response.text}"
result = response.json()
print(f"Response: {json.dumps(result, indent=2, ensure_ascii=False)}")
assert "data" in result and len(result["data"]) > 0
score = float(result["data"][0]["score"][0])
print(f"✓ Reward Score: {score}")
base_path = os.getenv("MODEL_PATH", "")
if base_path:
baseline_file = os.path.join(base_path, baseline_filename)
else:
baseline_file = baseline_filename
check_score_against_baseline(score, baseline_file, threshold=0.01)
def test_reward_model_with_caching(server_default_caching, reward_api_url, headers):
print("\n>>> Running Test: WITH Prefix Caching")
_run_test_logic(reward_api_url, headers, baseline_filename="RM_v1008_5.json")
def test_reward_model_without_caching(server_no_caching, reward_api_url, headers):
print("\n>>> Running Test: WITHOUT Prefix Caching")
_run_test_logic(reward_api_url, headers, baseline_filename="RM_v1008_5_no_caching.json")