Files
FastDeploy/tests/e2e/4cards_cases/_test_determinism_offline.py
T

336 lines
12 KiB
Python

# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Determinism offline inference tests using LLM.generate
Test scenarios:
1. Same-prompt repeatability (FD_DETERMINISTIC_MODE=1)
2. Different batch sizes consistency
3. Sampling-parameter combinations (temperature x top_p, parametrized)
4. Minimal output (max_tokens=1, early stop)
5. Special characters & multi-language prompts
6. Multi-turn conversation
7. State isolation (interleaved / interference prompts)
8. Non-deterministic validation (proves tests are effective)
Long sequence / long prompt / batch invariance tests are in test_determinism_long.py.
Usage:
CUDA_VISIBLE_DEVICES=0,1,2,3 pytest tests/e2e/4cards_cases/test_determinism_offline.py -v
"""
import os
from contextlib import contextmanager
import pytest
pytestmark = pytest.mark.gpu
DEFAULT_MODEL_DIR = "./models"
MODEL_NAME = "Qwen2-7B-Instruct"
@contextmanager
def env_override(mapping):
"""Temporarily set env vars, restoring original values on exit."""
old = {k: os.environ.get(k) for k in mapping}
os.environ.update(mapping)
try:
yield
finally:
for k, v in old.items():
if v is None:
os.environ.pop(k, None)
else:
os.environ[k] = v
@pytest.fixture(scope="module")
def model_path():
model_dir = os.getenv("MODEL_PATH", DEFAULT_MODEL_DIR)
return os.path.join(model_dir, MODEL_NAME)
@pytest.fixture(autouse=True)
def _reset_deterministic_mode():
"""Ensure every test starts with deterministic mode ON."""
os.environ["FD_DETERMINISTIC_MODE"] = "1"
yield
os.environ["FD_DETERMINISTIC_MODE"] = "1"
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture(scope="module", autouse=True)
def _module_env():
"""Set env vars before importing fastdeploy (must happen first)."""
with env_override(
{
"CUDA_VISIBLE_DEVICES": os.environ.get("CUDA_VISIBLE_DEVICES", "0"),
"FD_DETERMINISTIC_MODE": "1",
"FD_CUSTOM_AR_MAX_SIZE_MB": "64",
}
):
# Lazy import: env vars must be set before importing fastdeploy
global LLM, SamplingParams # noqa: PLW0603
from fastdeploy import LLM, SamplingParams
yield
@pytest.fixture(scope="module")
def llm(model_path, _module_env):
return LLM(
model=model_path,
tensor_parallel_size=4,
max_model_len=8192,
enable_prefix_caching=False,
graph_optimization_config={"use_cudagraph": os.getenv("USE_CUDAGRAPH", "0") == "1"},
)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _generate_text(llm, prompt, sp):
"""Generate once, return (text, token_ids)."""
out = llm.generate([prompt], sp)[0]
return out.outputs.text, list(out.outputs.token_ids)
def _assert_deterministic(llm, prompt, sp, runs=2):
"""Run *runs* times and assert all outputs are identical."""
results = [_generate_text(llm, prompt, sp) for _ in range(runs)]
texts = [r[0] for r in results]
token_ids = [r[1] for r in results]
assert all(t == texts[0] for t in texts), "Text outputs differ across runs"
assert all(t == token_ids[0] for t in token_ids), "Token IDs differ across runs"
return texts[0], token_ids[0]
# ===================== Core determinism tests =====================
def test_deterministic_same_prompt(llm):
"""Same prompt + same seed produces identical output across 5 runs."""
sp = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=50, seed=123)
_assert_deterministic(llm, "Please introduce artificial intelligence in one sentence.", sp, runs=5)
def test_deterministic_different_batch_sizes(llm):
"""Same prompt is consistent across batch sizes 1 / 2 / 4 / 8."""
prompt = "What is machine learning?"
sp = SamplingParams(temperature=0.5, max_tokens=30, seed=789)
baseline, _ = _generate_text(llm, prompt, sp)
for bs in [2, 4, 8]:
outputs = llm.generate([prompt] * bs, sp)
assert outputs[0].outputs.text == baseline, f"Batch size {bs} differs from bs=1"
def test_deterministic_batch_invariance(llm):
"""Target prompt produces identical output regardless of batch position."""
prompt = "What kind of programming language is Python?"
sp = SamplingParams(temperature=0.5, max_tokens=40, seed=456)
baseline, _ = _generate_text(llm, prompt, sp)
batch_configs = [
[prompt, "Filler question 1"],
["Filler question 2", prompt, "Filler question 3"],
["Filler question 4", "Filler question 5", prompt],
["Filler 6", "Filler 7", "Filler 8", prompt],
]
for i, batch in enumerate(batch_configs):
outputs = llm.generate(batch, sp)
idx = batch.index(prompt)
assert (
outputs[idx].outputs.text == baseline
), f"Batch config {i} (pos {idx}): result differs from single-request baseline"
# ===================== Sampling-parameter combinations =====================
@pytest.mark.parametrize(
"temp,top_p,seed",
[
(0.0, 1.0, 300), # greedy, no top_p filter
(0.0, 0.0, 301), # double-greedy
(0.3, 0.9, 302), # low temp, moderate top_p
(0.8, 0.0, 303), # medium temp, greedy top_p
(0.8, 1.0, 304), # medium temp, no top_p filter
(0.8, 0.5, 305), # medium temp, strict top_p
(1.0, 0.95, 306), # high temp
(1.5, 0.9, 307), # very high temp
],
)
def test_deterministic_param_combos(llm, temp, top_p, seed):
"""Determinism holds across various (temperature, top_p) combinations."""
sp = SamplingParams(temperature=temp, top_p=top_p, max_tokens=30, seed=seed)
_assert_deterministic(llm, "What is a neural network?", sp)
# ===================== Minimal / boundary output tests =====================
def test_deterministic_max_tokens_one(llm):
"""Single-token output is deterministic."""
sp = SamplingParams(temperature=0.1, max_tokens=1, seed=700)
text, token_ids = _assert_deterministic(llm, "What color is the sky?", sp)
assert len(token_ids) == 1, f"Expected 1 token, got {len(token_ids)}"
def test_deterministic_early_stop(llm):
"""Early stopping via stop sequences is deterministic."""
sp = SamplingParams(temperature=0.7, max_tokens=100, stop=["\u3002", "."], seed=800)
text, token_ids = _assert_deterministic(llm, "Please list three colors:", sp)
assert len(token_ids) < 100, f"Expected early stop, got {len(token_ids)} tokens"
# ===================== Special input tests =====================
@pytest.mark.parametrize(
"prompt,seed",
[
("What is AI? \U0001f52c\U0001f9e0", 900), # emoji
("Math: E = mc\u00b2", 901), # superscript
("Code: def hello(): return 'world'", 902), # code
("Symbols: @#$%^&*()", 903), # special symbols
],
)
def test_deterministic_special_chars(llm, prompt, seed):
sp = SamplingParams(temperature=0.5, max_tokens=30, seed=seed)
_assert_deterministic(llm, prompt, sp)
@pytest.mark.parametrize(
"lang,prompt,seed",
[
("Chinese", "Please introduce artificial intelligence in one sentence.", 1000),
("English", "What is artificial intelligence in one sentence?", 1001),
(
"Japanese",
"\u4eba\u5de5\u77e5\u80fd\u306b\u3064\u3044\u3066\u4e00\u8a00\u3067\u8aac\u660e\u3057\u3066\u304f\u3060\u3055\u3044\u3002",
1002,
),
("Spanish", "\u00bfQu\u00e9 es la inteligencia artificial en una frase?", 1003),
],
)
def test_deterministic_multi_language(llm, lang, prompt, seed):
sp = SamplingParams(temperature=0.5, max_tokens=30, seed=seed)
_assert_deterministic(llm, prompt, sp)
# ===================== Multi-turn conversation test =====================
def test_deterministic_multi_turn(llm):
"""Multi-turn chat maintains determinism."""
sp = SamplingParams(temperature=0.5, max_tokens=50, seed=1100)
messages1 = [
{"role": "user", "content": "Hello!"},
{"role": "assistant", "content": "Hi! How can I help you?"},
{"role": "user", "content": "Please introduce yourself."},
]
# First full conversation
r1_turn1 = llm.chat(messages1, sp)[0].outputs.text
msgs2 = messages1 + [
{"role": "assistant", "content": r1_turn1},
{"role": "user", "content": "What can you do?"},
]
r1_turn2 = llm.chat(msgs2, sp)[0].outputs.text
# Second full conversation (same seed)
r2_turn1 = llm.chat(messages1, sp)[0].outputs.text
msgs2_repeat = messages1 + [
{"role": "assistant", "content": r2_turn1},
{"role": "user", "content": "What can you do?"},
]
r2_turn2 = llm.chat(msgs2_repeat, sp)[0].outputs.text
assert r1_turn1 == r2_turn1, "Multi-turn: turn-1 outputs differ"
assert r1_turn2 == r2_turn2, "Multi-turn: turn-2 outputs differ"
# ===================== State isolation test =====================
def test_deterministic_state_isolation(llm):
"""Interference prompts and interleaving do not break determinism."""
prompt_a = "What is Python?"
prompt_b = "What is JavaScript?"
sp_a = SamplingParams(temperature=0.5, max_tokens=30, seed=1200)
sp_b = SamplingParams(temperature=0.5, max_tokens=30, seed=1201)
# Round 1
a1, _ = _generate_text(llm, prompt_a, sp_a)
b1, _ = _generate_text(llm, prompt_b, sp_b)
# Run unrelated interference
for p in ["Explain reinforcement learning.", "What is NLP?", "List 3 fruits."]:
llm.generate([p], SamplingParams(temperature=0.7, max_tokens=20, seed=999))
# Round 2
a2, _ = _generate_text(llm, prompt_a, sp_a)
b2, _ = _generate_text(llm, prompt_b, sp_b)
assert a1 == a2, "Prompt A: output changed after interference"
assert b1 == b2, "Prompt B: output changed after interference"
# ===================== Non-deterministic validation =====================
def test_non_deterministic_validation(llm):
"""
Prove that tests are effective:
- Without seed + without mode: outputs vary
- With explicit seed: outputs are consistent
"""
prompt = "Please explain deep learning in one sentence."
# Part 1: no mode, no seed -> outputs should differ
os.environ.pop("FD_DETERMINISTIC_MODE", None)
results_no_seed = []
for _ in range(5):
sp = SamplingParams(temperature=0.7, max_tokens=30)
results_no_seed.append(llm.generate([prompt], sp)[0].outputs.text)
# Probabilistic, skip if all outputs are the same
if len(set(results_no_seed)) == 1:
pytest.skip("Sampling produced identical outputs (probabilistic case)")
# Part 2: explicit seed -> outputs must be consistent
sp_seeded = SamplingParams(temperature=0.7, max_tokens=30, seed=999)
results_seeded = [llm.generate([prompt], sp_seeded)[0].outputs.text for _ in range(5)]
assert len(set(results_seeded)) == 1, "With explicit seed: expected consistent outputs"
if __name__ == "__main__":
pytest.main(["-sv", __file__])