mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
30f9f33f34
* add fa deter * add ut * add long sentence * fix basic * fix bugs * fix adn * fix first * fix single * fix single * fix single test * refine * add more test * refine comments * add comments of bmm * fix ci * remove probe * add * remove not need * refine tests * fix comments and refine code * refine code * refine test * refine test * mv 4cards tests * fix tests * add * fix comments * fix cover * fix cover --------- Co-authored-by: gongweibao <gognweibao@baidu.com>
160 lines
5.4 KiB
Python
160 lines
5.4 KiB
Python
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""
|
|
Unit test: isolate sampling determinism from model computation.
|
|
|
|
This test fixes the logits (model output) and runs only the sampling
|
|
pipeline multiple times. If the results differ, the bug is in sampling;
|
|
if they are always identical, the non-determinism comes from model
|
|
computation (logits differ between runs).
|
|
|
|
Usage:
|
|
CUDA_VISIBLE_DEVICES=0 pytest tests/deterministic/test_sampling_determinism.py -v -s
|
|
"""
|
|
|
|
import paddle
|
|
import paddle.nn.functional as F
|
|
import pytest
|
|
|
|
pytestmark = pytest.mark.gpu
|
|
|
|
VOCAB_SIZE = 151936 # Qwen2 vocab size
|
|
BATCH_SIZE = 1
|
|
|
|
|
|
def _make_logits(seed: int = 42):
|
|
"""Create reproducible random logits that look like real model output."""
|
|
paddle.seed(seed)
|
|
# Simulate logits with realistic distribution (not uniform)
|
|
logits = paddle.randn([BATCH_SIZE, VOCAB_SIZE], dtype="float32")
|
|
# Make it slightly peaked (a few tokens have higher logits)
|
|
logits[0, 100] += 5.0
|
|
logits[0, 200] += 4.5
|
|
logits[0, 300] += 4.0
|
|
return logits
|
|
|
|
|
|
def _sample_with_top_p(logits, top_p_val, seed_val):
|
|
"""Run the same sampling pipeline as sampler.forward_cuda (non-greedy path)."""
|
|
probs = F.softmax(logits, axis=-1)
|
|
top_p = paddle.to_tensor([top_p_val], dtype="float32")
|
|
topp_seed = paddle.to_tensor([[seed_val]], dtype="int64")
|
|
_, ids = paddle.tensor.top_p_sampling(probs, top_p, topp_seed=topp_seed, seed=-1, mode="truncated")
|
|
return ids.item()
|
|
|
|
|
|
# ---- Test 1: basic repeated sampling on identical logits ----
|
|
|
|
|
|
def test_sampling_determinism_basic():
|
|
"""Same logits + same seed -> must produce same token every time."""
|
|
logits = _make_logits(seed=42)
|
|
results = [_sample_with_top_p(logits, top_p_val=0.95, seed_val=200) for _ in range(20)]
|
|
assert len(set(results)) == 1, f"Sampling non-deterministic! Got {len(set(results))} distinct values: {results}"
|
|
|
|
|
|
# ---- Test 2: simulate multi-step decode (seed increments like real runner) ----
|
|
|
|
|
|
def test_sampling_determinism_multistep():
|
|
"""Simulate 100 decode steps with seed incrementing by 4 each step."""
|
|
logits = _make_logits(seed=42)
|
|
|
|
def run_steps():
|
|
tokens = []
|
|
for step in range(100):
|
|
seed_val = 200 + step * 4 # real runner increments seed by 4
|
|
tok = _sample_with_top_p(logits, top_p_val=0.95, seed_val=seed_val)
|
|
tokens.append(tok)
|
|
return tokens
|
|
|
|
run1 = run_steps()
|
|
run2 = run_steps()
|
|
assert run1 == run2, _diff_msg(run1, run2)
|
|
|
|
|
|
# ---- Test 3: interleave GPU work between sampling calls ----
|
|
|
|
|
|
def test_sampling_determinism_with_gpu_noise():
|
|
"""
|
|
Insert GPU matmul work between sampling calls to check if
|
|
GPU state residuals affect sampling determinism.
|
|
"""
|
|
logits = _make_logits(seed=42)
|
|
|
|
def run_steps_with_noise():
|
|
tokens = []
|
|
for step in range(50):
|
|
# Simulate GPU model forward between steps
|
|
_ = paddle.matmul(paddle.randn([256, 256]), paddle.randn([256, 256]))
|
|
seed_val = 200 + step * 4
|
|
tok = _sample_with_top_p(logits, top_p_val=0.95, seed_val=seed_val)
|
|
tokens.append(tok)
|
|
return tokens
|
|
|
|
run1 = run_steps_with_noise()
|
|
run2 = run_steps_with_noise()
|
|
assert run1 == run2, _diff_msg(run1, run2)
|
|
|
|
|
|
# ---- Test 4: flat distribution (temp=1.0 scenario, hardest case) ----
|
|
|
|
|
|
def test_sampling_determinism_flat_distribution():
|
|
"""
|
|
Flat probability distribution (simulating temp=1.0 with no dominant token).
|
|
This is the hardest case for determinism.
|
|
"""
|
|
paddle.seed(99)
|
|
# Logits close to zero -> softmax gives nearly uniform distribution
|
|
logits = paddle.randn([BATCH_SIZE, VOCAB_SIZE], dtype="float32") * 0.1
|
|
|
|
results_per_seed = {}
|
|
for seed_val in [100, 200, 300, 400, 500]:
|
|
results = [_sample_with_top_p(logits, top_p_val=0.95, seed_val=seed_val) for _ in range(10)]
|
|
results_per_seed[seed_val] = results
|
|
assert len(set(results)) == 1, (
|
|
f"seed={seed_val}: sampling non-deterministic on flat dist! "
|
|
f"Got {len(set(results))} distinct values: {results}"
|
|
)
|
|
|
|
|
|
# ---- Test 5: different top_p values ----
|
|
|
|
|
|
@pytest.mark.parametrize("top_p_val", [0.5, 0.8, 0.95, 1.0])
|
|
def test_sampling_determinism_various_top_p(top_p_val):
|
|
"""Determinism across different top_p values."""
|
|
logits = _make_logits(seed=42)
|
|
results = [_sample_with_top_p(logits, top_p_val=top_p_val, seed_val=200) for _ in range(10)]
|
|
assert len(set(results)) == 1, (
|
|
f"top_p={top_p_val}: non-deterministic! " f"Got {len(set(results))} distinct values: {results}"
|
|
)
|
|
|
|
|
|
# ---- Helpers ----
|
|
|
|
|
|
def _diff_msg(run1, run2):
|
|
for i, (a, b) in enumerate(zip(run1, run2)):
|
|
if a != b:
|
|
return f"First diff at step {i}: run1={a}, run2={b}. Total diffs: {sum(1 for x, y in zip(run1, run2) if x != y)}/{len(run1)}"
|
|
return "Lengths differ"
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main(["-sv", __file__])
|