mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
edd31e8849
* add * [tests] Add Paddle attention determinism tests and refactor resource manager Add comprehensive determinism tests for Paddle attention layer and refactor resource manager for deterministic mode support. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * add * add * add * add * add more * add more * fixsome * fixsome * fix bugs * fix bugs * only in gpu * add docs * fix comments * fix some * fix some * fix comments * add more * fix potential problem * remove not need * remove not need * remove no need * fix bug * fix bugs * fix comments * fix comments * Update tests/ce/deterministic/test_determinism_verification.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/inter_communicator/test_ipc_signal.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/layers/test_paddle_attention_determinism.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/engine/test_sampling_params_determinism.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/layers/test_paddle_attention_determinism.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update tests/layers/test_paddle_attention_determinism_standalone.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix comments * fix import error * fix a bug * fix bugs * fix bugs * fix coverage * refine codes * refine code * fix comments * fix comments * fix comments * rm not need * fix allreduce large tensor bug * mv log files * mv log files * add files --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
221 lines
8.7 KiB
Python
221 lines
8.7 KiB
Python
"""
|
|
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""
|
|
|
|
import hashlib
|
|
import logging
|
|
import time
|
|
|
|
import numpy as np
|
|
|
|
det_logger = logging.getLogger("fastdeploy.deterministic")
|
|
|
|
|
|
class DeterministicLogger:
|
|
"""Helper for logging tensor MD5 hashes and input details to assist determinism debugging."""
|
|
|
|
def __init__(self, share_inputs):
|
|
self.share_inputs = share_inputs
|
|
self._current_run_id = None
|
|
self._batch_counter = 0
|
|
|
|
def log_batch_start(self, model_forward_batch):
|
|
"""Log batch start with run_id tracking and batch counting."""
|
|
current_run_id = None
|
|
for req in model_forward_batch or []:
|
|
if req is not None:
|
|
parts = req.request_id.split("_")
|
|
if len(parts) > 1:
|
|
current_run_id = parts[-1]
|
|
break
|
|
if current_run_id is not None and current_run_id != self._current_run_id:
|
|
self._current_run_id = current_run_id
|
|
self._batch_counter = 0
|
|
|
|
self._batch_counter += 1
|
|
|
|
det_logger.info(f"\n{'='*80}")
|
|
det_logger.info(f"[BATCH-START] Run_{self._current_run_id} Batch_{self._batch_counter}")
|
|
det_logger.info(f"{'='*80}\n")
|
|
|
|
@staticmethod
|
|
def _compute_tensor_md5(tensor, name="tensor", prefix=""):
|
|
"""Compute MD5 hash of tensor for comparison"""
|
|
if tensor is None:
|
|
return f"{name}_md5=None"
|
|
|
|
# Copy tensor to CPU and convert to numpy array
|
|
try:
|
|
tensor_cpu = tensor.cpu().numpy().tobytes()
|
|
except Exception:
|
|
# For data types that don't support direct tobytes (e.g., float16), convert first
|
|
tensor_cpu = tensor.cpu().numpy().astype(np.float32).tobytes()
|
|
|
|
md5_hash = hashlib.md5(tensor_cpu).hexdigest()
|
|
return f"{prefix}{name}_md5={md5_hash[:16]}" # Print only first 16 chars to reduce log length
|
|
|
|
def log_tensor_md5s(self, tensor_dict, forward_batch_reqs_list=None, stage="forward"):
|
|
"""Log MD5 hash values for multiple tensors, including per-request MD5
|
|
|
|
Args:
|
|
tensor_dict: {name: tensor} dictionary
|
|
forward_batch_reqs_list: forward_batch_reqs_list list (may contain None)
|
|
stage: Stage identifier (e.g., "prefill", "decode", "forward")
|
|
"""
|
|
# Get batch size from first valid tensor
|
|
batch_size = self._get_batch_size(tensor_dict)
|
|
if batch_size is None:
|
|
return
|
|
|
|
# Get prefill/decode counts
|
|
prefill_count, decode_count, seq_lens_encoder = self._get_stage_counts(batch_size)
|
|
|
|
# Build stage information
|
|
stage_info = stage
|
|
if prefill_count > 0 or decode_count > 0:
|
|
stage_info += f" (prefill={prefill_count}, decode={decode_count})"
|
|
|
|
# Compute and log batch MD5
|
|
batch_md5_info = [
|
|
self._compute_tensor_md5(tensor, name, prefix="batch_")
|
|
for name, tensor in tensor_dict.items()
|
|
if tensor is not None
|
|
]
|
|
|
|
# Log overall batch MD5
|
|
req_id_str = self._build_req_id_str(forward_batch_reqs_list)
|
|
det_logger.info(
|
|
f"[DETERMINISM-MD5] stage={stage_info} | batch_size={batch_size} | "
|
|
+ (f"requests: {req_id_str} | " if req_id_str else "")
|
|
+ " | ".join(batch_md5_info)
|
|
)
|
|
|
|
# Log per-request MD5 for decode requests
|
|
self._log_per_request_md5s(
|
|
tensor_dict, forward_batch_reqs_list, batch_size, prefill_count, decode_count, seq_lens_encoder
|
|
)
|
|
|
|
@staticmethod
|
|
def _get_batch_size(tensor_dict):
|
|
"""Get batch size from first tensor with a shape."""
|
|
for name, tensor in tensor_dict.items():
|
|
if tensor is not None and hasattr(tensor, "shape"):
|
|
return tensor.shape[0]
|
|
return None
|
|
|
|
def _get_stage_counts(self, batch_size):
|
|
"""Get prefill/decode counts and seq_lens_encoder."""
|
|
prefill_count = 0
|
|
decode_count = 0
|
|
seq_lens_encoder = None
|
|
|
|
if self.share_inputs is not None and "seq_lens_encoder" in self.share_inputs:
|
|
seq_lens_encoder = self.share_inputs["seq_lens_encoder"].cpu().numpy()
|
|
prefill_count = int((seq_lens_encoder > 0).sum())
|
|
decode_count = int(batch_size - prefill_count)
|
|
|
|
return prefill_count, decode_count, seq_lens_encoder
|
|
|
|
@staticmethod
|
|
def _build_req_id_str(forward_batch_reqs_list):
|
|
"""Build request ID string from forward_batch_reqs_list."""
|
|
if forward_batch_reqs_list is None:
|
|
return ""
|
|
req_info = [f"[{i}]{req.request_id}" for i, req in enumerate(forward_batch_reqs_list) if req is not None]
|
|
return ", ".join(req_info)
|
|
|
|
def _log_per_request_md5s(
|
|
self, tensor_dict, forward_batch_reqs_list, batch_size, prefill_count, decode_count, seq_lens_encoder
|
|
):
|
|
"""Log per-request MD5 for decode requests.
|
|
|
|
In decode phase, tensor shape is [batch_size, hidden_dim] or [batch_size, vocab_size].
|
|
Can split by batch dimension directly.
|
|
"""
|
|
if decode_count == 0 or forward_batch_reqs_list is None:
|
|
return
|
|
|
|
for i, req in enumerate(forward_batch_reqs_list):
|
|
if req is None or i >= batch_size:
|
|
continue
|
|
|
|
# Check if this is a decode request
|
|
if seq_lens_encoder is not None:
|
|
if i >= len(seq_lens_encoder) or int(seq_lens_encoder[i]) != 0:
|
|
continue # Skip prefill requests
|
|
elif prefill_count > 0:
|
|
continue # Mixed batch without seq_lens_encoder, skip all
|
|
|
|
req_id = req.request_id
|
|
req_md5_info = [
|
|
self._compute_tensor_md5(tensor[i : i + 1], name)
|
|
for name, tensor in tensor_dict.items()
|
|
if tensor is not None and hasattr(tensor, "shape") and len(tensor.shape) >= 2
|
|
]
|
|
|
|
if req_md5_info:
|
|
det_logger.info(f"[DETERMINISM-MD5-REQ] {req_id} | decode | " + " | ".join(req_md5_info))
|
|
|
|
def log_prefill_input(self, request_id, idx, prefill_start_index, prefill_end_index, input_ids):
|
|
"""Log prefill input details for determinism verification."""
|
|
det_logger.info(
|
|
f"[DETERMINISM] Prefill input - request_id: {request_id}, "
|
|
f"idx: {idx}, prefill_start_index: {prefill_start_index}, "
|
|
f"prefill_end_index: {prefill_end_index}, "
|
|
f"input_ids: {input_ids}"
|
|
)
|
|
|
|
def log_deterministic_input(self, forward_meta):
|
|
"""Log determinism inference input information, supports multiple batch requests"""
|
|
ids = forward_meta.ids_remove_padding
|
|
req_ids = self.share_inputs.get("req_ids", None)
|
|
seq_lens_this_time = self.share_inputs.get("seq_lens_this_time", None)
|
|
seq_lens_encoder = self.share_inputs.get("seq_lens_encoder", None)
|
|
seq_lens_decoder = self.share_inputs.get("seq_lens_decoder", None)
|
|
|
|
# Get batch size
|
|
num_requests = len(seq_lens_this_time) if seq_lens_this_time is not None else 0
|
|
|
|
det_logger.info(f"[DETERMINISM-INPUT] time={time.time():.6f} | batch_size={num_requests}")
|
|
|
|
if num_requests == 0 or ids is None:
|
|
det_logger.info("[DETERMINISM-INPUT] No input data")
|
|
return
|
|
|
|
# Split ids for each request
|
|
ids_list = ids.cpu().numpy().tolist()
|
|
offset = 0
|
|
|
|
for i in range(num_requests):
|
|
# Get current request information
|
|
req_id = req_ids[i] if req_ids is not None and i < len(req_ids) else f"idx_{i}"
|
|
seq_len = int(seq_lens_this_time[i])
|
|
seq_len_enc = int(seq_lens_encoder[i]) if seq_lens_encoder is not None and i < len(seq_lens_encoder) else 0
|
|
seq_len_dec = int(seq_lens_decoder[i]) if seq_lens_decoder is not None and i < len(seq_lens_decoder) else 0
|
|
|
|
# Get current request's tokens
|
|
if seq_len > 0:
|
|
request_tokens = ids_list[offset : offset + seq_len]
|
|
else:
|
|
request_tokens = []
|
|
|
|
offset += seq_len
|
|
|
|
# Print one line log
|
|
det_logger.info(
|
|
f"[DETERMINISM-INPUT] req_id={req_id} | tokens={request_tokens} | "
|
|
f"len={seq_len} | seq_len_enc={seq_len_enc} | seq_len_dec={seq_len_dec}"
|
|
)
|