native top_p_sampling (#2901)

This commit is contained in:
lifulll
2025-07-22 14:09:59 +08:00
committed by GitHub
parent 0eedbdaee0
commit 2c6a9e887e
14 changed files with 93 additions and 7 deletions
@@ -20,7 +20,7 @@ import paddle
from fastdeploy.platforms import current_platform
if current_platform.is_cuda() and not current_platform.is_dcu():
if current_platform.is_cuda():
from fastdeploy.model_executor.ops.gpu import (
append_attention as append_attention_gpu,
)