mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 08:21:53 +08:00
* rm inplace info && to(gpu) * update append_attention * unpin paddle version * add full_cuda_graph=False * add blank line --------- Co-authored-by: SigureMo <sigure.qaq@gmail.com>
This commit is contained in:
@@ -205,7 +205,7 @@ def append_attention_with_output(
|
||||
append_attention
|
||||
"""
|
||||
if current_platform.is_cuda():
|
||||
append_attention_with_output_gpu(
|
||||
return append_attention_with_output_gpu(
|
||||
qkv,
|
||||
key_cache,
|
||||
value_cache,
|
||||
|
||||
Reference in New Issue
Block a user