mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 17:11:21 +08:00
[Feature] DeepseekV3 use pd_build_static_op (#2948)
Co-authored-by: K11OntheBoat <“ruianmaidanglao@163.com”>
This commit is contained in:
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "append_attn/multi_head_latent_attention_kernel.h"
|
||||
#include "helper.h"
|
||||
#include "mla_attn/batch_mla_with_paged_kv_cache.h"
|
||||
|
||||
template <paddle::DataType D>
|
||||
@@ -410,7 +411,7 @@ std::vector<paddle::DataType> MultiHeadLatentAttentionInferDtype(
|
||||
}
|
||||
}
|
||||
|
||||
PD_BUILD_OP(multi_head_latent_attention)
|
||||
PD_BUILD_STATIC_OP(multi_head_latent_attention)
|
||||
.Inputs({"query",
|
||||
"key_cache",
|
||||
"value_cache",
|
||||
|
||||
Reference in New Issue
Block a user