mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Optimize] optimize mask_quant & swiglu (#6222)
* optimize mask_quant op speed up 1.5 * fix calculate sequence * add fused * rm log * push kernel code * add ut * accuracy ok * add ue8m0 * add ut * add merge develop * rm ut of mask_per_token_quant
This commit is contained in:
@@ -293,7 +293,7 @@ elif paddle.is_compiled_with_cuda():
|
||||
"gpu_ops/step_system_cache.cu",
|
||||
"gpu_ops/cpp_extensions.cc",
|
||||
"gpu_ops/share_external_data.cu",
|
||||
"gpu_ops/per_token_quant_fp8.cu",
|
||||
"gpu_ops/fused_mask_swiglu_fp8_quant_kernel.cu",
|
||||
"gpu_ops/update_split_fuse_input.cu",
|
||||
"gpu_ops/text_image_index_out.cu",
|
||||
"gpu_ops/text_image_gather_scatter.cu",
|
||||
|
||||
Reference in New Issue
Block a user