mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Optimization] Support FA2/FA3/FA4 with attn_mask_q (#6354)
* support FA4 sm100 * flash attn backend support mask * flash attn backend run flashmask correct * add test for flash_attn_backend and flash_attn_func * check * add test for fa4 * requirements.txt add fa4 whl * check test on sm100 * fix CI conflict * add enable_torch_proxy for flash_mask * lazy import fa4 * check * fix tests import * check test_load_mpt import
This commit is contained in:
@@ -6,6 +6,9 @@ import unittest
|
||||
import paddle
|
||||
from paddle.distributed import fleet
|
||||
|
||||
# from fastdeploy.worker.worker_process import init_distributed_environment
|
||||
from utils import OpPerformanceTester
|
||||
|
||||
from fastdeploy.config import (
|
||||
CacheConfig,
|
||||
FDConfig,
|
||||
@@ -19,9 +22,6 @@ from fastdeploy.model_executor.layers.moe.moe import FusedMoE
|
||||
from fastdeploy.model_executor.layers.quantization.w4afp8 import W4AFP8Config
|
||||
from fastdeploy.scheduler import SchedulerConfig
|
||||
|
||||
# from fastdeploy.worker.worker_process import init_distributed_environment
|
||||
from tests.utils import OpPerformanceTester
|
||||
|
||||
paddle.set_default_dtype("bfloat16")
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user