mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-24 01:29:57 +08:00
[Optimization] Support FA2/FA3/FA4 with attn_mask_q (#6354)
* support FA4 sm100 * flash attn backend support mask * flash attn backend run flashmask correct * add test for flash_attn_backend and flash_attn_func * check * add test for fa4 * requirements.txt add fa4 whl * check test on sm100 * fix CI conflict * add enable_torch_proxy for flash_mask * lazy import fa4 * check * fix tests import * check test_load_mpt import
This commit is contained in:
@@ -19,6 +19,7 @@ import re
|
||||
from collections.abc import Mapping
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass, field
|
||||
from functools import cache
|
||||
from typing import Any, List, Optional, Union
|
||||
|
||||
import paddle
|
||||
@@ -547,3 +548,11 @@ def rename_offline_ckpt_suffix_to_fd_suffix(
|
||||
return loaded_weight_name
|
||||
|
||||
return fn
|
||||
|
||||
|
||||
@cache
|
||||
def get_sm_version():
|
||||
if paddle.cuda.is_available():
|
||||
prop = paddle.device.cuda.get_device_properties()
|
||||
return prop.major * 10 + prop.minor
|
||||
return 0
|
||||
|
||||
Reference in New Issue
Block a user