mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-24 01:29:57 +08:00
[BugFix]Set default OMP_NUM_THREADS=3 and fix extra GPU memory usage in DeepSeek (#5219)
* fix bug * update * update * update * fix copy * update
This commit is contained in:
@@ -32,7 +32,7 @@ from paddle.nn.functional.flash_attention import (
|
||||
from paddleformers.transformers.model_utils import PretrainedModel
|
||||
|
||||
from fastdeploy.model_executor.layers.utils import divide, get_tensor
|
||||
from fastdeploy.model_executor.utils import fd_cast, h2d_copy, set_weight_attrs
|
||||
from fastdeploy.model_executor.utils import fd_cast, set_weight_attrs
|
||||
|
||||
from .activation import ACT2FN
|
||||
from .configuration import DFNRopeVisionTransformerConfig
|
||||
@@ -151,7 +151,8 @@ class VisionFlashAttention2(nn.Layer):
|
||||
assert param.shape == shard_weight.shape, (
|
||||
f" Attempted to load weight ({shard_weight.shape}) " f"into parameter ({param.shape})"
|
||||
)
|
||||
h2d_copy(param, shard_weight)
|
||||
shard_weight = get_tensor(shard_weight)
|
||||
param.copy_(shard_weight, False)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user