[BugFix]Set default OMP_NUM_THREADS=3 and fix extra GPU memory usage in DeepSeek (#5219)

* fix bug

* update

* update

* update

* fix copy

* update
This commit is contained in:
bukejiyu
2025-11-28 14:22:04 +08:00
committed by GitHub
parent 7dc06cac6e
commit 1539fd6056
6 changed files with 29 additions and 16 deletions
@@ -32,7 +32,7 @@ from paddle.nn.functional.flash_attention import (
from paddleformers.transformers.model_utils import PretrainedModel
from fastdeploy.model_executor.layers.utils import divide, get_tensor
from fastdeploy.model_executor.utils import fd_cast, h2d_copy, set_weight_attrs
from fastdeploy.model_executor.utils import fd_cast, set_weight_attrs
from .activation import ACT2FN
from .configuration import DFNRopeVisionTransformerConfig
@@ -151,7 +151,8 @@ class VisionFlashAttention2(nn.Layer):
assert param.shape == shard_weight.shape, (
f" Attempted to load weight ({shard_weight.shape}) " f"into parameter ({param.shape})"
)
h2d_copy(param, shard_weight)
shard_weight = get_tensor(shard_weight)
param.copy_(shard_weight, False)
def forward(
self,