[BugFix]Set default OMP_NUM_THREADS=3 and fix extra GPU memory usage in DeepSeek (#5219)

* fix bug * update * update * update * fix copy * update
2026-04-24 01:29:57 +08:00 · 2025-11-28 14:22:04 +08:00
parent 7dc06cac6e
commit 1539fd6056
6 changed files with 29 additions and 16 deletions
@@ -32,7 +32,7 @@ from paddle.nn.functional.flash_attention import (
 from paddleformers.transformers.model_utils import PretrainedModel

 from fastdeploy.model_executor.layers.utils import divide, get_tensor
-from fastdeploy.model_executor.utils import fd_cast, h2d_copy, set_weight_attrs
+from fastdeploy.model_executor.utils import fd_cast, set_weight_attrs

 from .activation import ACT2FN
 from .configuration import DFNRopeVisionTransformerConfig
@@ -151,7 +151,8 @@ class VisionFlashAttention2(nn.Layer):
        assert param.shape == shard_weight.shape, (
            f" Attempted to load weight ({shard_weight.shape}) " f"into parameter ({param.shape})"
        )
-        h2d_copy(param, shard_weight)
+        shard_weight = get_tensor(shard_weight)
+        param.copy_(shard_weight, False)

    def forward(
        self,