[BugFix] Fix clear_parameters hang issue in MTP during weight cleanup in RL (#7522)

* fix mtp clear graph bugs in rl
2026-04-22 16:07:51 +08:00 · 2026-04-22 15:24:01 +08:00
parent e580cf0fef
commit 68dbe71d77
1 changed files with 9 additions and 2 deletions
@@ -2910,12 +2910,19 @@ class GPUModelRunner(ModelRunnerBase):
        # Clear CUDAGraph
        if self.use_cudagraph:
            self.model.clear_graph_opt_backend()
+            if (
+                self.speculative_decoding
+                and self.spec_method == SpecMethod.MTP
+                and self.graph_opt_config.draft_model_use_cudagraph
+            ):
+                self.proposer.model.clear_graph_opt_backend()
        # Clear parameters and Send single
        self.dynamic_weight_manager.clear_parameters(
            pid, self.fd_config.parallel_config.shutdown_comm_group_if_worker_idle
        )
-        if self.spec_method == SpecMethod.MTP:
-            self.proposer.model.clear_graph_opt_backend()
+
+        # NOTE(wangyanpeng): MTP cache must be cleared before clearing the main KV cache
+        if self.speculative_decoding and self.spec_method == SpecMethod.MTP:
            self.proposer.clear_mtp_cache()
        self.clear_cache()
        paddle.device.cuda.empty_cache()