mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
* add clear_grpah func * fix spell
This commit is contained in:
@@ -92,7 +92,7 @@ class GraphOptWrapper:
|
||||
def __call__(self, **kwargs):
|
||||
return self.graph_opt_backend(**kwargs)
|
||||
|
||||
def clear_grpah_opt_backend(self, fd_config):
|
||||
def clear_graph_opt_backend(self, fd_config):
|
||||
""" """
|
||||
# TODO(gongshaotian): Resolve the bug of static graphs not being able to update weights
|
||||
assert (
|
||||
|
||||
@@ -1306,9 +1306,9 @@ class DeepseekV3ForCausalLM(ModelForCasualLM):
|
||||
)
|
||||
return hidden_states
|
||||
|
||||
def clear_grpah_opt_backend(self):
|
||||
def clear_graph_opt_backend(self):
|
||||
"""Clear graph optimization backend, the captured cuda graph will be cleaned"""
|
||||
self.model.clear_grpah_opt_backend(fd_config=self.fd_config)
|
||||
self.model.clear_graph_opt_backend(fd_config=self.fd_config)
|
||||
|
||||
|
||||
class DeepSeekV3PretrainedModel(PretrainedModel):
|
||||
|
||||
@@ -701,9 +701,9 @@ class Ernie4_5_MoeForCausalLM(ModelForCasualLM):
|
||||
|
||||
return hidden_states
|
||||
|
||||
def clear_grpah_opt_backend(self):
|
||||
def clear_graph_opt_backend(self):
|
||||
"""Clear graph optimization backend, the captured cuda graph will be cleaned"""
|
||||
self.ernie.clear_grpah_opt_backend(fd_config=self.fd_config)
|
||||
self.ernie.clear_graph_opt_backend(fd_config=self.fd_config)
|
||||
|
||||
|
||||
@ModelRegistry.register_model_class(
|
||||
|
||||
@@ -829,9 +829,9 @@ class Ernie4_5_VLMoeForConditionalGeneration(ModelForCasualLM):
|
||||
|
||||
return hidden_states
|
||||
|
||||
def clear_grpah_opt_backend(self):
|
||||
def clear_graph_opt_backend(self):
|
||||
"""Clear graph optimization backend, the captured cuda graph will be cleaned"""
|
||||
self.ernie.clear_grpah_opt_backend(fd_config=self.fd_config)
|
||||
self.ernie.clear_graph_opt_backend(fd_config=self.fd_config)
|
||||
|
||||
|
||||
class Ernie4_5_VLPretrainedModel(PretrainedModel):
|
||||
|
||||
@@ -563,9 +563,9 @@ class Glm4MoeForCausalLM(ModelForCasualLM):
|
||||
|
||||
return hidden_states
|
||||
|
||||
def clear_grpah_opt_backend(self):
|
||||
def clear_graph_opt_backend(self):
|
||||
"""Clear graph optimization backend, the captured cuda graph will be cleaned"""
|
||||
self.model.clear_grpah_opt_backend(fd_config=self.fd_config)
|
||||
self.model.clear_graph_opt_backend(fd_config=self.fd_config)
|
||||
|
||||
|
||||
class Glm4MoePretrainedModel(PretrainedModel):
|
||||
|
||||
@@ -369,3 +369,7 @@ class Glm4MTPForCausalLM(ModelForCasualLM):
|
||||
)
|
||||
|
||||
return hidden_states
|
||||
|
||||
def clear_graph_opt_backend(self):
|
||||
"""Clear graph optimization backend, the captured cuda graph will be cleaned"""
|
||||
self.model.clear_graph_opt_backend(fd_config=self.fd_config)
|
||||
|
||||
@@ -417,9 +417,9 @@ class Qwen2ForCausalLM(ModelForCasualLM):
|
||||
|
||||
return hidden_states
|
||||
|
||||
def clear_grpah_opt_backend(self):
|
||||
def clear_graph_opt_backend(self):
|
||||
"""Clear graph optimization backend, the captured cuda graph will be cleaned"""
|
||||
self.qwen2.clear_grpah_opt_backend(fd_config=self.fd_config)
|
||||
self.qwen2.clear_graph_opt_backend(fd_config=self.fd_config)
|
||||
|
||||
|
||||
class Qwen2PretrainedModel(PretrainedModel):
|
||||
|
||||
@@ -341,9 +341,9 @@ class Qwen3ForCausalLM(ModelForCasualLM):
|
||||
|
||||
return hidden_states
|
||||
|
||||
def clear_grpah_opt_backend(self):
|
||||
def clear_graph_opt_backend(self):
|
||||
"""Clear graph optimization backend, the captured cuda graph will be cleaned"""
|
||||
self.model.clear_grpah_opt_backend(fd_config=self.fd_config)
|
||||
self.model.clear_graph_opt_backend(fd_config=self.fd_config)
|
||||
|
||||
|
||||
class Qwen3PretrainedModel(PretrainedModel):
|
||||
|
||||
@@ -382,9 +382,9 @@ class Qwen3VLForConditionalGeneration(ModelForCasualLM):
|
||||
|
||||
return hidden_states
|
||||
|
||||
def clear_grpah_opt_backend(self):
|
||||
def clear_graph_opt_backend(self):
|
||||
"""Clear graph optimization backend, the captured cuda graph will be cleaned"""
|
||||
self.model.clear_grpah_opt_backend(fd_config=self.fd_config)
|
||||
self.model.clear_graph_opt_backend(fd_config=self.fd_config)
|
||||
|
||||
|
||||
class Qwen3VLPretrainedModel(PretrainedModel):
|
||||
|
||||
@@ -453,9 +453,9 @@ class Qwen3MoeForCausalLM(ModelForCasualLM):
|
||||
|
||||
return hidden_states
|
||||
|
||||
def clear_grpah_opt_backend(self):
|
||||
def clear_graph_opt_backend(self):
|
||||
"""Clear graph optimization backend, the captured cuda graph will be cleaned"""
|
||||
self.model.clear_grpah_opt_backend(fd_config=self.fd_config)
|
||||
self.model.clear_graph_opt_backend(fd_config=self.fd_config)
|
||||
|
||||
|
||||
class Qwen3MoePretrainedModel(PretrainedModel):
|
||||
|
||||
@@ -2692,13 +2692,13 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
"""Dynamic model loader use to clear parameters use for RL"""
|
||||
# Clear CUDAGraph
|
||||
if self.use_cudagraph:
|
||||
self.model.clear_grpah_opt_backend()
|
||||
self.model.clear_graph_opt_backend()
|
||||
# Clear parameters and Send single
|
||||
self.dynamic_weight_manager.clear_parameters(
|
||||
pid, self.fd_config.parallel_config.shutdown_comm_group_if_worker_idle
|
||||
)
|
||||
if self.spec_method == SpecMethod.MTP:
|
||||
self.proposer.model.clear_grpah_opt_backend()
|
||||
self.proposer.model.clear_graph_opt_backend()
|
||||
self.proposer.clear_mtp_cache()
|
||||
self.clear_cache()
|
||||
paddle.device.cuda.empty_cache()
|
||||
@@ -2752,7 +2752,7 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
logger.info("GPU model runner's weight is already sleeping, no need to sleep again!")
|
||||
return
|
||||
if self.use_cudagraph:
|
||||
self.model.clear_grpah_opt_backend()
|
||||
self.model.clear_graph_opt_backend()
|
||||
if self.fd_config.parallel_config.enable_expert_parallel:
|
||||
self.dynamic_weight_manager.clear_deepep_buffer()
|
||||
self.dynamic_weight_manager.clear_model_weight()
|
||||
|
||||
@@ -2511,7 +2511,7 @@ class MetaxModelRunner(ModelRunnerBase):
|
||||
"""Dynamic model loader use to clear parameters use for RL"""
|
||||
# Clear CUDAGraph
|
||||
if self.use_cudagraph:
|
||||
self.model.clear_grpah_opt_backend()
|
||||
self.model.clear_graph_opt_backend()
|
||||
# Clear parameters and Send single
|
||||
self.dynamic_weight_manager.clear_parameters(
|
||||
pid, self.fd_config.parallel_config.shutdown_comm_group_if_worker_idle
|
||||
|
||||
@@ -91,10 +91,10 @@ class TestModel1(paddle.nn.Layer):
|
||||
|
||||
return sublayer2_output
|
||||
|
||||
def clear_grpah_opt_backend(self):
|
||||
def clear_graph_opt_backend(self):
|
||||
""" """
|
||||
self.sublayer1.clear_grpah_opt_backend(fd_config=self.fd_config)
|
||||
self.sublayer2.clear_grpah_opt_backend(fd_config=self.fd_config)
|
||||
self.sublayer1.clear_graph_opt_backend(fd_config=self.fd_config)
|
||||
self.sublayer2.clear_graph_opt_backend(fd_config=self.fd_config)
|
||||
|
||||
|
||||
class TestCUDAGrpahRecapture(unittest.TestCase):
|
||||
@@ -152,7 +152,7 @@ class TestCUDAGrpahRecapture(unittest.TestCase):
|
||||
|
||||
# Destroy
|
||||
print_gpu_memory_use("before destroy", 0)
|
||||
self.test_model1.clear_grpah_opt_backend()
|
||||
self.test_model1.clear_graph_opt_backend()
|
||||
print_gpu_memory_use("after destroy", 0)
|
||||
|
||||
def recapture_and_replay(self, input_tensor1, forward_meta1):
|
||||
@@ -168,7 +168,7 @@ class TestCUDAGrpahRecapture(unittest.TestCase):
|
||||
|
||||
# Destroy
|
||||
print_gpu_memory_use("before destroy", 0)
|
||||
self.test_model1.clear_grpah_opt_backend()
|
||||
self.test_model1.clear_graph_opt_backend()
|
||||
print_gpu_memory_use("after destroy", 0)
|
||||
|
||||
|
||||
|
||||
@@ -487,7 +487,7 @@ class TestSleepWakeupBehavior(unittest.TestCase):
|
||||
runner.local_rank = 0
|
||||
runner.device_id = 1
|
||||
runner.num_gpu_blocks = 8
|
||||
runner.model = Mock(clear_grpah_opt_backend=Mock())
|
||||
runner.model = Mock(clear_graph_opt_backend=Mock())
|
||||
runner.clear_cache = Mock()
|
||||
runner.initialize_kv_cache = Mock()
|
||||
runner.capture_model = Mock()
|
||||
@@ -523,7 +523,7 @@ class TestSleepWakeupBehavior(unittest.TestCase):
|
||||
|
||||
runner.sleep("weight,kv_cache")
|
||||
|
||||
runner.model.clear_grpah_opt_backend.assert_called_once()
|
||||
runner.model.clear_graph_opt_backend.assert_called_once()
|
||||
runner.dynamic_weight_manager.clear_deepep_buffer.assert_called_once()
|
||||
runner.dynamic_weight_manager.clear_model_weight.assert_called_once()
|
||||
runner.dynamic_weight_manager.clear_communication_group.assert_called_once()
|
||||
|
||||
Reference in New Issue
Block a user