mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Feature] support v1 update/clear api for RL (#6761)
* [Feature] support v1 update/clear api for RL * [fix] fix execute_model and add sleep/wakeup api * [fix] fix mtp and key_prefix * [chore] move _update_key_prefix to resume method * [fix] make the interface safe to call multiple times * [fix] fix some tiny bugs * [chore] make small changes against pr review * [docs] add docs for weight update * [test] add some tests and update docs * [style] fix code style check * [test] fix ci * [fix] fix stale control responses when control method timed out * [chore] remove unused code * [chore] fix code style * [chore] optimize tags and key_prefix * [test] fix ci * [chore] fix code style * [test] fix ci * [fix] fix ep control * [fix] fix ep control for engine cache queue
This commit is contained in:
@@ -142,34 +142,34 @@ class TestCUDAGrpahRecapture(unittest.TestCase):
|
||||
def capture_and_replay(self, input_tensor1, forward_meta1):
|
||||
""" """
|
||||
# Trigger Capture
|
||||
print_gpu_memory_use(0, "before capture")
|
||||
print_gpu_memory_use("before capture", 0)
|
||||
output1 = self.test_model1(ids_remove_padding=input_tensor1, forward_meta=forward_meta1)
|
||||
print_gpu_memory_use(0, "after capture")
|
||||
print_gpu_memory_use("after capture", 0)
|
||||
|
||||
# Replay
|
||||
output1 = self.test_model1(ids_remove_padding=input_tensor1, forward_meta=forward_meta1)
|
||||
assert (output1 == self.output_correct).all()
|
||||
|
||||
# Destroy
|
||||
print_gpu_memory_use(0, "before destroy")
|
||||
print_gpu_memory_use("before destroy", 0)
|
||||
self.test_model1.clear_grpah_opt_backend()
|
||||
print_gpu_memory_use(0, "after destroy")
|
||||
print_gpu_memory_use("after destroy", 0)
|
||||
|
||||
def recapture_and_replay(self, input_tensor1, forward_meta1):
|
||||
""" """
|
||||
# Trigger Capture
|
||||
print_gpu_memory_use(0, "before recapture")
|
||||
print_gpu_memory_use("before recapture", 0)
|
||||
output2 = self.test_model1(ids_remove_padding=input_tensor1, forward_meta=forward_meta1)
|
||||
print_gpu_memory_use(0, "after recapture")
|
||||
print_gpu_memory_use("after recapture", 0)
|
||||
|
||||
# Replay
|
||||
output2 = self.test_model1(ids_remove_padding=input_tensor1, forward_meta=forward_meta1)
|
||||
assert (output2 == self.output_correct).all()
|
||||
|
||||
# Destroy
|
||||
print_gpu_memory_use(0, "before destroy")
|
||||
print_gpu_memory_use("before destroy", 0)
|
||||
self.test_model1.clear_grpah_opt_backend()
|
||||
print_gpu_memory_use(0, "after destroy")
|
||||
print_gpu_memory_use("after destroy", 0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user