[Feature] support v1 update/clear api for RL (#6761)

* [Feature] support v1 update/clear api for RL

* [fix] fix execute_model and add sleep/wakeup api

* [fix] fix mtp and key_prefix

* [chore] move _update_key_prefix to resume method

* [fix] make the interface safe to call multiple times

* [fix] fix some tiny bugs

* [chore] make small changes against pr review

* [docs] add docs for weight update

* [test] add some tests and update docs

* [style] fix code style check

* [test] fix ci

* [fix] fix stale control responses when control method timed out

* [chore] remove unused code

* [chore] fix code style

* [chore] optimize tags and key_prefix

* [test] fix ci

* [chore] fix code style

* [test] fix ci

* [fix] fix ep control

* [fix] fix ep control for engine cache queue
This commit is contained in:
Yonghua Li
2026-03-25 19:18:46 +08:00
committed by GitHub
parent 48cfb608aa
commit a7f52c300d
26 changed files with 1857 additions and 392 deletions
@@ -142,34 +142,34 @@ class TestCUDAGrpahRecapture(unittest.TestCase):
def capture_and_replay(self, input_tensor1, forward_meta1):
""" """
# Trigger Capture
print_gpu_memory_use(0, "before capture")
print_gpu_memory_use("before capture", 0)
output1 = self.test_model1(ids_remove_padding=input_tensor1, forward_meta=forward_meta1)
print_gpu_memory_use(0, "after capture")
print_gpu_memory_use("after capture", 0)
# Replay
output1 = self.test_model1(ids_remove_padding=input_tensor1, forward_meta=forward_meta1)
assert (output1 == self.output_correct).all()
# Destroy
print_gpu_memory_use(0, "before destroy")
print_gpu_memory_use("before destroy", 0)
self.test_model1.clear_grpah_opt_backend()
print_gpu_memory_use(0, "after destroy")
print_gpu_memory_use("after destroy", 0)
def recapture_and_replay(self, input_tensor1, forward_meta1):
""" """
# Trigger Capture
print_gpu_memory_use(0, "before recapture")
print_gpu_memory_use("before recapture", 0)
output2 = self.test_model1(ids_remove_padding=input_tensor1, forward_meta=forward_meta1)
print_gpu_memory_use(0, "after recapture")
print_gpu_memory_use("after recapture", 0)
# Replay
output2 = self.test_model1(ids_remove_padding=input_tensor1, forward_meta=forward_meta1)
assert (output2 == self.output_correct).all()
# Destroy
print_gpu_memory_use(0, "before destroy")
print_gpu_memory_use("before destroy", 0)
self.test_model1.clear_grpah_opt_backend()
print_gpu_memory_use(0, "after destroy")
print_gpu_memory_use("after destroy", 0)
if __name__ == "__main__":