mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Qwen3VL] Add clear_grpah_opt_backend method to Qwen3VLForConditionalGeneration (#7086)
Add clear_grpah_opt_backend method that delegates to the underlying model to clear cuda graph optimization backend. Co-authored-by: CSWYF3634076 <wangyafeng@baidu.com>
This commit is contained in:
@@ -382,6 +382,10 @@ class Qwen3VLForConditionalGeneration(ModelForCasualLM):
|
||||
|
||||
return hidden_states
|
||||
|
||||
def clear_grpah_opt_backend(self):
|
||||
"""Clear graph optimization backend, the captured cuda graph will be cleaned"""
|
||||
self.model.clear_grpah_opt_backend(fd_config=self.fd_config)
|
||||
|
||||
|
||||
class Qwen3VLPretrainedModel(PretrainedModel):
|
||||
"""Utilities for tensor-parallel weight splitting."""
|
||||
|
||||
Reference in New Issue
Block a user