mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Other] Adjust GPUModelRunner to enhance compatibility (#6851)
This commit is contained in:
@@ -1564,16 +1564,15 @@ class XPUModelRunner(ModelRunnerBase):
|
||||
|
||||
# 2. Padding inputs for cuda grph
|
||||
|
||||
# 3. Execute model
|
||||
model_inputs = {}
|
||||
model_inputs["ids_remove_padding"] = self.share_inputs["ids_remove_padding"]
|
||||
if self.enable_mm:
|
||||
model_output = self.model(
|
||||
self.share_inputs["ids_remove_padding"], self.share_inputs["image_features"], self.forward_meta
|
||||
)
|
||||
else:
|
||||
model_output = self.model(
|
||||
ids_remove_padding=self.share_inputs["ids_remove_padding"],
|
||||
forward_meta=self.forward_meta,
|
||||
)
|
||||
model_inputs["image_features"] = self.share_inputs["image_features"]
|
||||
# 3. Execute model
|
||||
model_output = self.model(
|
||||
model_inputs,
|
||||
forward_meta=self.forward_meta,
|
||||
)
|
||||
if self.use_cudagraph:
|
||||
model_output = model_output[: self.real_token_num]
|
||||
hidden_states = xpu_process_output(
|
||||
|
||||
Reference in New Issue
Block a user