mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
support ep+tp at op layer (#4688)
This commit is contained in:
@@ -1156,6 +1156,13 @@ class XPUModelRunner(ModelRunnerBase):
|
||||
# 1. Prepare inputs of model and decoder.
|
||||
self._prepare_inputs(is_dummy_run=is_dummy_run)
|
||||
|
||||
# NOTE(wufeisheng): If `not_need_stop`` is False, it means the current worker is in an idle state.
|
||||
# This logic is not used in TP (Tensor Parallelism) mode. However, in EP (Expert Parallelism) mode,
|
||||
# when there is data on other runner, the current runner is required to execute part of the model.
|
||||
if not self.not_need_stop() and not is_dummy_run:
|
||||
self._execute_empty_input()
|
||||
return None
|
||||
|
||||
# 2. Padding inputs for cuda grph
|
||||
|
||||
# 3. Execute model
|
||||
@@ -1225,6 +1232,17 @@ class XPUModelRunner(ModelRunnerBase):
|
||||
|
||||
return None
|
||||
|
||||
def _execute_empty_input(self) -> None:
|
||||
"""
|
||||
In certain scenarios, such as during EP,
|
||||
the runner needs to execute partial modules of the model without input data.
|
||||
This requires the model to implement the `empty_input_forward` method.
|
||||
"""
|
||||
if hasattr(self.model, "empty_input_forward"):
|
||||
self.model.empty_input_forward()
|
||||
else:
|
||||
raise ValueError(f"{type(self.model)} has no attribute 'empty_input_forward")
|
||||
|
||||
@profile_run_guard(True)
|
||||
def profile_run(self) -> None:
|
||||
"""Execute a forward pass with dummy inputs to profile the memory usage of the model"""
|
||||
|
||||
Reference in New Issue
Block a user