mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-24 01:29:57 +08:00
cp 1131 tbo to develop (#6281)
This commit is contained in:
@@ -560,12 +560,13 @@ class EPRunner:
|
||||
|
||||
|
||||
class EPPrefillRunner(EPRunner):
|
||||
|
||||
allocate_on_comm_stream = False
|
||||
|
||||
"""
|
||||
EPPrefillRunner
|
||||
"""
|
||||
|
||||
allocate_on_comm_stream = False
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
top_k: int,
|
||||
@@ -664,6 +665,7 @@ class EPPrefillRunner(EPRunner):
|
||||
"async_finish": self.ep_engine.async_finish,
|
||||
"topk_weights": recv_topk_weights,
|
||||
"previous_event": event,
|
||||
"allocate_on_comm_stream": EPPrefillRunner.allocate_on_comm_stream,
|
||||
}
|
||||
fused_moe_out, _, event = buffer.combine(**combine_args)
|
||||
return fused_moe_out, event
|
||||
|
||||
Reference in New Issue
Block a user