mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Model Runner] Prepare token count and move FA3 initialization into the graph (#6170)
* prepare for token num and put FA3 init in graph
This commit is contained in:
@@ -935,6 +935,7 @@ class MTPProposer(Proposer):
|
||||
if self.model_inputs["not_need_stop"]:
|
||||
self.model_inputs["substep"] = substep
|
||||
# Remove padding
|
||||
token_num_cpu = self.model_inputs["seq_lens_this_time"].numpy().sum().item()
|
||||
(
|
||||
ids_remove_padding,
|
||||
batch_id_per_token,
|
||||
@@ -943,6 +944,7 @@ class MTPProposer(Proposer):
|
||||
output_cum_offsets,
|
||||
output_padding_offset,
|
||||
) = pre_process(
|
||||
token_num_cpu,
|
||||
self.model_inputs["input_ids"],
|
||||
self.model_inputs["seq_lens_this_time"],
|
||||
True,
|
||||
|
||||
Reference in New Issue
Block a user