mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Model Runner] Prepare token count and move FA3 initialization into the graph (#6170)
* prepare for token num and put FA3 init in graph
This commit is contained in:
@@ -2037,6 +2037,7 @@ __global__ void merge_multi_chunks_kernel(
|
||||
const int vid = threadIdx.x, hid = threadIdx.y;
|
||||
const int qid = blockIdx.x;
|
||||
const uint32_t bid = batch_id_per_token[qid];
|
||||
if (bid == -1) return;
|
||||
if (seq_lens_q[bid] <= 0 || seq_lens_kv[bid] <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user