[Feature][MTP] Support cacheKV transfer in per_chunk mode (#2890)

* support chunk_prefill both normal and speculative_decoding(mtp) * optimize pd-disaggregation config * fix bug
2026-04-23 00:17:25 +08:00 · 2025-07-17 17:58:08 +08:00
parent 67180c1ff9
commit d49f8fb30a
10 changed files with 110 additions and 27 deletions
@@ -36,9 +36,9 @@ void GetOutputKVSignal(const paddle::Tensor& x,
    int* out_data = const_cast<int*>(x.data<int>());
    int ret = -1;
    if (!wait_flag) {
-        ret = msgrcv(msgid, &msg_rcv, (MAX_BSZ * 2 + 2) * 4, 0, IPC_NOWAIT);
+        ret = msgrcv(msgid, &msg_rcv, (MAX_BSZ * 3 + 2) * 4, 0, IPC_NOWAIT);
    } else {
-        ret = msgrcv(msgid, &msg_rcv, (MAX_BSZ * 2 + 2) * 4, 0, 0);
+        ret = msgrcv(msgid, &msg_rcv, (MAX_BSZ * 3 + 2) * 4, 0, 0);
    }
    if (ret == -1) {
        out_data[0] = -1;
@@ -47,7 +47,7 @@ void GetOutputKVSignal(const paddle::Tensor& x,
    }
    int encoder_count = msg_rcv.mtext[0];

-    for (int i = 0; i < encoder_count * 2 + 2; i++) {
+    for (int i = 0; i < encoder_count * 3 + 2; i++) {
        out_data[i] = msg_rcv.mtext[i];
    }
    return;