fix cuda graph capture failure in CI test (#7094)

2026-04-23 00:17:25 +08:00 · 2026-03-31 11:05:51 +08:00
parent daa95244f7
commit dd2aa10ed4
1 changed files with 9 additions and 2 deletions
@@ -252,9 +252,16 @@ __global__ void verify_draft_tokens(
      break;
    }

-    // Accept-all override (debug/warmup)
+    // Accept-all override (debug/warmup/CUDA graph capture)
    if (accept_all) {
-      if (ctx.emit_token(i, ctx.step_input_ids_now[i + 1])) break;
+      int64_t token = ctx.step_input_ids_now[i + 1];
+      // During dummy run (accept_all), replace EOS tokens with a safe
+      // non-EOS value to prevent stop_flags being set, which would cause
+      // CUDA graph capture failure due to token count mismatch.
+      if (is_in_end(token, end_tokens, end_length)) {
+        token = 5;
+      }
+      if (ctx.emit_token(i, token)) break;
      continue;
    }