[Feature] support clear data (#3601)

* [Feature] support clear data * update * fix * fix * fix * fix * fix * fix * fix
2026-04-23 00:17:25 +08:00 · 2025-09-23 10:20:02 +08:00
parent 772f0156f3
commit 83720da79f
12 changed files with 85 additions and 1 deletions
@@ -227,6 +227,14 @@ class OpenAIServingCompletion:
            completion_batched_token_ids = [[] for _ in range(num_choices)]
            current_waiting_time = 0
            while num_choices > 0:
+                if self.engine_client.check_model_weight_status():
+                    return ErrorResponse(
+                        error=ErrorInfo(
+                            message="Model weight cleared",
+                            code=ErrorCode.INVALID_VALUE,
+                            type=ErrorType.INVALID_REQUEST_ERROR,
+                        )
+                    )
                try:
                    response = await asyncio.wait_for(response_queue.get(), timeout=10)
                    current_waiting_time = 0
@@ -281,7 +289,6 @@ class OpenAIServingCompletion:
            return res
        except Exception as e:
            api_server_logger.error(f"Error in completion_full_generator: {e}", exc_info=True)
-            raise
        finally:
            self.engine_client.semaphore.release()
            if dealer is not None:
@@ -360,6 +367,8 @@ class OpenAIServingCompletion:
            )
            current_waiting_time = 0
            while num_choices > 0:
+                if self.engine_client.check_model_weight_status():
+                    raise ValueError("Engine is clearing model weight")
                try:
                    response = await asyncio.wait_for(response_queue.get(), timeout=10)
                    current_waiting_time = 0
@@ -447,6 +456,7 @@ class OpenAIServingCompletion:
                        choices[-1].finish_reason = self.calc_finish_reason(
                            request.max_tokens, output_tokens[idx], output, tool_called[idx]
                        )
+
                    send_idx = output.get("send_idx")
                    # 只有当 send_idx 明确为 0 时才记录日志
                    if send_idx == 0 and not request.return_token_ids: