mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Feature] Fix mixed cache-aware (#7129)
* [Feature] Config eviction_duration * [Feature] Config eviction_duration * [Feature] Config eviction_duration * [Feature] Config eviction_duration * [Feature] Fix mixed cache-aware --------- Co-authored-by: mouxin <mouxin@baidu.com>
This commit is contained in:
@@ -412,7 +412,8 @@ func CommonCompletions(c *gin.Context, extractor PromptExtractor, completionEndp
|
|||||||
} else {
|
} else {
|
||||||
logger.Info(ctx, "Parsing completed; starting worker selection.")
|
logger.Info(ctx, "Parsing completed; starting worker selection.")
|
||||||
// Non-PD mode: use Mixed instance
|
// Non-PD mode: use Mixed instance
|
||||||
dest, err := manager.SelectWorker(ctx, "")
|
message = extractor(rawReq)
|
||||||
|
dest, err := manager.SelectWorker(ctx, message)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error(ctx, "Failed to select worker: %v", err)
|
logger.Error(ctx, "Failed to select worker: %v", err)
|
||||||
c.Writer.WriteHeader(http.StatusBadGateway)
|
c.Writer.WriteHeader(http.StatusBadGateway)
|
||||||
@@ -427,6 +428,7 @@ func CommonCompletions(c *gin.Context, extractor PromptExtractor, completionEndp
|
|||||||
defer func() {
|
defer func() {
|
||||||
for _, url := range releaseTargets {
|
for _, url := range releaseTargets {
|
||||||
scheduler_handler.Release(ctx, url)
|
scheduler_handler.Release(ctx, url)
|
||||||
|
scheduler_handler.ReleasePrefillTokens(ctx, url, message)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user