[Feature] Add Golang-based Router for Request Scheduling and Load Balancing (#5882)

* [Feature] add golang router

* [Feature] add golang router

* [Feature] add golang router

* [Feature] add golang router

* [Feature] add golang router

* [Feature] Add Golang-based Router for Request Scheduling and Load Balancing

* [Feature] Add Golang-based Router for Request Scheduling and Load Balancing

* [Feature] Add Golang-based Router for Request Scheduling and Load Balancing

* [Feature] Add Golang-based Router for Request Scheduling and Load Balancing

---------

Co-authored-by: mouxin <mouxin@baidu.com>
This commit is contained in:
mouxin
2026-01-07 21:28:08 +08:00
committed by GitHub
parent 925e7edd3c
commit 0a92e96f20
50 changed files with 6298 additions and 0 deletions
@@ -0,0 +1,52 @@
package handler
import (
"context"
"math"
)
// ProcessTokensSelectWorker selects the instance with the smallest number of tokens currently being processed for Prefill nodes.
func ProcessTokensSelectWorker(ctx context.Context, workers []string, message string) (string, error) {
if len(workers) == 0 {
return "", nil
}
var (
selected string
minTokens uint64 = math.MaxUint64
)
for _, w := range workers {
tc := GetOrCreateTokenCounter(ctx, w)
load := tc.Get()
if load < minTokens {
minTokens = load
selected = w
}
}
return selected, nil
}
// RequestNumSelectWorker selects the instance with the smallest number of current requests for Decode nodes.
func RequestNumSelectWorker(ctx context.Context, workers []string, message string) (string, error) {
if len(workers) == 0 {
return "", nil
}
var (
selected string
minCount uint64 = math.MaxUint64
)
for _, w := range workers {
c := GetOrCreateCounter(ctx, w)
load := c.Get()
if load < minCount {
minCount = load
selected = w
}
}
return selected, nil
}