Co-authored-by: gongweibao <gognweibao@baidu.com>
This commit is contained in:
gongweibao
2026-03-04 21:55:31 +08:00
committed by GitHub
parent 5c8f5184d9
commit ddb06ff83f
306 changed files with 40627 additions and 34418 deletions
+62 -62
View File
@@ -30,79 +30,79 @@ namespace transfer {
using server_callback_fn = void(std::vector<int64_t>, void *);
struct BatchResult {
BatchResult(int64_t cur_batch_size, std::vector<int64_t> &cur_tokens)
: batch_size(cur_batch_size), tokens(cur_tokens) {}
int64_t batch_size;
std::vector<int64_t> tokens;
BatchResult(int64_t cur_batch_size, std::vector<int64_t> &cur_tokens)
: batch_size(cur_batch_size), tokens(cur_tokens) {}
int64_t batch_size;
std::vector<int64_t> tokens;
};
class TokenTransfer {
public:
TokenTransfer(const TokenTransfer &o) = delete;
const TokenTransfer &operator=(const TokenTransfer &o) = delete;
~TokenTransfer() {}
public:
TokenTransfer(const TokenTransfer &o) = delete;
const TokenTransfer &operator=(const TokenTransfer &o) = delete;
~TokenTransfer() {}
static TokenTransfer &Instance() {
static TokenTransfer instance;
return instance;
static TokenTransfer &Instance() {
static TokenTransfer instance;
return instance;
}
void RegisterCallback(server_callback_fn *cb_fn, void *cb_data) {
stream_cb_fn_ = cb_fn;
stream_cb_data_ = cb_data;
}
void UnRegisterCallback() {
stream_cb_fn_ = nullptr;
stream_cb_data_ = nullptr;
}
// once copy: cpu --> cpu
// array length should be (1 + MAX_BATCH)
bool GetBatchToken(int64_t *array) {
if (Empty()) {
return false;
} else {
assert(array != nullptr);
std::lock_guard<std::mutex> mtx(mtx_);
array[0] = q_.front().batch_size;
if (array[0] != 0) {
memmove(reinterpret_cast<void *>(array + 1),
reinterpret_cast<void *>(q_.front().tokens.data()),
sizeof(int64_t) * array[0]);
}
q_.pop();
return true;
}
}
void RegisterCallback(server_callback_fn *cb_fn, void *cb_data) {
stream_cb_fn_ = cb_fn;
stream_cb_data_ = cb_data;
void PushBatchToken(int64_t cur_batch_size, int64_t *cur_tokens) {
std::lock_guard<std::mutex> mtx(mtx_);
if (q_.size() > MAX_CACHE_LENGTH) {
std::cout << "Warning: The queue that stores the results "
<< "has exceeded MAX_CACHE_LENGTH and will be forcefully "
"cleared."
<< std::endl;
std::queue<BatchResult> empty;
std::swap(q_, empty);
}
std::vector<int64_t> tmp(cur_tokens, cur_tokens + cur_batch_size);
q_.emplace(cur_batch_size, tmp);
}
void UnRegisterCallback() {
stream_cb_fn_ = nullptr;
stream_cb_data_ = nullptr;
}
bool Empty() {
std::lock_guard<std::mutex> mtx(mtx_);
return q_.empty();
}
// once copy: cpu --> cpu
// array length should be (1 + MAX_BATCH)
bool GetBatchToken(int64_t *array) {
if (Empty()) {
return false;
} else {
assert(array != nullptr);
std::lock_guard<std::mutex> mtx(mtx_);
array[0] = q_.front().batch_size;
if (array[0] != 0) {
memmove(reinterpret_cast<void *>(array + 1),
reinterpret_cast<void *>(q_.front().tokens.data()),
sizeof(int64_t) * array[0]);
}
q_.pop();
return true;
}
}
server_callback_fn *stream_cb_fn_ = nullptr;
void *stream_cb_data_ = nullptr;
void PushBatchToken(int64_t cur_batch_size, int64_t *cur_tokens) {
std::lock_guard<std::mutex> mtx(mtx_);
if (q_.size() > MAX_CACHE_LENGTH) {
std::cout << "Warning: The queue that stores the results "
<< "has exceeded MAX_CACHE_LENGTH and will be forcefully "
"cleared."
<< std::endl;
std::queue<BatchResult> empty;
std::swap(q_, empty);
}
std::vector<int64_t> tmp(cur_tokens, cur_tokens + cur_batch_size);
q_.emplace(cur_batch_size, tmp);
}
private:
TokenTransfer() {}
bool Empty() {
std::lock_guard<std::mutex> mtx(mtx_);
return q_.empty();
}
server_callback_fn *stream_cb_fn_ = nullptr;
void *stream_cb_data_ = nullptr;
private:
TokenTransfer() {}
std::mutex mtx_;
std::queue<BatchResult> q_;
std::mutex mtx_;
std::queue<BatchResult> q_;
};
} // namespace transfer