mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
@@ -35,66 +35,66 @@
|
||||
|
||||
#ifdef DEBUG_TRANSFER_OUTPUT
|
||||
void PrintVec(std::vector<int64_t> &vec) {
|
||||
std::cout << "std::vector vec_size: " << vec.size();
|
||||
for (int i{0}; i < vec.size(); i++) {
|
||||
std::cout << " " << vec[i];
|
||||
}
|
||||
std::cout << std::endl;
|
||||
std::cout << "std::vector vec_size: " << vec.size();
|
||||
for (int i{0}; i < vec.size(); i++) {
|
||||
std::cout << " " << vec[i];
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
void PrintVec(int64_t *arr) {
|
||||
std::cout << "READ vec_size: " << arr[0];
|
||||
for (int i{1}; i < arr[0] + 1; i++) {
|
||||
std::cout << " " << arr[i];
|
||||
}
|
||||
std::cout << std::endl;
|
||||
std::cout << "READ vec_size: " << arr[0];
|
||||
for (int i{1}; i < arr[0] + 1; i++) {
|
||||
std::cout << " " << arr[i];
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
void PrintVec(int64_t bs, int64_t *arr) {
|
||||
std::cout << "WRITE vec_size: " << bs;
|
||||
for (int i{0}; i < bs; i++) {
|
||||
std::cout << " " << arr[i];
|
||||
}
|
||||
std::cout << std::endl;
|
||||
std::cout << "WRITE vec_size: " << bs;
|
||||
for (int i{0}; i < bs; i++) {
|
||||
std::cout << " " << arr[i];
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
std::vector<paddle::Tensor> TransferOutput(const paddle::Tensor &x,
|
||||
int64_t rank_id) {
|
||||
using namespace paddle::inference::transfer;
|
||||
using namespace paddle::inference::transfer;
|
||||
|
||||
auto x_cpu = x.copy_to(paddle::CPUPlace(), false);
|
||||
if (rank_id != 0) {
|
||||
return {x_cpu};
|
||||
}
|
||||
std::vector<int64_t> x_shape = x_cpu.shape();
|
||||
int64_t token_num = x_cpu.numel();
|
||||
// only support int64_t
|
||||
assert(x_cpu.type() == paddle::DataType::INT64);
|
||||
auto x_cpu = x.copy_to(paddle::CPUPlace(), false);
|
||||
if (rank_id != 0) {
|
||||
return {x_cpu};
|
||||
}
|
||||
std::vector<int64_t> x_shape = x_cpu.shape();
|
||||
int64_t token_num = x_cpu.numel();
|
||||
// only support int64_t
|
||||
assert(x_cpu.type() == paddle::DataType::INT64);
|
||||
|
||||
auto &token_transfer = TokenTransfer::Instance();
|
||||
if (token_transfer.stream_cb_fn_) {
|
||||
auto data_ptr = x_cpu.data<int64_t>();
|
||||
std::vector<int64_t> tokens(data_ptr, data_ptr + token_num);
|
||||
token_transfer.stream_cb_fn_(tokens, token_transfer.stream_cb_data_);
|
||||
}
|
||||
auto &token_transfer = TokenTransfer::Instance();
|
||||
if (token_transfer.stream_cb_fn_) {
|
||||
auto data_ptr = x_cpu.data<int64_t>();
|
||||
std::vector<int64_t> tokens(data_ptr, data_ptr + token_num);
|
||||
token_transfer.stream_cb_fn_(tokens, token_transfer.stream_cb_data_);
|
||||
}
|
||||
#ifdef DEBUG_TRANSFER_OUTPUT
|
||||
else {
|
||||
token_transfer.PushBatchToken(token_num, x_cpu.data<int64_t>());
|
||||
}
|
||||
else {
|
||||
token_transfer.PushBatchToken(token_num, x_cpu.data<int64_t>());
|
||||
}
|
||||
#endif
|
||||
|
||||
return {x_cpu};
|
||||
return {x_cpu};
|
||||
}
|
||||
|
||||
std::vector<std::vector<int64_t>> TransferOutputInferShape(
|
||||
const std::vector<int64_t> &x_shape) {
|
||||
return {x_shape};
|
||||
return {x_shape};
|
||||
}
|
||||
|
||||
std::vector<paddle::DataType> TransferOutputInferDtype(
|
||||
const paddle::DataType &x_dtype) {
|
||||
return {x_dtype};
|
||||
return {x_dtype};
|
||||
}
|
||||
|
||||
PD_BUILD_STATIC_OP(transfer_output)
|
||||
|
||||
Reference in New Issue
Block a user