mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
+59
-35
@@ -15,59 +15,83 @@
|
||||
#pragma once
|
||||
|
||||
inline uint32_t get_decoder_block_shape_q() {
|
||||
static const char* decoder_block_shape_q_env = std::getenv("FLAGS_dec_block_shape_q");
|
||||
static const uint32_t decoder_block_shape_q =
|
||||
decoder_block_shape_q_env == nullptr ? 16 : std::stoi(std::string(decoder_block_shape_q_env));
|
||||
return decoder_block_shape_q;
|
||||
static const char* decoder_block_shape_q_env =
|
||||
std::getenv("FLAGS_dec_block_shape_q");
|
||||
static const uint32_t decoder_block_shape_q =
|
||||
decoder_block_shape_q_env == nullptr
|
||||
? 16
|
||||
: std::stoi(std::string(decoder_block_shape_q_env));
|
||||
return decoder_block_shape_q;
|
||||
}
|
||||
|
||||
inline uint32_t get_encoder_block_shape_q() {
|
||||
static const char* encoder_block_shape_q_env = std::getenv("FLAGS_enc_block_shape_q");
|
||||
static const uint32_t encoder_block_shape_q =
|
||||
encoder_block_shape_q_env == nullptr ? 64 : std::stoi(std::string(encoder_block_shape_q_env));
|
||||
return encoder_block_shape_q;
|
||||
static const char* encoder_block_shape_q_env =
|
||||
std::getenv("FLAGS_enc_block_shape_q");
|
||||
static const uint32_t encoder_block_shape_q =
|
||||
encoder_block_shape_q_env == nullptr
|
||||
? 64
|
||||
: std::stoi(std::string(encoder_block_shape_q_env));
|
||||
return encoder_block_shape_q;
|
||||
}
|
||||
|
||||
inline uint32_t get_max_partition_size(int bsz) {
|
||||
static const char* max_partition_size_env = std::getenv("FLAGS_cascade_attention_max_partition_size");
|
||||
static const uint32_t max_partition_size =
|
||||
max_partition_size_env == nullptr ? 32768 : std::stoul(std::string(max_partition_size_env));
|
||||
return max_partition_size;
|
||||
static const char* max_partition_size_env =
|
||||
std::getenv("FLAGS_cascade_attention_max_partition_size");
|
||||
static const uint32_t max_partition_size =
|
||||
max_partition_size_env == nullptr
|
||||
? 32768
|
||||
: std::stoul(std::string(max_partition_size_env));
|
||||
return max_partition_size;
|
||||
}
|
||||
|
||||
inline uint32_t get_cascade_attention_deal_each_time() {
|
||||
static const char* cascade_attention_deal_each_time_env = std::getenv("FLAGS_cascade_attention_deal_each_time");
|
||||
static const uint32_t cascade_attention_deal_each_time =
|
||||
cascade_attention_deal_each_time_env == nullptr ? 0 : std::stoul(std::string(cascade_attention_deal_each_time_env));
|
||||
return (cascade_attention_deal_each_time != 0 ? cascade_attention_deal_each_time : 32);
|
||||
static const char* cascade_attention_deal_each_time_env =
|
||||
std::getenv("FLAGS_cascade_attention_deal_each_time");
|
||||
static const uint32_t cascade_attention_deal_each_time =
|
||||
cascade_attention_deal_each_time_env == nullptr
|
||||
? 0
|
||||
: std::stoul(std::string(cascade_attention_deal_each_time_env));
|
||||
return (cascade_attention_deal_each_time != 0
|
||||
? cascade_attention_deal_each_time
|
||||
: 32);
|
||||
}
|
||||
|
||||
inline uint32_t get_cascade_attention_num_stages() {
|
||||
static const char* cascade_attention_num_stages_env = std::getenv("FLAGS_cascade_attention_num_stages");
|
||||
static const uint32_t cascade_attention_num_stages =
|
||||
cascade_attention_num_stages_env == nullptr ? 0 : std::stoul(std::string(cascade_attention_num_stages_env));
|
||||
return cascade_attention_num_stages != 0 ? cascade_attention_num_stages : 2;
|
||||
static const char* cascade_attention_num_stages_env =
|
||||
std::getenv("FLAGS_cascade_attention_num_stages");
|
||||
static const uint32_t cascade_attention_num_stages =
|
||||
cascade_attention_num_stages_env == nullptr
|
||||
? 0
|
||||
: std::stoul(std::string(cascade_attention_num_stages_env));
|
||||
return cascade_attention_num_stages != 0 ? cascade_attention_num_stages : 2;
|
||||
}
|
||||
|
||||
inline uint32_t get_cascade_attention_num_threads() {
|
||||
static const char* cascade_attention_num_threads_env = std::getenv("FLAGS_cascade_attention_num_threads");
|
||||
static const uint32_t cascade_attention_num_threads =
|
||||
cascade_attention_num_threads_env == nullptr ? 0 : std::stoul(std::string(cascade_attention_num_threads_env));
|
||||
return cascade_attention_num_threads != 0 ? cascade_attention_num_threads : 128;
|
||||
static const char* cascade_attention_num_threads_env =
|
||||
std::getenv("FLAGS_cascade_attention_num_threads");
|
||||
static const uint32_t cascade_attention_num_threads =
|
||||
cascade_attention_num_threads_env == nullptr
|
||||
? 0
|
||||
: std::stoul(std::string(cascade_attention_num_threads_env));
|
||||
return cascade_attention_num_threads != 0 ? cascade_attention_num_threads
|
||||
: 128;
|
||||
}
|
||||
|
||||
inline bool get_mla_use_tensorcore() {
|
||||
static const char* mla_use_tensorcore_env = std::getenv("FLAGS_mla_use_tensorcore");
|
||||
static const uint32_t mla_use_tensorcore =
|
||||
mla_use_tensorcore_env == nullptr ? 0 : std::stoul(std::string(mla_use_tensorcore_env));
|
||||
return mla_use_tensorcore != 0 ? true : false;
|
||||
static const char* mla_use_tensorcore_env =
|
||||
std::getenv("FLAGS_mla_use_tensorcore");
|
||||
static const uint32_t mla_use_tensorcore =
|
||||
mla_use_tensorcore_env == nullptr
|
||||
? 0
|
||||
: std::stoul(std::string(mla_use_tensorcore_env));
|
||||
return mla_use_tensorcore != 0 ? true : false;
|
||||
}
|
||||
inline int get_mla_dec_chunk_size(int bsz) {
|
||||
static const char* mla_dec_chunk_size_env =
|
||||
std::getenv("FLAGS_mla_dec_chunk_size");
|
||||
static const int mla_dec_chunk_size =
|
||||
mla_dec_chunk_size_env == nullptr
|
||||
? -1
|
||||
: std::stoi(std::string(mla_dec_chunk_size_env));
|
||||
return bsz > 1 ? mla_dec_chunk_size : 64;
|
||||
static const char* mla_dec_chunk_size_env =
|
||||
std::getenv("FLAGS_mla_dec_chunk_size");
|
||||
static const int mla_dec_chunk_size =
|
||||
mla_dec_chunk_size_env == nullptr
|
||||
? -1
|
||||
: std::stoi(std::string(mla_dec_chunk_size_env));
|
||||
return bsz > 1 ? mla_dec_chunk_size : 64;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user