mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 17:11:21 +08:00
⚡ Bolt: Optimize single element list appends
Replaced instances of `.extend([item])` with `.append(item)` in multiple files. Using `.extend([item])` incurs memory overhead by allocating a new single-element list and is computationally slower than calling `.append(item)` directly. Files updated: - fastdeploy/input/encodings/ernie_encoding.py - fastdeploy/input/ernie4_5_vl_processor/process.py - fastdeploy/output/token_processor.py - fastdeploy/worker/gpu_model_runner.py - fastdeploy/worker/metax_model_runner.py
This commit is contained in:
@@ -1479,7 +1479,7 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
self.cache_kvs_map[indexer_cache_name] = indexer_cache
|
||||
cache_kvs_list.extend([key_cache, indexer_cache])
|
||||
else:
|
||||
cache_kvs_list.extend([key_cache])
|
||||
cache_kvs_list.append(key_cache)
|
||||
if kv_cache_quant_type == "block_wise_fp8":
|
||||
key_cache_scales = paddle.full(
|
||||
shape=kv_cache_scale_shape, fill_value=0, dtype=paddle.get_default_dtype()
|
||||
@@ -1494,7 +1494,7 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
self.cache_kvs_map[value_cache_scales_name] = val_cache_scales
|
||||
cache_kvs_list.extend([key_cache_scales, val_cache_scales])
|
||||
else:
|
||||
cache_kvs_list.extend([key_cache_scales])
|
||||
cache_kvs_list.append(key_cache_scales)
|
||||
else:
|
||||
logger.info(
|
||||
f"..attaching kv cache for layer {i}: key:{key_cache_shape}, value:{value_cache_shape}, indexer:{indexer_cache_shape}"
|
||||
@@ -1526,9 +1526,9 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
self.cache_kvs_map[indexer_cache_name] = indexer_cache
|
||||
cache_kvs_list.extend([key_cache, indexer_cache])
|
||||
else:
|
||||
cache_kvs_list.extend([key_cache])
|
||||
cache_kvs_list.append(key_cache)
|
||||
if kv_cache_quant_type == "block_wise_fp8":
|
||||
cache_kvs_list.extend([key_cache_scales])
|
||||
cache_kvs_list.append(key_cache_scales)
|
||||
|
||||
self.share_inputs["caches"] = cache_kvs_list
|
||||
|
||||
|
||||
Reference in New Issue
Block a user