Bolt: Optimize single element list appends

Replaced instances of `.extend([item])` with `.append(item)` in multiple files.
Using `.extend([item])` incurs memory overhead by allocating a new single-element
list and is computationally slower than calling `.append(item)` directly.

Files updated:
- fastdeploy/input/encodings/ernie_encoding.py
- fastdeploy/input/ernie4_5_vl_processor/process.py
- fastdeploy/output/token_processor.py
- fastdeploy/worker/gpu_model_runner.py
- fastdeploy/worker/metax_model_runner.py
This commit is contained in:
google-labs-jules[bot]
2026-04-15 16:45:13 +00:00
parent e53f5184ac
commit 69c7dd0a19
6 changed files with 29 additions and 26 deletions
+4 -4
View File
@@ -1479,7 +1479,7 @@ class GPUModelRunner(ModelRunnerBase):
self.cache_kvs_map[indexer_cache_name] = indexer_cache
cache_kvs_list.extend([key_cache, indexer_cache])
else:
cache_kvs_list.extend([key_cache])
cache_kvs_list.append(key_cache)
if kv_cache_quant_type == "block_wise_fp8":
key_cache_scales = paddle.full(
shape=kv_cache_scale_shape, fill_value=0, dtype=paddle.get_default_dtype()
@@ -1494,7 +1494,7 @@ class GPUModelRunner(ModelRunnerBase):
self.cache_kvs_map[value_cache_scales_name] = val_cache_scales
cache_kvs_list.extend([key_cache_scales, val_cache_scales])
else:
cache_kvs_list.extend([key_cache_scales])
cache_kvs_list.append(key_cache_scales)
else:
logger.info(
f"..attaching kv cache for layer {i}: key:{key_cache_shape}, value:{value_cache_shape}, indexer:{indexer_cache_shape}"
@@ -1526,9 +1526,9 @@ class GPUModelRunner(ModelRunnerBase):
self.cache_kvs_map[indexer_cache_name] = indexer_cache
cache_kvs_list.extend([key_cache, indexer_cache])
else:
cache_kvs_list.extend([key_cache])
cache_kvs_list.append(key_cache)
if kv_cache_quant_type == "block_wise_fp8":
cache_kvs_list.extend([key_cache_scales])
cache_kvs_list.append(key_cache_scales)
self.share_inputs["caches"] = cache_kvs_list