Bolt: Optimize single element list appends

Replaced instances of `.extend([item])` with `.append(item)` in multiple files.
Using `.extend([item])` incurs memory overhead by allocating a new single-element
list and is computationally slower than calling `.append(item)` directly.

Files updated:
- fastdeploy/input/encodings/ernie_encoding.py
- fastdeploy/input/ernie4_5_vl_processor/process.py
- fastdeploy/output/token_processor.py
- fastdeploy/worker/gpu_model_runner.py
- fastdeploy/worker/metax_model_runner.py
This commit is contained in:
google-labs-jules[bot]
2026-04-15 16:45:13 +00:00
parent e53f5184ac
commit 69c7dd0a19
6 changed files with 29 additions and 26 deletions
+3
View File
@@ -0,0 +1,3 @@
## 2024-04-15 - FastDeploy test suite
**Learning:** The fastdeploy codebase tests require a complex environment. Running `pytest tests/` directly fails with hundreds of import errors due to missing dependencies and environment setup specific to FastDeploy (such as PaddlePaddle and other ML packages).
**Action:** When working on this codebase, accept that local tests might fail unless running in a fully configured container or environment.
+6 -6
View File
@@ -302,8 +302,8 @@ class ErnieEncoding(BaseEncoding):
if image_idx >= len(images):
raise ValueError("prompt token ids has more image placeholder than in messages")
# append image_start_id
outputs["input_ids"].extend([cur_token_id])
outputs["token_type_ids"].extend([IDS_TYPE_FLAG["image"]])
outputs["input_ids"].append(cur_token_id)
outputs["token_type_ids"].append(IDS_TYPE_FLAG["image"])
outputs["position_ids"].append([outputs["cur_position"]] * 3)
outputs["cur_position"] += 1
st += 1
@@ -326,8 +326,8 @@ class ErnieEncoding(BaseEncoding):
if video_idx >= len(videos):
raise ValueError("prompt token ids has more video placeholder than in messages")
# append video_start_id
outputs["input_ids"].extend([cur_token_id])
outputs["token_type_ids"].extend([IDS_TYPE_FLAG["image"]])
outputs["input_ids"].append(cur_token_id)
outputs["token_type_ids"].append(IDS_TYPE_FLAG["image"])
outputs["position_ids"].append([outputs["cur_position"]] * 3)
outputs["cur_position"] += 1
st += 1
@@ -351,11 +351,11 @@ class ErnieEncoding(BaseEncoding):
video_idx += 1
st = cur_idx
else:
outputs["input_ids"].extend([cur_token_id])
outputs["input_ids"].append(cur_token_id)
type_flag = (
IDS_TYPE_FLAG["image"] if cur_token_id in (image_end_id, video_end_id) else IDS_TYPE_FLAG["text"]
)
outputs["token_type_ids"].extend([type_flag])
outputs["token_type_ids"].append(type_flag)
outputs["position_ids"].append([outputs["cur_position"]] * 3)
outputs["cur_position"] += 1
st += 1
@@ -397,8 +397,8 @@ class DataProcessor(MMBaseDataProcessor):
if image_idx >= len(images):
raise ValueError("prompt token ids has more image placeholder than in messages")
# append image_start_id
outputs["input_ids"].extend([cur_token_id])
outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]])
outputs["input_ids"].append(cur_token_id)
outputs["token_type_ids"].append(IDS_TYPE_FLAG["text"])
outputs["position_ids"].append([outputs["cur_position"]] * 3)
outputs["cur_position"] += 1
st += 1
@@ -421,8 +421,8 @@ class DataProcessor(MMBaseDataProcessor):
if video_idx >= len(videos):
raise ValueError("prompt token ids has more video placeholder than in messages")
# append video_start_id
outputs["input_ids"].extend([cur_token_id])
outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]])
outputs["input_ids"].append(cur_token_id)
outputs["token_type_ids"].append(IDS_TYPE_FLAG["text"])
outputs["position_ids"].append([outputs["cur_position"]] * 3)
outputs["cur_position"] += 1
st += 1
@@ -446,8 +446,8 @@ class DataProcessor(MMBaseDataProcessor):
video_idx += 1
st = cur_idx
else:
outputs["input_ids"].extend([cur_token_id])
outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]])
outputs["input_ids"].append(cur_token_id)
outputs["token_type_ids"].append(IDS_TYPE_FLAG["text"])
outputs["position_ids"].append([outputs["cur_position"]] * 3)
outputs["cur_position"] += 1
st += 1
+6 -6
View File
@@ -686,9 +686,9 @@ class TokenProcessor:
sampled_token_ranks=[sampled_rank],
)
else:
result.outputs.draft_top_logprobs.logprob_token_ids.extend([topk_token_ids])
result.outputs.draft_top_logprobs.logprobs.extend([topk_logprobs])
result.outputs.draft_top_logprobs.sampled_token_ranks.extend([sampled_rank])
result.outputs.draft_top_logprobs.logprob_token_ids.append(topk_token_ids)
result.outputs.draft_top_logprobs.logprobs.append(topk_logprobs)
result.outputs.draft_top_logprobs.sampled_token_ranks.append(sampled_rank)
batch_result.append(result)
return batch_result
@@ -898,9 +898,9 @@ class TokenProcessor:
sampled_token_ranks=[sampled_rank],
)
else:
result.outputs.top_logprobs.logprob_token_ids.extend([topk_token_ids])
result.outputs.top_logprobs.logprobs.extend([topk_logprobs])
result.outputs.top_logprobs.sampled_token_ranks.extend([sampled_rank])
result.outputs.top_logprobs.logprob_token_ids.append(topk_token_ids)
result.outputs.top_logprobs.logprobs.append(topk_logprobs)
result.outputs.top_logprobs.sampled_token_ranks.append(sampled_rank)
if token_id in task.eos_token_ids or is_prefill or recovery_stop:
result.finished = True
trace_carrier = tracing.trace_get_proc_propagate_context(rid=rid)
+4 -4
View File
@@ -1479,7 +1479,7 @@ class GPUModelRunner(ModelRunnerBase):
self.cache_kvs_map[indexer_cache_name] = indexer_cache
cache_kvs_list.extend([key_cache, indexer_cache])
else:
cache_kvs_list.extend([key_cache])
cache_kvs_list.append(key_cache)
if kv_cache_quant_type == "block_wise_fp8":
key_cache_scales = paddle.full(
shape=kv_cache_scale_shape, fill_value=0, dtype=paddle.get_default_dtype()
@@ -1494,7 +1494,7 @@ class GPUModelRunner(ModelRunnerBase):
self.cache_kvs_map[value_cache_scales_name] = val_cache_scales
cache_kvs_list.extend([key_cache_scales, val_cache_scales])
else:
cache_kvs_list.extend([key_cache_scales])
cache_kvs_list.append(key_cache_scales)
else:
logger.info(
f"..attaching kv cache for layer {i}: key:{key_cache_shape}, value:{value_cache_shape}, indexer:{indexer_cache_shape}"
@@ -1526,9 +1526,9 @@ class GPUModelRunner(ModelRunnerBase):
self.cache_kvs_map[indexer_cache_name] = indexer_cache
cache_kvs_list.extend([key_cache, indexer_cache])
else:
cache_kvs_list.extend([key_cache])
cache_kvs_list.append(key_cache)
if kv_cache_quant_type == "block_wise_fp8":
cache_kvs_list.extend([key_cache_scales])
cache_kvs_list.append(key_cache_scales)
self.share_inputs["caches"] = cache_kvs_list
+4 -4
View File
@@ -1384,7 +1384,7 @@ class MetaxModelRunner(ModelRunnerBase):
self.cache_kvs_map[val_cache_name] = val_cache
cache_kvs_list.extend([key_cache, val_cache])
else:
cache_kvs_list.extend([key_cache])
cache_kvs_list.append(key_cache)
if kv_cache_quant_type == "block_wise_fp8":
key_cache_scales = paddle.full(
shape=kv_cache_scale_shape, fill_value=0, dtype=paddle.get_default_dtype()
@@ -1399,7 +1399,7 @@ class MetaxModelRunner(ModelRunnerBase):
self.cache_kvs_map[value_cache_scales_name] = val_cache_scales
cache_kvs_list.extend([key_cache_scales, val_cache_scales])
else:
cache_kvs_list.extend([key_cache_scales])
cache_kvs_list.append(key_cache_scales)
else:
logger.info(f"..attaching kv cache for layer {i}: key:{key_cache_shape}, value:{value_cache_shape}")
key_cache = paddle.empty(shape=[], dtype=cache_type)
@@ -1424,9 +1424,9 @@ class MetaxModelRunner(ModelRunnerBase):
self.cache_kvs_map[value_cache_scales_name] = val_cache_scales
cache_kvs_list.extend([key_cache_scales, val_cache_scales])
else:
cache_kvs_list.extend([key_cache])
cache_kvs_list.append(key_cache)
if kv_cache_quant_type == "block_wise_fp8":
cache_kvs_list.extend([key_cache_scales])
cache_kvs_list.append(key_cache_scales)
self.share_inputs["caches"] = cache_kvs_list