⚡ Bolt: Optimize single element list appends

Replaced instances of `.extend([item])` with `.append(item)` in multiple files. Using `.extend([item])` incurs memory overhead by allocating a new single-element list and is computationally slower than calling `.append(item)` directly. Files updated: - fastdeploy/input/encodings/ernie_encoding.py - fastdeploy/input/ernie4_5_vl_processor/process.py - fastdeploy/output/token_processor.py - fastdeploy/worker/gpu_model_runner.py - fastdeploy/worker/metax_model_runner.py
2026-04-23 00:17:25 +08:00 · 2026-04-15 16:45:13 +00:00
parent e53f5184ac
commit 69c7dd0a19
6 changed files with 29 additions and 26 deletions
@@ -0,0 +1,3 @@
 ## 2024-04-15 - FastDeploy test suite
 **Learning:** The fastdeploy codebase tests require a complex environment. Running `pytest tests/` directly fails with hundreds of import errors due to missing dependencies and environment setup specific to FastDeploy (such as PaddlePaddle and other ML packages).
 **Action:** When working on this codebase, accept that local tests might fail unless running in a fully configured container or environment.
@@ -302,8 +302,8 @@ class ErnieEncoding(BaseEncoding):
                if image_idx >= len(images):
                    raise ValueError("prompt token ids has more image placeholder than in messages")
                # append image_start_id
-                outputs["input_ids"].extend([cur_token_id])
+                outputs["input_ids"].append(cur_token_id)
-                outputs["token_type_ids"].extend([IDS_TYPE_FLAG["image"]])
+                outputs["token_type_ids"].append(IDS_TYPE_FLAG["image"])
                outputs["position_ids"].append([outputs["cur_position"]] * 3)
                outputs["cur_position"] += 1
                st += 1
@@ -326,8 +326,8 @@ class ErnieEncoding(BaseEncoding):
                if video_idx >= len(videos):
                    raise ValueError("prompt token ids has more video placeholder than in messages")
                # append video_start_id
-                outputs["input_ids"].extend([cur_token_id])
+                outputs["input_ids"].append(cur_token_id)
-                outputs["token_type_ids"].extend([IDS_TYPE_FLAG["image"]])
+                outputs["token_type_ids"].append(IDS_TYPE_FLAG["image"])
                outputs["position_ids"].append([outputs["cur_position"]] * 3)
                outputs["cur_position"] += 1
                st += 1
@@ -351,11 +351,11 @@ class ErnieEncoding(BaseEncoding):
                video_idx += 1
                st = cur_idx
            else:
-                outputs["input_ids"].extend([cur_token_id])
+                outputs["input_ids"].append(cur_token_id)
                type_flag = (
                    IDS_TYPE_FLAG["image"] if cur_token_id in (image_end_id, video_end_id) else IDS_TYPE_FLAG["text"]
                )
-                outputs["token_type_ids"].extend([type_flag])
+                outputs["token_type_ids"].append(type_flag)
                outputs["position_ids"].append([outputs["cur_position"]] * 3)
                outputs["cur_position"] += 1
                st += 1
@@ -397,8 +397,8 @@ class DataProcessor(MMBaseDataProcessor):
                if image_idx >= len(images):
                    raise ValueError("prompt token ids has more image placeholder than in messages")
                # append image_start_id
-                outputs["input_ids"].extend([cur_token_id])
+                outputs["input_ids"].append(cur_token_id)
-                outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]])
+                outputs["token_type_ids"].append(IDS_TYPE_FLAG["text"])
                outputs["position_ids"].append([outputs["cur_position"]] * 3)
                outputs["cur_position"] += 1
                st += 1
@@ -421,8 +421,8 @@ class DataProcessor(MMBaseDataProcessor):
                if video_idx >= len(videos):
                    raise ValueError("prompt token ids has more video placeholder than in messages")
                # append video_start_id
-                outputs["input_ids"].extend([cur_token_id])
+                outputs["input_ids"].append(cur_token_id)
-                outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]])
+                outputs["token_type_ids"].append(IDS_TYPE_FLAG["text"])
                outputs["position_ids"].append([outputs["cur_position"]] * 3)
                outputs["cur_position"] += 1
                st += 1
@@ -446,8 +446,8 @@ class DataProcessor(MMBaseDataProcessor):
                video_idx += 1
                st = cur_idx
            else:
-                outputs["input_ids"].extend([cur_token_id])
+                outputs["input_ids"].append(cur_token_id)
-                outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]])
+                outputs["token_type_ids"].append(IDS_TYPE_FLAG["text"])
                outputs["position_ids"].append([outputs["cur_position"]] * 3)
                outputs["cur_position"] += 1
                st += 1
@@ -686,9 +686,9 @@ class TokenProcessor:
                        sampled_token_ranks=[sampled_rank],
                    )
                else:
-                    result.outputs.draft_top_logprobs.logprob_token_ids.extend([topk_token_ids])
+                    result.outputs.draft_top_logprobs.logprob_token_ids.append(topk_token_ids)
-                    result.outputs.draft_top_logprobs.logprobs.extend([topk_logprobs])
+                    result.outputs.draft_top_logprobs.logprobs.append(topk_logprobs)
-                    result.outputs.draft_top_logprobs.sampled_token_ranks.extend([sampled_rank])
+                    result.outputs.draft_top_logprobs.sampled_token_ranks.append(sampled_rank)
            batch_result.append(result)
        return batch_result
@@ -898,9 +898,9 @@ class TokenProcessor:
                                sampled_token_ranks=[sampled_rank],
                            )
                        else:
-                            result.outputs.top_logprobs.logprob_token_ids.extend([topk_token_ids])
+                            result.outputs.top_logprobs.logprob_token_ids.append(topk_token_ids)
-                            result.outputs.top_logprobs.logprobs.extend([topk_logprobs])
+                            result.outputs.top_logprobs.logprobs.append(topk_logprobs)
-                            result.outputs.top_logprobs.sampled_token_ranks.extend([sampled_rank])
+                            result.outputs.top_logprobs.sampled_token_ranks.append(sampled_rank)
                if token_id in task.eos_token_ids or is_prefill or recovery_stop:
                    result.finished = True
                    trace_carrier = tracing.trace_get_proc_propagate_context(rid=rid)
@@ -1479,7 +1479,7 @@ class GPUModelRunner(ModelRunnerBase):
                    self.cache_kvs_map[indexer_cache_name] = indexer_cache
                    cache_kvs_list.extend([key_cache, indexer_cache])
                else:
-                    cache_kvs_list.extend([key_cache])
+                    cache_kvs_list.append(key_cache)
                if kv_cache_quant_type == "block_wise_fp8":
                    key_cache_scales = paddle.full(
                        shape=kv_cache_scale_shape, fill_value=0, dtype=paddle.get_default_dtype()
@@ -1494,7 +1494,7 @@ class GPUModelRunner(ModelRunnerBase):
                        self.cache_kvs_map[value_cache_scales_name] = val_cache_scales
                        cache_kvs_list.extend([key_cache_scales, val_cache_scales])
                    else:
-                        cache_kvs_list.extend([key_cache_scales])
+                        cache_kvs_list.append(key_cache_scales)
            else:
                logger.info(
                    f"..attaching kv cache for layer {i}: key:{key_cache_shape}, value:{value_cache_shape}, indexer:{indexer_cache_shape}"
@@ -1526,9 +1526,9 @@ class GPUModelRunner(ModelRunnerBase):
                    self.cache_kvs_map[indexer_cache_name] = indexer_cache
                    cache_kvs_list.extend([key_cache, indexer_cache])
                else:
-                    cache_kvs_list.extend([key_cache])
+                    cache_kvs_list.append(key_cache)
                    if kv_cache_quant_type == "block_wise_fp8":
-                        cache_kvs_list.extend([key_cache_scales])
+                        cache_kvs_list.append(key_cache_scales)
        self.share_inputs["caches"] = cache_kvs_list
@@ -1384,7 +1384,7 @@ class MetaxModelRunner(ModelRunnerBase):
                    self.cache_kvs_map[val_cache_name] = val_cache
                    cache_kvs_list.extend([key_cache, val_cache])
                else:
-                    cache_kvs_list.extend([key_cache])
+                    cache_kvs_list.append(key_cache)
                if kv_cache_quant_type == "block_wise_fp8":
                    key_cache_scales = paddle.full(
                        shape=kv_cache_scale_shape, fill_value=0, dtype=paddle.get_default_dtype()
@@ -1399,7 +1399,7 @@ class MetaxModelRunner(ModelRunnerBase):
                        self.cache_kvs_map[value_cache_scales_name] = val_cache_scales
                        cache_kvs_list.extend([key_cache_scales, val_cache_scales])
                    else:
-                        cache_kvs_list.extend([key_cache_scales])
+                        cache_kvs_list.append(key_cache_scales)
            else:
                logger.info(f"..attaching kv cache for layer {i}: key:{key_cache_shape}, value:{value_cache_shape}")
                key_cache = paddle.empty(shape=[], dtype=cache_type)
@@ -1424,9 +1424,9 @@ class MetaxModelRunner(ModelRunnerBase):
                        self.cache_kvs_map[value_cache_scales_name] = val_cache_scales
                        cache_kvs_list.extend([key_cache_scales, val_cache_scales])
                else:
-                    cache_kvs_list.extend([key_cache])
+                    cache_kvs_list.append(key_cache)
                    if kv_cache_quant_type == "block_wise_fp8":
-                        cache_kvs_list.extend([key_cache_scales])
+                        cache_kvs_list.append(key_cache_scales)
        self.share_inputs["caches"] = cache_kvs_list