mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
⚡ Bolt: Optimize single element list appends
Replaced instances of `.extend([item])` with `.append(item)` in multiple files. Using `.extend([item])` incurs memory overhead by allocating a new single-element list and is computationally slower than calling `.append(item)` directly. Files updated: - fastdeploy/input/encodings/ernie_encoding.py - fastdeploy/input/ernie4_5_vl_processor/process.py - fastdeploy/output/token_processor.py - fastdeploy/worker/gpu_model_runner.py - fastdeploy/worker/metax_model_runner.py
This commit is contained in:
@@ -0,0 +1,3 @@
|
|||||||
|
## 2024-04-15 - FastDeploy test suite
|
||||||
|
**Learning:** The fastdeploy codebase tests require a complex environment. Running `pytest tests/` directly fails with hundreds of import errors due to missing dependencies and environment setup specific to FastDeploy (such as PaddlePaddle and other ML packages).
|
||||||
|
**Action:** When working on this codebase, accept that local tests might fail unless running in a fully configured container or environment.
|
||||||
@@ -302,8 +302,8 @@ class ErnieEncoding(BaseEncoding):
|
|||||||
if image_idx >= len(images):
|
if image_idx >= len(images):
|
||||||
raise ValueError("prompt token ids has more image placeholder than in messages")
|
raise ValueError("prompt token ids has more image placeholder than in messages")
|
||||||
# append image_start_id
|
# append image_start_id
|
||||||
outputs["input_ids"].extend([cur_token_id])
|
outputs["input_ids"].append(cur_token_id)
|
||||||
outputs["token_type_ids"].extend([IDS_TYPE_FLAG["image"]])
|
outputs["token_type_ids"].append(IDS_TYPE_FLAG["image"])
|
||||||
outputs["position_ids"].append([outputs["cur_position"]] * 3)
|
outputs["position_ids"].append([outputs["cur_position"]] * 3)
|
||||||
outputs["cur_position"] += 1
|
outputs["cur_position"] += 1
|
||||||
st += 1
|
st += 1
|
||||||
@@ -326,8 +326,8 @@ class ErnieEncoding(BaseEncoding):
|
|||||||
if video_idx >= len(videos):
|
if video_idx >= len(videos):
|
||||||
raise ValueError("prompt token ids has more video placeholder than in messages")
|
raise ValueError("prompt token ids has more video placeholder than in messages")
|
||||||
# append video_start_id
|
# append video_start_id
|
||||||
outputs["input_ids"].extend([cur_token_id])
|
outputs["input_ids"].append(cur_token_id)
|
||||||
outputs["token_type_ids"].extend([IDS_TYPE_FLAG["image"]])
|
outputs["token_type_ids"].append(IDS_TYPE_FLAG["image"])
|
||||||
outputs["position_ids"].append([outputs["cur_position"]] * 3)
|
outputs["position_ids"].append([outputs["cur_position"]] * 3)
|
||||||
outputs["cur_position"] += 1
|
outputs["cur_position"] += 1
|
||||||
st += 1
|
st += 1
|
||||||
@@ -351,11 +351,11 @@ class ErnieEncoding(BaseEncoding):
|
|||||||
video_idx += 1
|
video_idx += 1
|
||||||
st = cur_idx
|
st = cur_idx
|
||||||
else:
|
else:
|
||||||
outputs["input_ids"].extend([cur_token_id])
|
outputs["input_ids"].append(cur_token_id)
|
||||||
type_flag = (
|
type_flag = (
|
||||||
IDS_TYPE_FLAG["image"] if cur_token_id in (image_end_id, video_end_id) else IDS_TYPE_FLAG["text"]
|
IDS_TYPE_FLAG["image"] if cur_token_id in (image_end_id, video_end_id) else IDS_TYPE_FLAG["text"]
|
||||||
)
|
)
|
||||||
outputs["token_type_ids"].extend([type_flag])
|
outputs["token_type_ids"].append(type_flag)
|
||||||
outputs["position_ids"].append([outputs["cur_position"]] * 3)
|
outputs["position_ids"].append([outputs["cur_position"]] * 3)
|
||||||
outputs["cur_position"] += 1
|
outputs["cur_position"] += 1
|
||||||
st += 1
|
st += 1
|
||||||
|
|||||||
@@ -397,8 +397,8 @@ class DataProcessor(MMBaseDataProcessor):
|
|||||||
if image_idx >= len(images):
|
if image_idx >= len(images):
|
||||||
raise ValueError("prompt token ids has more image placeholder than in messages")
|
raise ValueError("prompt token ids has more image placeholder than in messages")
|
||||||
# append image_start_id
|
# append image_start_id
|
||||||
outputs["input_ids"].extend([cur_token_id])
|
outputs["input_ids"].append(cur_token_id)
|
||||||
outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]])
|
outputs["token_type_ids"].append(IDS_TYPE_FLAG["text"])
|
||||||
outputs["position_ids"].append([outputs["cur_position"]] * 3)
|
outputs["position_ids"].append([outputs["cur_position"]] * 3)
|
||||||
outputs["cur_position"] += 1
|
outputs["cur_position"] += 1
|
||||||
st += 1
|
st += 1
|
||||||
@@ -421,8 +421,8 @@ class DataProcessor(MMBaseDataProcessor):
|
|||||||
if video_idx >= len(videos):
|
if video_idx >= len(videos):
|
||||||
raise ValueError("prompt token ids has more video placeholder than in messages")
|
raise ValueError("prompt token ids has more video placeholder than in messages")
|
||||||
# append video_start_id
|
# append video_start_id
|
||||||
outputs["input_ids"].extend([cur_token_id])
|
outputs["input_ids"].append(cur_token_id)
|
||||||
outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]])
|
outputs["token_type_ids"].append(IDS_TYPE_FLAG["text"])
|
||||||
outputs["position_ids"].append([outputs["cur_position"]] * 3)
|
outputs["position_ids"].append([outputs["cur_position"]] * 3)
|
||||||
outputs["cur_position"] += 1
|
outputs["cur_position"] += 1
|
||||||
st += 1
|
st += 1
|
||||||
@@ -446,8 +446,8 @@ class DataProcessor(MMBaseDataProcessor):
|
|||||||
video_idx += 1
|
video_idx += 1
|
||||||
st = cur_idx
|
st = cur_idx
|
||||||
else:
|
else:
|
||||||
outputs["input_ids"].extend([cur_token_id])
|
outputs["input_ids"].append(cur_token_id)
|
||||||
outputs["token_type_ids"].extend([IDS_TYPE_FLAG["text"]])
|
outputs["token_type_ids"].append(IDS_TYPE_FLAG["text"])
|
||||||
outputs["position_ids"].append([outputs["cur_position"]] * 3)
|
outputs["position_ids"].append([outputs["cur_position"]] * 3)
|
||||||
outputs["cur_position"] += 1
|
outputs["cur_position"] += 1
|
||||||
st += 1
|
st += 1
|
||||||
|
|||||||
@@ -686,9 +686,9 @@ class TokenProcessor:
|
|||||||
sampled_token_ranks=[sampled_rank],
|
sampled_token_ranks=[sampled_rank],
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
result.outputs.draft_top_logprobs.logprob_token_ids.extend([topk_token_ids])
|
result.outputs.draft_top_logprobs.logprob_token_ids.append(topk_token_ids)
|
||||||
result.outputs.draft_top_logprobs.logprobs.extend([topk_logprobs])
|
result.outputs.draft_top_logprobs.logprobs.append(topk_logprobs)
|
||||||
result.outputs.draft_top_logprobs.sampled_token_ranks.extend([sampled_rank])
|
result.outputs.draft_top_logprobs.sampled_token_ranks.append(sampled_rank)
|
||||||
batch_result.append(result)
|
batch_result.append(result)
|
||||||
return batch_result
|
return batch_result
|
||||||
|
|
||||||
@@ -898,9 +898,9 @@ class TokenProcessor:
|
|||||||
sampled_token_ranks=[sampled_rank],
|
sampled_token_ranks=[sampled_rank],
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
result.outputs.top_logprobs.logprob_token_ids.extend([topk_token_ids])
|
result.outputs.top_logprobs.logprob_token_ids.append(topk_token_ids)
|
||||||
result.outputs.top_logprobs.logprobs.extend([topk_logprobs])
|
result.outputs.top_logprobs.logprobs.append(topk_logprobs)
|
||||||
result.outputs.top_logprobs.sampled_token_ranks.extend([sampled_rank])
|
result.outputs.top_logprobs.sampled_token_ranks.append(sampled_rank)
|
||||||
if token_id in task.eos_token_ids or is_prefill or recovery_stop:
|
if token_id in task.eos_token_ids or is_prefill or recovery_stop:
|
||||||
result.finished = True
|
result.finished = True
|
||||||
trace_carrier = tracing.trace_get_proc_propagate_context(rid=rid)
|
trace_carrier = tracing.trace_get_proc_propagate_context(rid=rid)
|
||||||
|
|||||||
@@ -1479,7 +1479,7 @@ class GPUModelRunner(ModelRunnerBase):
|
|||||||
self.cache_kvs_map[indexer_cache_name] = indexer_cache
|
self.cache_kvs_map[indexer_cache_name] = indexer_cache
|
||||||
cache_kvs_list.extend([key_cache, indexer_cache])
|
cache_kvs_list.extend([key_cache, indexer_cache])
|
||||||
else:
|
else:
|
||||||
cache_kvs_list.extend([key_cache])
|
cache_kvs_list.append(key_cache)
|
||||||
if kv_cache_quant_type == "block_wise_fp8":
|
if kv_cache_quant_type == "block_wise_fp8":
|
||||||
key_cache_scales = paddle.full(
|
key_cache_scales = paddle.full(
|
||||||
shape=kv_cache_scale_shape, fill_value=0, dtype=paddle.get_default_dtype()
|
shape=kv_cache_scale_shape, fill_value=0, dtype=paddle.get_default_dtype()
|
||||||
@@ -1494,7 +1494,7 @@ class GPUModelRunner(ModelRunnerBase):
|
|||||||
self.cache_kvs_map[value_cache_scales_name] = val_cache_scales
|
self.cache_kvs_map[value_cache_scales_name] = val_cache_scales
|
||||||
cache_kvs_list.extend([key_cache_scales, val_cache_scales])
|
cache_kvs_list.extend([key_cache_scales, val_cache_scales])
|
||||||
else:
|
else:
|
||||||
cache_kvs_list.extend([key_cache_scales])
|
cache_kvs_list.append(key_cache_scales)
|
||||||
else:
|
else:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"..attaching kv cache for layer {i}: key:{key_cache_shape}, value:{value_cache_shape}, indexer:{indexer_cache_shape}"
|
f"..attaching kv cache for layer {i}: key:{key_cache_shape}, value:{value_cache_shape}, indexer:{indexer_cache_shape}"
|
||||||
@@ -1526,9 +1526,9 @@ class GPUModelRunner(ModelRunnerBase):
|
|||||||
self.cache_kvs_map[indexer_cache_name] = indexer_cache
|
self.cache_kvs_map[indexer_cache_name] = indexer_cache
|
||||||
cache_kvs_list.extend([key_cache, indexer_cache])
|
cache_kvs_list.extend([key_cache, indexer_cache])
|
||||||
else:
|
else:
|
||||||
cache_kvs_list.extend([key_cache])
|
cache_kvs_list.append(key_cache)
|
||||||
if kv_cache_quant_type == "block_wise_fp8":
|
if kv_cache_quant_type == "block_wise_fp8":
|
||||||
cache_kvs_list.extend([key_cache_scales])
|
cache_kvs_list.append(key_cache_scales)
|
||||||
|
|
||||||
self.share_inputs["caches"] = cache_kvs_list
|
self.share_inputs["caches"] = cache_kvs_list
|
||||||
|
|
||||||
|
|||||||
@@ -1384,7 +1384,7 @@ class MetaxModelRunner(ModelRunnerBase):
|
|||||||
self.cache_kvs_map[val_cache_name] = val_cache
|
self.cache_kvs_map[val_cache_name] = val_cache
|
||||||
cache_kvs_list.extend([key_cache, val_cache])
|
cache_kvs_list.extend([key_cache, val_cache])
|
||||||
else:
|
else:
|
||||||
cache_kvs_list.extend([key_cache])
|
cache_kvs_list.append(key_cache)
|
||||||
if kv_cache_quant_type == "block_wise_fp8":
|
if kv_cache_quant_type == "block_wise_fp8":
|
||||||
key_cache_scales = paddle.full(
|
key_cache_scales = paddle.full(
|
||||||
shape=kv_cache_scale_shape, fill_value=0, dtype=paddle.get_default_dtype()
|
shape=kv_cache_scale_shape, fill_value=0, dtype=paddle.get_default_dtype()
|
||||||
@@ -1399,7 +1399,7 @@ class MetaxModelRunner(ModelRunnerBase):
|
|||||||
self.cache_kvs_map[value_cache_scales_name] = val_cache_scales
|
self.cache_kvs_map[value_cache_scales_name] = val_cache_scales
|
||||||
cache_kvs_list.extend([key_cache_scales, val_cache_scales])
|
cache_kvs_list.extend([key_cache_scales, val_cache_scales])
|
||||||
else:
|
else:
|
||||||
cache_kvs_list.extend([key_cache_scales])
|
cache_kvs_list.append(key_cache_scales)
|
||||||
else:
|
else:
|
||||||
logger.info(f"..attaching kv cache for layer {i}: key:{key_cache_shape}, value:{value_cache_shape}")
|
logger.info(f"..attaching kv cache for layer {i}: key:{key_cache_shape}, value:{value_cache_shape}")
|
||||||
key_cache = paddle.empty(shape=[], dtype=cache_type)
|
key_cache = paddle.empty(shape=[], dtype=cache_type)
|
||||||
@@ -1424,9 +1424,9 @@ class MetaxModelRunner(ModelRunnerBase):
|
|||||||
self.cache_kvs_map[value_cache_scales_name] = val_cache_scales
|
self.cache_kvs_map[value_cache_scales_name] = val_cache_scales
|
||||||
cache_kvs_list.extend([key_cache_scales, val_cache_scales])
|
cache_kvs_list.extend([key_cache_scales, val_cache_scales])
|
||||||
else:
|
else:
|
||||||
cache_kvs_list.extend([key_cache])
|
cache_kvs_list.append(key_cache)
|
||||||
if kv_cache_quant_type == "block_wise_fp8":
|
if kv_cache_quant_type == "block_wise_fp8":
|
||||||
cache_kvs_list.extend([key_cache_scales])
|
cache_kvs_list.append(key_cache_scales)
|
||||||
|
|
||||||
self.share_inputs["caches"] = cache_kvs_list
|
self.share_inputs["caches"] = cache_kvs_list
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user