mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[RL] add version to the key of cache storage && refine raising error (#6160)
* Waiting for cache transfer manager inited * up * up * up * up * up * fix according comments * fix unittest * fix * fix unittest * fix error * pass storage_backend to worker
This commit is contained in:
@@ -248,6 +248,14 @@ class PrefixCacheManager:
|
||||
suffix=engine_worker_queue_port,
|
||||
create=False,
|
||||
)
|
||||
cache_transfer_inited_signal_data = np.zeros(shape=[tensor_parallel_size], dtype=np.int32)
|
||||
self.cache_transfer_inited_signal = IPCSignal(
|
||||
name="cache_transfer_inited_signal",
|
||||
array=cache_transfer_inited_signal_data,
|
||||
dtype=np.int32,
|
||||
suffix=engine_worker_queue_port,
|
||||
create=False,
|
||||
)
|
||||
|
||||
# Run command to launch cache transfer managers
|
||||
log_dir = envs.FD_LOG_DIR
|
||||
@@ -262,9 +270,9 @@ class PrefixCacheManager:
|
||||
val_shape_str = str(val_cache_shape)
|
||||
val_cache_arg_str = f" --value_cache_shape {val_shape_str}"
|
||||
if cache_config.kvcache_storage_backend:
|
||||
kvcache_storage_backend_str = cache_config.kvcache_storage_backend
|
||||
storage_arg_str = f" --kvcache_storage_backend {cache_config.kvcache_storage_backend}"
|
||||
else:
|
||||
kvcache_storage_backend_str = "none"
|
||||
storage_arg_str = " "
|
||||
|
||||
if self.cache_config.swap_space or self.cache_config.kvcache_storage_backend:
|
||||
for i in range(tensor_parallel_size):
|
||||
@@ -274,7 +282,6 @@ class PrefixCacheManager:
|
||||
+ " NCCL_MAX_NCHANNELS=1 NCCL_BUFFSIZE=0"
|
||||
+ f" FD_ENABLE_SWAP_SPACE_CLEARING={envs.FD_ENABLE_SWAP_SPACE_CLEARING}"
|
||||
+ f" {sys.executable} {py_path}"
|
||||
+ f" --model_id {os.path.basename(self.config.model_config.model)}"
|
||||
+ f" --device_id {int(device_ids[i])}"
|
||||
+ f" --rank {i}"
|
||||
+ f" --splitwise_role {self.splitwise_role}"
|
||||
@@ -295,14 +302,19 @@ class PrefixCacheManager:
|
||||
+ f" --speculative_config '{self.speculative_config.to_json_string()}'"
|
||||
+ f" --default_dtype '{self.config.model_config.dtype}'"
|
||||
+ (" --create_cache_tensor" if not self.enable_splitwise else "")
|
||||
+ f" --kvcache_storage_backend {kvcache_storage_backend_str}"
|
||||
+ storage_arg_str
|
||||
+ f" --write_policy {cache_config.write_policy}"
|
||||
+ f" --max_model_len {self.config.model_config.max_model_len}"
|
||||
+ f" --model_path {self.config.model_config.model}"
|
||||
+ f" >{log_dir}/launch_cache_transfer_manager_{int(device_ids[i])}.log 2>&1"
|
||||
)
|
||||
logger.info(f"Launch cache transfer manager, command:{launch_cmd}")
|
||||
cache_manager_processes.append(subprocess.Popen(launch_cmd, shell=True, preexec_fn=os.setsid))
|
||||
|
||||
logger.info("PrefixCacheManager is waiting for cache transfer manager to be initialized.")
|
||||
while np.sum(self.cache_transfer_inited_signal.value) != tensor_parallel_size:
|
||||
time.sleep(1)
|
||||
|
||||
logger.info("PrefixCacheManager is waiting for kv cache to be initialized.")
|
||||
while np.sum(self.cache_ready_signal.value) != tensor_parallel_size:
|
||||
time.sleep(1)
|
||||
|
||||
Reference in New Issue
Block a user