mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Fix] Fix eplb bug and support fp8 load weight (#5178)
* fix eplb part2 * fix eplb part2 * fix eplb part2
This commit is contained in:
@@ -260,6 +260,13 @@ class PaddleDisWorkerProc:
|
||||
"""
|
||||
update_weights_from_tensor
|
||||
"""
|
||||
import time
|
||||
|
||||
while True:
|
||||
if self.experts_manager.tensor_infos is None:
|
||||
time.sleep(0.1)
|
||||
else:
|
||||
break
|
||||
state_dicts = load_tensor_from_shm_mem(self.experts_manager.tensor_infos, mmap_infos[MODEL_MAIN_NAME], logger)
|
||||
rank_expert_list, logical_to_physical_map, expert_count = self.experts_manager.get_ep_rank_to_expert_id_list()
|
||||
self.worker.get_model().redundant_table_manger.update_expert_rank_table(
|
||||
@@ -267,6 +274,7 @@ class PaddleDisWorkerProc:
|
||||
)
|
||||
# TO BE FIXED
|
||||
self.worker.get_model().update_state_dict(state_dicts)
|
||||
self.experts_manager.tensor_infos = None
|
||||
|
||||
def _broadcast_model_weights_signal(self, src: int, group) -> int:
|
||||
model_weights_signal_tensor = paddle.full(shape=[1], fill_value=self.model_weights_signal[0], dtype="int32")
|
||||
|
||||
Reference in New Issue
Block a user