[Fix] Fix eplb bug and support fp8 load weight (#5178)

* fix eplb part2

* fix eplb part2

* fix eplb part2
This commit is contained in:
xiaoxiaohehe001
2025-11-24 15:31:37 +08:00
committed by GitHub
parent f5c1066245
commit 95f3c8c641
5 changed files with 27 additions and 8 deletions
+8
View File
@@ -260,6 +260,13 @@ class PaddleDisWorkerProc:
"""
update_weights_from_tensor
"""
import time
while True:
if self.experts_manager.tensor_infos is None:
time.sleep(0.1)
else:
break
state_dicts = load_tensor_from_shm_mem(self.experts_manager.tensor_infos, mmap_infos[MODEL_MAIN_NAME], logger)
rank_expert_list, logical_to_physical_map, expert_count = self.experts_manager.get_ep_rank_to_expert_id_list()
self.worker.get_model().redundant_table_manger.update_expert_rank_table(
@@ -267,6 +274,7 @@ class PaddleDisWorkerProc:
)
# TO BE FIXED
self.worker.get_model().update_state_dict(state_dicts)
self.experts_manager.tensor_infos = None
def _broadcast_model_weights_signal(self, src: int, group) -> int:
model_weights_signal_tensor = paddle.full(shape=[1], fill_value=self.model_weights_signal[0], dtype="int32")