[Feature] Unify fp8 block_wise quant ops (#5991)

* quant stash

* blockwise_quant

* precommit

* rm tensor.cut

* tp ok

* add swiglu

* rm outdate code

* fix activate ut

* change baseline

* fix baseline error
This commit is contained in:
fxyfxy777
2026-01-15 21:50:37 +08:00
committed by GitHub
parent d38cd8b40b
commit 4c92035f2d
17 changed files with 55 additions and 571 deletions
@@ -151,9 +151,9 @@ def check_routing_replay_chat_completion(openai_client, moe_layer_num: int, mode
cur_save_routing_path = f"./R3_tmp/routing_replay_output_{model_name}/"
model_path = os.getenv("MODEL_PATH")
if model_path:
baseline_path = os.path.join(model_path, f"R3_BaseLine/routing_replay_output_baseline_{model_name}")
baseline_path = os.path.join(model_path, f"R3_BaseLine_dev/routing_replay_output_baseline_{model_name}")
else:
baseline_path = f"./R3_BaseLine/routing_replay_output_baseline_{model_name}"
baseline_path = f"./R3_BaseLine_dev/routing_replay_output_baseline_{model_name}"
stream_baseline_path = os.path.join(baseline_path, "r3_chat_completion_stream")
nonstream_baseline_path = os.path.join(baseline_path, "r3_chat_completion_nonstream")