mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-24 01:29:57 +08:00
[Feature] Unify fp8 block_wise quant ops (#5991)
* quant stash * blockwise_quant * precommit * rm tensor.cut * tp ok * add swiglu * rm outdate code * fix activate ut * change baseline * fix baseline error
This commit is contained in:
@@ -151,9 +151,9 @@ def check_routing_replay_chat_completion(openai_client, moe_layer_num: int, mode
|
||||
cur_save_routing_path = f"./R3_tmp/routing_replay_output_{model_name}/"
|
||||
model_path = os.getenv("MODEL_PATH")
|
||||
if model_path:
|
||||
baseline_path = os.path.join(model_path, f"R3_BaseLine/routing_replay_output_baseline_{model_name}")
|
||||
baseline_path = os.path.join(model_path, f"R3_BaseLine_dev/routing_replay_output_baseline_{model_name}")
|
||||
else:
|
||||
baseline_path = f"./R3_BaseLine/routing_replay_output_baseline_{model_name}"
|
||||
baseline_path = f"./R3_BaseLine_dev/routing_replay_output_baseline_{model_name}"
|
||||
stream_baseline_path = os.path.join(baseline_path, "r3_chat_completion_stream")
|
||||
|
||||
nonstream_baseline_path = os.path.join(baseline_path, "r3_chat_completion_nonstream")
|
||||
|
||||
Reference in New Issue
Block a user