mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-25 01:55:45 +08:00
Support MXFP4 for GPT-OSS (#5435)
* support mxfp4 in gpt-oss * support mxfp4 in gpt-oss * add scope for flashinfer * remove torch code * update envs.FD_MXFP4_BACKEND * update process_weights_after_loading * update env name * support tp in gpt-oss, add e2e test * add flashinfer-python-paddle in requirements * fix import error * add test * add test * add test * add test
This commit is contained in:
@@ -279,6 +279,18 @@ class AppendAttentionBackend(AttentionBackend):
|
||||
forward_meta.rotary_embs = self._get_identity_rotary_embs(forward_meta.rotary_embs)
|
||||
|
||||
sliding_window = layer.sliding_window
|
||||
|
||||
if self.rope_3d:
|
||||
assert len(forward_meta.rotary_embs.shape) == 6
|
||||
else:
|
||||
assert len(forward_meta.rotary_embs.shape) == 5
|
||||
if layer.use_neox_rotary_style:
|
||||
assert forward_meta.rotary_embs.shape[0:4] == [2, 1, self.max_seq_len, 1]
|
||||
# 128 is qwen3
|
||||
# 32 is glm
|
||||
# 64 is gpt-oss
|
||||
assert forward_meta.rotary_embs.shape[4] in [128, 32, 64]
|
||||
|
||||
if self.pd_disaggregation_mode == "per_query":
|
||||
metadata.kv_signal_data_list[layer.layer_id] = init_signal_layerwise(
|
||||
metadata.kv_signal_metadata,
|
||||
|
||||
Reference in New Issue
Block a user