Support MXFP4 for GPT-OSS (#5435)

* support mxfp4 in gpt-oss

* support mxfp4 in gpt-oss

* add scope for flashinfer

* remove torch code

* update envs.FD_MXFP4_BACKEND

* update process_weights_after_loading

* update env name

* support tp in gpt-oss, add e2e test

* add flashinfer-python-paddle in requirements

* fix import error

* add test

* add test

* add test

* add test
This commit is contained in:
Haonan Luo
2026-01-22 14:21:01 +08:00
committed by GitHub
parent 309c7d9764
commit 82057cb71f
13 changed files with 670 additions and 25 deletions
@@ -279,6 +279,18 @@ class AppendAttentionBackend(AttentionBackend):
forward_meta.rotary_embs = self._get_identity_rotary_embs(forward_meta.rotary_embs)
sliding_window = layer.sliding_window
if self.rope_3d:
assert len(forward_meta.rotary_embs.shape) == 6
else:
assert len(forward_meta.rotary_embs.shape) == 5
if layer.use_neox_rotary_style:
assert forward_meta.rotary_embs.shape[0:4] == [2, 1, self.max_seq_len, 1]
# 128 is qwen3
# 32 is glm
# 64 is gpt-oss
assert forward_meta.rotary_embs.shape[4] in [128, 32, 64]
if self.pd_disaggregation_mode == "per_query":
metadata.kv_signal_data_list[layer.layer_id] = init_signal_layerwise(
metadata.kv_signal_metadata,