From 3b9d6c60d33efc09788d0bad0ea92fe94401a6ac Mon Sep 17 00:00:00 2001 From: yzwu Date: Fri, 17 Apr 2026 20:42:56 +0800 Subject: [PATCH] [Iiluvatar] fix ci error and update readme (#7453) --- docs/get_started/installation/iluvatar_gpu.md | 17 ++++++++++++----- .../get_started/installation/iluvatar_gpu.md | 19 +++++++++++++------ .../moe/fuse_moe_cutlass_iluvatar_backend.py | 9 +++++++++ requirements_iluvatar.txt | 1 + 4 files changed, 35 insertions(+), 11 deletions(-) diff --git a/docs/get_started/installation/iluvatar_gpu.md b/docs/get_started/installation/iluvatar_gpu.md index 4f7c64bba2..ed605c6b31 100644 --- a/docs/get_started/installation/iluvatar_gpu.md +++ b/docs/get_started/installation/iluvatar_gpu.md @@ -34,17 +34,24 @@ Note: Because the 4.3.8 SDK in the image is incompatible with KMD, paddle cannot ### 3.2 Install paddle ```bash -pip3 install paddlepaddle-iluvatar==3.4.0.dev20260326 -i https://www.paddlepaddle.org.cn/packages/nightly/ixuca/ +pip3 install paddlepaddle-iluvatar==3.4.0.dev20260326 -i https://www.paddlepaddle.org.cn/packages/nightly/ixuca/ --extra-index-url https://mirrors.aliyun.com/pypi/simple/ ``` ### 3.3 Install or build FastDeploy + +You can install FastDeploy in either of the following ways: + +- pip install ```bash pip3 install fastdeploy_iluvatar_gpu==2.5.0.dev0 -i https://www.paddlepaddle.org.cn/packages/stable/ixuca/ --extra-index-url https://mirrors.aliyun.com/pypi/simple/ ``` + +- Build from source You can build FastDeploy from source if you need the ```latest version```. ```bash -git clone https://github.com/PaddlePaddle/FastDeploy +git clone --recursive https://github.com/PaddlePaddle/FastDeploy.git cd FastDeploy +pip3 install -r requirements_iluvatar.txt -i https://mirrors.aliyun.com/pypi/simple/ bash build.sh ``` @@ -451,9 +458,9 @@ curl -X POST "http://0.0.0.0:8180/v1/chat/completions" \ ### 4.3 PaddleOCR-VL series #### 4.3.1 PaddleOCR-VL-0.9B -- (Optional) Install paddleocr +- (Optional) Build and install paddleocr from source -To install the latest `paddleocr`, you can compile it from source. The image contains a compilation and installation based on source code `39128c2c7fd40be44d8f33498cabd4ec10f1bfcd`. +To install the latest `paddleocr`, you can compile it from source. The version in the image is `3.3.2`. ```bash git clone -b main https://github.com/PaddlePaddle/PaddleOCR.git @@ -515,7 +522,7 @@ import os from paddleocr import PaddleOCRVL input_path = "./images" -pipeline = PaddleOCRVL(vl_rec_backend="fastdeploy-server", vl_rec_server_url="http://127.0.0.1:8180/v1") +pipeline = PaddleOCRVL(vl_rec_backend="fastdeploy-server", vl_rec_server_url="http://127.0.0.1:8180/v1", device="iluvatar_gpu") file_list = os.listdir(input_path) for file_name in file_list: file_path = os.path.join(input_path, file_name) diff --git a/docs/zh/get_started/installation/iluvatar_gpu.md b/docs/zh/get_started/installation/iluvatar_gpu.md index 4d7999321d..de8caa4b76 100644 --- a/docs/zh/get_started/installation/iluvatar_gpu.md +++ b/docs/zh/get_started/installation/iluvatar_gpu.md @@ -34,17 +34,24 @@ docker exec -it paddle_infer bash ### 3.2 安装paddle ```bash -pip3 install paddlepaddle-iluvatar==3.4.0.dev20260326 -i https://www.paddlepaddle.org.cn/packages/nightly/ixuca/ +pip3 install paddlepaddle-iluvatar==3.4.0.dev20260326 -i https://www.paddlepaddle.org.cn/packages/nightly/ixuca/ --extra-index-url https://mirrors.aliyun.com/pypi/simple/ ``` ### 3.3 安装fastdeploy + +以下两种方式安装均可以: + +- pip安装 ```bash pip3 install fastdeploy_iluvatar_gpu==2.5.0.dev0 -i https://www.paddlepaddle.org.cn/packages/stable/ixuca/ --extra-index-url https://mirrors.aliyun.com/pypi/simple/ ``` -可以按如下步骤编译FastDeploy,,得到```最新版本```。 + +- 源码编译 +可以按如下步骤编译FastDeploy得到```最新版本```。 ```bash -git clone https://github.com/PaddlePaddle/FastDeploy +git clone --recursive https://github.com/PaddlePaddle/FastDeploy.git cd FastDeploy +pip3 install -r requirements_iluvatar.txt -i https://mirrors.aliyun.com/pypi/simple/ bash build.sh ``` @@ -451,9 +458,9 @@ curl -X POST "http://0.0.0.0:8180/v1/chat/completions" \ ### 4.3 PaddleOCR-VL系列 #### 4.3.1 PaddleOCR-VL-0.9B -- (可选) 安装 paddleocr +- (可选) 源码编译安装 paddleocr -如果想要安装最新的`paddleocr`,可以源码编译。镜像里是基于`39128c2c7fd40be44d8f33498cabd4ec10f1bfcd`源码编译安装的 +如果想要安装最新的`paddleocr`,可以源码编译。镜像里的版本是`3.3.2` ```bash git clone -b main https://github.com/PaddlePaddle/PaddleOCR.git @@ -512,7 +519,7 @@ import os from paddleocr import PaddleOCRVL input_path = "./images" -pipeline = PaddleOCRVL(vl_rec_backend="fastdeploy-server", vl_rec_server_url="http://127.0.0.1:8180/v1") +pipeline = PaddleOCRVL(vl_rec_backend="fastdeploy-server", vl_rec_server_url="http://127.0.0.1:8180/v1", device="iluvatar_gpu") file_list = os.listdir(input_path) for file_name in file_list: file_path = os.path.join(input_path, file_name) diff --git a/fastdeploy/model_executor/layers/backends/iluvatar/moe/fuse_moe_cutlass_iluvatar_backend.py b/fastdeploy/model_executor/layers/backends/iluvatar/moe/fuse_moe_cutlass_iluvatar_backend.py index 1bb0272877..87a11d8c3b 100644 --- a/fastdeploy/model_executor/layers/backends/iluvatar/moe/fuse_moe_cutlass_iluvatar_backend.py +++ b/fastdeploy/model_executor/layers/backends/iluvatar/moe/fuse_moe_cutlass_iluvatar_backend.py @@ -124,12 +124,18 @@ class IluvatarCutlassMoEMethod(UnquantizedFusedMoEMethod): x: paddle.Tensor, gate: nn.Layer, topk_ids_hookfunc: Callable = None, + fc1_latent_proj: nn.Layer = None, + fc2_latent_proj: nn.Layer = None, ) -> paddle.Tensor: """ Paddle Cutlass compute Fused MoE. """ gate_out = gate(x) gate_out = gate_out.cast("float32") + + if fc1_latent_proj is not None: + x = fc1_latent_proj(x) + if layer.topk_method == "noaux_tc": gate_out, topk_weights, topk_idx = get_moe_scores( gate_out, @@ -206,6 +212,9 @@ class IluvatarCutlassMoEMethod(UnquantizedFusedMoEMethod): routed_scaling_factor=1.0, ) + if fc2_latent_proj is not None: + fused_moe_out = fc2_latent_proj(fused_moe_out) + return fused_moe_out diff --git a/requirements_iluvatar.txt b/requirements_iluvatar.txt index 83e968650c..112694433f 100644 --- a/requirements_iluvatar.txt +++ b/requirements_iluvatar.txt @@ -43,3 +43,4 @@ msgspec safetensors>=0.7.0 py-cpuinfo transformers>=4.55.1,<5.0.0 +paddleocr[doc-parser]==3.3.2