mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
[Feature][BugFix][OP] Enhance Deterministic Inference Mode with Kernel-level Fixes and Batch-invariant BMM (#6610)
* add fa deter * add ut * add long sentence * fix basic * fix bugs * fix adn * fix first * fix single * fix single * fix single test * refine * add more test * refine comments * add comments of bmm * fix ci * remove probe * add * remove not need * refine tests * fix comments and refine code * refine code * refine test * refine test * mv 4cards tests * fix tests * add * fix comments * fix cover * fix cover --------- Co-authored-by: gongweibao <gognweibao@baidu.com>
This commit is contained in:
@@ -28,8 +28,8 @@ namespace cub = hipcub;
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <algorithm>
|
||||
#include "stdint.h"
|
||||
#include "helper.h"
|
||||
#include "stdint.h"
|
||||
|
||||
#define FLT_MAX 1e38
|
||||
|
||||
@@ -1372,6 +1372,9 @@ std::vector<paddle::Tensor> BeamSearchSoftmax(
|
||||
cudaMemcpyDeviceToHost,
|
||||
cu_stream);
|
||||
|
||||
// Must synchronize before using host values copied from device
|
||||
cudaStreamSynchronize(cu_stream);
|
||||
|
||||
int beam_batch_size = logits_shape[0];
|
||||
int batch_size = beam_batch_size / beam_width_scalar;
|
||||
int vocab_size = logits_shape[1];
|
||||
|
||||
Reference in New Issue
Block a user