mirror of
https://github.com/nyanmisaka/ffmpeg-rockchip.git
synced 2026-04-26 09:14:51 +08:00
Various VP8 x86 deblocking speedups
SSSE3 versions, improve SSE2 versions a bit. SSE2/SSSE3 mbedge h functions are currently broken, so explicitly disable them. Originally committed as revision 24403 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
+67
-32
@@ -1229,18 +1229,22 @@ cglobal vp8_luma_dc_wht_mmx, 2,3
|
||||
movd [%7+%9*2], m%4
|
||||
%endmacro
|
||||
|
||||
%macro SPLATB_REG 3
|
||||
%macro SPLATB_REG 3-4
|
||||
movd %1, %2
|
||||
%ifidn %3, ssse3
|
||||
pshufb %1, %4
|
||||
%else
|
||||
punpcklbw %1, %1
|
||||
%if mmsize == 16 ; sse2
|
||||
punpcklwd %1, %1
|
||||
pshufd %1, %1, 0x0
|
||||
pshuflw %1, %1, 0x0
|
||||
punpcklqdq %1, %1
|
||||
%elifidn %3, mmx
|
||||
punpcklwd %1, %1
|
||||
punpckldq %1, %1
|
||||
%else ; mmxext
|
||||
pshufw %1, %1, 0x0
|
||||
%endif
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro SIMPLE_LOOPFILTER 3
|
||||
@@ -1252,7 +1256,10 @@ cglobal vp8_%2_loop_filter_simple_%1, 3, %3
|
||||
%if mmsize == 8 ; mmx/mmxext
|
||||
mov r3, 2
|
||||
%endif
|
||||
SPLATB_REG m7, r2, %1 ; splat "flim" into register
|
||||
%ifidn %1, ssse3
|
||||
pxor m0, m0
|
||||
%endif
|
||||
SPLATB_REG m7, r2, %1, m0 ; splat "flim" into register
|
||||
|
||||
; set up indexes to address 4 rows
|
||||
mov r2, r1
|
||||
@@ -1398,6 +1405,8 @@ SIMPLE_LOOPFILTER mmxext, h, 6
|
||||
INIT_XMM
|
||||
SIMPLE_LOOPFILTER sse2, v, 3
|
||||
SIMPLE_LOOPFILTER sse2, h, 6
|
||||
SIMPLE_LOOPFILTER ssse3, v, 3
|
||||
SIMPLE_LOOPFILTER ssse3, h, 6
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void vp8_h/v_loop_filter<size>_inner_<opt>(uint8_t *dst, [uint8_t *v,] int stride,
|
||||
@@ -1433,11 +1442,15 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5
|
||||
%define stack_reg hev_thr_reg
|
||||
%endif
|
||||
|
||||
%ifidn %1, ssse3
|
||||
pxor m7, m7
|
||||
%endif
|
||||
|
||||
%ifndef m8 ; mmx/mmxext or sse2 on x86-32
|
||||
; splat function arguments
|
||||
SPLATB_REG m0, E_reg, %1 ; E
|
||||
SPLATB_REG m1, I_reg, %1 ; I
|
||||
SPLATB_REG m2, hev_thr_reg, %1 ; hev_thresh
|
||||
SPLATB_REG m0, E_reg, %1, m7 ; E
|
||||
SPLATB_REG m1, I_reg, %1, m7 ; I
|
||||
SPLATB_REG m2, hev_thr_reg, %1, m7 ; hev_thresh
|
||||
|
||||
; align stack
|
||||
mov stack_reg, rsp ; backup stack pointer
|
||||
@@ -1470,9 +1483,9 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5
|
||||
%define q0backup m8
|
||||
|
||||
; splat function arguments
|
||||
SPLATB_REG flim_E, E_reg, %1 ; E
|
||||
SPLATB_REG flim_I, I_reg, %1 ; I
|
||||
SPLATB_REG hev_thr, hev_thr_reg, %1 ; hev_thresh
|
||||
SPLATB_REG flim_E, E_reg, %1, m7 ; E
|
||||
SPLATB_REG flim_I, I_reg, %1, m7 ; I
|
||||
SPLATB_REG hev_thr, hev_thr_reg, %1, m7 ; hev_thresh
|
||||
%endif
|
||||
|
||||
%if mmsize == 8 && %4 == 16 ; mmx/mmxext
|
||||
@@ -1884,15 +1897,15 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5
|
||||
%endmacro
|
||||
|
||||
INIT_MMX
|
||||
INNER_LOOPFILTER mmx, v, 6, 16, 8
|
||||
INNER_LOOPFILTER mmx, h, 6, 16, 8
|
||||
INNER_LOOPFILTER mmxext, v, 6, 16, 8
|
||||
INNER_LOOPFILTER mmxext, h, 6, 16, 8
|
||||
INNER_LOOPFILTER mmx, v, 6, 16, 0
|
||||
INNER_LOOPFILTER mmx, h, 6, 16, 0
|
||||
INNER_LOOPFILTER mmxext, v, 6, 16, 0
|
||||
INNER_LOOPFILTER mmxext, h, 6, 16, 0
|
||||
|
||||
INNER_LOOPFILTER mmx, v, 6, 8, 8
|
||||
INNER_LOOPFILTER mmx, h, 6, 8, 8
|
||||
INNER_LOOPFILTER mmxext, v, 6, 8, 8
|
||||
INNER_LOOPFILTER mmxext, h, 6, 8, 8
|
||||
INNER_LOOPFILTER mmx, v, 6, 8, 0
|
||||
INNER_LOOPFILTER mmx, h, 6, 8, 0
|
||||
INNER_LOOPFILTER mmxext, v, 6, 8, 0
|
||||
INNER_LOOPFILTER mmxext, h, 6, 8, 0
|
||||
|
||||
INIT_XMM
|
||||
INNER_LOOPFILTER sse2, v, 5, 16, 13
|
||||
@@ -1904,6 +1917,15 @@ INNER_LOOPFILTER sse2, h, 6, 16, 13
|
||||
INNER_LOOPFILTER sse2, v, 6, 8, 13
|
||||
INNER_LOOPFILTER sse2, h, 6, 8, 13
|
||||
|
||||
INNER_LOOPFILTER ssse3, v, 5, 16, 13
|
||||
%ifdef m8
|
||||
INNER_LOOPFILTER ssse3, h, 5, 16, 13
|
||||
%else
|
||||
INNER_LOOPFILTER ssse3, h, 6, 16, 13
|
||||
%endif
|
||||
INNER_LOOPFILTER ssse3, v, 6, 8, 13
|
||||
INNER_LOOPFILTER ssse3, h, 6, 8, 13
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void vp8_h/v_loop_filter<size>_mbedge_<opt>(uint8_t *dst, [uint8_t *v,] int stride,
|
||||
; int flimE, int flimI, int hev_thr);
|
||||
@@ -1984,11 +2006,15 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
|
||||
%define stack_reg hev_thr_reg
|
||||
%endif
|
||||
|
||||
%ifidn %1, ssse3
|
||||
pxor m7, m7
|
||||
%endif
|
||||
|
||||
%ifndef m8 ; mmx/mmxext or sse2 on x86-32
|
||||
; splat function arguments
|
||||
SPLATB_REG m0, E_reg, %1 ; E
|
||||
SPLATB_REG m1, I_reg, %1 ; I
|
||||
SPLATB_REG m2, hev_thr_reg, %1 ; hev_thresh
|
||||
SPLATB_REG m0, E_reg, %1, m7 ; E
|
||||
SPLATB_REG m1, I_reg, %1, m7 ; I
|
||||
SPLATB_REG m2, hev_thr_reg, %1, m7 ; hev_thresh
|
||||
|
||||
; align stack
|
||||
mov stack_reg, rsp ; backup stack pointer
|
||||
@@ -2028,9 +2054,9 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
|
||||
%define lim_sign m15
|
||||
|
||||
; splat function arguments
|
||||
SPLATB_REG flim_E, E_reg, %1 ; E
|
||||
SPLATB_REG flim_I, I_reg, %1 ; I
|
||||
SPLATB_REG hev_thr, hev_thr_reg, %1 ; hev_thresh
|
||||
SPLATB_REG flim_E, E_reg, %1, m7 ; E
|
||||
SPLATB_REG flim_I, I_reg, %1, m7 ; I
|
||||
SPLATB_REG hev_thr, hev_thr_reg, %1, m7 ; hev_thresh
|
||||
%endif
|
||||
|
||||
%if mmsize == 8 && %4 == 16 ; mmx/mmxext
|
||||
@@ -2521,15 +2547,15 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
|
||||
%endmacro
|
||||
|
||||
INIT_MMX
|
||||
MBEDGE_LOOPFILTER mmx, v, 6, 16, 8
|
||||
MBEDGE_LOOPFILTER mmx, h, 6, 16, 8
|
||||
MBEDGE_LOOPFILTER mmxext, v, 6, 16, 8
|
||||
MBEDGE_LOOPFILTER mmxext, h, 6, 16, 8
|
||||
MBEDGE_LOOPFILTER mmx, v, 6, 16, 0
|
||||
MBEDGE_LOOPFILTER mmx, h, 6, 16, 0
|
||||
MBEDGE_LOOPFILTER mmxext, v, 6, 16, 0
|
||||
MBEDGE_LOOPFILTER mmxext, h, 6, 16, 0
|
||||
|
||||
MBEDGE_LOOPFILTER mmx, v, 6, 8, 8
|
||||
MBEDGE_LOOPFILTER mmx, h, 6, 8, 8
|
||||
MBEDGE_LOOPFILTER mmxext, v, 6, 8, 8
|
||||
MBEDGE_LOOPFILTER mmxext, h, 6, 8, 8
|
||||
MBEDGE_LOOPFILTER mmx, v, 6, 8, 0
|
||||
MBEDGE_LOOPFILTER mmx, h, 6, 8, 0
|
||||
MBEDGE_LOOPFILTER mmxext, v, 6, 8, 0
|
||||
MBEDGE_LOOPFILTER mmxext, h, 6, 8, 0
|
||||
|
||||
INIT_XMM
|
||||
MBEDGE_LOOPFILTER sse2, v, 5, 16, 16
|
||||
@@ -2540,3 +2566,12 @@ MBEDGE_LOOPFILTER sse2, h, 6, 16, 16
|
||||
%endif
|
||||
MBEDGE_LOOPFILTER sse2, v, 6, 8, 16
|
||||
MBEDGE_LOOPFILTER sse2, h, 6, 8, 16
|
||||
|
||||
MBEDGE_LOOPFILTER ssse3, v, 5, 16, 16
|
||||
%ifdef m8
|
||||
MBEDGE_LOOPFILTER ssse3, h, 5, 16, 16
|
||||
%else
|
||||
MBEDGE_LOOPFILTER ssse3, h, 6, 16, 16
|
||||
%endif
|
||||
MBEDGE_LOOPFILTER ssse3, v, 6, 8, 16
|
||||
MBEDGE_LOOPFILTER ssse3, h, 6, 8, 16
|
||||
|
||||
Reference in New Issue
Block a user