[x264-devel] [PATCH 2/4] x264_intra_sad_x3_8x8_neon
George Stephanos
gaf.stephanos at gmail.com
Thu Feb 2 03:15:31 CET 2012
Down from 1205 to 868!
I would really like to know why I can't align the first store to anything.
On Thu, Feb 2, 2012 at 4:10 AM, George Stephanos <gaf.stephanos at gmail.com>wrote:
> ---
> common/arm/pixel-a.S | 47
> +++++++++++++++++++++++++++++++++++++++++++++++
> common/arm/pixel.h | 1 +
> common/pixel.c | 1 +
> 3 files changed, 49 insertions(+), 0 deletions(-)
>
> diff --git a/common/arm/pixel-a.S b/common/arm/pixel-a.S
> index da5f36c..995049b 100644
> --- a/common/arm/pixel-a.S
> +++ b/common/arm/pixel-a.S
> @@ -1289,3 +1289,50 @@ function x264_intra_sad_x3_4x4_armv6
> str r5, [r2, #8]
> pop {r4-r8,pc}
> .endfunc
> +
> +function x264_intra_sad_x3_8x8_neon
> + add r1, #7
> + vld1.8 {d4}, [r1]
> + add r1, #9
> + vrev64.8 d4, d4
> + vld1.8 {d0}, [r1]
> +
> + mov r3, #FENC_STRIDE
> +
> + vaddl.u8 q12, d0, d4
> + vadd.u16 d24, d25
> + vmov.i8 q1, #0
> + vpadd.u16 d24, d24
> + vmov.i8 q3, #0
> + vpadd.u16 d24, d24
> + vmov.i8 q13, #0
> + vrshr.u16 d24, #4
> + vdup.8 d24, d24[0]
> +
> +.irpc Y,0246
> + vld1.8 {d16}, [r0], r3
> + vld1.8 {d17}, [r0], r3
> + vdup.8 d5, d4[\Y]
> + vabal.u8 q1, d16, d0
> + vabal.u8 q3, d16, d5
> + vabal.u8 q13, d16, d24
> + vdup.8 d5, d4[\Y+1]
> + vabal.u8 q1, d17, d0
> + vabal.u8 q3, d17, d5
> + vabal.u8 q13, d17, d24
> +.endr
> + vmov.i8 d0, #0
> +
> + vadd.u16 d2, d3
> + vadd.u16 d6, d7
> + vadd.u16 d26, d27
> + vpadd.u16 d2, d0
> + vpadd.u16 d6, d0
> + vpadd.u16 d26, d0
> + vpadd.u16 d2, d6
> + vpadd.u16 d26, d26
> + vst1.64 {d2}, [r2]!
> + vst1.32 {d26[0]}, [r2,:32]
> +
> + bx lr
> +.endfunc
> diff --git a/common/arm/pixel.h b/common/arm/pixel.h
> index 3e02982..07a72c2 100644
> --- a/common/arm/pixel.h
> +++ b/common/arm/pixel.h
> @@ -70,4 +70,5 @@ void x264_pixel_ssim_4x4x2_core_neon( const uint8_t *,
> int,
> float x264_pixel_ssim_end4_neon( int sum0[5][4], int sum1[5][4], int
> width );
>
> void x264_intra_sad_x3_4x4_armv6( uint8_t *, uint8_t *, int * );
> +void x264_intra_sad_x3_8x8_neon( uint8_t *, uint8_t *, int * );
> #endif
> diff --git a/common/pixel.c b/common/pixel.c
> index 0949405..af7006f 100644
> --- a/common/pixel.c
> +++ b/common/pixel.c
> @@ -1212,6 +1212,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t
> *pixf )
> }
> if( cpu&X264_CPU_NEON )
> {
> + pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_neon;
> INIT5( sad, _neon );
> INIT5( sad_aligned, _neon );
> INIT7( sad_x3, _neon );
> --
> 1.7.4.1
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x264-devel/attachments/20120202/ff1732a8/attachment.html>
More information about the x264-devel
mailing list