[x264-devel] [PATCH 4/4] x264_intra_sad_x3_16x16_neon

George Stephanos gaf.stephanos at gmail.com
Thu Feb 2 15:43:01 CET 2012


2590 down to 2139.

On Thu, Feb 2, 2012 at 4:41 PM, George Stephanos <gaf.stephanos at gmail.com>wrote:

> ---
>  common/arm/pixel-a.S |   76
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  common/arm/pixel.h   |    2 +
>  common/pixel.c       |    1 +
>  3 files changed, 79 insertions(+), 0 deletions(-)
>
> diff --git a/common/arm/pixel-a.S b/common/arm/pixel-a.S
> index db32671..521c7c6 100644
> --- a/common/arm/pixel-a.S
> +++ b/common/arm/pixel-a.S
> @@ -1414,3 +1414,79 @@ function x264_intra_sad_x3_8x8c_neon
>
>     pop        {r4, pc}
>  .endfunc
> +
> +function x264_intra_sad_x3_16x16_neon
> +    push        {r4, lr}
> +    vmov.i8     q8, #0
> +    sub         lr, r1, #FDEC_STRIDE
> +    mov         r3, #FENC_STRIDE
> +    vld1.8      {q0}, [lr]
> +    mov         r4, #FDEC_STRIDE
> +    sub         lr, r1, #1
> +
> +.set Y, 0
> +.rept 16
> +    vld1.8      {q1}, [r0], r3
> +    vld1.8      {d28[]}, [lr], r4
> +.if Y == 0
> +    vabdl.u8    q2, d0, d2
> +    vabdl.u8    q3, d1, d3
> +    vabdl.u8    q10, d28, d2
> +    vabdl.u8    q11, d28, d3
> +.else
> +    vabal.u8    q2, d0, d2
> +    vabal.u8    q3, d1, d3
> +    vabal.u8    q10, d28, d2
> +    vabal.u8    q11, d28, d3
> +.endif
> +    vaddw.u8    q8, d28
> +.set Y, -1
> +.endr
> +    vmov.i8     d17, #0
> +
> +    vadd.u16    d4, d6
> +    vadd.u16    d20, d22
> +    vaddl.u8    q0, d0, d1
> +
> +    vadd.u16    d5, d7
> +    vadd.u16    d21, d23
> +    vadd.u16    d0, d1
> +
> +    vadd.u16    d4, d5
> +    vadd.u16    d20, d21
> +    vpadd.u16   d0, d17
> +
> +    vpadd.u16   d4, d17
> +    vpadd.u16   d20, d17
> +    vpadd.u16   d0, d17
> +
> +    vpadd.u16   d4, d20
> +    vadd.u16    d0, d16
> +    vst1.64     {d4}, [r2,:64]!
> +
> +    vrshr.u16   d0, #5
> +    sub         r0, r0, r3, lsl #4
> +    vdup.8      d0, d0[0]
> +
> +.set Y, 0
> +.rept 16
> +    vld1.8      {q1}, [r0], r3
> +.if Y == 0
> +    vabdl.u8    q12, d0, d2
> +    vabdl.u8    q13, d0, d3
> +.else
> +    vabal.u8    q12, d0, d2
> +    vabal.u8    q13, d0, d3
> +.endif
> +.set Y, -1
> +.endr
> +
> +    vadd.u16    d24, d26
> +    vadd.u16    d25, d27
> +    vadd.u16    d24, d25
> +    vpadd.u16   d24, d17
> +    vpadd.u16   d24, d17
> +    vst1.32     {d24[0]}, [r2,:32]
> +
> +    pop        {r4, pc}
> +.endfunc
> diff --git a/common/arm/pixel.h b/common/arm/pixel.h
> index 506cf59..f29ddb3 100644
> --- a/common/arm/pixel.h
> +++ b/common/arm/pixel.h
> @@ -72,4 +72,6 @@ float x264_pixel_ssim_end4_neon( int sum0[5][4], int
> sum1[5][4], int width );
>  void x264_intra_sad_x3_4x4_armv6( uint8_t *, uint8_t *, int * );
>  void x264_intra_sad_x3_8x8_neon( uint8_t *, uint8_t *, int * );
>  void x264_intra_sad_x3_8x8c_neon( uint8_t *, uint8_t *, int * );
> +void x264_intra_sad_x3_16x16_neon( uint8_t *, uint8_t *, int * );
> +
>  #endif
> diff --git a/common/pixel.c b/common/pixel.c
> index f6d6a04..d4ae1df 100644
> --- a/common/pixel.c
> +++ b/common/pixel.c
> @@ -1214,6 +1214,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t
> *pixf )
>     {
>         pixf->intra_sad_x3_8x8  = x264_intra_sad_x3_8x8_neon;
>         pixf->intra_sad_x3_8x8c   = x264_intra_sad_x3_8x8c_neon;
> +        pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16_neon;
>         INIT5( sad, _neon );
>         INIT5( sad_aligned, _neon );
>         INIT7( sad_x3, _neon );
> --
> 1.7.4.1
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x264-devel/attachments/20120202/5ca2e344/attachment-0001.html>


More information about the x264-devel mailing list