[x264-devel] [PATCH 2/4] x264_intra_sad_x3_8x8_neon

George Stephanos gaf.stephanos at gmail.com
Thu Feb 2 03:15:31 CET 2012


Down from 1205 to 868!
I would really like to know why I can't align the first store to anything.

On Thu, Feb 2, 2012 at 4:10 AM, George Stephanos <gaf.stephanos at gmail.com>wrote:

> ---
>  common/arm/pixel-a.S |   47
> +++++++++++++++++++++++++++++++++++++++++++++++
>  common/arm/pixel.h   |    1 +
>  common/pixel.c       |    1 +
>  3 files changed, 49 insertions(+), 0 deletions(-)
>
> diff --git a/common/arm/pixel-a.S b/common/arm/pixel-a.S
> index da5f36c..995049b 100644
> --- a/common/arm/pixel-a.S
> +++ b/common/arm/pixel-a.S
> @@ -1289,3 +1289,50 @@ function x264_intra_sad_x3_4x4_armv6
>     str         r5, [r2, #8]
>     pop         {r4-r8,pc}
>  .endfunc
> +
> +function x264_intra_sad_x3_8x8_neon
> +    add         r1, #7
> +    vld1.8      {d4}, [r1]
> +    add         r1, #9
> +    vrev64.8    d4, d4
> +    vld1.8      {d0}, [r1]
> +
> +    mov         r3, #FENC_STRIDE
> +
> +    vaddl.u8    q12, d0, d4
> +    vadd.u16    d24, d25
> +    vmov.i8     q1, #0
> +    vpadd.u16   d24, d24
> +    vmov.i8     q3, #0
> +    vpadd.u16   d24, d24
> +    vmov.i8     q13, #0
> +    vrshr.u16   d24, #4
> +    vdup.8      d24, d24[0]
> +
> +.irpc Y,0246
> +    vld1.8      {d16}, [r0], r3
> +    vld1.8      {d17}, [r0], r3
> +    vdup.8      d5, d4[\Y]
> +    vabal.u8    q1, d16, d0
> +    vabal.u8    q3, d16, d5
> +    vabal.u8    q13, d16, d24
> +    vdup.8      d5, d4[\Y+1]
> +    vabal.u8    q1, d17, d0
> +    vabal.u8    q3, d17, d5
> +    vabal.u8    q13, d17, d24
> +.endr
> +    vmov.i8     d0, #0
> +
> +    vadd.u16    d2, d3
> +    vadd.u16    d6, d7
> +    vadd.u16    d26, d27
> +    vpadd.u16   d2, d0
> +    vpadd.u16   d6, d0
> +    vpadd.u16   d26, d0
> +    vpadd.u16   d2, d6
> +    vpadd.u16   d26, d26
> +    vst1.64     {d2}, [r2]!
> +    vst1.32     {d26[0]}, [r2,:32]
> +
> +    bx          lr
> +.endfunc
> diff --git a/common/arm/pixel.h b/common/arm/pixel.h
> index 3e02982..07a72c2 100644
> --- a/common/arm/pixel.h
> +++ b/common/arm/pixel.h
> @@ -70,4 +70,5 @@ void x264_pixel_ssim_4x4x2_core_neon( const uint8_t *,
> int,
>  float x264_pixel_ssim_end4_neon( int sum0[5][4], int sum1[5][4], int
> width );
>
>  void x264_intra_sad_x3_4x4_armv6( uint8_t *, uint8_t *, int * );
> +void x264_intra_sad_x3_8x8_neon( uint8_t *, uint8_t *, int * );
>  #endif
> diff --git a/common/pixel.c b/common/pixel.c
> index 0949405..af7006f 100644
> --- a/common/pixel.c
> +++ b/common/pixel.c
> @@ -1212,6 +1212,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t
> *pixf )
>     }
>     if( cpu&X264_CPU_NEON )
>     {
> +        pixf->intra_sad_x3_8x8  = x264_intra_sad_x3_8x8_neon;
>         INIT5( sad, _neon );
>         INIT5( sad_aligned, _neon );
>         INIT7( sad_x3, _neon );
> --
> 1.7.4.1
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x264-devel/attachments/20120202/ff1732a8/attachment.html>


More information about the x264-devel mailing list