[x264-devel] [PATCH 1/4] x264_intra_sad_x3_4x4_armv6

Jason Garrett-Glaser jason at x264.com
Sat Jan 28 21:10:16 CET 2012


On Sat, Jan 28, 2012 at 10:51 AM, George Stephanos
<gaf.stephanos at gmail.com> wrote:
> ---
>  common/arm/pixel-a.S |   59 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  common/arm/pixel.h   |    1 +
>  common/pixel.c       |    1 +
>  3 files changed, 61 insertions(+), 0 deletions(-)
>
> diff --git a/common/arm/pixel-a.S b/common/arm/pixel-a.S
> index 217356e..de442e9 100644
> --- a/common/arm/pixel-a.S
> +++ b/common/arm/pixel-a.S
> @@ -1240,3 +1240,62 @@ ssim_skip:
>     vmov.32     r0,  d0[0]
>     bx          lr
>  .endfunc
> +
> +function x264_intra_sad_x3_4x4_armv6
> +    push        {r4-r6,lr}
> +    mov         r5, #0
> +
> +.set Y, 0
> +.rept 4
> +.if Y==0
> +    ldrb        r6, [r1, #Y*FDEC_STRIDE-1]
> +    add         r3, r6, r6, lsl #8
> +.else
> +    ldrb        r3, [r1, #Y*FDEC_STRIDE-1]
> +    add         r6, r3
> +    add         r3, r3, r3, lsl #8
> +.endif
> +    ldr         r4, [r0, #Y*FENC_STRIDE]
> +    add         r3, r3, r3, lsl #16
> +    usada8      r5, r3, r4, r5
> +.set Y, Y+1
> +.endr
> +    str         r5, [r2, #4]
> +    mov         r5, #0
> +
> +    ldr         r3, [r1, #-1*FDEC_STRIDE]
> +
> +    ldr         r4, [r0, #0*FENC_STRIDE]
> +    ldr         r1, [r0, #1*FENC_STRIDE]
> +    usada8      r5, r3, r4, r5
> +    ldr         r4, [r0, #2*FENC_STRIDE]
> +    usada8      r5, r3, r1, r5
> +    ldr         r1, [r0, #3*FENC_STRIDE]
> +    usada8      r5, r3, r4, r5
> +    usada8      r5, r3, r1, r5
> +
> +    str         r5, [r2]
> +
> +    mov         r5, #0
> +    usad8       r1, r3, r5
> +    add         r1, r6
> +
> +    add         r1, #4

Add the 4 to r6 first to reduce the latency of this chain.

Jason


More information about the x264-devel mailing list