[x264-devel] [PATCH 1/4] x264_intra_sad_x3_4x4_armv6
Jason Garrett-Glaser
jason at x264.com
Sat Jan 28 21:10:16 CET 2012
On Sat, Jan 28, 2012 at 10:51 AM, George Stephanos
<gaf.stephanos at gmail.com> wrote:
> ---
> common/arm/pixel-a.S | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++
> common/arm/pixel.h | 1 +
> common/pixel.c | 1 +
> 3 files changed, 61 insertions(+), 0 deletions(-)
>
> diff --git a/common/arm/pixel-a.S b/common/arm/pixel-a.S
> index 217356e..de442e9 100644
> --- a/common/arm/pixel-a.S
> +++ b/common/arm/pixel-a.S
> @@ -1240,3 +1240,62 @@ ssim_skip:
> vmov.32 r0, d0[0]
> bx lr
> .endfunc
> +
> +function x264_intra_sad_x3_4x4_armv6
> + push {r4-r6,lr}
> + mov r5, #0
> +
> +.set Y, 0
> +.rept 4
> +.if Y==0
> + ldrb r6, [r1, #Y*FDEC_STRIDE-1]
> + add r3, r6, r6, lsl #8
> +.else
> + ldrb r3, [r1, #Y*FDEC_STRIDE-1]
> + add r6, r3
> + add r3, r3, r3, lsl #8
> +.endif
> + ldr r4, [r0, #Y*FENC_STRIDE]
> + add r3, r3, r3, lsl #16
> + usada8 r5, r3, r4, r5
> +.set Y, Y+1
> +.endr
> + str r5, [r2, #4]
> + mov r5, #0
> +
> + ldr r3, [r1, #-1*FDEC_STRIDE]
> +
> + ldr r4, [r0, #0*FENC_STRIDE]
> + ldr r1, [r0, #1*FENC_STRIDE]
> + usada8 r5, r3, r4, r5
> + ldr r4, [r0, #2*FENC_STRIDE]
> + usada8 r5, r3, r1, r5
> + ldr r1, [r0, #3*FENC_STRIDE]
> + usada8 r5, r3, r4, r5
> + usada8 r5, r3, r1, r5
> +
> + str r5, [r2]
> +
> + mov r5, #0
> + usad8 r1, r3, r5
> + add r1, r6
> +
> + add r1, #4
Add the 4 to r6 first to reduce the latency of this chain.
Jason
More information about the x264-devel
mailing list