[x264-devel] [PATCH 1/4] x264_intra_sad_x3_4x4_armv6
George Stephanos
gaf.stephanos at gmail.com
Sat Jan 28 22:15:38 CET 2012
---
common/arm/pixel-a.S | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++
common/arm/pixel.h | 1 +
common/pixel.c | 1 +
3 files changed, 59 insertions(+), 0 deletions(-)
diff --git a/common/arm/pixel-a.S b/common/arm/pixel-a.S
index 217356e..107f832 100644
--- a/common/arm/pixel-a.S
+++ b/common/arm/pixel-a.S
@@ -1240,3 +1240,60 @@ ssim_skip:
vmov.32 r0, d0[0]
bx lr
.endfunc
+
+function x264_intra_sad_x3_4x4_armv6
+ push {r4-r6,lr}
+ mov r5, #0
+
+.set Y, 0
+.rept 4
+.if Y==0
+ ldrb r6, [r1, #Y*FDEC_STRIDE-1]
+ add r3, r6, r6, lsl #8
+.else
+ ldrb r3, [r1, #Y*FDEC_STRIDE-1]
+ add r6, r3
+ add r3, r3, r3, lsl #8
+.endif
+ ldr r4, [r0, #Y*FENC_STRIDE]
+ add r3, r3, r3, lsl #16
+ usada8 r5, r3, r4, r5
+.set Y, Y+1
+.endr
+ str r5, [r2, #4]
+ mov r5, #0
+
+ ldr r3, [r1, #-1*FDEC_STRIDE]
+
+ ldr r4, [r0, #0*FENC_STRIDE]
+ ldr r1, [r0, #1*FENC_STRIDE]
+ usada8 r5, r3, r4, r5
+ ldr r4, [r0, #2*FENC_STRIDE]
+ usada8 r5, r3, r1, r5
+ ldr r1, [r0, #3*FENC_STRIDE]
+ usada8 r5, r3, r4, r5
+ usada8 r5, r3, r1, r5
+
+ str r5, [r2]
+
+ mov r5, #0
+ add r6, #4
+ usad8 r1, r3, r5
+
+ add r1, r6
+ lsr r1, #3
+ add r1, r1, r1, lsl #8
+ ldr r4, [r0, #0*FENC_STRIDE]
+ add r1, r1, r1, lsl #16
+ ldr r3, [r0, #1*FENC_STRIDE]
+ usada8 r5, r1, r4, r5
+ ldr r4, [r0, #2*FENC_STRIDE]
+ usada8 r5, r1, r3, r5
+ ldr r3, [r0, #3*FENC_STRIDE]
+ usada8 r5, r1, r4, r5
+ usada8 r5, r1, r3, r5
+
+ str r5, [r2, #8]
+ pop {r4-r6,pc}
+.endfunc
+
diff --git a/common/arm/pixel.h b/common/arm/pixel.h
index 4af4bb0..3e02982 100644
--- a/common/arm/pixel.h
+++ b/common/arm/pixel.h
@@ -69,4 +69,5 @@ void x264_pixel_ssim_4x4x2_core_neon( const uint8_t *, int,
int sums[2][4]);
float x264_pixel_ssim_end4_neon( int sum0[5][4], int sum1[5][4], int width );
+void x264_intra_sad_x3_4x4_armv6( uint8_t *, uint8_t *, int * );
#endif
diff --git a/common/pixel.c b/common/pixel.c
index ca10c7d..0949405 100644
--- a/common/pixel.c
+++ b/common/pixel.c
@@ -1208,6 +1208,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
pixf->sad[PIXEL_4x4] = x264_pixel_sad_4x4_armv6;
pixf->sad_aligned[PIXEL_4x8] = x264_pixel_sad_4x8_armv6;
pixf->sad_aligned[PIXEL_4x4] = x264_pixel_sad_4x4_armv6;
+ pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_armv6;
}
if( cpu&X264_CPU_NEON )
{
--
1.7.8.3
More information about the x264-devel
mailing list