[x264-devel] [PATCH 1/4] x264_intra_sad_x3_4x4_armv6

George Stephanos gaf.stephanos at gmail.com
Sat Jan 28 22:15:38 CET 2012


---
 common/arm/pixel-a.S |   57 ++++++++++++++++++++++++++++++++++++++++++++++++++
 common/arm/pixel.h   |    1 +
 common/pixel.c       |    1 +
 3 files changed, 59 insertions(+), 0 deletions(-)

diff --git a/common/arm/pixel-a.S b/common/arm/pixel-a.S
index 217356e..107f832 100644
--- a/common/arm/pixel-a.S
+++ b/common/arm/pixel-a.S
@@ -1240,3 +1240,60 @@ ssim_skip:
     vmov.32     r0,  d0[0]
     bx          lr
 .endfunc
+
+function x264_intra_sad_x3_4x4_armv6
+    push        {r4-r6,lr}
+    mov         r5, #0
+
+.set Y, 0
+.rept 4
+.if Y==0
+    ldrb        r6, [r1, #Y*FDEC_STRIDE-1]
+    add         r3, r6, r6, lsl #8
+.else
+    ldrb        r3, [r1, #Y*FDEC_STRIDE-1]
+    add         r6, r3
+    add         r3, r3, r3, lsl #8
+.endif
+    ldr         r4, [r0, #Y*FENC_STRIDE]
+    add         r3, r3, r3, lsl #16
+    usada8      r5, r3, r4, r5
+.set Y, Y+1
+.endr
+    str         r5, [r2, #4]
+    mov         r5, #0
+
+    ldr         r3, [r1, #-1*FDEC_STRIDE]
+
+    ldr         r4, [r0, #0*FENC_STRIDE]
+    ldr         r1, [r0, #1*FENC_STRIDE]
+    usada8      r5, r3, r4, r5
+    ldr         r4, [r0, #2*FENC_STRIDE]
+    usada8      r5, r3, r1, r5
+    ldr         r1, [r0, #3*FENC_STRIDE]
+    usada8      r5, r3, r4, r5
+    usada8      r5, r3, r1, r5
+
+    str         r5, [r2]
+
+    mov         r5, #0
+    add         r6, #4
+    usad8       r1, r3, r5
+
+    add         r1, r6
+    lsr         r1, #3
+    add         r1, r1, r1, lsl #8
+    ldr         r4, [r0, #0*FENC_STRIDE]
+    add         r1, r1, r1, lsl #16
+    ldr         r3, [r0, #1*FENC_STRIDE]
+    usada8      r5, r1, r4, r5
+    ldr         r4, [r0, #2*FENC_STRIDE]
+    usada8      r5, r1, r3, r5
+    ldr         r3, [r0, #3*FENC_STRIDE]
+    usada8      r5, r1, r4, r5
+    usada8      r5, r1, r3, r5
+
+    str         r5, [r2, #8]
+    pop         {r4-r6,pc}
+.endfunc
+
diff --git a/common/arm/pixel.h b/common/arm/pixel.h
index 4af4bb0..3e02982 100644
--- a/common/arm/pixel.h
+++ b/common/arm/pixel.h
@@ -69,4 +69,5 @@ void x264_pixel_ssim_4x4x2_core_neon( const uint8_t *, int,
                                       int sums[2][4]);
 float x264_pixel_ssim_end4_neon( int sum0[5][4], int sum1[5][4], int width );
 
+void x264_intra_sad_x3_4x4_armv6( uint8_t *, uint8_t *, int * );
 #endif
diff --git a/common/pixel.c b/common/pixel.c
index ca10c7d..0949405 100644
--- a/common/pixel.c
+++ b/common/pixel.c
@@ -1208,6 +1208,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
         pixf->sad[PIXEL_4x4] = x264_pixel_sad_4x4_armv6;
         pixf->sad_aligned[PIXEL_4x8] = x264_pixel_sad_4x8_armv6;
         pixf->sad_aligned[PIXEL_4x4] = x264_pixel_sad_4x4_armv6;
+        pixf->intra_sad_x3_4x4  = x264_intra_sad_x3_4x4_armv6;
     }
     if( cpu&X264_CPU_NEON )
     {
-- 
1.7.8.3



More information about the x264-devel mailing list