[x264-devel] arm: x264_coeff_last8_arm

Janne Grunau git at videolan.org
Wed Apr 23 00:40:58 CEST 2014


x264 | branch: master | Janne Grunau <janne-x264 at jannau.net> | Sun Mar 16 17:21:58 2014 +0100| [3dd8fd6a89aec7d25afed0ab7371887a14085402] | committer: Jason Garrett-Glaser

arm: x264_coeff_last8_arm

checkasm --bench on a coretex-a9:
coeff_last8_c: 173
coeff_last8_armv6: 151

60 instead of 73 cycles in ~130k runs on the same cpu while encoding.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=3dd8fd6a89aec7d25afed0ab7371887a14085402
---

 common/arm/predict-c.c |   30 ------------------------------
 common/arm/predict.h   |   36 +++++++++++++++++++++++++++---------
 common/arm/quant-a.S   |   14 ++++++++++++++
 common/arm/quant.h     |    1 +
 common/quant.c         |    3 +++
 5 files changed, 45 insertions(+), 39 deletions(-)

diff --git a/common/arm/predict-c.c b/common/arm/predict-c.c
index b9ad262..08da8e5 100644
--- a/common/arm/predict-c.c
+++ b/common/arm/predict-c.c
@@ -27,36 +27,6 @@
 #include "predict.h"
 #include "pixel.h"
 
-void x264_predict_4x4_dc_armv6( uint8_t *src );
-void x264_predict_4x4_dc_top_neon( uint8_t *src );
-void x264_predict_4x4_h_armv6( uint8_t *src );
-void x264_predict_4x4_ddr_armv6( uint8_t *src );
-void x264_predict_4x4_ddl_neon( uint8_t *src );
-
-void x264_predict_8x8c_dc_neon( uint8_t *src );
-void x264_predict_8x8c_dc_top_neon( uint8_t *src );
-void x264_predict_8x8c_dc_left_neon( uint8_t *src );
-void x264_predict_8x8c_h_neon( uint8_t *src );
-void x264_predict_8x8c_v_neon( uint8_t *src );
-void x264_predict_8x8c_p_neon( uint8_t *src );
-
-void x264_predict_8x8_dc_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_ddl_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_ddr_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_vl_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_vr_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_v_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_h_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_hd_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_hu_neon( uint8_t *src, uint8_t edge[36] );
-
-void x264_predict_16x16_dc_neon( uint8_t *src );
-void x264_predict_16x16_dc_top_neon( uint8_t *src );
-void x264_predict_16x16_dc_left_neon( uint8_t *src );
-void x264_predict_16x16_h_neon( uint8_t *src );
-void x264_predict_16x16_v_neon( uint8_t *src );
-void x264_predict_16x16_p_neon( uint8_t *src );
-
 void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] )
 {
     if (!(cpu&X264_CPU_ARMV6))
diff --git a/common/arm/predict.h b/common/arm/predict.h
index 26e1e93..7c7acfc 100644
--- a/common/arm/predict.h
+++ b/common/arm/predict.h
@@ -27,17 +27,35 @@
 #define X264_ARM_PREDICT_H
 
 void x264_predict_4x4_dc_armv6( uint8_t *src );
+void x264_predict_4x4_dc_top_neon( uint8_t *src );
 void x264_predict_4x4_v_armv6( uint8_t *src );
 void x264_predict_4x4_h_armv6( uint8_t *src );
-void x264_predict_8x8_v_neon( pixel *src, pixel edge[36] );
-void x264_predict_8x8_h_neon( pixel *src, pixel edge[36] );
-void x264_predict_8x8_dc_neon( pixel *src, pixel edge[36] );
-void x264_predict_8x8c_dc_neon( pixel *src );
-void x264_predict_8x8c_h_neon( pixel *src );
-void x264_predict_8x8c_v_neon( pixel *src );
-void x264_predict_16x16_v_neon( pixel *src );
-void x264_predict_16x16_h_neon( pixel *src );
-void x264_predict_16x16_dc_neon( pixel *src );
+void x264_predict_4x4_ddr_armv6( uint8_t *src );
+void x264_predict_4x4_ddl_neon( uint8_t *src );
+
+void x264_predict_8x8c_dc_neon( uint8_t *src );
+void x264_predict_8x8c_dc_top_neon( uint8_t *src );
+void x264_predict_8x8c_dc_left_neon( uint8_t *src );
+void x264_predict_8x8c_h_neon( uint8_t *src );
+void x264_predict_8x8c_v_neon( uint8_t *src );
+void x264_predict_8x8c_p_neon( uint8_t *src );
+
+void x264_predict_8x8_dc_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_ddl_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_ddr_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_vl_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_vr_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_v_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_h_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_hd_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_hu_neon( uint8_t *src, uint8_t edge[36] );
+
+void x264_predict_16x16_dc_neon( uint8_t *src );
+void x264_predict_16x16_dc_top_neon( uint8_t *src );
+void x264_predict_16x16_dc_left_neon( uint8_t *src );
+void x264_predict_16x16_h_neon( uint8_t *src );
+void x264_predict_16x16_v_neon( uint8_t *src );
+void x264_predict_16x16_p_neon( uint8_t *src );
 
 void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] );
 void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter );
diff --git a/common/arm/quant-a.S b/common/arm/quant-a.S
index 2aeedc4..b8c6ba3 100644
--- a/common/arm/quant-a.S
+++ b/common/arm/quant-a.S
@@ -321,6 +321,20 @@ function x264_coeff_last4_arm
     bx          lr
 .endfunc
 
+function x264_coeff_last8_arm
+    ldrd        r2,  r3,  [r0, #8]
+    orrs        ip,  r2,  r3
+    movne       r0,  #4
+    ldrdeq      r2,  r3,  [r0]
+    moveq       r0,  #0
+    tst         r3,  r3
+    addne       r0,  #2
+    movne       r2,  r3
+    lsrs        r2,  r2,  #16
+    addne       r0,  r0,  #1
+    bx          lr
+.endfunc
+
 .macro COEFF_LAST_1x size
 function x264_coeff_last\size\()_neon
 .if \size == 15
diff --git a/common/arm/quant.h b/common/arm/quant.h
index 0695ab1..75d9fb2 100644
--- a/common/arm/quant.h
+++ b/common/arm/quant.h
@@ -39,6 +39,7 @@ void x264_dequant_4x4_neon( int16_t dct[16], int dequant_mf[6][16], int i_qp );
 void x264_dequant_8x8_neon( int16_t dct[64], int dequant_mf[6][64], int i_qp );
 
 int x264_coeff_last4_arm( int16_t * );
+int x264_coeff_last8_arm( int16_t * );
 int x264_coeff_last15_neon( int16_t * );
 int x264_coeff_last16_neon( int16_t * );
 int x264_coeff_last64_neon( int16_t * );
diff --git a/common/quant.c b/common/quant.c
index 169f39e..b8cca23 100644
--- a/common/quant.c
+++ b/common/quant.c
@@ -725,7 +725,10 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
 
 #if HAVE_ARMV6
     if( cpu&X264_CPU_ARMV6 )
+    {
         pf->coeff_last4 = x264_coeff_last4_arm;
+        pf->coeff_last8 = x264_coeff_last8_arm;
+    }
 
     if( cpu&X264_CPU_NEON )
     {



More information about the x264-devel mailing list