[x264-devel] arm: x264_coeff_last8_arm
Janne Grunau
git at videolan.org
Wed Apr 23 00:40:58 CEST 2014
x264 | branch: master | Janne Grunau <janne-x264 at jannau.net> | Sun Mar 16 17:21:58 2014 +0100| [3dd8fd6a89aec7d25afed0ab7371887a14085402] | committer: Jason Garrett-Glaser
arm: x264_coeff_last8_arm
checkasm --bench on a coretex-a9:
coeff_last8_c: 173
coeff_last8_armv6: 151
60 instead of 73 cycles in ~130k runs on the same cpu while encoding.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=3dd8fd6a89aec7d25afed0ab7371887a14085402
---
common/arm/predict-c.c | 30 ------------------------------
common/arm/predict.h | 36 +++++++++++++++++++++++++++---------
common/arm/quant-a.S | 14 ++++++++++++++
common/arm/quant.h | 1 +
common/quant.c | 3 +++
5 files changed, 45 insertions(+), 39 deletions(-)
diff --git a/common/arm/predict-c.c b/common/arm/predict-c.c
index b9ad262..08da8e5 100644
--- a/common/arm/predict-c.c
+++ b/common/arm/predict-c.c
@@ -27,36 +27,6 @@
#include "predict.h"
#include "pixel.h"
-void x264_predict_4x4_dc_armv6( uint8_t *src );
-void x264_predict_4x4_dc_top_neon( uint8_t *src );
-void x264_predict_4x4_h_armv6( uint8_t *src );
-void x264_predict_4x4_ddr_armv6( uint8_t *src );
-void x264_predict_4x4_ddl_neon( uint8_t *src );
-
-void x264_predict_8x8c_dc_neon( uint8_t *src );
-void x264_predict_8x8c_dc_top_neon( uint8_t *src );
-void x264_predict_8x8c_dc_left_neon( uint8_t *src );
-void x264_predict_8x8c_h_neon( uint8_t *src );
-void x264_predict_8x8c_v_neon( uint8_t *src );
-void x264_predict_8x8c_p_neon( uint8_t *src );
-
-void x264_predict_8x8_dc_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_ddl_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_ddr_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_vl_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_vr_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_v_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_h_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_hd_neon( uint8_t *src, uint8_t edge[36] );
-void x264_predict_8x8_hu_neon( uint8_t *src, uint8_t edge[36] );
-
-void x264_predict_16x16_dc_neon( uint8_t *src );
-void x264_predict_16x16_dc_top_neon( uint8_t *src );
-void x264_predict_16x16_dc_left_neon( uint8_t *src );
-void x264_predict_16x16_h_neon( uint8_t *src );
-void x264_predict_16x16_v_neon( uint8_t *src );
-void x264_predict_16x16_p_neon( uint8_t *src );
-
void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] )
{
if (!(cpu&X264_CPU_ARMV6))
diff --git a/common/arm/predict.h b/common/arm/predict.h
index 26e1e93..7c7acfc 100644
--- a/common/arm/predict.h
+++ b/common/arm/predict.h
@@ -27,17 +27,35 @@
#define X264_ARM_PREDICT_H
void x264_predict_4x4_dc_armv6( uint8_t *src );
+void x264_predict_4x4_dc_top_neon( uint8_t *src );
void x264_predict_4x4_v_armv6( uint8_t *src );
void x264_predict_4x4_h_armv6( uint8_t *src );
-void x264_predict_8x8_v_neon( pixel *src, pixel edge[36] );
-void x264_predict_8x8_h_neon( pixel *src, pixel edge[36] );
-void x264_predict_8x8_dc_neon( pixel *src, pixel edge[36] );
-void x264_predict_8x8c_dc_neon( pixel *src );
-void x264_predict_8x8c_h_neon( pixel *src );
-void x264_predict_8x8c_v_neon( pixel *src );
-void x264_predict_16x16_v_neon( pixel *src );
-void x264_predict_16x16_h_neon( pixel *src );
-void x264_predict_16x16_dc_neon( pixel *src );
+void x264_predict_4x4_ddr_armv6( uint8_t *src );
+void x264_predict_4x4_ddl_neon( uint8_t *src );
+
+void x264_predict_8x8c_dc_neon( uint8_t *src );
+void x264_predict_8x8c_dc_top_neon( uint8_t *src );
+void x264_predict_8x8c_dc_left_neon( uint8_t *src );
+void x264_predict_8x8c_h_neon( uint8_t *src );
+void x264_predict_8x8c_v_neon( uint8_t *src );
+void x264_predict_8x8c_p_neon( uint8_t *src );
+
+void x264_predict_8x8_dc_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_ddl_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_ddr_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_vl_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_vr_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_v_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_h_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_hd_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_hu_neon( uint8_t *src, uint8_t edge[36] );
+
+void x264_predict_16x16_dc_neon( uint8_t *src );
+void x264_predict_16x16_dc_top_neon( uint8_t *src );
+void x264_predict_16x16_dc_left_neon( uint8_t *src );
+void x264_predict_16x16_h_neon( uint8_t *src );
+void x264_predict_16x16_v_neon( uint8_t *src );
+void x264_predict_16x16_p_neon( uint8_t *src );
void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] );
void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter );
diff --git a/common/arm/quant-a.S b/common/arm/quant-a.S
index 2aeedc4..b8c6ba3 100644
--- a/common/arm/quant-a.S
+++ b/common/arm/quant-a.S
@@ -321,6 +321,20 @@ function x264_coeff_last4_arm
bx lr
.endfunc
+function x264_coeff_last8_arm
+ ldrd r2, r3, [r0, #8]
+ orrs ip, r2, r3
+ movne r0, #4
+ ldrdeq r2, r3, [r0]
+ moveq r0, #0
+ tst r3, r3
+ addne r0, #2
+ movne r2, r3
+ lsrs r2, r2, #16
+ addne r0, r0, #1
+ bx lr
+.endfunc
+
.macro COEFF_LAST_1x size
function x264_coeff_last\size\()_neon
.if \size == 15
diff --git a/common/arm/quant.h b/common/arm/quant.h
index 0695ab1..75d9fb2 100644
--- a/common/arm/quant.h
+++ b/common/arm/quant.h
@@ -39,6 +39,7 @@ void x264_dequant_4x4_neon( int16_t dct[16], int dequant_mf[6][16], int i_qp );
void x264_dequant_8x8_neon( int16_t dct[64], int dequant_mf[6][64], int i_qp );
int x264_coeff_last4_arm( int16_t * );
+int x264_coeff_last8_arm( int16_t * );
int x264_coeff_last15_neon( int16_t * );
int x264_coeff_last16_neon( int16_t * );
int x264_coeff_last64_neon( int16_t * );
diff --git a/common/quant.c b/common/quant.c
index 169f39e..b8cca23 100644
--- a/common/quant.c
+++ b/common/quant.c
@@ -725,7 +725,10 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
#if HAVE_ARMV6
if( cpu&X264_CPU_ARMV6 )
+ {
pf->coeff_last4 = x264_coeff_last4_arm;
+ pf->coeff_last8 = x264_coeff_last8_arm;
+ }
if( cpu&X264_CPU_NEON )
{
More information about the x264-devel
mailing list