[x264-devel] [PATCH 9/9] arm: x264_coeff_last8_arm
Janne Grunau
janne-x264 at jannau.net
Sun Mar 16 23:26:46 CET 2014
checkasm --bench on a coretex-a9:
coeff_last8_c: 173
coeff_last8_armv6: 151
60 instead of 73 cycles in ~130k runs on the same cpu while encoding.
---
common/arm/quant-a.S | 14 ++++++++++++++
common/arm/quant.h | 1 +
common/quant.c | 4 +++-
3 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/common/arm/quant-a.S b/common/arm/quant-a.S
index 2aeedc4..b8c6ba3 100644
--- a/common/arm/quant-a.S
+++ b/common/arm/quant-a.S
@@ -321,6 +321,20 @@ function x264_coeff_last4_arm
bx lr
.endfunc
+function x264_coeff_last8_arm
+ ldrd r2, r3, [r0, #8]
+ orrs ip, r2, r3
+ movne r0, #4
+ ldrdeq r2, r3, [r0]
+ moveq r0, #0
+ tst r3, r3
+ addne r0, #2
+ movne r2, r3
+ lsrs r2, r2, #16
+ addne r0, r0, #1
+ bx lr
+.endfunc
+
.macro COEFF_LAST_1x size
function x264_coeff_last\size\()_neon
.if \size == 15
diff --git a/common/arm/quant.h b/common/arm/quant.h
index 0695ab1..75d9fb2 100644
--- a/common/arm/quant.h
+++ b/common/arm/quant.h
@@ -39,6 +39,7 @@ void x264_dequant_4x4_neon( int16_t dct[16], int dequant_mf[6][16], int i_qp );
void x264_dequant_8x8_neon( int16_t dct[64], int dequant_mf[6][64], int i_qp );
int x264_coeff_last4_arm( int16_t * );
+int x264_coeff_last8_arm( int16_t * );
int x264_coeff_last15_neon( int16_t * );
int x264_coeff_last16_neon( int16_t * );
int x264_coeff_last64_neon( int16_t * );
diff --git a/common/quant.c b/common/quant.c
index 169f39e..6427724 100644
--- a/common/quant.c
+++ b/common/quant.c
@@ -724,8 +724,10 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
#endif
#if HAVE_ARMV6
- if( cpu&X264_CPU_ARMV6 )
+ if( cpu&X264_CPU_ARMV6 ) {
pf->coeff_last4 = x264_coeff_last4_arm;
+ pf->coeff_last8 = x264_coeff_last8_arm;
+ }
if( cpu&X264_CPU_NEON )
{
--
1.9.0
More information about the x264-devel
mailing list