[x264-devel] [PATCH] x264_decimate_score_15/16_neon
George Stephanos
gaf.stephanos at gmail.com
Thu Feb 9 00:25:35 CET 2012
---
common/arm/quant-a.S | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++
common/arm/quant.h | 2 +
common/quant.c | 2 +
tools/checkasm.c | 1 +
4 files changed, 76 insertions(+), 0 deletions(-)
diff --git a/common/arm/quant-a.S b/common/arm/quant-a.S
index 1c14c86..c2b50ba 100644
--- a/common/arm/quant-a.S
+++ b/common/arm/quant-a.S
@@ -32,6 +32,9 @@
pmovmskb_byte:
.byte 1,2,4,8,16,32,64,128
.byte 1,2,4,8,16,32,64,128
+decimate_table4:
+.byte 3,2,2,1,1,1,0,0
+.byte 0,0,0,0,0,0,0,0
.text
@@ -457,3 +460,71 @@ COEFF_LEVEL_RUN 8
COEFF_LEVEL_RUN 15
COEFF_LEVEL_RUN 16
+.macro DECIMATE_SCORE size
+function x264_decimate_score\size\()_neon
+ push {r4, r5, lr}
+ vld1.64 {d0-d3}, [r0,:128]
+ vabs.s16 q0, q0
+ vabs.s16 q1, q1
+ vqmovn.u16 d0, q0
+ vqmovn.u16 d1, q1
+
+ vmov.i8 q1, #2
+ vcge.u8 q1, q0, q1
+ vqmovn.u16 d4, q1
+ vmov r3, r4, d4
+ orrs r3, r4
+ movne r0, #9
+ bne 2f
+
+ movrel r1, pmovmskb_byte
+ vld1.64 {d2, d3}, [r1]
+
+ vtst.8 q0, q0
+ vand q0, q1
+
+ vmov.i8 d2, #0
+ vpadd.u8 d0, d2
+ vpadd.u8 d1, d2
+ vpadd.u8 d0, d2
+ vpadd.u8 d1, d2
+ vpadd.u8 d0, d2
+ vpadd.u8 d1, d2
+ vshl.u64 d1, #8
+ vorr.u8 d0, d1
+ vmov.32 r2, d0[0]
+
+ mov r0, #0
+
+ movs r2, r2
+ beq 2f
+.if \size == 15
+ lsr r2, #1
+.endif
+ orr r2, #0xf0000000
+ ror r2, #\size
+
+ movrel r4, decimate_table4
+ mov r5, #\size
+ clz r3, r2
+ add r3, #1
+ lsl r2, r3
+ sub r5, r3
+1:
+ clz r3, r2
+ ldrb r1, [r4, r3]
+ add r0, r1
+ add r3, #1
+ lsl r2, r3
+ subs r5, r3
+ bge 1b
+ b 2f
+2:
+ pop {r4,r5, pc}
+ bx lr
+.endfunc
+.endm
+
+DECIMATE_SCORE 15
+DECIMATE_SCORE 16
+
diff --git a/common/arm/quant.h b/common/arm/quant.h
index a548d15..db48b25 100644
--- a/common/arm/quant.h
+++ b/common/arm/quant.h
@@ -47,4 +47,6 @@ int x264_coeff_level_run8_neon( int16_t * );
int x264_coeff_level_run15_neon( int16_t * );
int x264_coeff_level_run16_neon( int16_t * );
+int x264_decimate_score15_neon( int16_t * );
+int x264_decimate_score16_neon( int16_t * );
#endif
diff --git a/common/quant.c b/common/quant.c
index 5a19d73..605cf28 100644
--- a/common/quant.c
+++ b/common/quant.c
@@ -710,6 +710,8 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf->coeff_level_run8 = x264_coeff_level_run8_neon;
pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15_neon;
pf->coeff_level_run[DCT_LUMA_4x4] = x264_coeff_level_run16_neon;
+ pf->decimate_score15 = x264_decimate_score15_neon;
+ pf->decimate_score16 = x264_decimate_score16_neon;
}
#endif
#endif // HIGH_BIT_DEPTH
diff --git a/tools/checkasm.c b/tools/checkasm.c
index ae7750f..e65a657 100644
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -1968,6 +1968,7 @@ static int check_quant( int cpu_ref, int cpu_new )
{ \
ok = 0; \
fprintf( stderr, #decname ": [FAILED]\n" ); \
+ fprintf( stderr, "\nresult_a: %d\nresult_c: %d\n", result_a, result_c ); \
break; \
} \
} \
--
1.7.4.1
More information about the x264-devel
mailing list