[x264-devel] [PATCH 03/24] arm: Simplify x264_predict_8x8c_p_neon

Martin Storsjö martin at martin.st
Thu Aug 13 22:59:24 CEST 2015


This gets rid of a few unnecessary (and confusing) steps in
calculating the increment to i00.

checkasm timing      Cortex-A7    A8    A9
intra_predict_8x8c_p_c      5525  4732  4755
intra_predict_8x8c_p_neon   1719  1140  1262  (before)
intra_predict_8x8c_p_neon   1663  1142  1255  (after)
---
 common/arm/predict-a.S |    7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/common/arm/predict-a.S b/common/arm/predict-a.S
index 3343144..7e5d9d3 100644
--- a/common/arm/predict-a.S
+++ b/common/arm/predict-a.S
@@ -535,17 +535,12 @@ function x264_predict_8x8c_p_neon
     vadd.i16    d16, d16, d0
     vshl.i16    d2,  d16, #4
     vsub.i16    d2,  d2,  d3
-    vshl.i16    d3,  d4,  #3
     vext.16     q0,  q0,  q0,  #7
-    vsub.i16    d6,  d5,  d3
     vmov.16     d0[0], r3
     vmul.i16    q0,  q0,  d4[0]
     vdup.16     q1,  d2[0]
-    vdup.16     q2,  d4[0]
-    vdup.16     q3,  d6[0]
-    vshl.i16    q2,  q2,  #3
+    vdup.16     q3,  d5[0]
     vadd.i16    q1,  q1,  q0
-    vadd.i16    q3,  q3,  q2
     mov         r3,  #8
 1:
     vqshrun.s16 d0,  q1,  #5
-- 
1.7.10.4



More information about the x264-devel mailing list