[x264-devel] [PATCH 04/24] arm: Use aligned loads in x264_coeff_last15_neon

Martin Storsjö martin at martin.st
Thu Aug 13 22:59:25 CEST 2015


After subtracting 2, the pointer will be aligned.

checkasm timing      Cortex-A7    A8    A9
coeff_last15_c              423   375   230
coeff_last15_neon           350   420   404  (before)
coeff_last15_neon           350   400   394  (after)
---
 common/arm/quant-a.S |    4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/common/arm/quant-a.S b/common/arm/quant-a.S
index 4b2129a..ad8d8f8 100644
--- a/common/arm/quant-a.S
+++ b/common/arm/quant-a.S
@@ -337,10 +337,8 @@ endfunc
 function x264_coeff_last\size\()_neon
 .if \size == 15
     sub         r0,  r0,  #2
-    vld1.64     {d0-d3}, [r0]
-.else
-    vld1.64     {d0-d3}, [r0,:128]
 .endif
+    vld1.64     {d0-d3}, [r0,:128]
     vtst.16     q0,  q0
     vtst.16     q1,  q1
     vshrn.u16   d0,  q0,  #8
-- 
1.7.10.4



More information about the x264-devel mailing list