[x264-devel] arm: do not use aligned stores in mc_weight_w4_*neon

Tue Aug 26 18:23:11 CEST 2014

x264 | branch: master | Janne Grunau <janne-x264 at jannau.net> | Sun Jul 20 18:24:57 2014 +0200| [0a05b3f9aa8c524a67119ec5eb6bcc24eb8f2f3b] | committer: Fiona Glaser

arm: do not use aligned stores in mc_weight_w4_*neon

mc_weight_w4_*neon is also used for width 2 which does not guarantee
4-byte aligned destination. Fixes crashes caused by random memory
corruption.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=0a05b3f9aa8c524a67119ec5eb6bcc24eb8f2f3b
---

 common/arm/mc-a.S |   12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S
index cd57920..2b7acda 100644
--- a/common/arm/mc-a.S
+++ b/common/arm/mc-a.S
@@ -567,8 +567,8 @@ weight4_loop:
     vrshl.s16   q8,  q8,  q2
     vadd.s16    q8,  q8,  q1
     vqmovun.s16 d16, q8
-    vst1.32     {d16[0]}, [r0,:32], r1
-    vst1.32     {d16[1]}, [r0,:32], r1
+    vst1.32     {d16[0]}, [r0], r1
+    vst1.32     {d16[1]}, [r0], r1
     bgt         weight4_loop
     pop         {r4-r5,pc}
 endfunc
@@ -670,8 +670,8 @@ weight4_nodenom_loop:
     vmla.s16    d20, d16, d0
     vmla.s16    d21, d18, d0
     vqmovun.s16 d16, q10
-    vst1.32     {d16[0]}, [r0,:32], r1
-    vst1.32     {d16[1]}, [r0,:32], r1
+    vst1.32     {d16[0]}, [r0], r1
+    vst1.32     {d16[1]}, [r0], r1
     bgt         weight4_nodenom_loop
     pop         {r4-r5,pc}
 endfunc
@@ -734,8 +734,8 @@ weight4_\name\()_loop:
     vld1.32     {d16[]}, [r2], r3
     vld1.32     {d17[]}, [r2], r3
     \op         q8,  q8,  q1
-    vst1.32     {d16[0]}, [r0,:32], r1
-    vst1.32     {d17[0]}, [r0,:32], r1
+    vst1.32     {d16[0]}, [r0], r1
+    vst1.32     {d17[0]}, [r0], r1
     bgt         weight4_\name\()_loop
     pop         {pc}
 endfunc