[x265] [PATCH] asm: rewrite partial process code in upShift_8_avx2 to avoid Mac crash bug

Min Chen chenm003 at 163.com
Fri Jul 17 04:55:06 CEST 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1437100595 25200
# Node ID 4d30628e1c22ae717a9bf1eebfa8320b92227d44
# Parent  8efce8620ae267eb49c421f02e7eeb2a1d74a829
asm: rewrite partial process code in upShift_8_avx2 to avoid Mac crash bug
---
 source/common/x86/pixel-a.asm |   24 ++++++++++--------------
 1 files changed, 10 insertions(+), 14 deletions(-)

diff -r 8efce8620ae2 -r 4d30628e1c22 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm	Tue Jul 14 16:29:46 2015 -0700
+++ b/source/common/x86/pixel-a.asm	Thu Jul 16 19:36:35 2015 -0700
@@ -7394,7 +7394,7 @@
 ;---------------------------------------------------------------------------------------------------------------------
 %if ARCH_X86_64
 INIT_YMM avx2
-cglobal upShift_8, 6,7,4
+cglobal upShift_8, 6,7,3
     movd        xm2, r6m
     add         r3d, r3d
     dec         r5d
@@ -7420,29 +7420,25 @@
     jg         .loopH
 
     ; processing last row of every frame [To handle width which not a multiple of 32]
-    lea         r3, [pb_movemask + 16]
-    mov         r5d, 15
-    and         r5d, r4d
-    sub         r3, r5
-    pmovsxbw    m3, [r3]
+    mov         r1d, 15
+    and         r1d, r4d
+    sub         r1, mmsize/2
 
     ; NOTE: Width MUST BE more than or equal to 16
-    shr         r4d, 4
-.loopW2:
+    shr         r4d, 4          ; log2(mmsize)
+.loopW16:
     pmovzxbw    m0,[r0]
     psllw       m0, xm2
     movu        [r2], m0
     add         r0, mmsize/2
     add         r2, mmsize
     dec         r4d
-    jg         .loopW2
-
-.nextW2:
-    ; process partial of 16
-    pmovzxbw    m0,[r0]
+    jg         .loopW16
+
+    ; Mac OS X can't read beyond array bound, so rollback some bytes
+    pmovzxbw    m0,[r0 + r1]
     psllw       m0, xm2
-    vpblendvb   m0, m0, [r2], m3
-    movu        [r2], m0
+    movu        [r2 + r1 * 2], m0
     RET
 %endif
 



More information about the x265-devel mailing list