[x265] [PATCH] asm: rewrite partial process code in upShift_8_avx2 to avoid Mac crash bug
Min Chen
chenm003 at 163.com
Fri Jul 17 04:55:06 CEST 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1437100595 25200
# Node ID 4d30628e1c22ae717a9bf1eebfa8320b92227d44
# Parent 8efce8620ae267eb49c421f02e7eeb2a1d74a829
asm: rewrite partial process code in upShift_8_avx2 to avoid Mac crash bug
---
source/common/x86/pixel-a.asm | 24 ++++++++++--------------
1 files changed, 10 insertions(+), 14 deletions(-)
diff -r 8efce8620ae2 -r 4d30628e1c22 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Tue Jul 14 16:29:46 2015 -0700
+++ b/source/common/x86/pixel-a.asm Thu Jul 16 19:36:35 2015 -0700
@@ -7394,7 +7394,7 @@
;---------------------------------------------------------------------------------------------------------------------
%if ARCH_X86_64
INIT_YMM avx2
-cglobal upShift_8, 6,7,4
+cglobal upShift_8, 6,7,3
movd xm2, r6m
add r3d, r3d
dec r5d
@@ -7420,29 +7420,25 @@
jg .loopH
; processing last row of every frame [To handle width which not a multiple of 32]
- lea r3, [pb_movemask + 16]
- mov r5d, 15
- and r5d, r4d
- sub r3, r5
- pmovsxbw m3, [r3]
+ mov r1d, 15
+ and r1d, r4d
+ sub r1, mmsize/2
; NOTE: Width MUST BE more than or equal to 16
- shr r4d, 4
-.loopW2:
+ shr r4d, 4 ; log2(mmsize)
+.loopW16:
pmovzxbw m0,[r0]
psllw m0, xm2
movu [r2], m0
add r0, mmsize/2
add r2, mmsize
dec r4d
- jg .loopW2
-
-.nextW2:
- ; process partial of 16
- pmovzxbw m0,[r0]
+ jg .loopW16
+
+ ; Mac OS X can't read beyond array bound, so rollback some bytes
+ pmovzxbw m0,[r0 + r1]
psllw m0, xm2
- vpblendvb m0, m0, [r2], m3
- movu [r2], m0
+ movu [r2 + r1 * 2], m0
RET
%endif
More information about the x265-devel
mailing list