[x265] [PATCH] asm: rewrite partial process code in upShift_8_avx2 to avoid Mac crash bug
chen
chenm003 at 163.com
Fri Jul 17 05:15:58 CEST 2015
At 2015-07-17 10:58:16,"Steve Borho" <steve at borho.org> wrote:
>On 07/16, Min Chen wrote:
>> # HG changeset patch
>> # User Min Chen <chenm003 at 163.com>
>> # Date 1437100595 25200
>> # Node ID 4d30628e1c22ae717a9bf1eebfa8320b92227d44
>> # Parent 8efce8620ae267eb49c421f02e7eeb2a1d74a829
>> asm: rewrite partial process code in upShift_8_avx2 to avoid Mac crash bug
>
>looks good, once the smoke tests finish on my Mac I'll push this with
>your other pending main12 patches
Thanks, if input video resolution less than 16xN, this code will crash also
>
>> source/common/x86/pixel-a.asm | 24 ++++++++++--------------
>> 1 files changed, 10 insertions(+), 14 deletions(-)
>>
>> diff -r 8efce8620ae2 -r 4d30628e1c22 source/common/x86/pixel-a.asm
>> --- a/source/common/x86/pixel-a.asm Tue Jul 14 16:29:46 2015 -0700
>> +++ b/source/common/x86/pixel-a.asm Thu Jul 16 19:36:35 2015 -0700
>> @@ -7394,7 +7394,7 @@
>> ;---------------------------------------------------------------------------------------------------------------------
>> %if ARCH_X86_64
>> INIT_YMM avx2
>> -cglobal upShift_8, 6,7,4
>> +cglobal upShift_8, 6,7,3
>> movd xm2, r6m
>> add r3d, r3d
>> dec r5d
>> @@ -7420,29 +7420,25 @@
>> jg .loopH
>>
>> ; processing last row of every frame [To handle width which not a multiple of 32]
>> - lea r3, [pb_movemask + 16]
>> - mov r5d, 15
>> - and r5d, r4d
>> - sub r3, r5
>> - pmovsxbw m3, [r3]
>> + mov r1d, 15
>> + and r1d, r4d
>> + sub r1, mmsize/2
>>
>> ; NOTE: Width MUST BE more than or equal to 16
>> - shr r4d, 4
>> -.loopW2:
>> + shr r4d, 4 ; log2(mmsize)
>> +.loopW16:
>> pmovzxbw m0,[r0]
>> psllw m0, xm2
>> movu [r2], m0
>> add r0, mmsize/2
>> add r2, mmsize
>> dec r4d
>> - jg .loopW2
>> -
>> -.nextW2:
>> - ; process partial of 16
>> - pmovzxbw m0,[r0]
>> + jg .loopW16
>> +
>> + ; Mac OS X can't read beyond array bound, so rollback some bytes
>> + pmovzxbw m0,[r0 + r1]
>> psllw m0, xm2
>> - vpblendvb m0, m0, [r2], m3
>> - movu [r2], m0
>> + movu [r2 + r1 * 2], m0
>> RET
>> %endif
>>
>>
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>
>--
>Steve Borho
>_______________________________________________
>x265-devel mailing list
>x265-devel at videolan.org
>https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150717/8eaeb552/attachment.html>
More information about the x265-devel
mailing list