[x265] [PATCH] asm: rewrite partial process code in upShift_8_avx2 to avoid Mac crash bug

chen chenm003 at 163.com
Fri Jul 17 05:15:58 CEST 2015


At 2015-07-17 10:58:16,"Steve Borho" <steve at borho.org> wrote:
>On 07/16, Min Chen wrote:
>> # HG changeset patch
>> # User Min Chen <chenm003 at 163.com>
>> # Date 1437100595 25200
>> # Node ID 4d30628e1c22ae717a9bf1eebfa8320b92227d44
>> # Parent  8efce8620ae267eb49c421f02e7eeb2a1d74a829
>> asm: rewrite partial process code in upShift_8_avx2 to avoid Mac crash bug
>
>looks good, once the smoke tests finish on my Mac I'll push this with
>your other pending main12 patches


Thanks, if input video resolution less than 16xN, this code will crash also

>
>>  source/common/x86/pixel-a.asm |   24 ++++++++++--------------
>>  1 files changed, 10 insertions(+), 14 deletions(-)
>> 
>> diff -r 8efce8620ae2 -r 4d30628e1c22 source/common/x86/pixel-a.asm
>> --- a/source/common/x86/pixel-a.asm	Tue Jul 14 16:29:46 2015 -0700
>> +++ b/source/common/x86/pixel-a.asm	Thu Jul 16 19:36:35 2015 -0700
>> @@ -7394,7 +7394,7 @@
>>  ;---------------------------------------------------------------------------------------------------------------------
>>  %if ARCH_X86_64
>>  INIT_YMM avx2
>> -cglobal upShift_8, 6,7,4
>> +cglobal upShift_8, 6,7,3
>>      movd        xm2, r6m
>>      add         r3d, r3d
>>      dec         r5d
>> @@ -7420,29 +7420,25 @@
>>      jg         .loopH
>>  
>>      ; processing last row of every frame [To handle width which not a multiple of 32]
>> -    lea         r3, [pb_movemask + 16]
>> -    mov         r5d, 15
>> -    and         r5d, r4d
>> -    sub         r3, r5
>> -    pmovsxbw    m3, [r3]
>> +    mov         r1d, 15
>> +    and         r1d, r4d
>> +    sub         r1, mmsize/2
>>  
>>      ; NOTE: Width MUST BE more than or equal to 16
>> -    shr         r4d, 4
>> -.loopW2:
>> +    shr         r4d, 4          ; log2(mmsize)
>> +.loopW16:
>>      pmovzxbw    m0,[r0]
>>      psllw       m0, xm2
>>      movu        [r2], m0
>>      add         r0, mmsize/2
>>      add         r2, mmsize
>>      dec         r4d
>> -    jg         .loopW2
>> -
>> -.nextW2:
>> -    ; process partial of 16
>> -    pmovzxbw    m0,[r0]
>> +    jg         .loopW16
>> +
>> +    ; Mac OS X can't read beyond array bound, so rollback some bytes
>> +    pmovzxbw    m0,[r0 + r1]
>>      psllw       m0, xm2
>> -    vpblendvb   m0, m0, [r2], m3
>> -    movu        [r2], m0
>> +    movu        [r2 + r1 * 2], m0
>>      RET
>>  %endif
>>  
>> 
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>
>-- 
>Steve Borho
>_______________________________________________
>x265-devel mailing list
>x265-devel at videolan.org
>https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150717/8eaeb552/attachment.html>


More information about the x265-devel mailing list