[x265] [PATCH] asm: rewrite partial process code in upShift_8_avx2 to avoid Mac crash bug

Steve Borho steve at borho.org
Fri Jul 17 04:58:16 CEST 2015


On 07/16, Min Chen wrote:
> # HG changeset patch
> # User Min Chen <chenm003 at 163.com>
> # Date 1437100595 25200
> # Node ID 4d30628e1c22ae717a9bf1eebfa8320b92227d44
> # Parent  8efce8620ae267eb49c421f02e7eeb2a1d74a829
> asm: rewrite partial process code in upShift_8_avx2 to avoid Mac crash bug

looks good, once the smoke tests finish on my Mac I'll push this with
your other pending main12 patches

>  source/common/x86/pixel-a.asm |   24 ++++++++++--------------
>  1 files changed, 10 insertions(+), 14 deletions(-)
> 
> diff -r 8efce8620ae2 -r 4d30628e1c22 source/common/x86/pixel-a.asm
> --- a/source/common/x86/pixel-a.asm	Tue Jul 14 16:29:46 2015 -0700
> +++ b/source/common/x86/pixel-a.asm	Thu Jul 16 19:36:35 2015 -0700
> @@ -7394,7 +7394,7 @@
>  ;---------------------------------------------------------------------------------------------------------------------
>  %if ARCH_X86_64
>  INIT_YMM avx2
> -cglobal upShift_8, 6,7,4
> +cglobal upShift_8, 6,7,3
>      movd        xm2, r6m
>      add         r3d, r3d
>      dec         r5d
> @@ -7420,29 +7420,25 @@
>      jg         .loopH
>  
>      ; processing last row of every frame [To handle width which not a multiple of 32]
> -    lea         r3, [pb_movemask + 16]
> -    mov         r5d, 15
> -    and         r5d, r4d
> -    sub         r3, r5
> -    pmovsxbw    m3, [r3]
> +    mov         r1d, 15
> +    and         r1d, r4d
> +    sub         r1, mmsize/2
>  
>      ; NOTE: Width MUST BE more than or equal to 16
> -    shr         r4d, 4
> -.loopW2:
> +    shr         r4d, 4          ; log2(mmsize)
> +.loopW16:
>      pmovzxbw    m0,[r0]
>      psllw       m0, xm2
>      movu        [r2], m0
>      add         r0, mmsize/2
>      add         r2, mmsize
>      dec         r4d
> -    jg         .loopW2
> -
> -.nextW2:
> -    ; process partial of 16
> -    pmovzxbw    m0,[r0]
> +    jg         .loopW16
> +
> +    ; Mac OS X can't read beyond array bound, so rollback some bytes
> +    pmovzxbw    m0,[r0 + r1]
>      psllw       m0, xm2
> -    vpblendvb   m0, m0, [r2], m3
> -    movu        [r2], m0
> +    movu        [r2 + r1 * 2], m0
>      RET
>  %endif
>  
> 
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho


More information about the x265-devel mailing list