[x265] [PATCH] asm: improve sad[32x32] 10% by unroll loop

Steve Borho steve at borho.org
Mon Mar 16 23:37:51 CET 2015


On 03/16, Min Chen wrote:
> # HG changeset patch
> # User Min Chen <chenm003 at 163.com>
> # Date 1426539636 25200
> # Node ID 117fb09221983c5f50988741168a216d35e3581a
> # Parent  d33fc159951225e42889071ef3d877d23f693197
> asm: improve sad[32x32] 10% by unroll loop

queued for testing

> ---
>  source/common/x86/sad-a.asm |   25 ++++++++++++++++++-------
>  1 files changed, 18 insertions(+), 7 deletions(-)
> 
> diff -r d33fc1599512 -r 117fb0922198 source/common/x86/sad-a.asm
> --- a/source/common/x86/sad-a.asm	Mon Mar 16 12:00:42 2015 -0700
> +++ b/source/common/x86/sad-a.asm	Mon Mar 16 14:00:36 2015 -0700
> @@ -3898,9 +3898,11 @@
>      RET
>  
>  INIT_YMM avx2
> -cglobal pixel_sad_32x32, 4,5,5
> +cglobal pixel_sad_32x32, 4,7,5
>      xorps           m0, m0
> -    mov             r4d, 16
> +    mov             r4d, 32/4
> +    lea             r5, [r1 * 3]
> +    lea             r6, [r3 * 3]
>  
>  .loop
>      movu           m1, [r0]               ; row 0 of pix0
> @@ -3913,11 +3915,21 @@
>      paddd          m0, m1
>      paddd          m0, m3
>  
> -    lea     r2,     [r2 + 2 * r3]
> -    lea     r0,     [r0 + 2 * r1]
> -
> -    dec         r4d
> -    jnz         .loop
> +    movu           m1, [r0 + 2 * r1]      ; row 2 of pix0
> +    movu           m2, [r2 + 2 * r3]      ; row 2 of pix1
> +    movu           m3, [r0 + r5]          ; row 3 of pix0
> +    movu           m4, [r2 + r6]          ; row 3 of pix1
> +
> +    psadbw         m1, m2
> +    psadbw         m3, m4
> +    paddd          m0, m1
> +    paddd          m0, m3
> +
> +    lea            r2,     [r2 + 4 * r3]
> +    lea            r0,     [r0 + 4 * r1]
> +
> +    dec            r4d
> +    jnz           .loop
>  
>      vextracti128   xm1, m0, 1
>      paddd          xm0, xm1
> @@ -3926,5 +3938,4 @@
>      movd            eax, xm0
>      RET
>  
> -
>  %endif
> 
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho


More information about the x265-devel mailing list