[x265] [PATCH] copy_cnt 4x4 AVX2 asm code, as per new interface

Steve Borho steve at borho.org
Fri Sep 5 13:25:55 CEST 2014


On 09/05, praveen at multicorewareinc.com wrote:
> # HG changeset patch
> # User Praveen Tiwari
> # Date 1409910275 -19800
> # Node ID 1a5956edc730c3f45982e94ecb6694bb613f63f8
> # Parent  a2993470dd1aaca47ae1c852ae73675e5353df6d
> copy_cnt 4x4 AVX2 asm code, as per new interface

I've folded these two patches together

> diff -r a2993470dd1a -r 1a5956edc730 source/common/x86/blockcopy8.asm
> --- a/source/common/x86/blockcopy8.asm	Fri Sep 05 11:39:20 2014 +0530
> +++ b/source/common/x86/blockcopy8.asm	Fri Sep 05 15:14:35 2014 +0530
> @@ -3991,23 +3991,23 @@
>  INIT_YMM avx2
>  cglobal copy_cnt_4, 3,3,5
>      add         r2d, r2d
> -    pxor        m4, m4
> +    xorpd       xm3, xm3
>  
>      ; row 0 & 1
>      movq        xm0, [r1]
>      movhps      xm0, [r1 + r2]
> -    pmovsxwd    m1, xm0
> -    movu        [r0 + 0 * mmsize], m1
>  
>      ; row 2 & 3
>      movq        xm1, [r1 + r2 * 2]
>      lea         r2, [r2 * 3]
>      movhps      xm1, [r1 + r2]
> -    pmovsxwd    m2, xm1
> -    movu        [r0 + 1 * mmsize], m2
> -
> -    packsswb    xm0, xm1
> -    pcmpeqb     xm0, xm4
> +
> +    vinserti128 m0, m0, xm1, 1
> +    movu    [r0], m0
> +
> +    vextractf128 xm1, m0, 1
> +    packsswb     xm0, xm1
> +    pcmpeqb      xm0, xm3
>  
>      ; get count
>      pmovmskb    eax, xm0
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho


More information about the x265-devel mailing list