[x265] [PATCH] removed copy_cnt_4 avx2 asm code: SSE version is eualy faster

Praveen Tiwari praveen at multicorewareinc.com
Thu Sep 11 13:52:58 CEST 2014


Ignore It, need to correct commit message.


Regards,
Praveen Tiwari

On Thu, Sep 11, 2014 at 4:41 PM, <praveen at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Praveen Tiwari
> # Date 1410433904 -19800
> # Node ID 5740ec22db67267bfca97fbba07ef9239802d2b0
> # Parent  012f315d3eda8044f5a49865e15ba2943fbab094
> removed copy_cnt_4 avx2 asm code: SSE version is eualy faster
>
> diff -r 012f315d3eda -r 5740ec22db67 source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp      Wed Sep 10 17:27:20 2014
> +0200
> +++ b/source/common/x86/asm-primitives.cpp      Thu Sep 11 16:41:44 2014
> +0530
> @@ -1730,7 +1730,6 @@
>          /* Need to update assembly code as per changed interface of the
> copy_cnt primitive, once
>           * code is updated, avx2 version will be enabled */
>
> -        // p.copy_cnt[BLOCK_4x4] = x265_copy_cnt_4_avx2;
>          p.copy_cnt[BLOCK_8x8] = x265_copy_cnt_8_avx2;
>          // p.copy_cnt[BLOCK_16x16] = x265_copy_cnt_16_avx2;
>          // p.copy_cnt[BLOCK_32x32] = x265_copy_cnt_32_avx2;
> diff -r 012f315d3eda -r 5740ec22db67 source/common/x86/blockcopy8.asm
> --- a/source/common/x86/blockcopy8.asm  Wed Sep 10 17:27:20 2014 +0200
> +++ b/source/common/x86/blockcopy8.asm  Thu Sep 11 16:41:44 2014 +0530
> @@ -3987,35 +3987,6 @@
>  %endif
>      RET
>
> -
> -INIT_YMM avx2
> -cglobal copy_cnt_4, 3,3,3
> -    add         r2d, r2d
> -    xorpd       xm2, xm2
> -
> -    ; row 0 & 1
> -    movq        xm0, [r1]
> -    movhps      xm0, [r1 + r2]
> -
> -    ; row 2 & 3
> -    movq        xm1, [r1 + r2 * 2]
> -    lea         r2, [r2 * 3]
> -    movhps      xm1, [r1 + r2]
> -
> -    vinserti128 m0, m0, xm1, 1
> -    movu    [r0], m0
> -
> -    vextractf128 xm1, m0, 1
> -    packsswb     xm0, xm1
> -    pcmpeqb      xm0, xm2
> -
> -    ; get count
> -    pmovmskb    eax, xm0
> -    not         ax
> -    popcnt      ax, ax
> -    RET
> -
> -
>
>  ;--------------------------------------------------------------------------------------
>  ; uint32_t copy_cnt(int16_t *dst, int16_t *src, intptr_t stride);
>
>  ;--------------------------------------------------------------------------------------
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140911/85b1c3af/attachment.html>


More information about the x265-devel mailing list