[x265] [PATCH] removed copy_cnt_4 avx2 asm code: SSE version is eualy faster
Praveen Tiwari
praveen at multicorewareinc.com
Thu Sep 11 13:52:58 CEST 2014
Ignore It, need to correct commit message.
Regards,
Praveen Tiwari
On Thu, Sep 11, 2014 at 4:41 PM, <praveen at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Praveen Tiwari
> # Date 1410433904 -19800
> # Node ID 5740ec22db67267bfca97fbba07ef9239802d2b0
> # Parent 012f315d3eda8044f5a49865e15ba2943fbab094
> removed copy_cnt_4 avx2 asm code: SSE version is eualy faster
>
> diff -r 012f315d3eda -r 5740ec22db67 source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp Wed Sep 10 17:27:20 2014
> +0200
> +++ b/source/common/x86/asm-primitives.cpp Thu Sep 11 16:41:44 2014
> +0530
> @@ -1730,7 +1730,6 @@
> /* Need to update assembly code as per changed interface of the
> copy_cnt primitive, once
> * code is updated, avx2 version will be enabled */
>
> - // p.copy_cnt[BLOCK_4x4] = x265_copy_cnt_4_avx2;
> p.copy_cnt[BLOCK_8x8] = x265_copy_cnt_8_avx2;
> // p.copy_cnt[BLOCK_16x16] = x265_copy_cnt_16_avx2;
> // p.copy_cnt[BLOCK_32x32] = x265_copy_cnt_32_avx2;
> diff -r 012f315d3eda -r 5740ec22db67 source/common/x86/blockcopy8.asm
> --- a/source/common/x86/blockcopy8.asm Wed Sep 10 17:27:20 2014 +0200
> +++ b/source/common/x86/blockcopy8.asm Thu Sep 11 16:41:44 2014 +0530
> @@ -3987,35 +3987,6 @@
> %endif
> RET
>
> -
> -INIT_YMM avx2
> -cglobal copy_cnt_4, 3,3,3
> - add r2d, r2d
> - xorpd xm2, xm2
> -
> - ; row 0 & 1
> - movq xm0, [r1]
> - movhps xm0, [r1 + r2]
> -
> - ; row 2 & 3
> - movq xm1, [r1 + r2 * 2]
> - lea r2, [r2 * 3]
> - movhps xm1, [r1 + r2]
> -
> - vinserti128 m0, m0, xm1, 1
> - movu [r0], m0
> -
> - vextractf128 xm1, m0, 1
> - packsswb xm0, xm1
> - pcmpeqb xm0, xm2
> -
> - ; get count
> - pmovmskb eax, xm0
> - not ax
> - popcnt ax, ax
> - RET
> -
> -
>
> ;--------------------------------------------------------------------------------------
> ; uint32_t copy_cnt(int16_t *dst, int16_t *src, intptr_t stride);
>
> ;--------------------------------------------------------------------------------------
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140911/85b1c3af/attachment.html>
More information about the x265-devel
mailing list