[x265] [PATCH] removed copy_cnt_4 avx2 asm code: SSE version is eualy faster
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Thu Sep 11 13:11:53 CEST 2014
# HG changeset patch
# User Praveen Tiwari
# Date 1410433904 -19800
# Node ID 5740ec22db67267bfca97fbba07ef9239802d2b0
# Parent 012f315d3eda8044f5a49865e15ba2943fbab094
removed copy_cnt_4 avx2 asm code: SSE version is eualy faster
diff -r 012f315d3eda -r 5740ec22db67 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Sep 10 17:27:20 2014 +0200
+++ b/source/common/x86/asm-primitives.cpp Thu Sep 11 16:41:44 2014 +0530
@@ -1730,7 +1730,6 @@
/* Need to update assembly code as per changed interface of the copy_cnt primitive, once
* code is updated, avx2 version will be enabled */
- // p.copy_cnt[BLOCK_4x4] = x265_copy_cnt_4_avx2;
p.copy_cnt[BLOCK_8x8] = x265_copy_cnt_8_avx2;
// p.copy_cnt[BLOCK_16x16] = x265_copy_cnt_16_avx2;
// p.copy_cnt[BLOCK_32x32] = x265_copy_cnt_32_avx2;
diff -r 012f315d3eda -r 5740ec22db67 source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm Wed Sep 10 17:27:20 2014 +0200
+++ b/source/common/x86/blockcopy8.asm Thu Sep 11 16:41:44 2014 +0530
@@ -3987,35 +3987,6 @@
%endif
RET
-
-INIT_YMM avx2
-cglobal copy_cnt_4, 3,3,3
- add r2d, r2d
- xorpd xm2, xm2
-
- ; row 0 & 1
- movq xm0, [r1]
- movhps xm0, [r1 + r2]
-
- ; row 2 & 3
- movq xm1, [r1 + r2 * 2]
- lea r2, [r2 * 3]
- movhps xm1, [r1 + r2]
-
- vinserti128 m0, m0, xm1, 1
- movu [r0], m0
-
- vextractf128 xm1, m0, 1
- packsswb xm0, xm1
- pcmpeqb xm0, xm2
-
- ; get count
- pmovmskb eax, xm0
- not ax
- popcnt ax, ax
- RET
-
-
;--------------------------------------------------------------------------------------
; uint32_t copy_cnt(int16_t *dst, int16_t *src, intptr_t stride);
;--------------------------------------------------------------------------------------
More information about the x265-devel
mailing list