<div dir="ltr">Ignore It, need to correct commit message.<div><br></div><div><br></div><div>Regards,</div><div>Praveen Tiwari</div></div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Sep 11, 2014 at 4:41 PM, <span dir="ltr"><<a href="mailto:praveen@multicorewareinc.com" target="_blank">praveen@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Praveen Tiwari<br>
# Date 1410433904 -19800<br>
# Node ID 5740ec22db67267bfca97fbba07ef9239802d2b0<br>
# Parent 012f315d3eda8044f5a49865e15ba2943fbab094<br>
removed copy_cnt_4 avx2 asm code: SSE version is eualy faster<br>
<br>
diff -r 012f315d3eda -r 5740ec22db67 source/common/x86/asm-primitives.cpp<br>
--- a/source/common/x86/asm-primitives.cpp Wed Sep 10 17:27:20 2014 +0200<br>
+++ b/source/common/x86/asm-primitives.cpp Thu Sep 11 16:41:44 2014 +0530<br>
@@ -1730,7 +1730,6 @@<br>
/* Need to update assembly code as per changed interface of the copy_cnt primitive, once<br>
* code is updated, avx2 version will be enabled */<br>
<br>
- // p.copy_cnt[BLOCK_4x4] = x265_copy_cnt_4_avx2;<br>
p.copy_cnt[BLOCK_8x8] = x265_copy_cnt_8_avx2;<br>
// p.copy_cnt[BLOCK_16x16] = x265_copy_cnt_16_avx2;<br>
// p.copy_cnt[BLOCK_32x32] = x265_copy_cnt_32_avx2;<br>
diff -r 012f315d3eda -r 5740ec22db67 source/common/x86/blockcopy8.asm<br>
--- a/source/common/x86/blockcopy8.asm Wed Sep 10 17:27:20 2014 +0200<br>
+++ b/source/common/x86/blockcopy8.asm Thu Sep 11 16:41:44 2014 +0530<br>
@@ -3987,35 +3987,6 @@<br>
%endif<br>
RET<br>
<br>
-<br>
-INIT_YMM avx2<br>
-cglobal copy_cnt_4, 3,3,3<br>
- add r2d, r2d<br>
- xorpd xm2, xm2<br>
-<br>
- ; row 0 & 1<br>
- movq xm0, [r1]<br>
- movhps xm0, [r1 + r2]<br>
-<br>
- ; row 2 & 3<br>
- movq xm1, [r1 + r2 * 2]<br>
- lea r2, [r2 * 3]<br>
- movhps xm1, [r1 + r2]<br>
-<br>
- vinserti128 m0, m0, xm1, 1<br>
- movu [r0], m0<br>
-<br>
- vextractf128 xm1, m0, 1<br>
- packsswb xm0, xm1<br>
- pcmpeqb xm0, xm2<br>
-<br>
- ; get count<br>
- pmovmskb eax, xm0<br>
- not ax<br>
- popcnt ax, ax<br>
- RET<br>
-<br>
-<br>
;--------------------------------------------------------------------------------------<br>
; uint32_t copy_cnt(int16_t *dst, int16_t *src, intptr_t stride);<br>
;--------------------------------------------------------------------------------------<br>
</blockquote></div><br></div>