[x265] [PATCH] copy_cnt_4 avx2 asm code: nit, same speedup by sse version
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Thu Sep 11 14:05:24 CEST 2014
# HG changeset patch
# User Praveen Tiwari
# Date 1410437024 -19800
# Node ID 9241634204a12babf8a2a90dc4f776646a9ddfb3
# Parent c7d9506fe681e29b6315bf42ac898b46adb9b6a5
copy_cnt_4 avx2 asm code: nit, same speedup by sse version
diff -r c7d9506fe681 -r 9241634204a1 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu Sep 11 17:20:05 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp Thu Sep 11 17:33:44 2014 +0530
@@ -1730,7 +1730,6 @@
/* Need to update assembly code as per changed interface of the copy_cnt primitive, once
* code is updated, avx2 version will be enabled */
- // p.copy_cnt[BLOCK_4x4] = x265_copy_cnt_4_avx2;
p.copy_cnt[BLOCK_8x8] = x265_copy_cnt_8_avx2;
// p.copy_cnt[BLOCK_16x16] = x265_copy_cnt_16_avx2;
// p.copy_cnt[BLOCK_32x32] = x265_copy_cnt_32_avx2;
diff -r c7d9506fe681 -r 9241634204a1 source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm Thu Sep 11 17:20:05 2014 +0530
+++ b/source/common/x86/blockcopy8.asm Thu Sep 11 17:33:44 2014 +0530
@@ -3988,34 +3988,6 @@
RET
-INIT_YMM avx2
-cglobal copy_cnt_4, 3,3,3
- add r2d, r2d
- xorpd xm2, xm2
-
- ; row 0 & 1
- movq xm0, [r1]
- movhps xm0, [r1 + r2]
-
- ; row 2 & 3
- movq xm1, [r1 + r2 * 2]
- lea r2, [r2 * 3]
- movhps xm1, [r1 + r2]
-
- vinserti128 m0, m0, xm1, 1
- movu [r0], m0
-
- vextractf128 xm1, m0, 1
- packsswb xm0, xm1
- pcmpeqb xm0, xm2
-
- ; get count
- pmovmskb eax, xm0
- not ax
- popcnt ax, ax
- RET
-
-
;--------------------------------------------------------------------------------------
; uint32_t copy_cnt(int16_t *dst, int16_t *src, intptr_t stride);
;--------------------------------------------------------------------------------------
More information about the x265-devel
mailing list