[x265] [PATCH] copy_cnt 4x4 AVX2 asm code, as per new interface
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Fri Sep 5 11:44:53 CEST 2014
# HG changeset patch
# User Praveen Tiwari
# Date 1409910275 -19800
# Node ID 1a5956edc730c3f45982e94ecb6694bb613f63f8
# Parent a2993470dd1aaca47ae1c852ae73675e5353df6d
copy_cnt 4x4 AVX2 asm code, as per new interface
diff -r a2993470dd1a -r 1a5956edc730 source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm Fri Sep 05 11:39:20 2014 +0530
+++ b/source/common/x86/blockcopy8.asm Fri Sep 05 15:14:35 2014 +0530
@@ -3991,23 +3991,23 @@
INIT_YMM avx2
cglobal copy_cnt_4, 3,3,5
add r2d, r2d
- pxor m4, m4
+ xorpd xm3, xm3
; row 0 & 1
movq xm0, [r1]
movhps xm0, [r1 + r2]
- pmovsxwd m1, xm0
- movu [r0 + 0 * mmsize], m1
; row 2 & 3
movq xm1, [r1 + r2 * 2]
lea r2, [r2 * 3]
movhps xm1, [r1 + r2]
- pmovsxwd m2, xm1
- movu [r0 + 1 * mmsize], m2
-
- packsswb xm0, xm1
- pcmpeqb xm0, xm4
+
+ vinserti128 m0, m0, xm1, 1
+ movu [r0], m0
+
+ vextractf128 xm1, m0, 1
+ packsswb xm0, xm1
+ pcmpeqb xm0, xm3
; get count
pmovmskb eax, xm0
More information about the x265-devel
mailing list