[x265] [PATCH] copy_cnt: nits
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Fri Sep 5 08:10:17 CEST 2014
# HG changeset patch
# User Praveen Tiwari
# Date 1409897360 -19800
# Node ID a2993470dd1aaca47ae1c852ae73675e5353df6d
# Parent 1847e02a9514b02690cc4b97bae022091bf33424
copy_cnt: nits
diff -r 1847e02a9514 -r a2993470dd1a source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm Fri Sep 05 11:33:57 2014 +0530
+++ b/source/common/x86/blockcopy8.asm Fri Sep 05 11:39:20 2014 +0530
@@ -3957,35 +3957,35 @@
add r2d, r2d
pxor m2, m2
- ; row 0 & 1
- movh m0, [r1]
- movhps m0, [r1 + r2]
- mova [r0], m0
-
- ; row 2 & 3
- movh m1, [r1 + r2 * 2]
- lea r2, [r2 * 3]
- movhps m1, [r1 + r2]
- mova [r0 + 16], m1
-
- packsswb m0, m1
- pcmpeqb m0, m2
-
- ; get count
- ; CHECK_ME: Intel documents said POPCNT is SSE4.2 instruction, but just implement after Nehalem
+ ; row 0 & 1
+ movh m0, [r1]
+ movhps m0, [r1 + r2]
+ mova [r0], m0
+
+ ; row 2 & 3
+ movh m1, [r1 + r2 * 2]
+ lea r2, [r2 * 3]
+ movhps m1, [r1 + r2]
+ mova [r0 + 16], m1
+
+ packsswb m0, m1
+ pcmpeqb m0, m2
+
+ ; get count
+ ; CHECK_ME: Intel documents said POPCNT is SSE4.2 instruction, but just implement after Nehalem
%if 0
- pmovmskb eax, m0
- not ax
- popcnt ax, ax
+ pmovmskb eax, m0
+ not ax
+ popcnt ax, ax
%else
- mova m1, [pb_1]
- paddb m0, m1
- psadbw m0, m2
- pshufd m1, m0, 2
- paddw m0, m1
- movd eax, m0
- %endif
- RET
+ mova m1, [pb_1]
+ paddb m0, m1
+ psadbw m0, m2
+ pshufd m1, m0, 2
+ paddw m0, m1
+ movd eax, m0
+%endif
+ RET
INIT_YMM avx2
More information about the x265-devel
mailing list