[x265] [PATCH] asm: avx2 10bit code for sign primitive(356.91 -> 242.00)
rajesh at multicorewareinc.com
rajesh at multicorewareinc.com
Thu Jun 25 10:50:15 CEST 2015
# HG changeset patch
# User Rajesh Paulraj<rajesh at multicorewareinc.com>
# Date 1435219457 -19800
# Thu Jun 25 13:34:17 2015 +0530
# Node ID 430625004ef81ba9e9e398d4cf12a68a1cd4b664
# Parent a03487d6295cf89b065eff36e5c1ec4ee4253243
asm: avx2 10bit code for sign primitive(356.91 -> 242.00)
avx2:
calSign 9.08x 242.00 2197.71
sse4:
calSign 6.16x 356.91 2197.63
diff -r a03487d6295c -r 430625004ef8 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu Jun 25 13:29:58 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Thu Jun 25 13:34:17 2015 +0530
@@ -1496,6 +1496,7 @@
p.scale1D_128to64 = PFX(scale1D_128to64_avx2);
p.scale2D_64to32 = PFX(scale2D_64to32_avx2);
p.weight_pp = PFX(weight_pp_avx2);
+ p.sign = x265_calculateSign_avx2;
p.cu[BLOCK_16x16].calcresidual = PFX(getResidual16_avx2);
p.cu[BLOCK_32x32].calcresidual = PFX(getResidual32_avx2);
diff -r a03487d6295c -r 430625004ef8 source/common/x86/loopfilter.asm
--- a/source/common/x86/loopfilter.asm Thu Jun 25 13:29:58 2015 +0530
+++ b/source/common/x86/loopfilter.asm Thu Jun 25 13:34:17 2015 +0530
@@ -1466,3 +1466,42 @@
movu [r0 + r4], m4
RET
%endif
+
+;-----------------------------------------------------------------------------
+; void calSign(int8_t *dst, const pixel *src1, const pixel *src2, const int endX)
+;-----------------------------------------------------------------------------
+%if HIGH_BIT_DEPTH
+INIT_YMM avx2
+cglobal calculateSign, 4, 7, 5
+ mova m0, [pw_1]
+ mov r4d, r3d
+ shr r3d, 4
+ add r3d, 1
+ mov r5, r0
+ movu m4, [r0 + r4]
+
+.loop
+ movu m1, [r1] ; m2 = pRec[x]
+ movu m2, [r2] ; m3 = pTmpU[x]
+
+ pcmpgtw m3, m1, m2
+ pcmpgtw m2, m1
+
+ pand m3, m0
+ por m3, m2
+ packsswb m3, m3
+ vpermq m3, m3, q3220
+ movu [r0 ], xm3
+
+ add r0, 16
+ add r1, 32
+ add r2, 32
+ dec r3d
+ jnz .loop
+
+ mov r6, r0
+ sub r6, r5
+ sub r4, r6
+ movu [r0 + r4], m4
+ RET
+%endif
More information about the x265-devel
mailing list