[x265] [PATCH] asm: psyCost_pp_8x8 for HIGH_BIT_DEPTH in sse4: improve 6995c->1070c
chen
chenm003 at 163.com
Tue Dec 30 13:42:17 CET 2014
right
At 2014-12-30 19:49:13,"Divya Manivannan" <divya at multicorewareinc.com> wrote:
># HG changeset patch
># User Divya Manivannan <divya at multicorewareinc.com>
># Date 1419940084 -19800
># Tue Dec 30 17:18:04 2014 +0530
># Node ID 259bf78bae8c6bd9bfefa8e8575de8620d20971c
># Parent f15a798c41f69a053b1694399230b38eec8cb1a5
>asm: psyCost_pp_8x8 for HIGH_BIT_DEPTH in sse4: improve 6995c->1070c
>
>diff -r f15a798c41f6 -r 259bf78bae8c source/common/x86/asm-primitives.cpp
>--- a/source/common/x86/asm-primitives.cpp Tue Dec 30 14:35:08 2014 +0530
>+++ b/source/common/x86/asm-primitives.cpp Tue Dec 30 17:18:04 2014 +0530
>@@ -1436,6 +1436,9 @@
> INTRA_ANG_SSE4_HIGH(sse4);
>
> p.psy_cost_pp[BLOCK_4x4] = x265_psyCost_pp_4x4_sse4;
>+#if X86_64
>+ p.psy_cost_pp[BLOCK_8x8] = x265_psyCost_pp_8x8_sse4;
>+#endif
> }
> if (cpuMask & X265_CPU_XOP)
> {
>diff -r f15a798c41f6 -r 259bf78bae8c source/common/x86/pixel-a.asm
>--- a/source/common/x86/pixel-a.asm Tue Dec 30 14:35:08 2014 +0530
>+++ b/source/common/x86/pixel-a.asm Tue Dec 30 17:18:04 2014 +0530
>@@ -6748,7 +6748,83 @@
> INIT_XMM sse4
> cglobal psyCost_pp_8x8, 4, 6, 13
>
>- FIX_STRIDES r1, r3
>+%if HIGH_BIT_DEPTH
>+ FIX_STRIDES r1, r3
>+ lea r4, [3 * r1]
>+ pxor m10, m10
>+ movu m0, [r0]
>+ movu m1, [r0 + r1]
>+ movu m2, [r0 + r1 * 2]
>+ movu m3, [r0 + r4]
>+ lea r5, [r0 + r1 * 4]
>+ movu m4, [r5]
>+ movu m5, [r5 + r1]
>+ movu m6, [r5 + r1 * 2]
>+ movu m7, [r5 + r4]
>+
>+ paddw m8, m0, m1
>+ paddw m8, m2
>+ paddw m8, m3
>+ paddw m8, m4
>+ paddw m8, m5
>+ paddw m8, m6
>+ paddw m8, m7
>+ pmaddwd m8, [pw_1]
>+ movhlps m9, m8
>+ paddd m8, m9
>+ psrldq m9, m8, 4
>+ paddd m8, m9
>+ psrld m8, 2
>+
>+ HADAMARD8_2D 0, 1, 2, 3, 4, 5, 6, 7, 9, amax
>+
>+ paddd m0, m1
>+ paddd m0, m2
>+ paddd m0, m3
>+ HADDUW m0, m1
>+ paddd m0, [pd_1]
>+ psrld m0, 1
>+ psubd m10, m0, m8
>+
>+ lea r4, [3 * r3]
>+ movu m0, [r2]
>+ movu m1, [r2 + r3]
>+ movu m2, [r2 + r3 * 2]
>+ movu m3, [r2 + r4]
>+ lea r5, [r2 + r3 * 4]
>+ movu m4, [r5]
>+ movu m5, [r5 + r3]
>+ movu m6, [r5 + r3 * 2]
>+ movu m7, [r5 + r4]
>+
>+ paddw m8, m0, m1
>+ paddw m8, m2
>+ paddw m8, m3
>+ paddw m8, m4
>+ paddw m8, m5
>+ paddw m8, m6
>+ paddw m8, m7
>+ pmaddwd m8, [pw_1]
>+ movhlps m9, m8
>+ paddd m8, m9
>+ psrldq m9, m8, 4
>+ paddd m8, m9
>+ psrld m8, 2
>+
>+ HADAMARD8_2D 0, 1, 2, 3, 4, 5, 6, 7, 9, amax
>+
>+ paddd m0, m1
>+ paddd m0, m2
>+ paddd m0, m3
>+ HADDUW m0, m1
>+ paddd m0, [pd_1]
>+ psrld m0, 1
>+ psubd m0, m8
>+ psubd m10, m0
>+ pabsd m0, m10
>+ movd eax, m0
>+
>+%else ; !HIGH_BIT_DEPTH
> lea r4, [3 * r1]
> mova m8, [hmul_8p]
>
>@@ -6842,6 +6918,6 @@
> psubd m12, m0
> pabsd m0, m12
> movd eax, m0
>-
>+%endif ; HIGH_BIT_DEPTH
> RET
> %endif
>_______________________________________________
>x265-devel mailing list
>x265-devel at videolan.org
>https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20141230/78ede184/attachment.html>
More information about the x265-devel
mailing list