[x265] [PATCH] asm: psyCost_pp_8x8 for HIGH_BIT_DEPTH in sse4: improve 6995c->1070c

chen chenm003 at 163.com
Tue Dec 30 13:42:17 CET 2014


right


At 2014-12-30 19:49:13,"Divya Manivannan" <divya at multicorewareinc.com> wrote:
># HG changeset patch
># User Divya Manivannan <divya at multicorewareinc.com>
># Date 1419940084 -19800
>#      Tue Dec 30 17:18:04 2014 +0530
># Node ID 259bf78bae8c6bd9bfefa8e8575de8620d20971c
># Parent  f15a798c41f69a053b1694399230b38eec8cb1a5
>asm: psyCost_pp_8x8 for HIGH_BIT_DEPTH in sse4: improve 6995c->1070c
>
>diff -r f15a798c41f6 -r 259bf78bae8c source/common/x86/asm-primitives.cpp
>--- a/source/common/x86/asm-primitives.cpp	Tue Dec 30 14:35:08 2014 +0530
>+++ b/source/common/x86/asm-primitives.cpp	Tue Dec 30 17:18:04 2014 +0530
>@@ -1436,6 +1436,9 @@
>         INTRA_ANG_SSE4_HIGH(sse4);
> 
>         p.psy_cost_pp[BLOCK_4x4] = x265_psyCost_pp_4x4_sse4;
>+#if X86_64
>+        p.psy_cost_pp[BLOCK_8x8] = x265_psyCost_pp_8x8_sse4;
>+#endif
>     }
>     if (cpuMask & X265_CPU_XOP)
>     {
>diff -r f15a798c41f6 -r 259bf78bae8c source/common/x86/pixel-a.asm
>--- a/source/common/x86/pixel-a.asm	Tue Dec 30 14:35:08 2014 +0530
>+++ b/source/common/x86/pixel-a.asm	Tue Dec 30 17:18:04 2014 +0530
>@@ -6748,7 +6748,83 @@
> INIT_XMM sse4
> cglobal psyCost_pp_8x8, 4, 6, 13
> 
>-    FIX_STRIDES r1, r3
>+%if HIGH_BIT_DEPTH
>+    FIX_STRIDES r1, r3
>+    lea             r4, [3 * r1]
>+    pxor            m10, m10
>+    movu            m0, [r0]
>+    movu            m1, [r0 + r1]
>+    movu            m2, [r0 + r1 * 2]
>+    movu            m3, [r0 + r4]
>+    lea             r5, [r0 + r1 * 4]
>+    movu            m4, [r5]
>+    movu            m5, [r5 + r1]
>+    movu            m6, [r5 + r1 * 2]
>+    movu            m7, [r5 + r4]
>+
>+    paddw           m8, m0, m1
>+    paddw           m8, m2
>+    paddw           m8, m3
>+    paddw           m8, m4
>+    paddw           m8, m5
>+    paddw           m8, m6
>+    paddw           m8, m7
>+    pmaddwd         m8, [pw_1]
>+    movhlps         m9, m8
>+    paddd           m8, m9
>+    psrldq          m9, m8, 4
>+    paddd           m8, m9
>+    psrld           m8, 2
>+
>+    HADAMARD8_2D 0, 1, 2, 3, 4, 5, 6, 7, 9, amax
>+
>+    paddd           m0, m1
>+    paddd           m0, m2
>+    paddd           m0, m3
>+    HADDUW m0, m1
>+    paddd           m0, [pd_1]
>+    psrld           m0, 1
>+    psubd           m10, m0, m8
>+
>+    lea             r4, [3 * r3]
>+    movu            m0, [r2]
>+    movu            m1, [r2 + r3]
>+    movu            m2, [r2 + r3 * 2]
>+    movu            m3, [r2 + r4]
>+    lea             r5, [r2 + r3 * 4]
>+    movu            m4, [r5]
>+    movu            m5, [r5 + r3]
>+    movu            m6, [r5 + r3 * 2]
>+    movu            m7, [r5 + r4]
>+
>+    paddw           m8, m0, m1
>+    paddw           m8, m2
>+    paddw           m8, m3
>+    paddw           m8, m4
>+    paddw           m8, m5
>+    paddw           m8, m6
>+    paddw           m8, m7
>+    pmaddwd         m8, [pw_1]
>+    movhlps         m9, m8
>+    paddd           m8, m9
>+    psrldq          m9, m8, 4
>+    paddd           m8, m9
>+    psrld           m8, 2
>+
>+    HADAMARD8_2D 0, 1, 2, 3, 4, 5, 6, 7, 9, amax
>+
>+    paddd           m0, m1
>+    paddd           m0, m2
>+    paddd           m0, m3
>+    HADDUW m0, m1
>+    paddd           m0, [pd_1]
>+    psrld           m0, 1
>+    psubd           m0, m8
>+    psubd           m10, m0
>+    pabsd           m0, m10
>+    movd            eax, m0
>+
>+%else ; !HIGH_BIT_DEPTH
>     lea             r4, [3 * r1]
>     mova            m8, [hmul_8p]
> 
>@@ -6842,6 +6918,6 @@
>     psubd           m12, m0
>     pabsd           m0, m12
>     movd            eax, m0
>-
>+%endif ; HIGH_BIT_DEPTH
>     RET
> %endif
>_______________________________________________
>x265-devel mailing list
>x265-devel at videolan.org
>https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20141230/78ede184/attachment.html>


More information about the x265-devel mailing list