[x265] [PATCH] asm: optimize dct4

chen chenm003 at 163.com
Tue Aug 26 17:52:02 CEST 2014


it is right, but you forgot upgrade sse2 to ssse3
and we may buffer dct4_shuf to avoid multiple of memory stall. (eg: in first part, m3 is free)

At 2014-08-26 17:52:40,dnyaneshwar at multicorewareinc.com wrote:
># HG changeset patch
># User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
># Date 1409046621 -19800
>#      Tue Aug 26 15:20:21 2014 +0530
># Node ID bbd5b3f269b095760d21877e94d67df8bd72f479
># Parent  5acfb12ec5d17cc700e313fc99248e2408e5967b
>asm: optimize dct4
>
>diff -r 5acfb12ec5d1 -r bbd5b3f269b0 source/common/x86/dct8.asm
>--- a/source/common/x86/dct8.asm	Mon Aug 25 17:53:12 2014 +0900
>+++ b/source/common/x86/dct8.asm	Tue Aug 26 15:20:21 2014 +0530
>@@ -30,6 +30,8 @@
> 
> SECTION_RODATA 32
> 
>+dct4_shuf:      db 0, 1, 2, 3, 8, 9, 10, 11, 6, 7, 4, 5, 14, 15, 12, 13
>+
> tab_dct4:       times 4 dw 64, 64
>                 times 4 dw 83, 36
>                 times 4 dw 64, -64
>@@ -118,16 +120,14 @@
>     movh        m0, [r0 + 0 * r2]
>     movh        m1, [r0 + 1 * r2]
>     punpcklqdq  m0, m1
>-    pshufd      m0, m0, 0xD8
>-    pshufhw     m0, m0, 0xB1
>+    pshufb      m0, [dct4_shuf]
> 
>     lea         r0, [r0 + 2 * r2]
>     movh        m1, [r0]
>     movh        m2, [r0 + r2]
>     punpcklqdq  m1, m2
>-    pshufd      m1, m1, 0xD8
>-    pshufhw     m1, m1, 0xB1
> 
>+    pshufb      m1, [dct4_shuf]
>     punpcklqdq  m2, m0, m1
>     punpckhqdq  m0, m1
> 
>@@ -140,8 +140,7 @@
>     paddd       m3, m7
>     psrad       m3, DCT_SHIFT
>     packssdw    m0, m3
>-    pshufd      m0, m0, 0xD8
>-    pshufhw     m0, m0, 0xB1
>+    pshufb      m0, [dct4_shuf]
>     pmaddwd     m1, m6
>     paddd       m1, m7
>     psrad       m1, DCT_SHIFT
>@@ -149,9 +148,8 @@
>     paddd       m2, m7
>     psrad       m2, DCT_SHIFT
>     packssdw    m1, m2
>-    pshufd      m1, m1, 0xD8
>-    pshufhw     m1, m1, 0xB1
> 
>+    pshufb      m1, [dct4_shuf]
>     punpcklqdq  m2, m0, m1
>     punpckhqdq  m0, m1
> 
>_______________________________________________
>x265-devel mailing list
>x265-devel at videolan.org
>https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140826/f5a2f0d3/attachment.html>


More information about the x265-devel mailing list