[x265] [PATCH] asm: optimize dct4

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Tue Aug 26 11:52:40 CEST 2014


# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1409046621 -19800
#      Tue Aug 26 15:20:21 2014 +0530
# Node ID bbd5b3f269b095760d21877e94d67df8bd72f479
# Parent  5acfb12ec5d17cc700e313fc99248e2408e5967b
asm: optimize dct4

diff -r 5acfb12ec5d1 -r bbd5b3f269b0 source/common/x86/dct8.asm
--- a/source/common/x86/dct8.asm	Mon Aug 25 17:53:12 2014 +0900
+++ b/source/common/x86/dct8.asm	Tue Aug 26 15:20:21 2014 +0530
@@ -30,6 +30,8 @@
 
 SECTION_RODATA 32
 
+dct4_shuf:      db 0, 1, 2, 3, 8, 9, 10, 11, 6, 7, 4, 5, 14, 15, 12, 13
+
 tab_dct4:       times 4 dw 64, 64
                 times 4 dw 83, 36
                 times 4 dw 64, -64
@@ -118,16 +120,14 @@
     movh        m0, [r0 + 0 * r2]
     movh        m1, [r0 + 1 * r2]
     punpcklqdq  m0, m1
-    pshufd      m0, m0, 0xD8
-    pshufhw     m0, m0, 0xB1
+    pshufb      m0, [dct4_shuf]
 
     lea         r0, [r0 + 2 * r2]
     movh        m1, [r0]
     movh        m2, [r0 + r2]
     punpcklqdq  m1, m2
-    pshufd      m1, m1, 0xD8
-    pshufhw     m1, m1, 0xB1
 
+    pshufb      m1, [dct4_shuf]
     punpcklqdq  m2, m0, m1
     punpckhqdq  m0, m1
 
@@ -140,8 +140,7 @@
     paddd       m3, m7
     psrad       m3, DCT_SHIFT
     packssdw    m0, m3
-    pshufd      m0, m0, 0xD8
-    pshufhw     m0, m0, 0xB1
+    pshufb      m0, [dct4_shuf]
     pmaddwd     m1, m6
     paddd       m1, m7
     psrad       m1, DCT_SHIFT
@@ -149,9 +148,8 @@
     paddd       m2, m7
     psrad       m2, DCT_SHIFT
     packssdw    m1, m2
-    pshufd      m1, m1, 0xD8
-    pshufhw     m1, m1, 0xB1
 
+    pshufb      m1, [dct4_shuf]
     punpcklqdq  m2, m0, m1
     punpckhqdq  m0, m1
 


More information about the x265-devel mailing list