[x265] [PATCH] asm: Fix dct[8x8] main12 AVX2

aasaipriya at multicorewareinc.com aasaipriya at multicorewareinc.com
Fri Oct 23 11:39:58 CEST 2015


# HG changeset patch
# User Aasaipriya Chandran <aasaipriya at multicorewareinc.com>
# Date 1445590185 -19800
#      Fri Oct 23 14:19:45 2015 +0530
# Node ID 285f4165db02250aba9bfe8e83c889d923a5b58d
# Parent  a7251c3e0ef810b95bb25be5371035208e36996d
asm: Fix dct[8x8] main12 AVX2

diff -r a7251c3e0ef8 -r 285f4165db02 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu Oct 22 09:12:28 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Fri Oct 23 14:19:45 2015 +0530
@@ -1583,9 +1583,9 @@
         p.cu[BLOCK_32x32].cpy2Dto1D_shr = PFX(cpy2Dto1D_shr_32_avx2);
 
 #if X265_DEPTH <= 10
-        ALL_LUMA_TU_S(dct, dct, avx2);
         ALL_LUMA_TU_S(idct, idct, avx2);
 #endif
+        ALL_LUMA_TU_S(dct, dct, avx2);
         ALL_LUMA_CU_S(transpose, transpose, avx2);
 
         ALL_LUMA_PU(luma_vpp, interp_8tap_vert_pp, avx2);
diff -r a7251c3e0ef8 -r 285f4165db02 source/common/x86/dct8.asm
--- a/source/common/x86/dct8.asm	Thu Oct 22 09:12:28 2015 +0530
+++ b/source/common/x86/dct8.asm	Fri Oct 23 14:19:45 2015 +0530
@@ -2174,7 +2174,7 @@
     pmaddwd         m0,                 m%4
     phaddd          m2,                 m0
     paddd           m2,                 m5
-    psrad           m2,                 DCT_SHIFT
+    psrad           m2,                 DCT8_SHIFT1
     packssdw        m2,                 m2
     vpermq          m2,                 m2, 0x08
     mova            [r5 + %2],          xm2
@@ -2190,7 +2190,7 @@
     phaddd          m8,                 m9
     phaddd          m6,                 m8
     paddd           m6,                 m5
-    psrad           m6,                 DCT_SHIFT2
+    psrad           m6,                 DCT8_SHIFT2
 
     vbroadcasti128  m4,                 [r6 + %2]
     pmaddwd         m10,                m0, m4
@@ -2201,7 +2201,7 @@
     phaddd          m8,                 m9
     phaddd          m10,                m8
     paddd           m10,                m5
-    psrad           m10,                DCT_SHIFT2
+    psrad           m10,                DCT8_SHIFT2
 
     packssdw        m6,                 m10
     vpermq          m10,                m6, 0xD8
@@ -2210,18 +2210,7 @@
 
 INIT_YMM avx2
 cglobal dct8, 3, 7, 11, 0-8*16
-%if BIT_DEPTH == 12
-    %define         DCT_SHIFT          6
-    vbroadcasti128  m5,                [pd_16]
-%elif BIT_DEPTH == 10
-    %define         DCT_SHIFT          4
-    vbroadcasti128  m5,                [pd_8]
-%elif BIT_DEPTH == 8
-    %define         DCT_SHIFT          2
-    vbroadcasti128  m5,                [pd_2]
-%else
-    %error Unsupported BIT_DEPTH!
-%endif
+vbroadcasti128      m5,                [pd_ %+ DCT8_ROUND1]
 %define             DCT_SHIFT2         9
 
     add             r2d,               r2d
@@ -2265,7 +2254,7 @@
     DCT8_PASS_1     7 * 16,             7 * 16, 4, 1
 
     ;pass2
-    vbroadcasti128  m5,                [pd_256]
+    vbroadcasti128  m5,                [pd_ %+ DCT8_ROUND2]
 
     mova            m0,                [r5]
     mova            m1,                [r5 + 32]


More information about the x265-devel mailing list