[x265] [PATCH 3 of 3] asm: fix dct[8x8] AVX2 asm for main12

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Wed Dec 9 09:50:52 CET 2015


# HG changeset patch
# User Aasaipriya Chandran <aasaipriya at multicorewareinc.com>
# Date 1449648215 -19800
#      Wed Dec 09 13:33:35 2015 +0530
# Node ID 9e3f71d784e59527a14702e83de474bc3f12fd15
# Parent  9357c1f448a7b987cebfd3cc5542cc6c65e63fe2
asm: fix dct[8x8] AVX2 asm for main12

diff -r 9357c1f448a7 -r 9e3f71d784e5 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue Dec 01 15:16:12 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Wed Dec 09 13:33:35 2015 +0530
@@ -1573,9 +1573,8 @@
         p.cu[BLOCK_32x32].cpy2Dto1D_shr = PFX(cpy2Dto1D_shr_32_avx2);
 
         ALL_LUMA_TU_S(idct, idct, avx2);
-#if X265_DEPTH <= 10
         ALL_LUMA_TU_S(dct, dct, avx2);
-#endif
+
         ALL_LUMA_CU_S(transpose, transpose, avx2);
 
         ALL_LUMA_PU(luma_vpp, interp_8tap_vert_pp, avx2);
diff -r 9357c1f448a7 -r 9e3f71d784e5 source/common/x86/dct8.asm
--- a/source/common/x86/dct8.asm	Tue Dec 01 15:16:12 2015 +0530
+++ b/source/common/x86/dct8.asm	Wed Dec 09 13:33:35 2015 +0530
@@ -2174,7 +2174,7 @@
     pmaddwd         m0,                 m%4
     phaddd          m2,                 m0
     paddd           m2,                 m5
-    psrad           m2,                 DCT_SHIFT
+    psrad           m2,                 DCT8_SHIFT1
     packssdw        m2,                 m2
     vpermq          m2,                 m2, 0x08
     mova            [r5 + %2],          xm2
@@ -2190,7 +2190,7 @@
     phaddd          m8,                 m9
     phaddd          m6,                 m8
     paddd           m6,                 m5
-    psrad           m6,                 DCT_SHIFT2
+    psrad           m6,                 DCT8_SHIFT2
 
     vbroadcasti128  m4,                 [r6 + %2]
     pmaddwd         m10,                m0, m4
@@ -2201,7 +2201,7 @@
     phaddd          m8,                 m9
     phaddd          m10,                m8
     paddd           m10,                m5
-    psrad           m10,                DCT_SHIFT2
+    psrad           m10,                DCT8_SHIFT2
 
     packssdw        m6,                 m10
     vpermq          m10,                m6, 0xD8
@@ -2210,18 +2210,7 @@
 
 INIT_YMM avx2
 cglobal dct8, 3, 7, 11, 0-8*16
-%if BIT_DEPTH == 12
-    %define         DCT_SHIFT          6
-    vbroadcasti128  m5,                [pd_16]
-%elif BIT_DEPTH == 10
-    %define         DCT_SHIFT          4
-    vbroadcasti128  m5,                [pd_8]
-%elif BIT_DEPTH == 8
-    %define         DCT_SHIFT          2
-    vbroadcasti128  m5,                [pd_2]
-%else
-    %error Unsupported BIT_DEPTH!
-%endif
+vbroadcasti128      m5,                [pd_ %+ DCT8_ROUND1]
 %define             DCT_SHIFT2         9
 
     add             r2d,               r2d
@@ -2265,7 +2254,7 @@
     DCT8_PASS_1     7 * 16,             7 * 16, 4, 1
 
     ;pass2
-    vbroadcasti128  m5,                [pd_256]
+    vbroadcasti128  m5,                [pd_ %+ DCT8_ROUND2]
 
     mova            m0,                [r5]
     mova            m1,                [r5 + 32]


More information about the x265-devel mailing list