[x265] [PATCH] asm: Fix idct4,8,16 and 32 for main12 avx2

ramya at multicorewareinc.com ramya at multicorewareinc.com
Thu Oct 29 09:10:05 CET 2015


# HG changeset patch
# User Ramya Sriraman <ramya at multicorewareinc.com>
# Date 1446102850 -19800
#      Thu Oct 29 12:44:10 2015 +0530
# Node ID f4e6d123cb891215d049d1ee55e74011f5801e48
# Parent  4267f1fddd20287c4f66e38c3e2249015163dc6d
asm: Fix idct4,8,16 and 32 for main12 avx2

diff -r 4267f1fddd20 -r f4e6d123cb89 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Mon Oct 26 15:00:53 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Thu Oct 29 12:44:10 2015 +0530
@@ -1582,9 +1582,9 @@
         p.cu[BLOCK_16x16].cpy2Dto1D_shr = PFX(cpy2Dto1D_shr_16_avx2);
         p.cu[BLOCK_32x32].cpy2Dto1D_shr = PFX(cpy2Dto1D_shr_32_avx2);
 
+        ALL_LUMA_TU_S(idct, idct, avx2);
 #if X265_DEPTH <= 10
         ALL_LUMA_TU_S(dct, dct, avx2);
-        ALL_LUMA_TU_S(idct, idct, avx2);
 #endif
         ALL_LUMA_CU_S(transpose, transpose, avx2);
 
diff -r 4267f1fddd20 -r f4e6d123cb89 source/common/x86/dct8.asm
--- a/source/common/x86/dct8.asm	Mon Oct 26 15:00:53 2015 +0530
+++ b/source/common/x86/dct8.asm	Thu Oct 29 12:44:10 2015 +0530
@@ -2904,7 +2904,7 @@
 cglobal idct8, 3, 7, 13, 0-8*16
 %if BIT_DEPTH == 12
     %define         IDCT_SHIFT2        8
-    vpbroadcastd    m12,                [pd_256]
+    vpbroadcastd    m12,                [pd_128]
 %elif BIT_DEPTH == 10
     %define         IDCT_SHIFT2        10
     vpbroadcastd    m12,                [pd_512]
@@ -3065,7 +3065,7 @@
 cglobal idct16, 3, 7, 16, 0-16*mmsize
 %if BIT_DEPTH == 12
     %define         IDCT_SHIFT2        8
-    vpbroadcastd    m15,                [pd_256]
+    vpbroadcastd    m15,                [pd_128]
 %elif BIT_DEPTH == 10
     %define         IDCT_SHIFT2        10
     vpbroadcastd    m15,                [pd_512]
@@ -3487,7 +3487,7 @@
 
 %if BIT_DEPTH == 12
     %define         IDCT_SHIFT2        8
-    vpbroadcastd    m15,                [pd_256]
+    vpbroadcastd    m15,                [pd_128]
 %elif BIT_DEPTH == 10
     %define         IDCT_SHIFT2        10
     vpbroadcastd    m15,                [pd_512]
@@ -3651,7 +3651,7 @@
 %define             IDCT_SHIFT1         7
 %if BIT_DEPTH == 12
     %define         IDCT_SHIFT2        8
-    vpbroadcastd    m5,                [pd_256]
+    vpbroadcastd    m5,                [pd_128]
 %elif BIT_DEPTH == 10
     %define         IDCT_SHIFT2        10
     vpbroadcastd    m5,                [pd_512]


More information about the x265-devel mailing list