[x265] [PATCH 275 of 307] x86:AVX512 intra_pred_ang32 mode 26 for high bit depth

mythreyi at multicorewareinc.com mythreyi at multicorewareinc.com
Sat Apr 7 04:34:33 CEST 2018


# HG changeset patch
# User Jayashree
# Date 1514549317 -19800
#      Fri Dec 29 17:38:37 2017 +0530
# Node ID 47fd272d3c7002b5a84067a818ca4ae1c61276c1
# Parent  74965520283a92095a542ba1997798d6b3af7281
x86:AVX512 intra_pred_ang32 mode 26 for high bit depth


Primitive           | AVX2 performance | AVX512 performance
-------------------------------------------------------------
intra_ang_32x32[26] |     2.31x       |      4.38x

diff -r 74965520283a -r 47fd272d3c70 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed Dec 27 14:51:40 2017 +0530
+++ b/source/common/x86/asm-primitives.cpp	Fri Dec 29 17:38:37 2017 +0530
@@ -3093,6 +3093,8 @@
         p.cu[BLOCK_32x32].intra_pred[34]     = PFX(intra_pred_ang32_2_avx512);
         p.cu[BLOCK_32x32].intra_pred[10] = PFX(intra_pred_ang32_10_avx512);
         p.cu[BLOCK_32x32].intra_pred[18]    = PFX(intra_pred_ang32_18_avx512);
+        p.cu[BLOCK_32x32].intra_pred[26]    = PFX(intra_pred_ang32_26_avx512);
+
         p.pu[LUMA_64x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x64>;
         p.pu[LUMA_64x48].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x48>;
         p.pu[LUMA_64x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_64x32>;
diff -r 74965520283a -r 47fd272d3c70 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm	Wed Dec 27 14:51:40 2017 +0530
+++ b/source/common/x86/intrapred16.asm	Fri Dec 29 17:38:37 2017 +0530
@@ -18594,9 +18594,52 @@
     palignr     m4,                 m2, m0, 2
     movu        [r0 + r3],          m4
     mov         rsp,                [rsp+4*(mmsize/2)]
-
-    RET
-
+    RET
+INIT_ZMM avx512
+cglobal intra_pred_ang32_26, 3,3,2
+    movu        m0,                 [r2 + 2]
+    add         r1d,                r1d
+    lea         r2,                 [r1 * 3]
+    movu        [r0],               m0
+   movu        [r0 + r1],          m0
+    movu        [r0 + r1 * 2],      m0
+    movu        [r0 + r2],          m0
+    lea         r0,                 [r0 + r1 *4]
+    movu        [r0],               m0
+    movu        [r0 + r1],          m0
+    movu        [r0 + r1 * 2],      m0
+    movu        [r0 + r2],          m0
+    lea         r0,                 [r0 + r1 *4]
+    movu        [r0],               m0
+    movu        [r0 + r1],          m0
+    movu        [r0 + r1 * 2],      m0
+    movu        [r0 + r2],          m0
+    lea         r0,                 [r0 + r1 *4]
+    movu        [r0],               m0
+    movu        [r0 + r1],          m0
+    movu        [r0 + r1 * 2],      m0
+    movu        [r0 + r2],          m0
+    lea         r0,                 [r0 + r1 *4]
+    movu        [r0],               m0
+    movu        [r0 + r1],          m0
+    movu        [r0 + r1 * 2],      m0
+    movu        [r0 + r2],          m0
+    lea         r0,                 [r0 + r1 *4]
+    movu        [r0],               m0
+    movu        [r0 + r1],          m0
+    movu        [r0 + r1 * 2],      m0
+    movu        [r0 + r2],          m0
+    lea         r0,                 [r0 + r1 *4]
+    movu        [r0],               m0
+    movu        [r0 + r1],          m0
+    movu        [r0 + r1 * 2],      m0
+    movu        [r0 + r2],          m0
+    lea         r0,                 [r0 + r1 *4]
+    movu        [r0],               m0
+    movu        [r0 + r1],          m0
+    movu        [r0 + r1 * 2],      m0
+    movu        [r0 + r2],          m0
+    RET
 ;-------------------------------------------------------------------------------------------------------
 ; avx512 code for intra_pred_ang32 mode 2 to 34 end
 ;-------------------------------------------------------------------------------------------------------


More information about the x265-devel mailing list