[x265] [PATCH 1 of 6] asm: avx2 code for intra_pred_ang16x16 mode 2 and 34

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Thu May 28 08:36:18 CEST 2015


# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1432791818 -19800
#      Thu May 28 11:13:38 2015 +0530
# Node ID 1af2413e19a29f28872386ff97b041360745daea
# Parent  18939c0e321f08207fa0a383939bc44485773013
asm: avx2 code for intra_pred_ang16x16 mode 2 and 34

performance improvement over SSE:
intra_ang_16x16[ 2]     356c-> 192c,  46%
intra_ang_16x16[34]     351c-> 201c,  43%

diff -r 18939c0e321f -r 1af2413e19a2 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue May 26 13:17:55 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Thu May 28 11:13:38 2015 +0530
@@ -1196,6 +1196,9 @@
     }
     if (cpuMask & X265_CPU_AVX2)
     {
+        p.cu[BLOCK_16x16].intra_pred[2]     = x265_intra_pred_ang16_2_avx2;
+        p.cu[BLOCK_16x16].intra_pred[34]    = x265_intra_pred_ang16_2_avx2;
+
         p.pu[LUMA_8x4].addAvg   = x265_addAvg_8x4_avx2;
         p.pu[LUMA_8x8].addAvg   = x265_addAvg_8x8_avx2;
         p.pu[LUMA_8x16].addAvg  = x265_addAvg_8x16_avx2;
diff -r 18939c0e321f -r 1af2413e19a2 source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h	Tue May 26 13:17:55 2015 +0530
+++ b/source/common/x86/intrapred.h	Thu May 28 11:13:38 2015 +0530
@@ -242,6 +242,7 @@
 void x265_intra_pred_ang8_21_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
 void x265_intra_pred_ang8_22_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
 void x265_intra_pred_ang8_23_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
+void x265_intra_pred_ang16_2_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
 void x265_intra_pred_ang16_3_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
 void x265_intra_pred_ang16_4_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
 void x265_intra_pred_ang16_5_avx2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int dirMode, int bFilter);
diff -r 18939c0e321f -r 1af2413e19a2 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm	Tue May 26 13:17:55 2015 +0530
+++ b/source/common/x86/intrapred16.asm	Thu May 28 11:13:38 2015 +0530
@@ -10182,6 +10182,63 @@
 .quit:
     RET
 
+;-------------------------------------------------------------------------------------------------------
+; avx2 code for intra_pred_ang16 mode 2 to 34 start
+;-------------------------------------------------------------------------------------------------------
+INIT_YMM avx2
+cglobal intra_pred_ang16_2, 3,5,3
+    lea         r4,                 [r2]
+    add         r2,                 64
+    cmp         r3m,                byte 34
+    cmove       r2,                 r4
+    add         r1d,                 r1d
+    lea         r3,                 [r1 * 3]
+    movu        m0,                 [r2 + 4]
+    movu        m1,                 [r2 + 20]
+
+    movu        [r0],               m0
+    palignr     m2,                 m1, m0, 2
+    movu        [r0 + r1],          m2
+    palignr     m2,                 m1, m0, 4
+    movu        [r0 + r1 * 2],      m2
+    palignr     m2,                 m1, m0, 6
+    movu        [r0 + r3],          m2
+
+    lea         r0,                 [r0 + r1 * 4]
+    palignr     m2,                 m1, m0, 8
+    movu        [r0],               m2
+    palignr     m2,                 m1, m0, 10
+    movu        [r0 + r1],          m2
+    palignr     m2,                 m1, m0, 12
+    movu        [r0 + r1 * 2],      m2
+    palignr     m2,                 m1, m0, 14
+    movu        [r0 + r3],          m2
+
+    movu        m0,                 [r2 + 36]
+    lea         r0,                 [r0 + r1 * 4]
+    movu        [r0],               m1
+    palignr     m2,                 m0, m1, 2
+    movu        [r0 + r1],          m2
+    palignr     m2,                 m0, m1, 4
+    movu        [r0 + r1 * 2],      m2
+    palignr     m2,                 m0, m1, 6
+    movu        [r0 + r3],          m2
+
+    lea         r0,                 [r0 + r1 * 4]
+    palignr     m2,                 m0, m1, 8
+    movu        [r0],               m2
+    palignr     m2,                 m0, m1, 10
+    movu        [r0 + r1],          m2
+    palignr     m2,                 m0, m1, 12
+    movu        [r0 + r1 * 2],      m2
+    palignr     m2,                 m0, m1, 14
+    movu        [r0 + r3],          m2
+    RET
+
+;-------------------------------------------------------------------------------------------------------
+; end of avx2 code for intra_pred_ang16 mode 2 to 34
+;-------------------------------------------------------------------------------------------------------
+
 %macro MODE_2_34 0
     movu            m0, [r2 + 4]
     movu            m1, [r2 + 20]


More information about the x265-devel mailing list