[x265] [PATCH] asm: avx2 code for high_bit_depth intra_planar_16x16

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Tue May 12 08:19:20 CEST 2015


# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1431322630 -19800
#      Mon May 11 11:07:10 2015 +0530
# Node ID e112801421b324350f7de388aef19a9f582b4c2b
# Parent  ac2832c459edc3e1417d6fb62f89203e23484ec8
asm: avx2 code for high_bit_depth intra_planar_16x16

AVX2:
intra_planar_16x16      19.84x   478.04          9483.19

SSE:
intra_planar_16x16      12.91x   793.82          10248.45

diff -r ac2832c459ed -r e112801421b3 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue May 12 11:07:51 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Mon May 11 11:07:10 2015 +0530
@@ -1181,6 +1181,8 @@
     }
     if (cpuMask & X265_CPU_AVX2)
     {
+        p.cu[BLOCK_16x16].intra_pred[PLANAR_IDX] = x265_intra_pred_planar16_avx2;
+
         p.cu[BLOCK_16x16].intra_pred[DC_IDX] = x265_intra_pred_dc16_avx2;
         p.cu[BLOCK_32x32].intra_pred[DC_IDX] = x265_intra_pred_dc32_avx2;
 
diff -r ac2832c459ed -r e112801421b3 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm	Tue May 12 11:07:51 2015 +0530
+++ b/source/common/x86/intrapred16.asm	Mon May 11 11:07:10 2015 +0530
@@ -89,6 +89,7 @@
 cextern pw_2
 cextern pw_4
 cextern pw_8
+cextern pw_15
 cextern pw_16
 cextern pw_32
 cextern pw_1023
@@ -103,6 +104,7 @@
 cextern pw_swap
 cextern pb_unpackwq1
 cextern pb_unpackwq2
+cextern pw_planar16_mul
 
 ;-----------------------------------------------------------------------------------
 ; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* above, int, int filter)
@@ -902,6 +904,54 @@
 %endrep
     RET
 
+;---------------------------------------------------------------------------------------
+; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel*srcPix, int, int filter)
+;---------------------------------------------------------------------------------------
+INIT_YMM avx2
+cglobal intra_pred_planar16, 3,3,4
+    add             r1d, r1d
+    vpbroadcastw    m3, [r2 + 34]
+    vpbroadcastw    m4, [r2 + 98]
+    mova            m0, [pw_planar16_mul]
+    movu            m2, [r2 + 2]
+
+    pmullw          m3, [multiL]                ; (x + 1) * topRight
+    pmullw          m1, m2, [pw_15]             ; (blkSize - 1 - y) * above[x]
+    paddw           m3, [pw_16]
+    paddw           m3, m4
+    paddw           m3, m1
+    psubw           m4, m2
+    add             r2, 66
+
+%macro INTRA_PRED_PLANAR16_AVX2 1
+    vpbroadcastw    m1, [r2 + %1]
+    vpbroadcastw    m2, [r2 + %1 + 2]
+
+    pmullw          m1, m0
+    pmullw          m2, m0
+    paddw           m1, m3
+    paddw           m3, m4
+    psraw           m1, 5
+    paddw           m2, m3
+    psraw           m2, 5
+    paddw           m3, m4
+    movu            [r0], m1
+    movu            [r0 + r1], m2
+%if %1 <= 24
+    lea             r0, [r0 + r1 * 2]
+%endif
+%endmacro
+    INTRA_PRED_PLANAR16_AVX2 0
+    INTRA_PRED_PLANAR16_AVX2 4
+    INTRA_PRED_PLANAR16_AVX2 8
+    INTRA_PRED_PLANAR16_AVX2 12
+    INTRA_PRED_PLANAR16_AVX2 16
+    INTRA_PRED_PLANAR16_AVX2 20
+    INTRA_PRED_PLANAR16_AVX2 24
+    INTRA_PRED_PLANAR16_AVX2 28
+%undef INTRA_PRED_PLANAR16_AVX2
+    RET
+
 ;-----------------------------------------------------------------------------------------
 ; void intraPredAng4(pixel* dst, intptr_t dstStride, pixel* src, int dirMode, int bFilter)
 ;-----------------------------------------------------------------------------------------


More information about the x265-devel mailing list