[x265] [PATCH] asm: avx2 code for high_bit_depth intra_planar_16x16
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Tue May 12 08:19:20 CEST 2015
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1431322630 -19800
# Mon May 11 11:07:10 2015 +0530
# Node ID e112801421b324350f7de388aef19a9f582b4c2b
# Parent ac2832c459edc3e1417d6fb62f89203e23484ec8
asm: avx2 code for high_bit_depth intra_planar_16x16
AVX2:
intra_planar_16x16 19.84x 478.04 9483.19
SSE:
intra_planar_16x16 12.91x 793.82 10248.45
diff -r ac2832c459ed -r e112801421b3 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Tue May 12 11:07:51 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Mon May 11 11:07:10 2015 +0530
@@ -1181,6 +1181,8 @@
}
if (cpuMask & X265_CPU_AVX2)
{
+ p.cu[BLOCK_16x16].intra_pred[PLANAR_IDX] = x265_intra_pred_planar16_avx2;
+
p.cu[BLOCK_16x16].intra_pred[DC_IDX] = x265_intra_pred_dc16_avx2;
p.cu[BLOCK_32x32].intra_pred[DC_IDX] = x265_intra_pred_dc32_avx2;
diff -r ac2832c459ed -r e112801421b3 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Tue May 12 11:07:51 2015 +0530
+++ b/source/common/x86/intrapred16.asm Mon May 11 11:07:10 2015 +0530
@@ -89,6 +89,7 @@
cextern pw_2
cextern pw_4
cextern pw_8
+cextern pw_15
cextern pw_16
cextern pw_32
cextern pw_1023
@@ -103,6 +104,7 @@
cextern pw_swap
cextern pb_unpackwq1
cextern pb_unpackwq2
+cextern pw_planar16_mul
;-----------------------------------------------------------------------------------
; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* above, int, int filter)
@@ -902,6 +904,54 @@
%endrep
RET
+;---------------------------------------------------------------------------------------
+; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel*srcPix, int, int filter)
+;---------------------------------------------------------------------------------------
+INIT_YMM avx2
+cglobal intra_pred_planar16, 3,3,4
+ add r1d, r1d
+ vpbroadcastw m3, [r2 + 34]
+ vpbroadcastw m4, [r2 + 98]
+ mova m0, [pw_planar16_mul]
+ movu m2, [r2 + 2]
+
+ pmullw m3, [multiL] ; (x + 1) * topRight
+ pmullw m1, m2, [pw_15] ; (blkSize - 1 - y) * above[x]
+ paddw m3, [pw_16]
+ paddw m3, m4
+ paddw m3, m1
+ psubw m4, m2
+ add r2, 66
+
+%macro INTRA_PRED_PLANAR16_AVX2 1
+ vpbroadcastw m1, [r2 + %1]
+ vpbroadcastw m2, [r2 + %1 + 2]
+
+ pmullw m1, m0
+ pmullw m2, m0
+ paddw m1, m3
+ paddw m3, m4
+ psraw m1, 5
+ paddw m2, m3
+ psraw m2, 5
+ paddw m3, m4
+ movu [r0], m1
+ movu [r0 + r1], m2
+%if %1 <= 24
+ lea r0, [r0 + r1 * 2]
+%endif
+%endmacro
+ INTRA_PRED_PLANAR16_AVX2 0
+ INTRA_PRED_PLANAR16_AVX2 4
+ INTRA_PRED_PLANAR16_AVX2 8
+ INTRA_PRED_PLANAR16_AVX2 12
+ INTRA_PRED_PLANAR16_AVX2 16
+ INTRA_PRED_PLANAR16_AVX2 20
+ INTRA_PRED_PLANAR16_AVX2 24
+ INTRA_PRED_PLANAR16_AVX2 28
+%undef INTRA_PRED_PLANAR16_AVX2
+ RET
+
;-----------------------------------------------------------------------------------------
; void intraPredAng4(pixel* dst, intptr_t dstStride, pixel* src, int dirMode, int bFilter)
;-----------------------------------------------------------------------------------------
More information about the x265-devel
mailing list