[x265] [PATCH 8 of 9] asm:intra pred planar16 sse2
dtyx265 at gmail.com
dtyx265 at gmail.com
Fri Mar 6 01:20:01 CET 2015
# HG changeset patch
# User David T Yuen <dtyx265 at gmail.com>
# Date 1425599635 28800
# Node ID 670625ad427f2cd3d8bc00fa1bcd590708caea9a
# Parent 2505da40f1a6f63266b98d5d6d81f51602c61a12
asm:intra pred planar16 sse2
This replaces c code for systems using ssse3 to sse2 processors
The code is backported from intrapred planar16 sse4
./test/TestBench --testbench intrapred | grep intra_planar_16x16
intra_planar_16x16 4.47x 2727.49 12185.39
diff -r 2505da40f1a6 -r 670625ad427f source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu Mar 05 15:43:35 2015 -0800
+++ b/source/common/x86/asm-primitives.cpp Thu Mar 05 15:53:55 2015 -0800
@@ -1219,6 +1219,7 @@
p.cu[BLOCK_4x4].intra_pred[PLANAR_IDX] = x265_intra_pred_planar4_sse2;
p.cu[BLOCK_8x8].intra_pred[PLANAR_IDX] = x265_intra_pred_planar8_sse2;
+ p.cu[BLOCK_16x16].intra_pred[PLANAR_IDX] = x265_intra_pred_planar16_sse2;
p.cu[BLOCK_4x4].calcresidual = x265_getResidual4_sse2;
p.cu[BLOCK_8x8].calcresidual = x265_getResidual8_sse2;
diff -r 2505da40f1a6 -r 670625ad427f source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h Thu Mar 05 15:43:35 2015 -0800
+++ b/source/common/x86/intrapred.h Thu Mar 05 15:53:55 2015 -0800
@@ -37,6 +37,7 @@
void x265_intra_pred_planar4_sse2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int, int);
void x265_intra_pred_planar8_sse2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int, int);
+void x265_intra_pred_planar16_sse2(pixel* dst, intptr_t dstStride, const pixel* srcPix, int, int);
void x265_intra_pred_planar4_sse4(pixel* dst, intptr_t dstStride, const pixel* srcPix, int, int);
void x265_intra_pred_planar8_sse4(pixel* dst, intptr_t dstStride, const pixel* srcPix, int, int);
void x265_intra_pred_planar16_sse4(pixel* dst, intptr_t dstStride, const pixel* srcPix, int, int);
diff -r 2505da40f1a6 -r 670625ad427f source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm Thu Mar 05 15:43:35 2015 -0800
+++ b/source/common/x86/intrapred8.asm Thu Mar 05 15:53:55 2015 -0800
@@ -645,6 +645,95 @@
INTRA_PRED_PLANAR_8 7
RET
+;---------------------------------------------------------------------------------------
+; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel*srcPix, int, int filter)
+;---------------------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal intra_pred_planar16, 3,5,8
+ pxor m0, m0
+ movh m2, [r2 + 1]
+ punpcklbw m2, m0
+ movh m7, [r2 + 9]
+ punpcklbw m7, m0
+
+ movd m3, [r2 + 17] ; topRight = above[16]
+ movd m6, [r2 + 49] ; bottomLeft = left[16]
+ pand m3, [pw_00ff]
+ pand m6, [pw_00ff]
+ pshuflw m3, m3, 0x00
+ pshuflw m6, m6, 0x00
+ pshufd m3, m3, 0x44 ; v_topRight
+ pshufd m6, m6, 0x44 ; v_bottomLeft
+
+ pmullw m4, m3, [multiH] ; (x + 1) * topRight
+ pmullw m3, [multiL] ; (x + 1) * topRight
+ pmullw m1, m2, [pw_planar16_1] ; (blkSize - 1 - y) * above[x]
+ pmullw m5, m7, [pw_planar16_1] ; (blkSize - 1 - y) * above[x]
+ paddw m4, [pw_16]
+ paddw m3, [pw_16]
+ paddw m4, m6
+ paddw m3, m6
+ paddw m4, m5
+ paddw m3, m1
+ psubw m1, m6, m7
+ psubw m6, m2
+
+ movh m2, [r2 + 33]
+ punpcklbw m2, m0
+ movh m7, [r2 + 41]
+ punpcklbw m7, m0
+
+%macro INTRA_PRED_PLANAR_16 1
+%if (%1 < 4)
+ pshuflw m5, m2, 0x55 * %1
+ pshufd m5, m5, 0
+%else
+%if (%1 < 8)
+ pshufhw m5, m2, 0x55 * (%1 - 4)
+ pshufd m5, m5, 0xAA
+%else
+%if (%1 < 12)
+ pshuflw m5, m7, 0x55 * (%1 - 8)
+ pshufd m5, m5, 0
+%else
+ pshufhw m5, m7, 0x55 * (%1 - 12)
+ pshufd m5, m5, 0xAA
+%endif
+%endif
+%endif
+%if (%1 > 0)
+ paddw m3, m6
+ paddw m4, m1
+ lea r0, [r0 + r1]
+%endif
+ pmullw m0, m5, [pw_planar8_0]
+ pmullw m5, [pw_planar16_0]
+ paddw m0, m4
+ paddw m5, m3
+ psraw m5, 5
+ psraw m0, 5
+ packuswb m5, m0
+ movu [r0], m5
+%endmacro
+
+ INTRA_PRED_PLANAR_16 0
+ INTRA_PRED_PLANAR_16 1
+ INTRA_PRED_PLANAR_16 2
+ INTRA_PRED_PLANAR_16 3
+ INTRA_PRED_PLANAR_16 4
+ INTRA_PRED_PLANAR_16 5
+ INTRA_PRED_PLANAR_16 6
+ INTRA_PRED_PLANAR_16 7
+ INTRA_PRED_PLANAR_16 8
+ INTRA_PRED_PLANAR_16 9
+ INTRA_PRED_PLANAR_16 10
+ INTRA_PRED_PLANAR_16 11
+ INTRA_PRED_PLANAR_16 12
+ INTRA_PRED_PLANAR_16 13
+ INTRA_PRED_PLANAR_16 14
+ INTRA_PRED_PLANAR_16 15
+ RET
+
;---------------------------------------------------------------------------------------------
; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel *srcPix, int dirMode, int bFilter)
;---------------------------------------------------------------------------------------------
More information about the x265-devel
mailing list