[x265] [PATCH] 16bpp: assembly code for intra_planar8
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Mon Dec 9 11:50:52 CET 2013
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1386579848 -19800
# Mon Dec 09 14:34:08 2013 +0530
# Node ID 755e80d65d853e26e1f8ddd1fd46f924a92ba948
# Parent 942ea368858fd64d908c2b5fca5cdb23eca6a038
16bpp: assembly code for intra_planar8
diff -r 942ea368858f -r 755e80d65d85 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Mon Dec 09 11:31:12 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Mon Dec 09 14:34:08 2013 +0530
@@ -675,6 +675,7 @@
if (cpuMask & X265_CPU_SSE4)
{
p.intra_pred[BLOCK_4x4][0] = x265_intra_pred_planar4_sse4;
+ p.intra_pred[BLOCK_8x8][0] = x265_intra_pred_planar8_sse4;
p.intra_pred[BLOCK_4x4][1] = x265_intra_pred_dc4_sse4;
p.intra_pred[BLOCK_8x8][1] = x265_intra_pred_dc8_sse4;
diff -r 942ea368858f -r 755e80d65d85 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Mon Dec 09 11:31:12 2013 +0530
+++ b/source/common/x86/intrapred16.asm Mon Dec 09 14:34:08 2013 +0530
@@ -29,8 +29,10 @@
SECTION .text
cextern pw_1
+cextern pw_8
cextern pd_32
cextern pw_4096
+cextern multiL
cextern multi_2Row
@@ -467,3 +469,66 @@
%undef COMP_PRED_PLANAR_2ROW
RET
+
+;-----------------------------------------------------------------------------------------------------------
+; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
+;-----------------------------------------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal intra_pred_planar8, 4,4,7
+ add r2, 2
+ add r3, 2
+ add r1, r1
+ movu m1, [r3] ; v_topRow
+ movu m2, [r2] ; v_leftColumn
+
+ movd m3, [r3 + 16] ; topRight = above[8];
+ movd m4, [r2 + 16] ; bottomLeft = left[8];
+
+ pshuflw m3, m3, 0
+ pshufd m3, m3, 0
+ pshuflw m4, m4, 0
+ pshufd m4, m4, 0
+
+ psubw m4, m1 ; v_bottomRow
+ psubw m3, m2 ; v_rightColumn
+
+ psllw m1, 3 ; v_topRow
+ psllw m2, 3 ; v_leftColumn
+
+ paddw m6, m2, [pw_8]
+
+%macro PRED_PLANAR_ROW8 1
+ %if (%1 < 4)
+ pshuflw m5, m6, 0x55 * %1
+ pshufd m5, m5, 0
+ pshuflw m2, m3, 0x55 * %1
+ pshufd m2, m2, 0
+ %else
+ pshufhw m5, m6, 0x55 * (%1 - 4)
+ pshufd m5, m5, 0xAA
+ pshufhw m2, m3, 0x55 * (%1 - 4)
+ pshufd m2, m2, 0xAA
+ %endif
+
+ pmullw m2, [multiL]
+ paddw m5, m2
+ paddw m1, m4
+ paddw m5, m1
+ psraw m5, 4
+
+ movu [r0], m5
+ add r0, r1
+
+%endmacro
+
+ PRED_PLANAR_ROW8 0
+ PRED_PLANAR_ROW8 1
+ PRED_PLANAR_ROW8 2
+ PRED_PLANAR_ROW8 3
+ PRED_PLANAR_ROW8 4
+ PRED_PLANAR_ROW8 5
+ PRED_PLANAR_ROW8 6
+ PRED_PLANAR_ROW8 7
+
+%undef PRED_PLANAR_ROW8
+ RET
More information about the x265-devel
mailing list