[x265] [PATCH] 16bpp: assembly code for intra_planar4
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Mon Dec 9 11:50:36 CET 2013
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1386568872 -19800
# Mon Dec 09 11:31:12 2013 +0530
# Node ID 942ea368858fd64d908c2b5fca5cdb23eca6a038
# Parent b29f2f31ec460ff186fde8c75c0949af4b3fa824
16bpp: assembly code for intra_planar4
diff -r b29f2f31ec46 -r 942ea368858f source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Sat Dec 07 17:31:09 2013 -0600
+++ b/source/common/x86/asm-primitives.cpp Mon Dec 09 11:31:12 2013 +0530
@@ -674,6 +674,8 @@
}
if (cpuMask & X265_CPU_SSE4)
{
+ p.intra_pred[BLOCK_4x4][0] = x265_intra_pred_planar4_sse4;
+
p.intra_pred[BLOCK_4x4][1] = x265_intra_pred_dc4_sse4;
p.intra_pred[BLOCK_8x8][1] = x265_intra_pred_dc8_sse4;
p.intra_pred[BLOCK_16x16][1] = x265_intra_pred_dc16_sse4;
diff -r b29f2f31ec46 -r 942ea368858f source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Sat Dec 07 17:31:09 2013 -0600
+++ b/source/common/x86/intrapred16.asm Mon Dec 09 11:31:12 2013 +0530
@@ -31,6 +31,7 @@
cextern pw_1
cextern pd_32
cextern pw_4096
+cextern multi_2Row
;-------------------------------------------------------------------------------------------------------
@@ -100,6 +101,8 @@
RET
+
+
;-------------------------------------------------------------------------------------------------------
; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
;-------------------------------------------------------------------------------------------------------
@@ -398,3 +401,69 @@
%endrep
RET
+
+;-----------------------------------------------------------------------------------------------------------
+; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
+;-----------------------------------------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal intra_pred_planar4, 4,7,5
+ add r2, 2
+ add r3, 2
+ add r1, r1
+ movh m0, [r3] ; topRow[i] = above[i];
+ punpcklqdq m0, m0
+
+ pxor m1, m1
+ movd m2, [r2 + 8] ; bottomLeft = left[4]
+ movzx r6d, word [r3 + 8] ; topRight = above[4];
+ pshuflw m2, m2, 0
+ pshufd m2, m2, 0
+
+ psubw m2, m0 ; bottomRow[i] = bottomLeft - topRow[i]
+ psllw m0, 2
+ punpcklqdq m3, m2, m1
+ psubw m0, m3
+ paddw m2, m2
+
+%macro COMP_PRED_PLANAR_2ROW 1
+ movzx r4d, word [r2 + %1]
+ lea r4d, [r4d * 4 + 4]
+ movd m3, r4d
+ pshuflw m3, m3, 0
+
+ movzx r4d, word [r2 + %1 + 2]
+ lea r4d, [r4d * 4 + 4]
+ movd m4, r4d
+ pshuflw m4, m4, 0
+ punpcklqdq m3, m4 ; horPred
+
+ movzx r4d, word [r2 + %1]
+ mov r5d, r6d
+ sub r5d, r4d
+ movd m4, r5d
+ pshuflw m4, m4, 0
+
+ movzx r4d, word [r2 + %1 + 2]
+ mov r5d, r6d
+ sub r5d, r4d
+ movd m1, r5d
+ pshuflw m1, m1, 0
+ punpcklqdq m4, m1 ; rightColumnN
+
+ pmullw m4, [multi_2Row]
+ paddw m3, m4
+ paddw m0, m2
+ paddw m3, m0
+ psraw m3, 3
+
+ movh [r0], m3
+ pshufd m3, m3, 0xAE
+ movh [r0 + r1], m3
+ lea r0, [r0 + 2 * r1]
+%endmacro
+
+ COMP_PRED_PLANAR_2ROW 0
+ COMP_PRED_PLANAR_2ROW 4
+%undef COMP_PRED_PLANAR_2ROW
+
+ RET
More information about the x265-devel
mailing list