[x265] [PATCH] asm: assembly code for intra_pred_planar[4x4]
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Mon Nov 25 14:49:27 CET 2013
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1385387273 -19800
# Mon Nov 25 19:17:53 2013 +0530
# Node ID c070e25af31107c7c5a5a6cb5c5e049871c56e22
# Parent 10f605bd053009c8c981c7529322fecd1e54af7b
asm: assembly code for intra_pred_planar[4x4]
diff -r 10f605bd0530 -r c070e25af311 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Fri Nov 22 14:59:34 2013 -0600
+++ b/source/common/x86/asm-primitives.cpp Mon Nov 25 19:17:53 2013 +0530
@@ -663,6 +663,8 @@
p.intra_pred_dc[BLOCK_8x8] = x265_intra_pred_dc8_sse4;
p.intra_pred_dc[BLOCK_16x16] = x265_intra_pred_dc16_sse4;
p.intra_pred_dc[BLOCK_32x32] = x265_intra_pred_dc32_sse4;
+
+ p.intra_pred_planar[BLOCK_4x4] = x265_intra_pred_planar4_sse4;
}
if (cpuMask & X265_CPU_AVX)
{
diff -r 10f605bd0530 -r c070e25af311 source/common/x86/intrapred.asm
--- a/source/common/x86/intrapred.asm Fri Nov 22 14:59:34 2013 -0600
+++ b/source/common/x86/intrapred.asm Mon Nov 25 19:17:53 2013 +0530
@@ -26,7 +26,7 @@
SECTION_RODATA 32
-
+multi_2Row: dw 1, 2, 3, 4, 1, 2, 3, 4
SECTION .text
@@ -362,3 +362,63 @@
%endrep
RET
+
+INIT_XMM sse4
+cglobal intra_pred_planar4, 4,7,5, above, left, dst, dstStride
+
+ pmovzxbw m0, [r0] ; topRow[i] = above[i];
+ punpcklqdq m0, m0
+
+ pxor m1, m1
+ movd m2, [r1 + 4] ; bottomLeft = left[4]
+ movzx r6d, byte [r0 + 4] ; topRight = above[4];
+ pshufb m2, m1
+ punpcklbw m2, m1
+ psubw m2, m0 ; bottomRow[i] = bottomLeft - topRow[i]
+ psllw m0, 2
+ punpcklqdq m3, m2, m1
+ psubw m0, m3
+ paddw m2, m2
+
+%macro COMP_PRED_PLANAR_2ROW 1
+ movzx r4d, byte [r1 + %1]
+ lea r4d, [r4d * 4 + 4]
+ movd m3, r4d
+ pshuflw m3, m3, 0
+
+ movzx r4d, byte [r1 + %1 + 1]
+ lea r4d, [r4d * 4 + 4]
+ movd m4, r4d
+ pshuflw m4, m4, 0
+ punpcklqdq m3, m4 ; horPred
+
+ movzx r4d, byte [r1 + %1]
+ mov r5d, r6d
+ sub r5d, r4d
+ movd m4, r5d
+ pshuflw m4, m4, 0
+
+ movzx r4d, byte [r1 + %1 + 1]
+ mov r5d, r6d
+ sub r5d, r4d
+ movd m1, r5d
+ pshuflw m1, m1, 0
+ punpcklqdq m4, m1 ; rightColumnN
+
+ pmullw m4, [multi_2Row]
+ paddw m3, m4
+ paddw m0, m2
+ paddw m3, m0
+ psraw m3, 3
+ packuswb m3, m3
+
+ movd [r2], m3
+ pshufd m3, m3, 0x55
+ movd [r2 + r3], m3
+ lea r2, [r2 + 2 * r3]
+%endmacro
+
+ COMP_PRED_PLANAR_2ROW 0
+ COMP_PRED_PLANAR_2ROW 2
+
+ RET
diff -r 10f605bd0530 -r c070e25af311 source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h Fri Nov 22 14:59:34 2013 -0600
+++ b/source/common/x86/intrapred.h Mon Nov 25 19:17:53 2013 +0530
@@ -31,4 +31,6 @@
void x265_intra_pred_dc16_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter);
void x265_intra_pred_dc32_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter);
+void x265_intra_pred_planar4_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride);
+
#endif // ifndef X265_INTRAPRED_H
More information about the x265-devel
mailing list