[x265] [PATCH] asm: assembly code for intra_pred_planar[8x8]
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Tue Nov 26 12:14:37 CET 2013
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1385464412 -19800
# Tue Nov 26 16:43:32 2013 +0530
# Node ID d143056e5535cf9d243b5f4700e203cd11abf473
# Parent 491fd3ee6fd11a52f50ba22b39b9e9596b8e7238
asm: assembly code for intra_pred_planar[8x8]
diff -r 491fd3ee6fd1 -r d143056e5535 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Mon Nov 25 14:00:56 2013 -0600
+++ b/source/common/x86/asm-primitives.cpp Tue Nov 26 16:43:32 2013 +0530
@@ -670,6 +670,7 @@
p.weight_pp = x265_weight_pp_sse4;
p.weight_sp = x265_weight_sp_sse4;
p.intra_pred_planar[BLOCK_4x4] = x265_intra_pred_planar4_sse4;
+ p.intra_pred_planar[BLOCK_8x8] = x265_intra_pred_planar8_sse4;
}
if (cpuMask & X265_CPU_AVX)
{
diff -r 491fd3ee6fd1 -r d143056e5535 source/common/x86/intrapred.asm
--- a/source/common/x86/intrapred.asm Mon Nov 25 14:00:56 2013 -0600
+++ b/source/common/x86/intrapred.asm Tue Nov 26 16:43:32 2013 +0530
@@ -27,9 +27,12 @@
SECTION_RODATA 32
multi_2Row: dw 1, 2, 3, 4, 1, 2, 3, 4
+multiL: dw 1, 2, 3, 4, 5, 6, 7, 8
SECTION .text
+cextern pw_8
+
;-----------------------------------------------------------------------------
; void intra_pred_dc(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter)
;-----------------------------------------------------------------------------
@@ -363,6 +366,9 @@
RET
+;----------------------------------------------------------------------------------------
+; void intra_pred_planar4_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride)
+;----------------------------------------------------------------------------------------
INIT_XMM sse4
cglobal intra_pred_planar4, 4,7,5, above, left, dst, dstStride
@@ -422,3 +428,65 @@
COMP_PRED_PLANAR_2ROW 2
RET
+
+;----------------------------------------------------------------------------------------
+; void intra_pred_planar8_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride)
+;----------------------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal intra_pred_planar8, 4,4,7, above, left, dst, dstStride
+
+ pxor m0, m0
+ pmovzxbw m1, [r0] ; v_topRow
+ pmovzxbw m2, [r1] ; v_leftColumn
+
+ movd m3, [r0 + 8] ; topRight = above[8];
+ movd m4, [r1 + 8] ; bottomLeft = left[8];
+
+ pshufb m3, m0
+ pshufb m4, m0
+ punpcklbw m3, m0 ; v_topRight
+ punpcklbw m4, m0 ; v_bottomLeft
+
+ psubw m4, m1 ; v_bottomRow
+ psubw m3, m2 ; v_rightColumn
+
+ psllw m1, 3 ; v_topRow
+ psllw m2, 3 ; v_leftColumn
+
+ paddw m6, m2, [pw_8]
+
+%macro COMP_PRED_PLANAR_ROW 1
+ %if (%1 < 4)
+ pshuflw m5, m6, 0x55 * %1
+ pshufd m5, m5, 0
+ pshuflw m2, m3, 0x55 * %1
+ pshufd m2, m2, 0
+ %else
+ pshufhw m5, m6, 0x55 * (%1 - 4)
+ pshufd m5, m5, 0xAA
+ pshufhw m2, m3, 0x55 * (%1 - 4)
+ pshufd m2, m2, 0xAA
+ %endif
+
+ pmullw m2, [multiL]
+ paddw m5, m2
+ paddw m1, m4
+ paddw m5, m1
+ psraw m5, 4
+ packuswb m5, m5
+
+ movh [r2], m5
+ lea r2, [r2 + r3]
+
+%endmacro
+
+ COMP_PRED_PLANAR_ROW 0
+ COMP_PRED_PLANAR_ROW 1
+ COMP_PRED_PLANAR_ROW 2
+ COMP_PRED_PLANAR_ROW 3
+ COMP_PRED_PLANAR_ROW 4
+ COMP_PRED_PLANAR_ROW 5
+ COMP_PRED_PLANAR_ROW 6
+ COMP_PRED_PLANAR_ROW 7
+
+ RET
diff -r 491fd3ee6fd1 -r d143056e5535 source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h Mon Nov 25 14:00:56 2013 -0600
+++ b/source/common/x86/intrapred.h Tue Nov 26 16:43:32 2013 +0530
@@ -32,5 +32,6 @@
void x265_intra_pred_dc32_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter);
void x265_intra_pred_planar4_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride);
+void x265_intra_pred_planar8_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride);
#endif // ifndef X265_INTRAPRED_H
More information about the x265-devel
mailing list