[x265] [PATCH] assembly code for intra_pred_planar_16x16 for 10 and 12-bit
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Tue Dec 10 15:48:24 CET 2013
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1386681605 -19800
# Tue Dec 10 18:50:05 2013 +0530
# Node ID 287192393599065089606f9904d046dba74a8aa7
# Parent e4c13676c4b5a4702a1b70ca91af242a74f4c1a5
assembly code for intra_pred_planar_16x16 for 10 and 12-bit
diff -r e4c13676c4b5 -r 287192393599 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Tue Dec 10 20:51:47 2013 +0550
+++ b/source/common/x86/intrapred16.asm Tue Dec 10 18:50:05 2013 +0530
@@ -45,6 +45,7 @@
cextern pd_32
cextern pw_4096
cextern multiL
+cextern multiH
cextern multi_2Row
cextern pb_unpackwq1
cextern pb_unpackwq2
@@ -546,19 +547,21 @@
; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
;-----------------------------------------------------------------------------------------------------------
INIT_XMM sse4
-%if ARCH_X86_64 == 1
+%if (BIT_DEPTH == 12)
+
+%if (ARCH_X86_64 == 1)
cglobal intra_pred_planar16, 4,7,8+3
-%define bottomRow0 m7
-%define bottomRow1 m8
-%define bottomRow2 m9
-%define bottomRow3 m10
+ %define bottomRow0 m7
+ %define bottomRow1 m8
+ %define bottomRow2 m9
+ %define bottomRow3 m10
%else
+cglobal intra_pred_planar16, 4,7,8, 0-3*mmsize
+ %define bottomRow0 [rsp + 0*mmsize]
+ %define bottomRow1 [rsp + 1*mmsize]
+ %define bottomRow2 [rsp + 2*mmsize]
+ %define bottomRow3 m7
%endif
-cglobal intra_pred_planar16, 4,7,8, 0-3*mmsize
-%define bottomRow0 [rsp + 0*mmsize]
-%define bottomRow1 [rsp + 1*mmsize]
-%define bottomRow2 [rsp + 2*mmsize]
-%define bottomRow3 m7
add r2, 2
add r3, 2
@@ -646,7 +649,81 @@
inc r6d
cmp r6d, 16
jnz .loopH
+
RET
+%else ; BIT-DEPTH == 10
+INIT_XMM sse4
+cglobal intra_pred_planar16, 4,6,7
+ add r2, 2
+ add r3, 2
+ add r1, r1
+
+ movu m1, [r3] ; topRow[0-7]
+ movu m2, [r3 + 16] ; topRow[8-15]
+
+ movd m3, [r2 + 32]
+ pshuflw m3, m3, 0
+ pshufd m3, m3, 0
+ movzx r4d, word [r3 + 32] ; topRight = above[16]
+
+ psubw m4, m3, m1 ; v_bottomRow[0]
+ psubw m3, m2 ; v_bottomRow[1]
+
+ psllw m1, 4
+ psllw m2, 4
+
+%macro PRED_PLANAR_ROW16 1
+ movzx r5d, word [r2 + %1 * 2]
+ add r5d, r5d
+ lea r5d, [r5d * 8 + 16]
+ movd m5, r5d
+ pshuflw m5, m5, 0
+ pshufd m5, m5, 0 ; horPred
+
+ movzx r5d, word [r2 + %1 * 2]
+ mov r3d, r4d
+ sub r3d, r5d
+ movd m0, r3d
+ pshuflw m0, m0, 0
+ pshufd m0, m0, 0
+
+ pmullw m6, m0, [multiL]
+ paddw m6, m5
+ paddw m1, m4
+ paddw m6, m1
+ psraw m6, 5
+
+ pmullw m0, m0, [multiH]
+ paddw m5, m0
+ paddw m2, m3
+ paddw m5, m2
+ psraw m5, 5
+
+ movu [r0], m6
+ movu [r0 + 16], m5
+ add r0, r1
+%endmacro
+
+ PRED_PLANAR_ROW16 0
+ PRED_PLANAR_ROW16 1
+ PRED_PLANAR_ROW16 2
+ PRED_PLANAR_ROW16 3
+ PRED_PLANAR_ROW16 4
+ PRED_PLANAR_ROW16 5
+ PRED_PLANAR_ROW16 6
+ PRED_PLANAR_ROW16 7
+ PRED_PLANAR_ROW16 8
+ PRED_PLANAR_ROW16 9
+ PRED_PLANAR_ROW16 10
+ PRED_PLANAR_ROW16 11
+ PRED_PLANAR_ROW16 12
+ PRED_PLANAR_ROW16 13
+ PRED_PLANAR_ROW16 14
+ PRED_PLANAR_ROW16 15
+%undef PRED_PLANAR_ROW16
+
+ RET
+%endif
;-----------------------------------------------------------------------------
More information about the x265-devel
mailing list