[x265] [PATCH] asm: 16bpp asm code for intra_pred_ang4 - mode 14, 15, 16
yuvaraj at multicorewareinc.com
yuvaraj at multicorewareinc.com
Tue Dec 10 12:35:30 CET 2013
# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1386675301 -19800
# Tue Dec 10 17:05:01 2013 +0530
# Node ID ee69fed0ed3b5d79546bdbd1ac864b3cdebb4bc9
# Parent 0979012b3c8b4eef4569b5a2b6269bf2dc015374
asm: 16bpp asm code for intra_pred_ang4 - mode 14,15,16
diff -r 0979012b3c8b -r ee69fed0ed3b source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Tue Dec 10 16:38:11 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Tue Dec 10 17:05:01 2013 +0530
@@ -738,6 +738,12 @@
SETUP_INTRA_ANG4(11, 11, sse4);
SETUP_INTRA_ANG4(12, 12, sse4);
SETUP_INTRA_ANG4(13, 13, sse4);
+ SETUP_INTRA_ANG4(14, 14, sse4);
+ SETUP_INTRA_ANG4(15, 15, sse4);
+ SETUP_INTRA_ANG4(16, 16, sse4);
+ SETUP_INTRA_ANG4(20, 16, sse4);
+ SETUP_INTRA_ANG4(21, 15, sse4);
+ SETUP_INTRA_ANG4(22, 14, sse4);
SETUP_INTRA_ANG4(23, 13, sse4);
SETUP_INTRA_ANG4(24, 12, sse4);
SETUP_INTRA_ANG4(25, 11, sse4);
diff -r 0979012b3c8b -r ee69fed0ed3b source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Tue Dec 10 16:38:11 2013 +0530
+++ b/source/common/x86/intrapred16.asm Tue Dec 10 17:05:01 2013 +0530
@@ -847,3 +847,72 @@
mova m6, [r3 - 16 * 16] ; [ 5]
mova m7, [r3 + 7 * 16] ; [28]
jmp mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ .do_filter4x4)
+
+cglobal intra_pred_ang4_14, 4,4,8
+ cmp r4m, byte 22
+ jnz .load
+ xchg r2, r3
+.load
+ movu m5, [r2 - 2] ; [x x 4 3 2 1 0 x]
+ palignr m2, m5, 2 ; [x x x 4 3 2 1 0]
+ palignr m0, m5, 4 ; [x x x x 4 3 2 1]
+ pinsrw m5, [r3 + 4], 0
+ punpcklwd m5, m2 ; [3 2 2 1 1 0 0 x]
+ punpcklwd m2, m0 ; [4 3 3 2 2 1 1 0]
+ mova m3, m2
+ mova m4, m5
+
+ lea r3, [ang_table + 19 * 16]
+ mova m0, [r3 + 0 * 16] ; [19]
+ mova m1, [r3 - 13 * 16] ; [ 6]
+ mova m6, [r3 + 6 * 16] ; [25]
+ mova m7, [r3 - 7 * 16] ; [12]
+ jmp mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ .do_filter4x4)
+
+
+cglobal intra_pred_ang4_15, 4,4,8
+ cmp r4m, byte 21
+ jnz .load
+ xchg r2, r3
+.load
+ movu m3, [r2 - 2] ; [x x 4 3 2 1 0 x]
+ palignr m2, m3, 2 ; [x x x 4 3 2 1 0]
+ palignr m0, m3, 4 ; [x x x x 4 3 2 1]
+ pinsrw m3, [r3 + 4], 0
+ pslldq m5, m3, 2 ; [x 4 3 2 1 0 x y]
+ pinsrw m5, [r3 + 8], 0
+ punpcklwd m5, m3 ; [2 1 1 0 0 x x y]
+ punpcklwd m3, m2 ; [3 2 2 1 1 0 0 x]
+ punpcklwd m2, m0 ; [4 3 3 2 2 1 1 0]
+ mova m4, m3
+
+ lea r3, [ang_table + 23 * 16]
+ mova m0, [r3 - 8 * 16] ; [15]
+ mova m1, [r3 + 7 * 16] ; [30]
+ mova m6, [r3 - 10 * 16] ; [13]
+ mova m7, [r3 + 5 * 16] ; [28]
+ jmp mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ .do_filter4x4)
+
+
+cglobal intra_pred_ang4_16, 4,4,8
+ cmp r4m, byte 20
+ jnz .load
+ xchg r2, r3
+.load
+ movu m3, [r2 - 2] ; [x x 4 3 2 1 0 x]
+ palignr m2, m3, 2 ; [x x x 4 3 2 1 0]
+ palignr m0, m3, 4 ; [x x x x 4 3 2 1]
+ pinsrw m3, [r3 + 4], 0
+ pslldq m5, m3, 2 ; [x 4 3 2 1 0 x y]
+ pinsrw m5, [r3 + 6], 0
+ punpcklwd m5, m3 ; [2 1 1 0 0 x x y]
+ punpcklwd m3, m2 ; [3 2 2 1 1 0 0 x]
+ punpcklwd m2, m0 ; [4 3 3 2 2 1 1 0]
+ mova m4, m3
+
+ lea r3, [ang_table + 19 * 16]
+ mova m0, [r3 - 8 * 16] ; [11]
+ mova m1, [r3 + 3 * 16] ; [22]
+ mova m6, [r3 - 18 * 16] ; [ 1]
+ mova m7, [r3 - 7 * 16] ; [12]
+ jmp mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ .do_filter4x4)
More information about the x265-devel
mailing list