[x265] [PATCH] asm: 16bpp asm code for intra_pred_ang4 - mode 11, 12, 13
yuvaraj at multicorewareinc.com
yuvaraj at multicorewareinc.com
Tue Dec 10 12:08:34 CET 2013
# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1386673691 -19800
# Tue Dec 10 16:38:11 2013 +0530
# Node ID 0979012b3c8b4eef4569b5a2b6269bf2dc015374
# Parent 6f874e1d99c54ba751b9b397ff07da1e02b6aea6
asm: 16bpp asm code for intra_pred_ang4 - mode 11,12,13
diff -r 6f874e1d99c5 -r 0979012b3c8b source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Tue Dec 10 15:44:02 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Tue Dec 10 16:38:11 2013 +0530
@@ -735,6 +735,12 @@
SETUP_INTRA_ANG4(8, 8, sse4);
SETUP_INTRA_ANG4(9, 9, sse4);
SETUP_INTRA_ANG4(10, 10, sse4);
+ SETUP_INTRA_ANG4(11, 11, sse4);
+ SETUP_INTRA_ANG4(12, 12, sse4);
+ SETUP_INTRA_ANG4(13, 13, sse4);
+ SETUP_INTRA_ANG4(23, 13, sse4);
+ SETUP_INTRA_ANG4(24, 12, sse4);
+ SETUP_INTRA_ANG4(25, 11, sse4);
SETUP_INTRA_ANG4(26, 26, sse4);
SETUP_INTRA_ANG4(27, 9, sse4);
SETUP_INTRA_ANG4(28, 8, sse4);
diff -r 6f874e1d99c5 -r 0979012b3c8b source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Tue Dec 10 15:44:02 2013 +0530
+++ b/source/common/x86/intrapred16.asm Tue Dec 10 16:38:11 2013 +0530
@@ -790,3 +790,60 @@
.quit:
RET
+
+cglobal intra_pred_ang4_11, 3,4,8
+ cmp r4m, byte 25
+ cmove r2, r3mp
+ lea r3, [ang_table + 24 * 16]
+ movu m2, [r2] ; [x x x 4 3 2 1 0]
+ palignr m1, m2, 2 ; [x x x x 4 3 2 1]
+ punpcklwd m2, m1 ; [4 3 3 2 2 1 1 0]
+ mova m3, m2
+ mova m4, m2
+ mova m5, m2
+
+ mova m0, [r3 + 6 * 16] ; [24]
+ mova m1, [r3 + 4 * 16] ; [26]
+ mova m6, [r3 + 2 * 16] ; [28]
+ mova m7, [r3 + 0 * 16] ; [30]
+ jmp mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ .do_filter4x4)
+
+
+cglobal intra_pred_ang4_12, 3,4,8
+ cmp r4m, byte 24
+ cmove r2, r3mp
+ lea r3, [ang_table + 20 * 16]
+ movu m2, [r2] ; [x x x 4 3 2 1 0]
+ palignr m1, m2, 2 ; [x x x x 4 3 2 1]
+ punpcklwd m2, m1 ; [4 3 3 2 2 1 1 0]
+ mova m3, m2
+ mova m4, m2
+ mova m5, m2
+
+ mova m0, [r3 + 7 * 16] ; [27]
+ mova m1, [r3 + 2 * 16] ; [22]
+ mova m6, [r3 - 3 * 16] ; [17]
+ mova m7, [r3 - 8 * 16] ; [12]
+ jmp mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ .do_filter4x4)
+
+
+cglobal intra_pred_ang4_13, 4,4,8
+ cmp r4m, byte 23
+ jnz .load
+ xchg r2, r3
+.load
+ movu m5, [r2 - 2] ; [x x 4 3 2 1 0 x]
+ palignr m2, m5, 2 ; [x x x 4 3 2 1 0]
+ palignr m0, m5, 4 ; [x x x x 4 3 2 1]
+ pinsrw m5, [r3 + 8], 0
+ punpcklwd m5, m2 ; [3 2 2 1 1 0 0 x]
+ punpcklwd m2, m0 ; [4 3 3 2 2 1 1 0]
+ mova m3, m2
+ mova m4, m2
+
+ lea r3, [ang_table + 21 * 16]
+ mova m0, [r3 + 2 * 16] ; [23]
+ mova m1, [r3 - 7 * 16] ; [14]
+ mova m6, [r3 - 16 * 16] ; [ 5]
+ mova m7, [r3 + 7 * 16] ; [28]
+ jmp mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ .do_filter4x4)
More information about the x265-devel
mailing list