[x265] [PATCH] asm: 16bpp asm code for intra_pred_ang4_8 and intra_pred_ang4_9
yuvaraj at multicorewareinc.com
yuvaraj at multicorewareinc.com
Mon Dec 9 13:17:31 CET 2013
# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1386591441 -19800
# Mon Dec 09 17:47:21 2013 +0530
# Node ID 9f68d1d7b3d903d4f72aa3451a83c2363a114dd9
# Parent 9ceeba76f4714972bd93bda3d1513e8bb68a2384
asm: 16bpp asm code for intra_pred_ang4_8 and intra_pred_ang4_9
diff -r 9ceeba76f471 -r 9f68d1d7b3d9 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Mon Dec 09 17:08:00 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Mon Dec 09 17:47:21 2013 +0530
@@ -691,6 +691,10 @@
SETUP_INTRA_ANG4(5, 5, sse4);
SETUP_INTRA_ANG4(6, 6, sse4);
SETUP_INTRA_ANG4(7, 7, sse4);
+ SETUP_INTRA_ANG4(8, 8, sse4);
+ SETUP_INTRA_ANG4(9, 9, sse4);
+ SETUP_INTRA_ANG4(27, 9, sse4);
+ SETUP_INTRA_ANG4(28, 8, sse4);
SETUP_INTRA_ANG4(29, 7, sse4);
SETUP_INTRA_ANG4(30, 6, sse4);
SETUP_INTRA_ANG4(31, 5, sse4);
diff -r 9ceeba76f471 -r 9f68d1d7b3d9 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Mon Dec 09 17:08:00 2013 +0530
+++ b/source/common/x86/intrapred16.asm Mon Dec 09 17:47:21 2013 +0530
@@ -557,3 +557,38 @@
mova m6, [r3 + 7 * 16] ; [27]
mova m7, [r3 - 16 * 16] ; [ 4]
jmp mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ .do_filter4x4)
+
+cglobal intra_pred_ang4_8, 3,4,8
+ cmp r4m, byte 28
+ cmove r2, r3mp
+ lea r3, [ang_table + 13 * 16]
+ movu m0, [r2 + 2] ; [8 7 6 5 4 3 2 1]
+ palignr m1, m0, 2 ; [x 8 7 6 5 4 3 2]
+ punpcklwd m2, m0, m1 ; [5 4 4 3 3 2 2 1]
+ mova m3, m2
+ mova m4, m2
+ mova m5, m2
+
+ mova m0, [r3 - 8 * 16] ; [ 5]
+ mova m1, [r3 - 3 * 16] ; [10]
+ mova m6, [r3 + 2 * 16] ; [15]
+ mova m7, [r3 + 7 * 16] ; [20]
+ jmp mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ .do_filter4x4)
+
+
+cglobal intra_pred_ang4_9, 3,4,8
+ cmp r4m, byte 27
+ cmove r2, r3mp
+ lea r3, [ang_table + 4 * 16]
+ movu m0, [r2 + 2] ; [8 7 6 5 4 3 2 1]
+ palignr m1, m0, 2 ; [x 8 7 6 5 4 3 2]
+ punpcklwd m2, m0, m1 ; [5 4 4 3 3 2 2 1]
+ mova m3, m2
+ mova m4, m2
+ mova m5, m2
+
+ mova m0, [r3 - 2 * 16] ; [ 2]
+ mova m1, [r3 - 0 * 16] ; [ 4]
+ mova m6, [r3 + 2 * 16] ; [ 6]
+ mova m7, [r3 + 4 * 16] ; [ 8]
+ jmp mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ .do_filter4x4)
More information about the x265-devel
mailing list