[x265] [PATCH] asm: 16bpp asm code for intra_pred_ang4_10
yuvaraj at multicorewareinc.com
yuvaraj at multicorewareinc.com
Tue Dec 10 11:00:26 CET 2013
# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1386669618 -19800
# Tue Dec 10 15:30:18 2013 +0530
# Node ID 730f6b622dbcdb4192743de304fb351bb2fb8ae3
# Parent 285a4d8c42a07d4c3a285c657da609801391c4a2
asm: 16bpp asm code for intra_pred_ang4_10
diff -r 285a4d8c42a0 -r 730f6b622dbc source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Mon Dec 09 21:44:11 2013 +0550
+++ b/source/common/x86/asm-primitives.cpp Tue Dec 10 15:30:18 2013 +0530
@@ -734,6 +734,7 @@
SETUP_INTRA_ANG4(7, 7, sse4);
SETUP_INTRA_ANG4(8, 8, sse4);
SETUP_INTRA_ANG4(9, 9, sse4);
+ SETUP_INTRA_ANG4(10, 10, sse4);
SETUP_INTRA_ANG4(27, 9, sse4);
SETUP_INTRA_ANG4(28, 8, sse4);
SETUP_INTRA_ANG4(29, 7, sse4);
diff -r 285a4d8c42a0 -r 730f6b622dbc source/common/x86/const-a.asm
--- a/source/common/x86/const-a.asm Mon Dec 09 21:44:11 2013 +0550
+++ b/source/common/x86/const-a.asm Tue Dec 10 15:30:18 2013 +0530
@@ -41,6 +41,8 @@
const deinterleave_shufd, dd 0,4,1,5,2,6,3,7
const pb_unpackbd1, times 2 db 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3
const pb_unpackbd2, times 2 db 4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7
+const pb_unpackwq1, db 0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3
+const pb_unpackwq2, db 4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7
const pb_01, times 8 db 0,1
const pb_0, times 16 db 0
diff -r 285a4d8c42a0 -r 730f6b622dbc source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm Mon Dec 09 21:44:11 2013 +0550
+++ b/source/common/x86/intrapred16.asm Tue Dec 10 15:30:18 2013 +0530
@@ -44,7 +44,8 @@
cextern pw_4096
cextern multiL
cextern multi_2Row
-
+cextern pb_unpackwq1
+cextern pb_unpackwq2
;-------------------------------------------------------------------------------------------------------
; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
@@ -727,3 +728,33 @@
mova m6, [r3 + 2 * 16] ; [ 6]
mova m7, [r3 + 4 * 16] ; [ 8]
jmp mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ .do_filter4x4)
+
+cglobal intra_pred_ang4_10, 3,3,4
+ movh m0, [r2 + 2] ; [4 3 2 1]
+ pshufb m2, m0, [pb_unpackwq2] ; [4 4 4 4 3 3 3 3]
+ pshufb m0, [pb_unpackwq1] ; [2 2 2 2 1 1 1 1]
+ add r1, r1
+ movhlps m1, m0 ; [2 2 2 2]
+ movhlps m3, m2 ; [4 4 4 4]
+ movh [r0 + r1], m1
+ movh [r0 + r1 * 2], m2
+ lea r1, [r1 * 3]
+ movh [r0 + r1], m3
+
+ cmp r5m, byte 0
+ jz .quit
+
+ ; filter
+ mov r2, r3mp
+ movu m1, [r2] ; [7 6 5 4 3 2 1 0]
+ pshufb m2, m1, [pb_unpackwq1] ; [0 0 0 0]
+ palignr m1, m1, 2 ; [4 3 2 1]
+ psubw m1, m2
+ psraw m1, 1
+ paddw m0, m1
+ pmovsxwd m0, m0
+ packusdw m0, m0
+
+.quit:
+ movh [r0], m0
+ RET
More information about the x265-devel
mailing list