[x265] [PATCH] asm: 16bpp asm code for intra_pred_ang4_10

yuvaraj at multicorewareinc.com yuvaraj at multicorewareinc.com
Tue Dec 10 11:00:26 CET 2013


# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1386669618 -19800
#      Tue Dec 10 15:30:18 2013 +0530
# Node ID 730f6b622dbcdb4192743de304fb351bb2fb8ae3
# Parent  285a4d8c42a07d4c3a285c657da609801391c4a2
asm: 16bpp asm code for intra_pred_ang4_10

diff -r 285a4d8c42a0 -r 730f6b622dbc source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Mon Dec 09 21:44:11 2013 +0550
+++ b/source/common/x86/asm-primitives.cpp	Tue Dec 10 15:30:18 2013 +0530
@@ -734,6 +734,7 @@
         SETUP_INTRA_ANG4(7, 7, sse4);
         SETUP_INTRA_ANG4(8, 8, sse4);
         SETUP_INTRA_ANG4(9, 9, sse4);
+        SETUP_INTRA_ANG4(10, 10, sse4);
         SETUP_INTRA_ANG4(27, 9, sse4);
         SETUP_INTRA_ANG4(28, 8, sse4);
         SETUP_INTRA_ANG4(29, 7, sse4);
diff -r 285a4d8c42a0 -r 730f6b622dbc source/common/x86/const-a.asm
--- a/source/common/x86/const-a.asm	Mon Dec 09 21:44:11 2013 +0550
+++ b/source/common/x86/const-a.asm	Tue Dec 10 15:30:18 2013 +0530
@@ -41,6 +41,8 @@
 const deinterleave_shufd, dd 0,4,1,5,2,6,3,7
 const pb_unpackbd1, times 2 db 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3
 const pb_unpackbd2, times 2 db 4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7
+const pb_unpackwq1, db 0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3
+const pb_unpackwq2, db 4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7
 
 const pb_01,       times  8 db 0,1
 const pb_0,        times 16 db 0
diff -r 285a4d8c42a0 -r 730f6b622dbc source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm	Mon Dec 09 21:44:11 2013 +0550
+++ b/source/common/x86/intrapred16.asm	Tue Dec 10 15:30:18 2013 +0530
@@ -44,7 +44,8 @@
 cextern pw_4096
 cextern multiL
 cextern multi_2Row
-
+cextern pb_unpackwq1
+cextern pb_unpackwq2
 
 ;-------------------------------------------------------------------------------------------------------
 ; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
@@ -727,3 +728,33 @@
     mova        m6, [r3 +  2 * 16]  ; [ 6]
     mova        m7, [r3 +  4 * 16]  ; [ 8]
     jmp         mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ .do_filter4x4)
+
+cglobal intra_pred_ang4_10, 3,3,4
+    movh        m0,             [r2 + 2]            ; [4 3 2 1]
+    pshufb      m2,             m0, [pb_unpackwq2]  ; [4 4 4 4 3 3 3 3]
+    pshufb      m0,             [pb_unpackwq1]      ; [2 2 2 2 1 1 1 1]
+    add         r1,             r1
+    movhlps     m1,             m0                  ; [2 2 2 2]
+    movhlps     m3,             m2                  ; [4 4 4 4]
+    movh        [r0 + r1],      m1
+    movh        [r0 + r1 * 2],  m2
+    lea         r1,             [r1 * 3]
+    movh        [r0 + r1],      m3
+
+    cmp         r5m,            byte 0
+    jz         .quit
+
+    ; filter
+    mov         r2,             r3mp
+    movu        m1,             [r2]                ; [7 6 5 4 3 2 1 0]
+    pshufb      m2,             m1, [pb_unpackwq1]  ; [0 0 0 0]
+    palignr     m1,             m1, 2               ; [4 3 2 1]
+    psubw       m1,             m2
+    psraw       m1,             1
+    paddw       m0,             m1
+    pmovsxwd    m0,             m0
+    packusdw    m0,             m0
+
+.quit:
+    movh        [r0],           m0
+    RET


More information about the x265-devel mailing list