[x265] [PATCH] asm: 16bpp code for intra_pred_ang4_26

yuvaraj at multicorewareinc.com yuvaraj at multicorewareinc.com
Tue Dec 10 11:14:17 CET 2013


# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1386670442 -19800
#      Tue Dec 10 15:44:02 2013 +0530
# Node ID 6f874e1d99c54ba751b9b397ff07da1e02b6aea6
# Parent  730f6b622dbcdb4192743de304fb351bb2fb8ae3
asm: 16bpp code for intra_pred_ang4_26

diff -r 730f6b622dbc -r 6f874e1d99c5 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue Dec 10 15:30:18 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp	Tue Dec 10 15:44:02 2013 +0530
@@ -735,6 +735,7 @@
         SETUP_INTRA_ANG4(8, 8, sse4);
         SETUP_INTRA_ANG4(9, 9, sse4);
         SETUP_INTRA_ANG4(10, 10, sse4);
+        SETUP_INTRA_ANG4(26, 26, sse4);
         SETUP_INTRA_ANG4(27, 9, sse4);
         SETUP_INTRA_ANG4(28, 8, sse4);
         SETUP_INTRA_ANG4(29, 7, sse4);
diff -r 730f6b622dbc -r 6f874e1d99c5 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm	Tue Dec 10 15:30:18 2013 +0530
+++ b/source/common/x86/intrapred16.asm	Tue Dec 10 15:44:02 2013 +0530
@@ -758,3 +758,35 @@
 .quit:
     movh        [r0],           m0
     RET
+
+cglobal intra_pred_ang4_26, 4,4,3
+    movh        m0,             [r3 + 2]            ; [8 7 6 5 4 3 2 1]
+    add         r1,             r1
+    ; store
+    movh        [r0],           m0
+    movh        [r0 + r1],      m0
+    movh        [r0 + r1 * 2],  m0
+    lea         r3,             [r1 * 3]
+    movh        [r0 + r3],      m0
+
+    ; filter
+    cmp         r5m,            byte 0
+    jz         .quit
+
+    pshufb      m0,             [pb_unpackwq1]      ; [2 2 2 2 1 1 1 1]
+    movu        m1,             [r2]                ; [7 6 5 4 3 2 1 0]
+    pshufb      m2,             m1, [pb_unpackwq1]  ; [0 0 0 0]
+    palignr     m1,             m1, 2               ; [4 3 2 1]
+    psubw       m1,             m2
+    psraw       m1,             1
+    paddw       m0,             m1
+    pmovsxwd    m0,             m0
+    packusdw    m0,             m0
+
+    pextrw      [r0],           m0, 0
+    pextrw      [r0 + r1],      m0, 1
+    pextrw      [r0 + r1 * 2],  m0, 2
+    pextrw      [r0 + r3],      m0, 3
+
+.quit:
+    RET


More information about the x265-devel mailing list