[x265] [PATCH] asm: 16bpp asm code for intra_pred_ang4 - mode 14, 15, 16

yuvaraj at multicorewareinc.com yuvaraj at multicorewareinc.com
Tue Dec 10 12:35:30 CET 2013


# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1386675301 -19800
#      Tue Dec 10 17:05:01 2013 +0530
# Node ID ee69fed0ed3b5d79546bdbd1ac864b3cdebb4bc9
# Parent  0979012b3c8b4eef4569b5a2b6269bf2dc015374
asm: 16bpp asm code for intra_pred_ang4 - mode 14,15,16

diff -r 0979012b3c8b -r ee69fed0ed3b source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue Dec 10 16:38:11 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp	Tue Dec 10 17:05:01 2013 +0530
@@ -738,6 +738,12 @@
         SETUP_INTRA_ANG4(11, 11, sse4);
         SETUP_INTRA_ANG4(12, 12, sse4);
         SETUP_INTRA_ANG4(13, 13, sse4);
+        SETUP_INTRA_ANG4(14, 14, sse4);
+        SETUP_INTRA_ANG4(15, 15, sse4);
+        SETUP_INTRA_ANG4(16, 16, sse4);
+        SETUP_INTRA_ANG4(20, 16, sse4);
+        SETUP_INTRA_ANG4(21, 15, sse4);
+        SETUP_INTRA_ANG4(22, 14, sse4);
         SETUP_INTRA_ANG4(23, 13, sse4);
         SETUP_INTRA_ANG4(24, 12, sse4);
         SETUP_INTRA_ANG4(25, 11, sse4);
diff -r 0979012b3c8b -r ee69fed0ed3b source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm	Tue Dec 10 16:38:11 2013 +0530
+++ b/source/common/x86/intrapred16.asm	Tue Dec 10 17:05:01 2013 +0530
@@ -847,3 +847,72 @@
     mova        m6, [r3 - 16 * 16]  ; [ 5]
     mova        m7, [r3 +  7 * 16]  ; [28]
     jmp         mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ .do_filter4x4)
+
+cglobal intra_pred_ang4_14, 4,4,8
+    cmp         r4m, byte 22
+    jnz        .load
+    xchg        r2, r3
+.load
+    movu        m5, [r2 - 2]    ; [x x 4 3 2 1 0 x]
+    palignr     m2, m5, 2       ; [x x x 4 3 2 1 0]
+    palignr     m0, m5, 4       ; [x x x x 4 3 2 1]
+    pinsrw      m5, [r3 + 4], 0
+    punpcklwd   m5, m2          ; [3 2 2 1 1 0 0 x]
+    punpcklwd   m2, m0          ; [4 3 3 2 2 1 1 0]
+    mova        m3, m2
+    mova        m4, m5
+
+    lea         r3, [ang_table + 19 * 16]
+    mova        m0, [r3 +  0 * 16]  ; [19]
+    mova        m1, [r3 - 13 * 16]  ; [ 6]
+    mova        m6, [r3 +  6 * 16]  ; [25]
+    mova        m7, [r3 -  7 * 16]  ; [12]
+    jmp         mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ .do_filter4x4)
+
+
+cglobal intra_pred_ang4_15, 4,4,8
+    cmp         r4m, byte 21
+    jnz        .load
+    xchg        r2, r3
+.load
+    movu        m3, [r2 - 2]    ; [x x 4 3 2 1 0 x]
+    palignr     m2, m3, 2       ; [x x x 4 3 2 1 0]
+    palignr     m0, m3, 4       ; [x x x x 4 3 2 1]
+    pinsrw      m3, [r3 + 4], 0
+    pslldq      m5, m3, 2       ; [x 4 3 2 1 0 x y]
+    pinsrw      m5, [r3 + 8], 0
+    punpcklwd   m5, m3          ; [2 1 1 0 0 x x y]
+    punpcklwd   m3, m2          ; [3 2 2 1 1 0 0 x]
+    punpcklwd   m2, m0          ; [4 3 3 2 2 1 1 0]
+    mova        m4, m3
+
+    lea         r3, [ang_table + 23 * 16]
+    mova        m0, [r3 -  8 * 16]  ; [15]
+    mova        m1, [r3 +  7 * 16]  ; [30]
+    mova        m6, [r3 - 10 * 16]  ; [13]
+    mova        m7, [r3 +  5 * 16]  ; [28]
+    jmp         mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ .do_filter4x4)
+
+
+cglobal intra_pred_ang4_16, 4,4,8
+    cmp         r4m, byte 20
+    jnz        .load
+    xchg        r2, r3
+.load
+    movu        m3, [r2 - 2]    ; [x x 4 3 2 1 0 x]
+    palignr     m2, m3, 2       ; [x x x 4 3 2 1 0]
+    palignr     m0, m3, 4       ; [x x x x 4 3 2 1]
+    pinsrw      m3, [r3 + 4], 0
+    pslldq      m5, m3, 2       ; [x 4 3 2 1 0 x y]
+    pinsrw      m5, [r3 + 6], 0
+    punpcklwd   m5, m3          ; [2 1 1 0 0 x x y]
+    punpcklwd   m3, m2          ; [3 2 2 1 1 0 0 x]
+    punpcklwd   m2, m0          ; [4 3 3 2 2 1 1 0]
+    mova        m4, m3
+
+    lea         r3, [ang_table + 19 * 16]
+    mova        m0, [r3 -  8 * 16]  ; [11]
+    mova        m1, [r3 +  3 * 16]  ; [22]
+    mova        m6, [r3 - 18 * 16]  ; [ 1]
+    mova        m7, [r3 -  7 * 16]  ; [12]
+    jmp         mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ .do_filter4x4)


More information about the x265-devel mailing list