[x265] [PATCH] asm: 16bpp asm code for intra_pred_ang4 - mode 11, 12, 13

yuvaraj at multicorewareinc.com yuvaraj at multicorewareinc.com
Tue Dec 10 12:08:34 CET 2013


# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1386673691 -19800
#      Tue Dec 10 16:38:11 2013 +0530
# Node ID 0979012b3c8b4eef4569b5a2b6269bf2dc015374
# Parent  6f874e1d99c54ba751b9b397ff07da1e02b6aea6
asm: 16bpp asm code for intra_pred_ang4 - mode 11,12,13

diff -r 6f874e1d99c5 -r 0979012b3c8b source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue Dec 10 15:44:02 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp	Tue Dec 10 16:38:11 2013 +0530
@@ -735,6 +735,12 @@
         SETUP_INTRA_ANG4(8, 8, sse4);
         SETUP_INTRA_ANG4(9, 9, sse4);
         SETUP_INTRA_ANG4(10, 10, sse4);
+        SETUP_INTRA_ANG4(11, 11, sse4);
+        SETUP_INTRA_ANG4(12, 12, sse4);
+        SETUP_INTRA_ANG4(13, 13, sse4);
+        SETUP_INTRA_ANG4(23, 13, sse4);
+        SETUP_INTRA_ANG4(24, 12, sse4);
+        SETUP_INTRA_ANG4(25, 11, sse4);
         SETUP_INTRA_ANG4(26, 26, sse4);
         SETUP_INTRA_ANG4(27, 9, sse4);
         SETUP_INTRA_ANG4(28, 8, sse4);
diff -r 6f874e1d99c5 -r 0979012b3c8b source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm	Tue Dec 10 15:44:02 2013 +0530
+++ b/source/common/x86/intrapred16.asm	Tue Dec 10 16:38:11 2013 +0530
@@ -790,3 +790,60 @@
 
 .quit:
     RET
+
+cglobal intra_pred_ang4_11, 3,4,8
+    cmp         r4m, byte 25
+    cmove       r2, r3mp
+    lea         r3, [ang_table + 24 * 16]
+    movu        m2, [r2]        ; [x x x 4 3 2 1 0]
+    palignr     m1, m2, 2       ; [x x x x 4 3 2 1]
+    punpcklwd   m2, m1          ; [4 3 3 2 2 1 1 0]
+    mova        m3, m2
+    mova        m4, m2
+    mova        m5, m2
+
+    mova        m0, [r3 +  6 * 16]  ; [24]
+    mova        m1, [r3 +  4 * 16]  ; [26]
+    mova        m6, [r3 +  2 * 16]  ; [28]
+    mova        m7, [r3 +  0 * 16]  ; [30]
+    jmp         mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ .do_filter4x4)
+
+
+cglobal intra_pred_ang4_12, 3,4,8
+    cmp         r4m, byte 24
+    cmove       r2, r3mp
+    lea         r3, [ang_table + 20 * 16]
+    movu        m2, [r2]        ; [x x x 4 3 2 1 0]
+    palignr     m1, m2, 2       ; [x x x x 4 3 2 1]
+    punpcklwd   m2, m1          ; [4 3 3 2 2 1 1 0]
+    mova        m3, m2
+    mova        m4, m2
+    mova        m5, m2
+
+    mova        m0, [r3 +  7 * 16]  ; [27]
+    mova        m1, [r3 +  2 * 16]  ; [22]
+    mova        m6, [r3 -  3 * 16]  ; [17]
+    mova        m7, [r3 -  8 * 16]  ; [12]
+    jmp         mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ .do_filter4x4)
+
+
+cglobal intra_pred_ang4_13, 4,4,8
+    cmp         r4m, byte 23
+    jnz        .load
+    xchg        r2, r3
+.load
+    movu        m5, [r2 - 2]    ; [x x 4 3 2 1 0 x]
+    palignr     m2, m5, 2       ; [x x x 4 3 2 1 0]
+    palignr     m0, m5, 4       ; [x x x x 4 3 2 1]
+    pinsrw      m5, [r3 + 8], 0
+    punpcklwd   m5, m2          ; [3 2 2 1 1 0 0 x]
+    punpcklwd   m2, m0          ; [4 3 3 2 2 1 1 0]
+    mova        m3, m2
+    mova        m4, m2
+
+    lea         r3, [ang_table + 21 * 16]
+    mova        m0, [r3 +  2 * 16]  ; [23]
+    mova        m1, [r3 -  7 * 16]  ; [14]
+    mova        m6, [r3 - 16 * 16]  ; [ 5]
+    mova        m7, [r3 +  7 * 16]  ; [28]
+    jmp         mangle(private_prefix %+ _ %+ intra_pred_ang4_3 %+ SUFFIX %+ .do_filter4x4)


More information about the x265-devel mailing list