[x265] [PATCH] 16bpp: assembly code for intra_planar4

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Mon Dec 9 11:50:36 CET 2013


# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1386568872 -19800
#      Mon Dec 09 11:31:12 2013 +0530
# Node ID 942ea368858fd64d908c2b5fca5cdb23eca6a038
# Parent  b29f2f31ec460ff186fde8c75c0949af4b3fa824
16bpp: assembly code for intra_planar4

diff -r b29f2f31ec46 -r 942ea368858f source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Sat Dec 07 17:31:09 2013 -0600
+++ b/source/common/x86/asm-primitives.cpp	Mon Dec 09 11:31:12 2013 +0530
@@ -674,6 +674,8 @@
     }
     if (cpuMask & X265_CPU_SSE4)
     {
+        p.intra_pred[BLOCK_4x4][0] = x265_intra_pred_planar4_sse4;
+
         p.intra_pred[BLOCK_4x4][1] = x265_intra_pred_dc4_sse4;
         p.intra_pred[BLOCK_8x8][1] = x265_intra_pred_dc8_sse4;
         p.intra_pred[BLOCK_16x16][1] = x265_intra_pred_dc16_sse4;
diff -r b29f2f31ec46 -r 942ea368858f source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm	Sat Dec 07 17:31:09 2013 -0600
+++ b/source/common/x86/intrapred16.asm	Mon Dec 09 11:31:12 2013 +0530
@@ -31,6 +31,7 @@
 cextern pw_1
 cextern pd_32
 cextern pw_4096
+cextern multi_2Row
 
 
 ;-------------------------------------------------------------------------------------------------------
@@ -100,6 +101,8 @@
 
     RET
 
+
+
 ;-------------------------------------------------------------------------------------------------------
 ; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
 ;-------------------------------------------------------------------------------------------------------
@@ -398,3 +401,69 @@
 %endrep
 
     RET
+
+;-----------------------------------------------------------------------------------------------------------
+; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
+;-----------------------------------------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal intra_pred_planar4, 4,7,5
+    add             r2,         2
+    add             r3,         2
+    add             r1,         r1
+    movh            m0,         [r3]      ; topRow[i] = above[i];
+    punpcklqdq      m0,         m0
+
+    pxor            m1,         m1
+    movd            m2,         [r2 + 8]  ; bottomLeft = left[4]
+    movzx           r6d, word   [r3 + 8]  ; topRight   = above[4];
+    pshuflw         m2,         m2, 0
+    pshufd          m2,         m2, 0
+
+    psubw           m2,         m0        ; bottomRow[i] = bottomLeft - topRow[i]
+    psllw           m0,         2
+    punpcklqdq      m3,         m2, m1
+    psubw           m0,         m3
+    paddw           m2,         m2
+
+%macro COMP_PRED_PLANAR_2ROW 1
+    movzx           r4d, word   [r2 + %1]
+    lea             r4d,        [r4d * 4 + 4]
+    movd            m3,         r4d
+    pshuflw         m3,         m3, 0
+
+    movzx           r4d, word   [r2 + %1 + 2]
+    lea             r4d,        [r4d * 4 + 4]
+    movd            m4,         r4d
+    pshuflw         m4,         m4, 0
+    punpcklqdq      m3,         m4        ; horPred
+
+    movzx           r4d, word   [r2 + %1]
+    mov             r5d,        r6d
+    sub             r5d,        r4d
+    movd            m4,         r5d
+    pshuflw         m4,         m4, 0
+
+    movzx           r4d, word   [r2 + %1 + 2]
+    mov             r5d,        r6d
+    sub             r5d,        r4d
+    movd            m1,         r5d
+    pshuflw         m1,         m1, 0
+    punpcklqdq      m4,         m1        ; rightColumnN
+
+    pmullw          m4,         [multi_2Row]
+    paddw           m3,         m4
+    paddw           m0,         m2
+    paddw           m3,         m0
+    psraw           m3,         3
+
+    movh            [r0],       m3
+    pshufd          m3,         m3, 0xAE
+    movh            [r0 + r1],  m3
+    lea             r0,         [r0 + 2 * r1]
+%endmacro
+
+    COMP_PRED_PLANAR_2ROW 0
+    COMP_PRED_PLANAR_2ROW 4
+%undef COMP_PRED_PLANAR_2ROW
+
+    RET


More information about the x265-devel mailing list