[x265] [PATCH] 16bpp: assembly code for intra_planar8

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Mon Dec 9 11:50:52 CET 2013


# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1386579848 -19800
#      Mon Dec 09 14:34:08 2013 +0530
# Node ID 755e80d65d853e26e1f8ddd1fd46f924a92ba948
# Parent  942ea368858fd64d908c2b5fca5cdb23eca6a038
16bpp: assembly code for intra_planar8

diff -r 942ea368858f -r 755e80d65d85 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Mon Dec 09 11:31:12 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp	Mon Dec 09 14:34:08 2013 +0530
@@ -675,6 +675,7 @@
     if (cpuMask & X265_CPU_SSE4)
     {
         p.intra_pred[BLOCK_4x4][0] = x265_intra_pred_planar4_sse4;
+        p.intra_pred[BLOCK_8x8][0] = x265_intra_pred_planar8_sse4;
 
         p.intra_pred[BLOCK_4x4][1] = x265_intra_pred_dc4_sse4;
         p.intra_pred[BLOCK_8x8][1] = x265_intra_pred_dc8_sse4;
diff -r 942ea368858f -r 755e80d65d85 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm	Mon Dec 09 11:31:12 2013 +0530
+++ b/source/common/x86/intrapred16.asm	Mon Dec 09 14:34:08 2013 +0530
@@ -29,8 +29,10 @@
 SECTION .text
 
 cextern pw_1
+cextern pw_8
 cextern pd_32
 cextern pw_4096
+cextern multiL
 cextern multi_2Row
 
 
@@ -467,3 +469,66 @@
 %undef COMP_PRED_PLANAR_2ROW
 
     RET
+
+;-----------------------------------------------------------------------------------------------------------
+; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
+;-----------------------------------------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal intra_pred_planar8, 4,4,7
+    add             r2,     2
+    add             r3,     2
+    add             r1,     r1
+    movu            m1,     [r3]      ; v_topRow
+    movu            m2,     [r2]      ; v_leftColumn
+
+    movd            m3,     [r3 + 16] ; topRight   = above[8];
+    movd            m4,     [r2 + 16] ; bottomLeft = left[8];
+
+    pshuflw         m3,     m3, 0
+    pshufd          m3,     m3, 0
+    pshuflw         m4,     m4, 0
+    pshufd          m4,     m4, 0
+
+    psubw           m4,     m1        ; v_bottomRow
+    psubw           m3,     m2        ; v_rightColumn
+
+    psllw           m1,     3         ; v_topRow
+    psllw           m2,     3         ; v_leftColumn
+
+    paddw           m6,     m2, [pw_8]
+
+%macro PRED_PLANAR_ROW8 1
+    %if (%1 < 4)
+        pshuflw     m5,     m6, 0x55 * %1
+        pshufd      m5,     m5, 0
+        pshuflw     m2,     m3, 0x55 * %1
+        pshufd      m2,     m2, 0
+    %else
+        pshufhw     m5,     m6, 0x55 * (%1 - 4)
+        pshufd      m5,     m5, 0xAA
+        pshufhw     m2,     m3, 0x55 * (%1 - 4)
+        pshufd      m2,     m2, 0xAA
+    %endif
+
+    pmullw          m2,     [multiL]
+    paddw           m5,     m2
+    paddw           m1,     m4
+    paddw           m5,     m1
+    psraw           m5,     4
+
+    movu            [r0],   m5
+    add             r0,     r1
+
+%endmacro
+
+    PRED_PLANAR_ROW8 0
+    PRED_PLANAR_ROW8 1
+    PRED_PLANAR_ROW8 2
+    PRED_PLANAR_ROW8 3
+    PRED_PLANAR_ROW8 4
+    PRED_PLANAR_ROW8 5
+    PRED_PLANAR_ROW8 6
+    PRED_PLANAR_ROW8 7
+
+%undef PRED_PLANAR_ROW8
+    RET


More information about the x265-devel mailing list