[x265] [PATCH] asm: assembly code for intra_pred_planar[8x8]

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Tue Nov 26 12:14:37 CET 2013


# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1385464412 -19800
#      Tue Nov 26 16:43:32 2013 +0530
# Node ID d143056e5535cf9d243b5f4700e203cd11abf473
# Parent  491fd3ee6fd11a52f50ba22b39b9e9596b8e7238
asm: assembly code for intra_pred_planar[8x8]

diff -r 491fd3ee6fd1 -r d143056e5535 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Mon Nov 25 14:00:56 2013 -0600
+++ b/source/common/x86/asm-primitives.cpp	Tue Nov 26 16:43:32 2013 +0530
@@ -670,6 +670,7 @@
         p.weight_pp = x265_weight_pp_sse4;
         p.weight_sp = x265_weight_sp_sse4;
         p.intra_pred_planar[BLOCK_4x4] = x265_intra_pred_planar4_sse4;
+        p.intra_pred_planar[BLOCK_8x8] = x265_intra_pred_planar8_sse4;
     }
     if (cpuMask & X265_CPU_AVX)
     {
diff -r 491fd3ee6fd1 -r d143056e5535 source/common/x86/intrapred.asm
--- a/source/common/x86/intrapred.asm	Mon Nov 25 14:00:56 2013 -0600
+++ b/source/common/x86/intrapred.asm	Tue Nov 26 16:43:32 2013 +0530
@@ -27,9 +27,12 @@
 SECTION_RODATA 32
 
 multi_2Row: dw 1, 2, 3, 4, 1, 2, 3, 4
+multiL:     dw 1, 2, 3, 4, 5, 6, 7, 8
 
 SECTION .text
 
+cextern pw_8
+
 ;-----------------------------------------------------------------------------
 ; void intra_pred_dc(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter)
 ;-----------------------------------------------------------------------------
@@ -363,6 +366,9 @@
 
     RET
 
+;----------------------------------------------------------------------------------------
+; void intra_pred_planar4_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride)
+;----------------------------------------------------------------------------------------
 INIT_XMM sse4
 cglobal intra_pred_planar4, 4,7,5, above, left, dst, dstStride
 
@@ -422,3 +428,65 @@
     COMP_PRED_PLANAR_2ROW 2
 
     RET
+
+;----------------------------------------------------------------------------------------
+; void intra_pred_planar8_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride)
+;----------------------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal intra_pred_planar8, 4,4,7, above, left, dst, dstStride
+
+    pxor            m0,     m0
+    pmovzxbw        m1,     [r0]     ; v_topRow
+    pmovzxbw        m2,     [r1]     ; v_leftColumn
+
+    movd            m3,     [r0 + 8] ; topRight   = above[8];
+    movd            m4,     [r1 + 8] ; bottomLeft = left[8];
+
+    pshufb          m3,     m0
+    pshufb          m4,     m0
+    punpcklbw       m3,     m0       ; v_topRight
+    punpcklbw       m4,     m0       ; v_bottomLeft
+
+    psubw           m4,     m1       ; v_bottomRow
+    psubw           m3,     m2       ; v_rightColumn
+
+    psllw           m1,     3        ; v_topRow
+    psllw           m2,     3        ; v_leftColumn
+
+    paddw           m6,     m2, [pw_8]
+
+%macro COMP_PRED_PLANAR_ROW 1
+    %if (%1 < 4)
+        pshuflw     m5,     m6, 0x55 * %1
+        pshufd      m5,     m5, 0
+        pshuflw     m2,     m3, 0x55 * %1
+        pshufd      m2,     m2, 0
+    %else
+        pshufhw     m5,     m6, 0x55 * (%1 - 4)
+        pshufd      m5,     m5, 0xAA
+        pshufhw     m2,     m3, 0x55 * (%1 - 4)
+        pshufd      m2,     m2, 0xAA
+    %endif
+
+    pmullw          m2,     [multiL]
+    paddw           m5,     m2
+    paddw           m1,     m4
+    paddw           m5,     m1
+    psraw           m5,     4
+    packuswb        m5,     m5
+
+    movh            [r2],   m5
+    lea             r2,     [r2 + r3]
+
+%endmacro
+
+    COMP_PRED_PLANAR_ROW 0
+    COMP_PRED_PLANAR_ROW 1
+    COMP_PRED_PLANAR_ROW 2
+    COMP_PRED_PLANAR_ROW 3
+    COMP_PRED_PLANAR_ROW 4
+    COMP_PRED_PLANAR_ROW 5
+    COMP_PRED_PLANAR_ROW 6
+    COMP_PRED_PLANAR_ROW 7
+
+    RET
diff -r 491fd3ee6fd1 -r d143056e5535 source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h	Mon Nov 25 14:00:56 2013 -0600
+++ b/source/common/x86/intrapred.h	Tue Nov 26 16:43:32 2013 +0530
@@ -32,5 +32,6 @@
 void x265_intra_pred_dc32_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter);
 
 void x265_intra_pred_planar4_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride);
+void x265_intra_pred_planar8_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride);
 
 #endif // ifndef X265_INTRAPRED_H


More information about the x265-devel mailing list