[x265] [PATCH] asm: assembly code for intra_pred_planar[4x4]

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Mon Nov 25 14:49:27 CET 2013


# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1385387273 -19800
#      Mon Nov 25 19:17:53 2013 +0530
# Node ID c070e25af31107c7c5a5a6cb5c5e049871c56e22
# Parent  10f605bd053009c8c981c7529322fecd1e54af7b
asm: assembly code for intra_pred_planar[4x4]

diff -r 10f605bd0530 -r c070e25af311 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Fri Nov 22 14:59:34 2013 -0600
+++ b/source/common/x86/asm-primitives.cpp	Mon Nov 25 19:17:53 2013 +0530
@@ -663,6 +663,8 @@
         p.intra_pred_dc[BLOCK_8x8] = x265_intra_pred_dc8_sse4;
         p.intra_pred_dc[BLOCK_16x16] = x265_intra_pred_dc16_sse4;
         p.intra_pred_dc[BLOCK_32x32] = x265_intra_pred_dc32_sse4;
+
+        p.intra_pred_planar[BLOCK_4x4] = x265_intra_pred_planar4_sse4;
     }
     if (cpuMask & X265_CPU_AVX)
     {
diff -r 10f605bd0530 -r c070e25af311 source/common/x86/intrapred.asm
--- a/source/common/x86/intrapred.asm	Fri Nov 22 14:59:34 2013 -0600
+++ b/source/common/x86/intrapred.asm	Mon Nov 25 19:17:53 2013 +0530
@@ -26,7 +26,7 @@
 
 SECTION_RODATA 32
 
-
+multi_2Row: dw 1, 2, 3, 4, 1, 2, 3, 4
 
 SECTION .text
 
@@ -362,3 +362,63 @@
 %endrep
 
     RET
+
+INIT_XMM sse4
+cglobal intra_pred_planar4, 4,7,5, above, left, dst, dstStride
+
+    pmovzxbw        m0,         [r0]      ; topRow[i] = above[i];
+    punpcklqdq      m0,         m0
+
+    pxor            m1,         m1
+    movd            m2,         [r1 + 4]  ; bottomLeft = left[4]
+    movzx           r6d, byte   [r0 + 4]  ; topRight   = above[4];
+    pshufb          m2,         m1
+    punpcklbw       m2,         m1
+    psubw           m2,         m0        ; bottomRow[i] = bottomLeft - topRow[i]
+    psllw           m0,         2
+    punpcklqdq      m3,         m2, m1
+    psubw           m0,         m3
+    paddw           m2,         m2
+
+%macro COMP_PRED_PLANAR_2ROW 1
+    movzx           r4d, byte   [r1 + %1]
+    lea             r4d,        [r4d * 4 + 4]
+    movd            m3,         r4d
+    pshuflw         m3,         m3, 0
+
+    movzx           r4d, byte   [r1 + %1 + 1]
+    lea             r4d,        [r4d * 4 + 4]
+    movd            m4,         r4d
+    pshuflw         m4,         m4, 0
+    punpcklqdq      m3,         m4        ; horPred
+
+    movzx           r4d, byte   [r1 + %1]
+    mov             r5d,        r6d
+    sub             r5d,        r4d
+    movd            m4,         r5d
+    pshuflw         m4,         m4, 0
+
+    movzx           r4d, byte   [r1 + %1 + 1]
+    mov             r5d,        r6d
+    sub             r5d,        r4d
+    movd            m1,         r5d
+    pshuflw         m1,         m1, 0
+    punpcklqdq      m4,         m1        ; rightColumnN
+
+    pmullw          m4,         [multi_2Row]
+    paddw           m3,         m4
+    paddw           m0,         m2
+    paddw           m3,         m0
+    psraw           m3,         3
+    packuswb        m3,         m3
+
+    movd            [r2],       m3
+    pshufd          m3,         m3, 0x55
+    movd            [r2 + r3],  m3
+    lea             r2,         [r2 + 2 * r3]
+%endmacro
+
+    COMP_PRED_PLANAR_2ROW 0
+    COMP_PRED_PLANAR_2ROW 2
+
+    RET
diff -r 10f605bd0530 -r c070e25af311 source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h	Fri Nov 22 14:59:34 2013 -0600
+++ b/source/common/x86/intrapred.h	Mon Nov 25 19:17:53 2013 +0530
@@ -31,4 +31,6 @@
 void x265_intra_pred_dc16_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter);
 void x265_intra_pred_dc32_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int filter);
 
+void x265_intra_pred_planar4_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride);
+
 #endif // ifndef X265_INTRAPRED_H


More information about the x265-devel mailing list