[x265] [PATCH] asm : assembly code for intra_pred_planar[16x16]

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Wed Nov 27 05:44:15 CET 2013


# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1385527396 -19800
#      Wed Nov 27 10:13:16 2013 +0530
# Node ID 96ab65a21778396f65ced5670cee46ba34cafb4d
# Parent  b09b6fa7e89a6971c7dfa57c1e539f1836f9fcf9
asm : assembly code for intra_pred_planar[16x16]

diff -r b09b6fa7e89a -r 96ab65a21778 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue Nov 26 12:24:24 2013 -0600
+++ b/source/common/x86/asm-primitives.cpp	Wed Nov 27 10:13:16 2013 +0530
@@ -674,6 +674,7 @@
         p.weight_sp = x265_weight_sp_sse4;
         p.intra_pred_planar[BLOCK_4x4] = x265_intra_pred_planar4_sse4;
         p.intra_pred_planar[BLOCK_8x8] = x265_intra_pred_planar8_sse4;
+        p.intra_pred_planar[BLOCK_16x16] = x265_intra_pred_planar16_sse4;
     }
     if (cpuMask & X265_CPU_AVX)
     {
diff -r b09b6fa7e89a -r 96ab65a21778 source/common/x86/intrapred.asm
--- a/source/common/x86/intrapred.asm	Tue Nov 26 12:24:24 2013 -0600
+++ b/source/common/x86/intrapred.asm	Wed Nov 27 10:13:16 2013 +0530
@@ -28,6 +28,7 @@
 
 multi_2Row: dw 1, 2, 3, 4, 1, 2, 3, 4
 multiL:     dw 1, 2, 3, 4, 5, 6, 7, 8
+multiH:     dw 9, 10, 11, 12, 13, 14, 15, 16
 
 SECTION .text
 
@@ -490,3 +491,76 @@
     COMP_PRED_PLANAR_ROW 7
 
     RET
+
+
+;----------------------------------------------------------------------------------------
+; void intra_pred_planar16_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride)
+;----------------------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal intra_pred_planar16, 4,6,8, above, left, dst, dstStride
+
+    pxor            m0,         m0
+    pmovzxbw        m1,         [r0]       ; topRow[0-7]
+    pmovzxbw        m2,         [r0 + 8]   ; topRow[8-15]
+
+    movd            m3,         [r1 + 16]
+    pshufb          m3,         m0
+    punpcklbw       m3,         m0         ; v_bottomLeft = left[16]
+    movzx           r4d, byte   [r0 + 16]  ; topRight     = above[16]
+
+    psubw           m4,         m3, m1     ; v_bottomRow[0]
+    psubw           m5,         m3, m2     ; v_bottomRow[1]
+
+    psllw           m1,         4
+    psllw           m2,         4
+
+%macro COMP_PRED_PLANAR_ROW 1
+    movzx           r5d, byte   [r1 + %1]
+    add             r5d,        r5d
+    lea             r5d,        [r5d * 8 + 16]
+    movd            m3,         r5d
+    pshuflw         m3,         m3, 0
+    pshufd          m3,         m3, 0      ; horPred
+
+    movzx           r5d, byte   [r1 + %1]
+    mov             r0d,        r4d
+    sub             r0d,        r5d
+    movd            m6,         r0d
+    pshuflw         m6,         m6, 0
+    pshufd          m6,         m6, 0
+
+    pmullw          m7,         m6, [multiL]
+    paddw           m7,         m3
+    paddw           m1,         m4
+    paddw           m7,         m1
+    psraw           m7,         5
+
+    pmullw          m6,         m6, [multiH]
+    paddw           m3,         m6
+    paddw           m2,         m5
+    paddw           m3,         m2
+    psraw           m3,         5
+
+    packuswb        m7,         m3
+    movu            [r2],       m7
+    lea             r2,         [r2 + r3]
+%endmacro
+
+    COMP_PRED_PLANAR_ROW 0
+    COMP_PRED_PLANAR_ROW 1
+    COMP_PRED_PLANAR_ROW 2
+    COMP_PRED_PLANAR_ROW 3
+    COMP_PRED_PLANAR_ROW 4
+    COMP_PRED_PLANAR_ROW 5
+    COMP_PRED_PLANAR_ROW 6
+    COMP_PRED_PLANAR_ROW 7
+    COMP_PRED_PLANAR_ROW 8
+    COMP_PRED_PLANAR_ROW 9
+    COMP_PRED_PLANAR_ROW 10
+    COMP_PRED_PLANAR_ROW 11
+    COMP_PRED_PLANAR_ROW 12
+    COMP_PRED_PLANAR_ROW 13
+    COMP_PRED_PLANAR_ROW 14
+    COMP_PRED_PLANAR_ROW 15
+
+    RET
diff -r b09b6fa7e89a -r 96ab65a21778 source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h	Tue Nov 26 12:24:24 2013 -0600
+++ b/source/common/x86/intrapred.h	Wed Nov 27 10:13:16 2013 +0530
@@ -33,5 +33,6 @@
 
 void x265_intra_pred_planar4_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride);
 void x265_intra_pred_planar8_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride);
+void x265_intra_pred_planar16_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride);
 
 #endif // ifndef X265_INTRAPRED_H


More information about the x265-devel mailing list