[x265] [PATCH] asm: assembly code for intra_pred_planar[32x32]

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Thu Nov 28 09:50:01 CET 2013


# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1385624240 -19800
#      Thu Nov 28 13:07:20 2013 +0530
# Node ID 3e94e05251fbf3e55f5195736bb95f0ac6de6ad6
# Parent  949f85337789c8d00f39ed1a010990efe67ebcf4
asm: assembly code for intra_pred_planar[32x32]

diff -r 949f85337789 -r 3e94e05251fb source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed Nov 27 18:10:14 2013 -0600
+++ b/source/common/x86/asm-primitives.cpp	Thu Nov 28 13:07:20 2013 +0530
@@ -699,6 +699,7 @@
         p.intra_pred_planar[BLOCK_4x4] = x265_intra_pred_planar4_sse4;
         p.intra_pred_planar[BLOCK_8x8] = x265_intra_pred_planar8_sse4;
         p.intra_pred_planar[BLOCK_16x16] = x265_intra_pred_planar16_sse4;
+        p.intra_pred_planar[BLOCK_32x32] = x265_intra_pred_planar32_sse4;
     }
     if (cpuMask & X265_CPU_AVX)
     {
diff -r 949f85337789 -r 3e94e05251fb source/common/x86/intrapred.asm
--- a/source/common/x86/intrapred.asm	Wed Nov 27 18:10:14 2013 -0600
+++ b/source/common/x86/intrapred.asm	Thu Nov 28 13:07:20 2013 +0530
@@ -29,6 +29,8 @@
 multi_2Row: dw 1, 2, 3, 4, 1, 2, 3, 4
 multiL:     dw 1, 2, 3, 4, 5, 6, 7, 8
 multiH:     dw 9, 10, 11, 12, 13, 14, 15, 16
+multiH2:    dw 17, 18, 19, 20, 21, 22, 23, 24
+multiH3:    dw 25, 26, 27, 28, 29, 30, 31, 32
 
 SECTION .text
 
@@ -564,3 +566,111 @@
     PRED_PLANAR_ROW16 15
 
     RET
+
+
+;----------------------------------------------------------------------------------------
+; void intra_pred_planar32_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride)
+;----------------------------------------------------------------------------------------
+INIT_XMM sse4
+%if ARCH_X86_64 == 1
+cglobal intra_pred_planar32, 4,7,12
+  %define bottomRow0    m8
+  %define bottomRow1    m9
+  %define bottomRow2    m10
+  %define bottomRow3    m11
+%else
+cglobal intra_pred_planar32, 4,7,8,0-(4*mmsize)
+  %define bottomRow0    [rsp + 0 * mmsize]
+  %define bottomRow1    [rsp + 1 * mmsize]
+  %define bottomRow2    [rsp + 2 * mmsize]
+  %define bottomRow3    [rsp + 3 * mmsize]
+%endif
+
+    pxor            m3,         m3
+    movd            m0,         [r1 + 32]
+    pshufb          m0,         m3
+    punpcklbw       m0,         m3          ; v_bottomLeft = left[32]
+    movzx           r4d, byte   [r0 + 32]   ; topRight     = above[32]
+
+    pmovzxbw        m1,         [r0 + 0]    ; topRow[0]
+    pmovzxbw        m2,         [r0 + 8]    ; topRow[1]
+    pmovzxbw        m3,         [r0 +16]    ; topRow[2]
+    pmovzxbw        m4,         [r0 +24]    ; topRow[3]
+
+    psubw           m5,         m0, m1      ; v_bottomRow[0]
+    psubw           m6,         m0, m2      ; v_bottomRow[1]
+    psubw           m7,         m0, m3      ; v_bottomRow[2]
+    psubw           m0,         m4          ; v_bottomRow[3]
+
+    mova            bottomRow0, m5
+    mova            bottomRow1, m6
+    mova            bottomRow2, m7
+    mova            bottomRow3, m0
+
+    psllw           m1,         5
+    psllw           m2,         5
+    psllw           m3,         5
+    psllw           m4,         5
+
+%macro COMP_PRED_PLANAR_ROW 1
+    movzx           r5d,   byte [r1]
+    shl             r5d,        5
+    add             r5d,        32
+    movd            m5,         r5d
+    pshuflw         m5,         m5, 0
+    pshufd          m5,         m5, 0      ; horPred
+
+    movzx           r5d,   byte [r1]
+    mov             r6d,        r4d
+    sub             r6d,        r5d
+    movd            m6,         r6d
+    pshuflw         m6,         m6, 0
+    pshufd          m6,         m6, 0
+
+%if (%1 == 0)
+    pmullw          m7,         m6, [multiL]
+%else
+    pmullw          m7,         m6, [multiH2]
+%endif
+
+    paddw           m7,         m5
+%if (%1 == 0)
+    paddw           m1,         bottomRow0
+    paddw           m7,         m1
+%else
+    paddw           m3,         bottomRow2
+    paddw           m7,         m3
+%endif
+    psraw           m7,         6
+
+%if (%1 == 0)
+    pmullw          m6,        [multiH]
+%else
+    pmullw          m6,        [multiH3]
+%endif
+    paddw           m6,         m5
+%if (%1 == 0)
+    paddw           m2,         bottomRow1
+    paddw           m6,         m2
+%else
+    paddw           m4,         bottomRow3
+    paddw           m6,         m4
+%endif
+    psraw           m6,         6
+
+    packuswb        m7,         m6
+    movu            [r2 + %1],  m7
+%endmacro
+
+    mov r0,         32
+.loop
+    COMP_PRED_PLANAR_ROW 0
+    COMP_PRED_PLANAR_ROW 16
+    inc             r1
+    lea             r2,         [r2 + r3]
+
+    dec             r0
+    jnz .loop
+%undef COMP_PRED_PLANAR_ROW
+
+    RET
diff -r 949f85337789 -r 3e94e05251fb source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h	Wed Nov 27 18:10:14 2013 -0600
+++ b/source/common/x86/intrapred.h	Thu Nov 28 13:07:20 2013 +0530
@@ -34,5 +34,6 @@
 void x265_intra_pred_planar4_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride);
 void x265_intra_pred_planar8_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride);
 void x265_intra_pred_planar16_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride);
+void x265_intra_pred_planar32_sse4(pixel* above, pixel* left, pixel* dst, intptr_t dstStride);
 
 #endif // ifndef X265_INTRAPRED_H


More information about the x265-devel mailing list