[x265] [PATCH 13 of 29] intrapred: cleanup intra_pred_dc and intra_pred_planar older code

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Tue Jan 13 08:11:21 CET 2015


# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1419400410 -19800
#      Wed Dec 24 11:23:30 2014 +0530
# Node ID 558439d703fdc46fe47ce315c33fe982f8269a79
# Parent  44a17e7764a38817869d4a58d6764b14841eb2ec
intrapred: cleanup intra_pred_dc and intra_pred_planar older code

new asm and unit test code is available for these primitives, this older code is no longer used

diff -r 44a17e7764a3 -r 558439d703fd source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue Dec 23 19:11:31 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp	Wed Dec 24 11:23:30 2014 +0530
@@ -1409,10 +1409,6 @@
         p.quant = x265_quant_sse4;
         p.nquant = x265_nquant_sse4;
         p.dequant_normal = x265_dequant_normal_sse4;
-        p.intra_pred[0][BLOCK_4x4] = x265_intra_pred_planar4_sse4;
-        p.intra_pred[0][BLOCK_8x8] = x265_intra_pred_planar8_sse4;
-        p.intra_pred[0][BLOCK_16x16] = x265_intra_pred_planar16_sse4;
-        p.intra_pred[0][BLOCK_32x32] = x265_intra_pred_planar32_sse4;
 
         p.intra_pred_new[0][BLOCK_4x4] = x265_intra_pred_planar4_new_sse4;
         p.intra_pred_new[0][BLOCK_8x8] = x265_intra_pred_planar8_new_sse4;
@@ -1696,10 +1692,6 @@
         p.dequant_normal = x265_dequant_normal_sse4;
         p.weight_pp = x265_weight_pp_sse4;
         p.weight_sp = x265_weight_sp_sse4;
-        p.intra_pred[0][BLOCK_4x4] = x265_intra_pred_planar4_sse4;
-        p.intra_pred[0][BLOCK_8x8] = x265_intra_pred_planar8_sse4;
-        p.intra_pred[0][BLOCK_16x16] = x265_intra_pred_planar16_sse4;
-        p.intra_pred[0][BLOCK_32x32] = x265_intra_pred_planar32_sse4;
 
         p.intra_pred_new[0][BLOCK_4x4] = x265_intra_pred_planar4_new_sse4;
         p.intra_pred_new[0][BLOCK_8x8] = x265_intra_pred_planar8_new_sse4;
diff -r 44a17e7764a3 -r 558439d703fd source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h	Tue Dec 23 19:11:31 2014 +0530
+++ b/source/common/x86/intrapred.h	Wed Dec 24 11:23:30 2014 +0530
@@ -26,21 +26,11 @@
 #ifndef X265_INTRAPRED_H
 #define X265_INTRAPRED_H
 
-void x265_intra_pred_dc4_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int filter);
-void x265_intra_pred_dc8_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int filter);
-void x265_intra_pred_dc16_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int filter);
-void x265_intra_pred_dc32_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int filter);
-
 void x265_intra_pred_dc4_new_sse4 (pixel* dst, intptr_t dstStride, pixel*srcPix, int, int filter);
 void x265_intra_pred_dc8_new_sse4(pixel* dst, intptr_t dstStride, pixel* above, int, int filter);
 void x265_intra_pred_dc16_new_sse4(pixel* dst, intptr_t dstStride, pixel* above, int, int filter);
 void x265_intra_pred_dc32_new_sse4(pixel* dst, intptr_t dstStride, pixel* above, int, int filter);
 
-void x265_intra_pred_planar4_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int);
-void x265_intra_pred_planar8_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int);
-void x265_intra_pred_planar16_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int);
-void x265_intra_pred_planar32_sse4(pixel* dst, intptr_t dstStride, pixel* above, pixel* left, int, int);
-
 void x265_intra_pred_planar4_new_sse4(pixel* dst, intptr_t dstStride, pixel* above, int, int);
 void x265_intra_pred_planar8_new_sse4(pixel* dst, intptr_t dstStride, pixel* above, int, int);
 void x265_intra_pred_planar16_new_sse4(pixel* dst, intptr_t dstStride, pixel* above, int, int);
diff -r 44a17e7764a3 -r 558439d703fd source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm	Tue Dec 23 19:11:31 2014 +0530
+++ b/source/common/x86/intrapred16.asm	Wed Dec 24 11:23:30 2014 +0530
@@ -448,71 +448,6 @@
     jnz            .loop
     RET
 
-;-----------------------------------------------------------------------------------------------------------
-; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-----------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_planar4, 4,7,5
-    add             r2,         2
-    add             r3,         2
-    add             r1,         r1
-    movh            m0,         [r3]      ; topRow[i] = above[i];
-    punpcklqdq      m0,         m0
-
-    pxor            m1,         m1
-    movd            m2,         [r2 + 8]  ; bottomLeft = left[4]
-    movzx           r6d, word   [r3 + 8]  ; topRight   = above[4];
-    pshuflw         m2,         m2, 0
-    pshufd          m2,         m2, 0
-
-    psubw           m2,         m0        ; bottomRow[i] = bottomLeft - topRow[i]
-    psllw           m0,         2
-    punpcklqdq      m3,         m2, m1
-    psubw           m0,         m3
-    paddw           m2,         m2
-
-%macro COMP_PRED_PLANAR_2ROW 1
-    movzx           r4d, word   [r2 + %1]
-    lea             r4d,        [r4d * 4 + 4]
-    movd            m3,         r4d
-    pshuflw         m3,         m3, 0
-
-    movzx           r4d, word   [r2 + %1 + 2]
-    lea             r4d,        [r4d * 4 + 4]
-    movd            m4,         r4d
-    pshuflw         m4,         m4, 0
-    punpcklqdq      m3,         m4        ; horPred
-
-    movzx           r4d, word   [r2 + %1]
-    mov             r5d,        r6d
-    sub             r5d,        r4d
-    movd            m4,         r5d
-    pshuflw         m4,         m4, 0
-
-    movzx           r4d, word   [r2 + %1 + 2]
-    mov             r5d,        r6d
-    sub             r5d,        r4d
-    movd            m1,         r5d
-    pshuflw         m1,         m1, 0
-    punpcklqdq      m4,         m1        ; rightColumnN
-
-    pmullw          m4,         [multi_2Row]
-    paddw           m3,         m4
-    paddw           m0,         m2
-    paddw           m3,         m0
-    psraw           m3,         3
-
-    movh            [r0],       m3
-    pshufd          m3,         m3, 0xAE
-    movh            [r0 + r1],  m3
-    lea             r0,         [r0 + 2 * r1]
-%endmacro
-
-    COMP_PRED_PLANAR_2ROW 0
-    COMP_PRED_PLANAR_2ROW 4
-%undef COMP_PRED_PLANAR_2ROW
-    RET
-
 ;---------------------------------------------------------------------------------------
 ; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel*srcPix, int, int filter)
 ;---------------------------------------------------------------------------------------
@@ -565,69 +500,6 @@
     movh            [r0 + r1], m1
     RET
 
-;-----------------------------------------------------------------------------------------------------------
-; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-----------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_planar8, 4,4,7
-    add             r2,     2
-    add             r3,     2
-    add             r1,     r1
-    movu            m1,     [r3]      ; v_topRow
-    movu            m2,     [r2]      ; v_leftColumn
-
-    movd            m3,     [r3 + 16] ; topRight   = above[8];
-    movd            m4,     [r2 + 16] ; bottomLeft = left[8];
-
-    pshuflw         m3,     m3, 0
-    pshufd          m3,     m3, 0
-    pshuflw         m4,     m4, 0
-    pshufd          m4,     m4, 0
-
-    psubw           m4,     m1        ; v_bottomRow
-    psubw           m3,     m2        ; v_rightColumn
-
-    psllw           m1,     3         ; v_topRow
-    psllw           m2,     3         ; v_leftColumn
-
-    paddw           m6,     m2, [pw_8]
-
-%macro PRED_PLANAR_ROW8 1
-    %if (%1 < 4)
-        pshuflw     m5,     m6, 0x55 * %1
-        pshufd      m5,     m5, 0
-        pshuflw     m2,     m3, 0x55 * %1
-        pshufd      m2,     m2, 0
-    %else
-        pshufhw     m5,     m6, 0x55 * (%1 - 4)
-        pshufd      m5,     m5, 0xAA
-        pshufhw     m2,     m3, 0x55 * (%1 - 4)
-        pshufd      m2,     m2, 0xAA
-    %endif
-
-    pmullw          m2,     [multiL]
-    paddw           m5,     m2
-    paddw           m1,     m4
-    paddw           m5,     m1
-    psraw           m5,     4
-
-    movu            [r0],   m5
-    add             r0,     r1
-
-%endmacro
-
-    PRED_PLANAR_ROW8 0
-    PRED_PLANAR_ROW8 1
-    PRED_PLANAR_ROW8 2
-    PRED_PLANAR_ROW8 3
-    PRED_PLANAR_ROW8 4
-    PRED_PLANAR_ROW8 5
-    PRED_PLANAR_ROW8 6
-    PRED_PLANAR_ROW8 7
-
-%undef PRED_PLANAR_ROW8
-    RET
-
 ;---------------------------------------------------------------------------------------
 ; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel*srcPix, int, int filter)
 ;---------------------------------------------------------------------------------------
@@ -679,187 +551,6 @@
     INTRA_PRED_PLANAR8 7
     RET
 
-;-----------------------------------------------------------------------------------------------------------
-; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-----------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-%if (BIT_DEPTH == 12)
-
-%if (ARCH_X86_64 == 1)
-cglobal intra_pred_planar16, 4,7,8+3
-%define bottomRow0  m7
-%define bottomRow1  m8
-%define bottomRow2  m9
-%define bottomRow3  m10
-%else
-cglobal intra_pred_planar16, 4,7,8, 0-3*mmsize
-%define bottomRow0  [rsp + 0*mmsize]
-%define bottomRow1  [rsp + 1*mmsize]
-%define bottomRow2  [rsp + 2*mmsize]
-%define bottomRow3  m7
-%endif
-
-    add             r2, 2
-    add             r3, 2
-    add             r1, r1
-
-    pxor            m0, m0
-
-    ; bottomRow
-    movzx           r4d, word [r2 + 16*2]
-    movd            m1, r4d
-    pshufd          m1, m1, 0               ; m1 = bottomLeft
-    movu            m2, [r3]
-    pmovzxwd        m3, m2
-    punpckhwd       m2, m0
-    psubd           m4, m1, m3
-    mova            bottomRow0, m4
-    psubd           m4, m1, m2
-    mova            bottomRow1, m4
-    movu            m2, [r3 + 16]
-    pmovzxwd        m3, m2
-    punpckhwd       m2, m0
-    psubd           m4, m1, m3
-    mova            bottomRow2, m4
-    psubd           m1, m2
-    mova            bottomRow3, m1
-
-    ; topRow
-    pmovzxwd        m0, [r3 + 0*8]
-    pslld           m0, 4
-    pmovzxwd        m1, [r3 + 1*8]
-    pslld           m1, 4
-    pmovzxwd        m2, [r3 + 2*8]
-    pslld           m2, 4
-    pmovzxwd        m3, [r3 + 3*8]
-    pslld           m3, 4
-
-    xor             r6, r6
-.loopH:
-    movzx           r4d, word [r2 + r6*2]
-    movzx           r5d, word [r3 + 16*2]       ; r5 = topRight
-    sub             r5d, r4d
-    movd            m5, r5d
-    pshuflw         m5, m5, 0
-    pmullw          m5, [multiL]
-    pmovsxwd        m5, m5                      ; m5 = rightCol
-    add             r4d, r4d
-    lea             r4d, [r4d * 8 + 16]
-    movd            m4, r4d
-    pshufd          m4, m4, 0                   ; m4 = horPred
-    paddd           m4, m5
-    pshufd          m6, m5, 0xFF                ; m6 = [4 4 4 4]
-
-    ; 0-3
-    paddd           m0, bottomRow0
-    paddd           m5, m0, m4
-    psrad           m5, 5
-    packusdw        m5, m5
-    movh            [r0 + 0*8], m5
-
-    ; 4-7
-    paddd           m4, m6
-    paddd           m1, bottomRow1
-    paddd           m5, m1, m4
-    psrad           m5, 5
-    packusdw        m5, m5
-    movh            [r0 + 1*8], m5
-
-    ; 8-11
-    paddd           m4, m6
-    paddd           m2, bottomRow2
-    paddd           m5, m2, m4
-    psrad           m5, 5
-    packusdw        m5, m5
-    movh            [r0 + 2*8], m5
-
-    ; 12-15
-    paddd           m4, m6
-    paddd           m3, bottomRow3
-    paddd           m5, m3, m4
-    psrad           m5, 5
-    packusdw        m5, m5
-    movh            [r0 + 3*8], m5
-
-    add             r0, r1
-    inc             r6d
-    cmp             r6d, 16
-    jnz            .loopH
-    RET
-
-%else ; BIT_DEPTH == 10
-INIT_XMM sse4
-cglobal intra_pred_planar16, 4,6,7
-    add             r2,         2
-    add             r3,         2
-    add             r1,         r1
-
-    movu            m1,         [r3]        ; topRow[0-7]
-    movu            m2,         [r3 + 16]   ; topRow[8-15]
-
-    movd            m3,         [r2 + 32]
-    pshuflw         m3,         m3, 0
-    pshufd          m3,         m3, 0
-    movzx           r4d, word   [r3 + 32]   ; topRight = above[16]
-
-    psubw           m4,         m3, m1      ; v_bottomRow[0]
-    psubw           m3,         m2          ; v_bottomRow[1]
-
-    psllw           m1,         4
-    psllw           m2,         4
-
-%macro PRED_PLANAR_ROW16 1
-    movzx           r5d, word   [r2 + %1 * 2]
-    add             r5d,        r5d
-    lea             r5d,        [r5d * 8 + 16]
-    movd            m5,         r5d
-    pshuflw         m5,         m5, 0
-    pshufd          m5,         m5, 0       ; horPred
-
-    movzx           r5d, word   [r2 + %1 * 2]
-    mov             r3d,        r4d
-    sub             r3d,        r5d
-    movd            m0,         r3d
-    pshuflw         m0,         m0, 0
-    pshufd          m0,         m0, 0
-
-    pmullw          m6,         m0, [multiL]
-    paddw           m6,         m5
-    paddw           m1,         m4
-    paddw           m6,         m1
-    psraw           m6,         5
-
-    pmullw          m0,         m0, [multiH]
-    paddw           m5,         m0
-    paddw           m2,         m3
-    paddw           m5,         m2
-    psraw           m5,         5
-
-    movu            [r0],       m6
-    movu            [r0 + 16],  m5
-    add             r0,         r1
-%endmacro
-
-    PRED_PLANAR_ROW16 0
-    PRED_PLANAR_ROW16 1
-    PRED_PLANAR_ROW16 2
-    PRED_PLANAR_ROW16 3
-    PRED_PLANAR_ROW16 4
-    PRED_PLANAR_ROW16 5
-    PRED_PLANAR_ROW16 6
-    PRED_PLANAR_ROW16 7
-    PRED_PLANAR_ROW16 8
-    PRED_PLANAR_ROW16 9
-    PRED_PLANAR_ROW16 10
-    PRED_PLANAR_ROW16 11
-    PRED_PLANAR_ROW16 12
-    PRED_PLANAR_ROW16 13
-    PRED_PLANAR_ROW16 14
-    PRED_PLANAR_ROW16 15
-%undef PRED_PLANAR_ROW16
-    RET
-%endif
-
 ;---------------------------------------------------------------------------------------
 ; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel*srcPix, int, int filter)
 ;---------------------------------------------------------------------------------------
@@ -1166,197 +857,6 @@
     mov             rsp, r6
     RET
 
-;-----------------------------------------------------------------------------------------------------------
-; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-----------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-%if (ARCH_X86_64 == 1)
-cglobal intra_pred_planar32, 4,7,8+8, 0-4*mmsize
-    %define bottomRow0  m8
-    %define bottomRow1  m9
-    %define bottomRow2  m10
-    %define bottomRow3  m11
-    %define bottomRow4  m12
-    %define bottomRow5  m13
-    %define bottomRow6  m14
-    %define bottomRow7  m15
-    %define tmp0        [rsp + 0*mmsize]
-    %define tmp1        [rsp + 1*mmsize]
-    %define tmp2        [rsp + 2*mmsize]
-    %define tmp3        [rsp + 3*mmsize]
-%else
-cglobal intra_pred_planar32, 4,7,8, 0-12*mmsize
-    %define bottomRow0  [rsp + 0*mmsize]
-    %define bottomRow1  [rsp + 1*mmsize]
-    %define bottomRow2  [rsp + 2*mmsize]
-    %define bottomRow3  [rsp + 3*mmsize]
-    %define bottomRow4  [rsp + 4*mmsize]
-    %define bottomRow5  [rsp + 5*mmsize]
-    %define bottomRow6  [rsp + 6*mmsize]
-    %define bottomRow7  [rsp + 7*mmsize]
-    %define tmp0        [rsp + 8*mmsize]
-    %define tmp1        [rsp + 9*mmsize]
-    %define tmp2        [rsp + 10*mmsize]
-    %define tmp3        [rsp + 11*mmsize]
-%endif
-
-    add             r2, 2
-    add             r3, 2
-    add             r1, r1
-
-    pxor            m0, m0
-
-    ; bottomRow
-    movzx           r4d, word [r2 + 32*2]
-    movd            m1, r4d
-    pshufd          m1, m1, 0               ; m1 = bottomLeft
-    movu            m2, [r3]
-    pmovzxwd        m3, m2
-    punpckhwd       m2, m0
-    psubd           m4, m1, m3
-    mova            bottomRow0, m4
-    psubd           m4, m1, m2
-    mova            bottomRow1, m4
-    movu            m2, [r3 + 16]
-    pmovzxwd        m3, m2
-    punpckhwd       m2, m0
-    psubd           m4, m1, m3
-    mova            bottomRow2, m4
-    psubd           m4, m1, m2
-    mova            bottomRow3, m4
-
-    movu            m2, [r3 + 32]
-    pmovzxwd        m3, m2
-    punpckhwd       m2, m0
-    psubd           m4, m1, m3
-    mova            bottomRow4, m4
-    psubd           m4, m1, m2
-    mova            bottomRow5, m4
-    movu            m2, [r3 + 48]
-    pmovzxwd        m3, m2
-    punpckhwd       m2, m0
-    psubd           m4, m1, m3
-    mova            bottomRow6, m4
-    psubd           m1, m2
-    mova            bottomRow7, m1
-
-    ; topRow
-    pmovzxwd        m0, [r3 + 0*8]
-    pslld           m0, 5
-    pmovzxwd        m1, [r3 + 1*8]
-    pslld           m1, 5
-    pmovzxwd        m2, [r3 + 2*8]
-    pslld           m2, 5
-    pmovzxwd        m3, [r3 + 3*8]
-    pslld           m3, 5
-
-    pmovzxwd        m4, [r3 + 4*8]
-    pslld           m4, 5
-    mova            tmp0, m4
-    pmovzxwd        m4, [r3 + 5*8]
-    pslld           m4, 5
-    mova            tmp1, m4
-    pmovzxwd        m4, [r3 + 6*8]
-    pslld           m4, 5
-    mova            tmp2, m4
-    pmovzxwd        m4, [r3 + 7*8]
-    pslld           m4, 5
-    mova            tmp3, m4
-
-    xor             r6, r6
-.loopH:
-    movzx           r4d, word [r2 + r6*2]
-    movzx           r5d, word [r3 + 32*2]       ; r5 = topRight
-    sub             r5d, r4d
-    movd            m5, r5d
-    pshuflw         m5, m5, 0
-    pmullw          m5, [multiL]
-    pmovsxwd        m5, m5                      ; m5 = rightCol
-    shl             r4d, 5
-    add             r4d, 32
-    movd            m4, r4d
-    pshufd          m4, m4, 0                   ; m4 = horPred
-    paddd           m4, m5
-    pshufd          m6, m5, 0xFF                ; m6 = [4 4 4 4]
-
-    ; 0-3
-    paddd           m0, bottomRow0
-    paddd           m5, m0, m4
-    psrad           m5, 6
-    packusdw        m5, m5
-    movh            [r0 + 0*8], m5
-
-    ; 4-7
-    paddd           m4, m6
-    paddd           m1, bottomRow1
-    paddd           m5, m1, m4
-    psrad           m5, 6
-    packusdw        m5, m5
-    movh            [r0 + 1*8], m5
-
-    ; 8-11
-    paddd           m4, m6
-    paddd           m2, bottomRow2
-    paddd           m5, m2, m4
-    psrad           m5, 6
-    packusdw        m5, m5
-    movh            [r0 + 2*8], m5
-
-    ; 12-15
-    paddd           m4, m6
-    paddd           m3, bottomRow3
-    paddd           m5, m3, m4
-    psrad           m5, 6
-    packusdw        m5, m5
-    movh            [r0 + 3*8], m5
-
-    ; 16-19
-    paddd           m4, m6
-    mova            m7, tmp0
-    paddd           m7, bottomRow4
-    mova            tmp0, m7
-    paddd           m7, m4
-    psrad           m7, 6
-    packusdw        m7, m7
-    movh            [r0 + 4*8], m7
-
-    ; 20-23
-    paddd           m4, m6
-    mova            m7, tmp1
-    paddd           m7, bottomRow5
-    mova            tmp1, m7
-    paddd           m7, m4
-    psrad           m7, 6
-    packusdw        m7, m7
-    movh            [r0 + 5*8], m7
-
-    ; 24-27
-    paddd           m4, m6
-    mova            m7, tmp2
-    paddd           m7, bottomRow6
-    mova            tmp2, m7
-    paddd           m7, m4
-    psrad           m7, 6
-    packusdw        m7, m7
-    movh            [r0 + 6*8], m7
-
-    ; 28-31
-    paddd           m4, m6
-    mova            m7, tmp3
-    paddd           m7, bottomRow7
-    mova            tmp3, m7
-    paddd           m7, m4
-    psrad           m7, 6
-    packusdw        m7, m7
-    movh            [r0 + 7*8], m7
-
-    add             r0, r1
-    inc             r6d
-    cmp             r6d, 32
-    jnz            .loopH
-
-    RET
-
 ;-----------------------------------------------------------------------------
 ; void intraPredAng(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
 ;-----------------------------------------------------------------------------
diff -r 44a17e7764a3 -r 558439d703fd source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm	Tue Dec 23 19:11:31 2014 +0530
+++ b/source/common/x86/intrapred8.asm	Wed Dec 24 11:23:30 2014 +0530
@@ -415,70 +415,6 @@
 
     RET
 
-;-----------------------------------------------------------------------------------------------------------
-; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-----------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_planar4, 4,7,5
-    inc             r2
-    inc             r3
-    pmovzxbw        m0,         [r3]      ; topRow[i] = above[i];
-    punpcklqdq      m0,         m0
-
-    pxor            m1,         m1
-    movd            m2,         [r2 + 4]  ; bottomLeft = left[4]
-    movzx           r6d, byte   [r3 + 4]  ; topRight   = above[4];
-    pshufb          m2,         m1
-    punpcklbw       m2,         m1
-    psubw           m2,         m0        ; bottomRow[i] = bottomLeft - topRow[i]
-    psllw           m0,         2
-    punpcklqdq      m3,         m2, m1
-    psubw           m0,         m3
-    paddw           m2,         m2
-
-%macro COMP_PRED_PLANAR_2ROW 1
-    movzx           r4d, byte   [r2 + %1]
-    lea             r4d,        [r4d * 4 + 4]
-    movd            m3,         r4d
-    pshuflw         m3,         m3, 0
-
-    movzx           r4d, byte   [r2 + %1 + 1]
-    lea             r4d,        [r4d * 4 + 4]
-    movd            m4,         r4d
-    pshuflw         m4,         m4, 0
-    punpcklqdq      m3,         m4        ; horPred
-
-    movzx           r4d, byte   [r2 + %1]
-    mov             r5d,        r6d
-    sub             r5d,        r4d
-    movd            m4,         r5d
-    pshuflw         m4,         m4, 0
-
-    movzx           r4d, byte   [r2 + %1 + 1]
-    mov             r5d,        r6d
-    sub             r5d,        r4d
-    movd            m1,         r5d
-    pshuflw         m1,         m1, 0
-    punpcklqdq      m4,         m1        ; rightColumnN
-
-    pmullw          m4,         [multi_2Row]
-    paddw           m3,         m4
-    paddw           m0,         m2
-    paddw           m3,         m0
-    psraw           m3,         3
-    packuswb        m3,         m3
-
-    movd            [r0],       m3
-    pshufd          m3,         m3, 0x55
-    movd            [r0 + r1],  m3
-    lea             r0,         [r0 + 2 * r1]
-%endmacro
-
-    COMP_PRED_PLANAR_2ROW 0
-    COMP_PRED_PLANAR_2ROW 2
-
-    RET
-
 ;---------------------------------------------------------------------------------------
 ; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel*srcPix, int, int filter)
 ;---------------------------------------------------------------------------------------
@@ -533,68 +469,6 @@
     movd            [r0 + r1], m5
     RET
 
-;-----------------------------------------------------------------------------------------------------------
-; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-----------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_planar8, 4,4,7
-    inc             r2
-    inc             r3
-    pxor            m0,     m0
-    pmovzxbw        m1,     [r3]     ; v_topRow
-    pmovzxbw        m2,     [r2]     ; v_leftColumn
-
-    movd            m3,     [r3 + 8] ; topRight   = above[8];
-    movd            m4,     [r2 + 8] ; bottomLeft = left[8];
-
-    pshufb          m3,     m0
-    pshufb          m4,     m0
-    punpcklbw       m3,     m0       ; v_topRight
-    punpcklbw       m4,     m0       ; v_bottomLeft
-
-    psubw           m4,     m1       ; v_bottomRow
-    psubw           m3,     m2       ; v_rightColumn
-
-    psllw           m1,     3        ; v_topRow
-    psllw           m2,     3        ; v_leftColumn
-
-    paddw           m6,     m2, [pw_8]
-
-%macro PRED_PLANAR_ROW8 1
-    %if (%1 < 4)
-        pshuflw     m5,     m6, 0x55 * %1
-        pshufd      m5,     m5, 0
-        pshuflw     m2,     m3, 0x55 * %1
-        pshufd      m2,     m2, 0
-    %else
-        pshufhw     m5,     m6, 0x55 * (%1 - 4)
-        pshufd      m5,     m5, 0xAA
-        pshufhw     m2,     m3, 0x55 * (%1 - 4)
-        pshufd      m2,     m2, 0xAA
-    %endif
-
-    pmullw          m2,     [multiL]
-    paddw           m5,     m2
-    paddw           m1,     m4
-    paddw           m5,     m1
-    psraw           m5,     4
-    packuswb        m5,     m5
-
-    movh            [r0],   m5
-    lea             r0,     [r0 + r1]
-
-%endmacro
-
-    PRED_PLANAR_ROW8 0
-    PRED_PLANAR_ROW8 1
-    PRED_PLANAR_ROW8 2
-    PRED_PLANAR_ROW8 3
-    PRED_PLANAR_ROW8 4
-    PRED_PLANAR_ROW8 5
-    PRED_PLANAR_ROW8 6
-    PRED_PLANAR_ROW8 7
-    RET
-
 ;---------------------------------------------------------------------------------------
 ; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel*srcPix, int, int filter)
 ;---------------------------------------------------------------------------------------
@@ -647,79 +521,6 @@
     INTRA_PRED_PLANAR8 7
     RET
 
-;-----------------------------------------------------------------------------------------------------------
-; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-----------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_planar16, 4,6,8
-    inc             r2
-    inc             r3
-    pxor            m0,         m0
-    pmovzxbw        m1,         [r3]       ; topRow[0-7]
-    pmovzxbw        m2,         [r3 + 8]   ; topRow[8-15]
-
-    movd            m3,         [r2 + 16]
-    pshufb          m3,         m0
-    punpcklbw       m3,         m0         ; v_bottomLeft = left[16]
-    movzx           r4d, byte   [r3 + 16]  ; topRight     = above[16]
-
-    psubw           m4,         m3, m1     ; v_bottomRow[0]
-    psubw           m5,         m3, m2     ; v_bottomRow[1]
-
-    psllw           m1,         4
-    psllw           m2,         4
-
-%macro PRED_PLANAR_ROW16 1
-    movzx           r5d, byte   [r2 + %1]
-    add             r5d,        r5d
-    lea             r5d,        [r5d * 8 + 16]
-    movd            m3,         r5d
-    pshuflw         m3,         m3, 0
-    pshufd          m3,         m3, 0      ; horPred
-
-    movzx           r5d, byte   [r2 + %1]
-    mov             r3d,        r4d
-    sub             r3d,        r5d
-    movd            m6,         r3d
-    pshuflw         m6,         m6, 0
-    pshufd          m6,         m6, 0
-
-    pmullw          m7,         m6, [multiL]
-    paddw           m7,         m3
-    paddw           m1,         m4
-    paddw           m7,         m1
-    psraw           m7,         5
-
-    pmullw          m6,         m6, [multiH]
-    paddw           m3,         m6
-    paddw           m2,         m5
-    paddw           m3,         m2
-    psraw           m3,         5
-
-    packuswb        m7,         m3
-    movu            [r0],       m7
-    lea             r0,         [r0 + r1]
-%endmacro
-
-    PRED_PLANAR_ROW16 0
-    PRED_PLANAR_ROW16 1
-    PRED_PLANAR_ROW16 2
-    PRED_PLANAR_ROW16 3
-    PRED_PLANAR_ROW16 4
-    PRED_PLANAR_ROW16 5
-    PRED_PLANAR_ROW16 6
-    PRED_PLANAR_ROW16 7
-    PRED_PLANAR_ROW16 8
-    PRED_PLANAR_ROW16 9
-    PRED_PLANAR_ROW16 10
-    PRED_PLANAR_ROW16 11
-    PRED_PLANAR_ROW16 12
-    PRED_PLANAR_ROW16 13
-    PRED_PLANAR_ROW16 14
-    PRED_PLANAR_ROW16 15
-
-    RET
-
 ;---------------------------------------------------------------------------------------
 ; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel*srcPix, int, int filter)
 ;---------------------------------------------------------------------------------------
@@ -909,114 +710,6 @@
     jnz             .loop
     RET
 
-;-----------------------------------------------------------------------------------------------------------
-; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-----------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-%if ARCH_X86_64 == 1
-cglobal intra_pred_planar32, 4,7,12
-  %define bottomRow0    m8
-  %define bottomRow1    m9
-  %define bottomRow2    m10
-  %define bottomRow3    m11
-%else
-cglobal intra_pred_planar32, 4,7,8,0-(4*mmsize)
-  %define bottomRow0    [rsp + 0 * mmsize]
-  %define bottomRow1    [rsp + 1 * mmsize]
-  %define bottomRow2    [rsp + 2 * mmsize]
-  %define bottomRow3    [rsp + 3 * mmsize]
-%endif
-    inc             r2
-    inc             r3
-    pxor            m3,         m3
-    movd            m0,         [r2 + 32]
-    pshufb          m0,         m3
-    punpcklbw       m0,         m3          ; v_bottomLeft = left[32]
-    movzx           r4d, byte   [r3 + 32]   ; topRight     = above[32]
-
-    pmovzxbw        m1,         [r3 + 0]    ; topRow[0]
-    pmovzxbw        m2,         [r3 + 8]    ; topRow[1]
-    pmovzxbw        m3,         [r3 +16]    ; topRow[2]
-    pmovzxbw        m4,         [r3 +24]    ; topRow[3]
-
-    psubw           m5,         m0, m1      ; v_bottomRow[0]
-    psubw           m6,         m0, m2      ; v_bottomRow[1]
-    psubw           m7,         m0, m3      ; v_bottomRow[2]
-    psubw           m0,         m4          ; v_bottomRow[3]
-
-    mova            bottomRow0, m5
-    mova            bottomRow1, m6
-    mova            bottomRow2, m7
-    mova            bottomRow3, m0
-
-    psllw           m1,         5
-    psllw           m2,         5
-    psllw           m3,         5
-    psllw           m4,         5
-
-%macro COMP_PRED_PLANAR_ROW 1
-    movzx           r5d,   byte [r2]
-    shl             r5d,        5
-    add             r5d,        32
-    movd            m5,         r5d
-    pshuflw         m5,         m5, 0
-    pshufd          m5,         m5, 0      ; horPred
-
-    movzx           r5d,   byte [r2]
-    mov             r6d,        r4d
-    sub             r6d,        r5d
-    movd            m6,         r6d
-    pshuflw         m6,         m6, 0
-    pshufd          m6,         m6, 0
-
-%if (%1 == 0)
-    pmullw          m7,         m6, [multiL]
-%else
-    pmullw          m7,         m6, [multiH2]
-%endif
-
-    paddw           m7,         m5
-%if (%1 == 0)
-    paddw           m1,         bottomRow0
-    paddw           m7,         m1
-%else
-    paddw           m3,         bottomRow2
-    paddw           m7,         m3
-%endif
-    psraw           m7,         6
-
-%if (%1 == 0)
-    pmullw          m6,        [multiH]
-%else
-    pmullw          m6,        [multiH3]
-%endif
-    paddw           m6,         m5
-%if (%1 == 0)
-    paddw           m2,         bottomRow1
-    paddw           m6,         m2
-%else
-    paddw           m4,         bottomRow3
-    paddw           m6,         m4
-%endif
-    psraw           m6,         6
-
-    packuswb        m7,         m6
-    movu            [r0 + %1],  m7
-%endmacro
-
-    mov r3,         32
-.loop:
-    COMP_PRED_PLANAR_ROW 0
-    COMP_PRED_PLANAR_ROW 16
-    inc             r2
-    lea             r0,         [r0 + r1]
-
-    dec             r3
-    jnz .loop
-%undef COMP_PRED_PLANAR_ROW
-
-    RET
-
 ;-----------------------------------------------------------------------------
 ; void intraPredAng(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
 ;-----------------------------------------------------------------------------
diff -r 44a17e7764a3 -r 558439d703fd source/test/intrapredharness.cpp
--- a/source/test/intrapredharness.cpp	Tue Dec 23 19:11:31 2014 +0530
+++ b/source/test/intrapredharness.cpp	Wed Dec 24 11:23:30 2014 +0530
@@ -33,44 +33,6 @@
         pixel_buff[i] = rand() % PIXEL_MAX;
 }
 
-bool IntraPredHarness::check_dc_primitive(intra_pred_t ref, intra_pred_t opt, int width)
-{
-    int j = Predict::ADI_BUF_STRIDE;
-    intptr_t stride = FENC_STRIDE;
-
-#if _DEBUG
-    memset(pixel_out_vec, 0xCD, OUTPUT_SIZE);
-    memset(pixel_out_c, 0xCD, OUTPUT_SIZE);
-#endif
-
-    for (int i = 0; i <= 100; i++)
-    {
-        int rand_filter = rand() & 1;
-        if (width > 16)
-            rand_filter = 0;
-
-        pixel left[MAX_CU_SIZE * 2 + 1];
-        for (int k = 0; k < width * 2 + 1; k++)
-        {
-            left[k] = pixel_buff[j - 1 + k * Predict::ADI_BUF_STRIDE];
-        }
-
-        ref(pixel_out_c, stride, pixel_buff + j - Predict::ADI_BUF_STRIDE, left + 1, 0, rand_filter);
-        checked(opt, pixel_out_vec, stride, pixel_buff + j - Predict::ADI_BUF_STRIDE, left + 1, 0, rand_filter);
-
-        for (int k = 0; k < width; k++)
-        {
-            if (memcmp(pixel_out_vec + k * FENC_STRIDE, pixel_out_c + k * FENC_STRIDE, width * sizeof(pixel)))
-                return false;
-        }
-
-        reportfail();
-        j += FENC_STRIDE;
-    }
-
-    return true;
-}
-
 bool IntraPredHarness::check_dc_primitive(intra_pred_new_t ref, intra_pred_new_t opt, int width)
 {
     int j = Predict::ADI_BUF_STRIDE;
@@ -88,41 +50,7 @@
             rand_filter = 0;
 
         ref(pixel_out_c, stride, pixel_buff + j - Predict::ADI_BUF_STRIDE, 0, rand_filter);
-        opt(pixel_out_vec, stride, pixel_buff + j - Predict::ADI_BUF_STRIDE, 0, rand_filter);
-
-        for (int k = 0; k < width; k++)
-        {
-            if (memcmp(pixel_out_vec + k * FENC_STRIDE, pixel_out_c + k * FENC_STRIDE, width * sizeof(pixel)))
-                return false;
-        }
-
-        reportfail();
-        j += FENC_STRIDE;
-    }
-
-    return true;
-}
-
-bool IntraPredHarness::check_planar_primitive(intra_pred_t ref, intra_pred_t opt, int width)
-{
-    int j = Predict::ADI_BUF_STRIDE;
-    intptr_t stride = FENC_STRIDE;
-
-#if _DEBUG
-    memset(pixel_out_vec, 0xCD, OUTPUT_SIZE);
-    memset(pixel_out_c, 0xCD, OUTPUT_SIZE);
-#endif
-
-    for (int i = 0; i <= 100; i++)
-    {
-        pixel left[MAX_CU_SIZE * 2 + 1];
-        for (int k = 0; k < width * 2 + 1; k++)
-        {
-            left[k] = pixel_buff[j - 1 + k * Predict::ADI_BUF_STRIDE];
-        }
-
-        ref(pixel_out_c, stride, pixel_buff + j - Predict::ADI_BUF_STRIDE, left + 1, 0, 0);
-        checked(opt, pixel_out_vec, stride, pixel_buff + j - Predict::ADI_BUF_STRIDE, left + 1, 0, 0);
+        checked(opt, pixel_out_vec, stride, pixel_buff + j - Predict::ADI_BUF_STRIDE, 0, rand_filter);
 
         for (int k = 0; k < width; k++)
         {
@@ -264,15 +192,6 @@
 {
     for (int i = BLOCK_4x4; i <= BLOCK_32x32; i++)
     {
-        if (opt.intra_pred[0][i])
-        {
-            const int size = (1 << (i + 2));
-            if (!check_planar_primitive(ref.intra_pred[0][i], opt.intra_pred[0][i], size))
-            {
-                printf("intra_planar %dx%d failed\n", size, size);
-                return false;
-            }
-        }
         if (opt.intra_pred_new[0][i])
         {
             const int size = (1 << (i + 2));
@@ -326,12 +245,6 @@
             REPORT_SPEEDUP(opt.intra_pred_new[0][i], ref.intra_pred_new[0][i],
                            pixel_out_vec, FENC_STRIDE, pixel_buff + srcStride, 0, 0);
         }
-        if (opt.intra_pred[0][i])
-        {
-            printf("intra_planar %2dx%d", size, size);
-            REPORT_SPEEDUP(opt.intra_pred[0][i], ref.intra_pred[0][i],
-                           pixel_out_vec, FENC_STRIDE, pixel_buff + srcStride, pixel_buff, 0, 0);
-        }
         if (opt.intra_pred_allangs[i])
         {
             bool bFilter = (size <= 16);
diff -r 44a17e7764a3 -r 558439d703fd source/test/intrapredharness.h
--- a/source/test/intrapredharness.h	Tue Dec 23 19:11:31 2014 +0530
+++ b/source/test/intrapredharness.h	Wed Dec 24 11:23:30 2014 +0530
@@ -41,9 +41,7 @@
     pixel pixel_out_33_c[OUTPUT_SIZE_33];
     pixel pixel_out_33_vec[OUTPUT_SIZE_33];
 
-    bool check_dc_primitive(intra_pred_t ref, intra_pred_t opt, int width);
     bool check_dc_primitive(intra_pred_new_t ref, intra_pred_new_t opt, int width);
-    bool check_planar_primitive(intra_pred_t ref, intra_pred_t opt, int width);
     bool check_planar_primitive(intra_pred_new_t ref, intra_pred_new_t opt, int width);
     bool check_angular_primitive(const intra_pred_t ref[][NUM_TR_SIZE], const intra_pred_t opt[][NUM_TR_SIZE]);
     bool check_allangs_primitive(const intra_allangs_t ref[], const intra_allangs_t opt[]);



More information about the x265-devel mailing list