[x265] [PATCH 09 of 29] intra_pred_dc: cleanup, remove unused asm code

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Tue Jan 13 08:11:17 CET 2015


# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1418728444 -19800
#      Tue Dec 16 16:44:04 2014 +0530
# Node ID 723d7d1622b8de39d5221f0b8ff82a2a922e7d8b
# Parent  c238ea33fa51521639404aeb65864a7e4492dcef
intra_pred_dc: cleanup, remove unused asm code

diff -r c238ea33fa51 -r 723d7d1622b8 source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm	Mon Jan 12 12:48:45 2015 +0530
+++ b/source/common/x86/intrapred16.asm	Tue Dec 16 16:44:04 2014 +0530
@@ -71,73 +71,6 @@
 cextern pb_unpackwq1
 cextern pb_unpackwq2
 
-;-------------------------------------------------------------------------------------------------------
-; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_dc4, 4,6,2
-    mov         r4d,            r5m
-    add         r2,             2
-    add         r3,             2
-
-    movh        m0,             [r3]           ; sumAbove
-    movh        m1,             [r2]           ; sumLeft
-
-    paddw       m0,             m1
-    pshufd      m1,             m0, 1
-    paddw       m0,             m1
-    phaddw      m0,             m0             ; m0 = sum
-
-    test        r4d,            r4d
-
-    pmulhrsw    m0,             [pw_4096]      ; m0 = (sum + 4) / 8
-    movd        r4d,            m0             ; r4d = dc_val
-    movzx       r4d,            r4w
-    pshuflw     m0,             m0, 0          ; m0 = word [dc_val ...]
-
-    ; store DC 4x4
-    movh        [r0],           m0
-    movh        [r0 + r1 * 2],  m0
-    movh        [r0 + r1 * 4],  m0
-    lea         r5,             [r0 + r1 * 4]
-    movh        [r5 + r1 * 2],  m0
-
-    ; do DC filter
-    jz          .end
-    lea         r5d,            [r4d * 2 + 2]  ; r5d = DC * 2 + 2
-    add         r4d,            r5d            ; r4d = DC * 3 + 2
-    movd        m0,             r4d
-    pshuflw     m0,             m0, 0          ; m0 = pixDCx3
-
-    ; filter top
-    movu        m1,             [r3]
-    paddw       m1,             m0
-    psraw       m1,             2
-    movh        [r0],           m1             ; overwrite top-left pixel, we will update it later
-
-    ; filter top-left
-    movzx       r3d, word       [r3]
-    add         r5d,            r3d
-    movzx       r3d, word       [r2]
-    add         r3d,            r5d
-    shr         r3d,            2
-    mov         [r0],           r3w
-
-    ; filter left
-    lea         r0,             [r0 + r1 * 2]
-    movu        m1,             [r2 + 2]
-    paddw       m1,             m0
-    psraw       m1,             2
-    movd        r3d,            m1
-    mov         [r0],           r3w
-    shr         r3d,            16
-    mov         [r0 + r1 * 2],  r3w
-    pextrw      [r0 + r1 * 4],  m1, 2
-
-.end:
-
-    RET
-
 ;-----------------------------------------------------------------------------------
 ; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* above, int, int filter)
 ;-----------------------------------------------------------------------------------
@@ -202,87 +135,6 @@
 .end:
     RET
 
-;-------------------------------------------------------------------------------------------------------
-; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_dc8, 4, 7, 2
-    mov             r4d,           r5m
-    add             r2,            2
-    add             r3,            2
-    add             r1,            r1
-    movu            m0,            [r3]
-    movu            m1,            [r2]
-
-    paddw           m0,            m1
-    movhlps         m1,            m0
-    paddw           m0,            m1
-    phaddw          m0,            m0
-    pmaddwd         m0,            [pw_1]
-
-    movd            r5d,           m0
-    add             r5d,           8
-    shr             r5d,           4              ; sum = sum / 16
-    movd            m1,            r5d
-    pshuflw         m1,            m1, 0          ; m1 = word [dc_val ...]
-    pshufd          m1,            m1, 0
-
-    test            r4d,           r4d
-
-    ; store DC 8x8
-    mov             r6,            r0
-    movu            [r0],          m1
-    movu            [r0 + r1],     m1
-    movu            [r0 + r1 * 2], m1
-    lea             r0,            [r0 + r1 * 2]
-    movu            [r0 + r1],     m1
-    movu            [r0 + r1 * 2], m1
-    lea             r0,            [r0 + r1 * 2]
-    movu            [r0 + r1],     m1
-    movu            [r0 + r1 * 2], m1
-    lea             r0,            [r0 + r1 * 2]
-    movu            [r0 + r1],     m1
-
-    ; Do DC Filter
-    jz              .end
-    lea             r4d,           [r5d * 2 + 2]  ; r4d = DC * 2 + 2
-    add             r5d,           r4d            ; r5d = DC * 3 + 2
-    movd            m1,            r5d
-    pshuflw         m1,            m1, 0          ; m1 = pixDCx3
-    pshufd          m1,            m1, 0
-
-    ; filter top
-    movu            m0,            [r3]
-    paddw           m0,            m1
-    psraw           m0,            2
-    movu            [r6],          m0
-
-    ; filter top-left
-    movzx           r3d, word      [r3]
-    add             r4d,           r3d
-    movzx           r3d, word      [r2]
-    add             r3d,           r4d
-    shr             r3d,           2
-    mov             [r6],          r3w
-
-    ; filter left
-    add             r6,            r1
-    movu            m0,            [r2 + 2]
-    paddw           m0,            m1
-    psraw           m0,            2
-    pextrw          [r6],          m0, 0
-    pextrw          [r6 + r1],     m0, 1
-    pextrw          [r6 + r1 * 2], m0, 2
-    lea             r6,            [r6 + r1 * 2]
-    pextrw          [r6 + r1],     m0, 3
-    pextrw          [r6 + r1 * 2], m0, 4
-    lea             r6,            [r6 + r1 * 2]
-    pextrw          [r6 + r1],     m0, 5
-    pextrw          [r6 + r1 * 2], m0, 6
-
-.end:
-    RET
-
 ;-----------------------------------------------------------------------------------
 ; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* above, int, int filter)
 ;-----------------------------------------------------------------------------------
@@ -366,10 +218,9 @@
 ; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
 ;-------------------------------------------------------------------------------------------------------
 INIT_XMM sse4
-cglobal intra_pred_dc16, 4, 7, 4
-    mov             r4d,                 r5m
+cglobal intra_pred_dc16_new, 5, 7, 4
+    lea             r3,                  [r2 + 66]
     add             r2,                  2
-    add             r3,                  2
     add             r1,                  r1
     movu            m0,                  [r3]
     movu            m1,                  [r3 + 16]
@@ -444,26 +295,26 @@
     pshufd          m1,                  m1, 0
 
     ; filter top
-    movu            m2,                  [r3]
+    movu            m2,                  [r2]
     paddw           m2,                  m1
     psraw           m2,                  2
     movu            [r6],                m2
-    movu            m3,                  [r3 + 16]
+    movu            m3,                  [r2 + 16]
     paddw           m3,                  m1
     psraw           m3,                  2
     movu            [r6 + 16],           m3
 
     ; filter top-left
-    movzx           r3d, word            [r3]
-    add             r4d,                 r3d
-    movzx           r3d, word            [r2]
-    add             r3d,                 r4d
-    shr             r3d,                 2
-    mov             [r6],                r3w
+    movzx           r5d, word            [r3]
+    add             r4d,                 r5d
+    movzx           r5d, word            [r2]
+    add             r5d,                 r4d
+    shr             r5d,                 2
+    mov             [r6],                r5w
 
     ; filter left
     add             r6,                  r1
-    movu            m2,                  [r2 + 2]
+    movu            m2,                  [r3 + 2]
     paddw           m2,                  m1
     psraw           m2,                  2
 
@@ -480,141 +331,6 @@
     pextrw          [r6 + r1],           m2, 7
 
     lea             r6,                  [r6 + r1 * 2]
-    movu            m3,                  [r2 + 18]
-    paddw           m3,                  m1
-    psraw           m3,                  2
-
-    pextrw          [r6],                m3, 0
-    pextrw          [r6 + r1],           m3, 1
-    lea             r6,                  [r6 + r1 * 2]
-    pextrw          [r6],                m3, 2
-    pextrw          [r6 + r1],           m3, 3
-    lea             r6,                  [r6 + r1 * 2]
-    pextrw          [r6],                m3, 4
-    pextrw          [r6 + r1],           m3, 5
-    lea             r6,                  [r6 + r1 * 2]
-    pextrw          [r6],                m3, 6
-
-.end:
-    RET
-
-;-------------------------------------------------------------------------------------------------------
-; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-------------------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_dc16_new, 5, 7, 4
-    lea             r3,                  [r2 + 66]
-    add             r2,                  2
-    add             r1,                  r1
-    movu            m0,                  [r3]
-    movu            m1,                  [r3 + 16]
-    movu            m2,                  [r2]
-    movu            m3,                  [r2 + 16]
-
-    paddw           m0,                  m1
-    paddw           m2,                  m3
-    paddw           m0,                  m2
-    movhlps         m1,                  m0
-    paddw           m0,                  m1
-    phaddw          m0,                  m0
-    pmaddwd         m0,                  [pw_1]
-
-    movd            r5d,                 m0
-    add             r5d,                 16
-    shr             r5d,                 5     ; sum = sum / 16
-    movd            m1,                  r5d
-    pshuflw         m1,                  m1, 0 ; m1 = word [dc_val ...]
-    pshufd          m1,                  m1, 0
-
-    test            r4d,                 r4d
-
-    ; store DC 16x16
-    mov             r6,                  r0
-    movu            [r0],                m1
-    movu            [r0 + 16],           m1
-    movu            [r0 + r1],           m1
-    movu            [r0 + 16 + r1],      m1
-    lea             r0,                  [r0 + r1 * 2]
-    movu            [r0],                m1
-    movu            [r0 + 16],           m1
-    movu            [r0 + r1],           m1
-    movu            [r0 + 16 + r1],      m1
-    lea             r0,                  [r0 + r1 * 2]
-    movu            [r0],                m1
-    movu            [r0 + 16],           m1
-    movu            [r0 + r1],           m1
-    movu            [r0 + 16 + r1],      m1
-    lea             r0,                  [r0 + r1 * 2]
-    movu            [r0],                m1
-    movu            [r0 + 16],           m1
-    movu            [r0 + r1],           m1
-    movu            [r0 + 16 + r1],      m1
-    lea             r0,                  [r0 + r1 * 2]
-    movu            [r0],                m1
-    movu            [r0 + 16],           m1
-    movu            [r0 + r1],           m1
-    movu            [r0 + 16 + r1],      m1
-    lea             r0,                  [r0 + r1 * 2]
-    movu            [r0],                m1
-    movu            [r0 + 16],           m1
-    movu            [r0 + r1],           m1
-    movu            [r0 + 16 + r1],      m1
-    lea             r0,                  [r0 + r1 * 2]
-    movu            [r0],                m1
-    movu            [r0 + 16],           m1
-    movu            [r0 + r1],           m1
-    movu            [r0 + 16 + r1],      m1
-    lea             r0,                  [r0 + r1 * 2]
-    movu            [r0],                m1
-    movu            [r0 + 16],           m1
-    movu            [r0 + r1],           m1
-    movu            [r0 + 16 + r1],      m1
-
-    ; Do DC Filter
-    jz              .end
-    lea             r4d,                 [r5d * 2 + 2]  ; r4d = DC * 2 + 2
-    add             r5d,                 r4d            ; r5d = DC * 3 + 2
-    movd            m1,                  r5d
-    pshuflw         m1,                  m1, 0          ; m1 = pixDCx3
-    pshufd          m1,                  m1, 0
-
-    ; filter top
-    movu            m2,                  [r2]
-    paddw           m2,                  m1
-    psraw           m2,                  2
-    movu            [r6],                m2
-    movu            m3,                  [r2 + 16]
-    paddw           m3,                  m1
-    psraw           m3,                  2
-    movu            [r6 + 16],           m3
-
-    ; filter top-left
-    movzx           r5d, word            [r3]
-    add             r4d,                 r5d
-    movzx           r5d, word            [r2]
-    add             r5d,                 r4d
-    shr             r5d,                 2
-    mov             [r6],                r5w
-
-    ; filter left
-    add             r6,                  r1
-    movu            m2,                  [r3 + 2]
-    paddw           m2,                  m1
-    psraw           m2,                  2
-
-    pextrw          [r6],                m2, 0
-    pextrw          [r6 + r1],           m2, 1
-    lea             r6,                  [r6 + r1 * 2]
-    pextrw          [r6],                m2, 2
-    pextrw          [r6 + r1],           m2, 3
-    lea             r6,                  [r6 + r1 * 2]
-    pextrw          [r6],                m2, 4
-    pextrw          [r6 + r1],           m2, 5
-    lea             r6,                  [r6 + r1 * 2]
-    pextrw          [r6],                m2, 6
-    pextrw          [r6 + r1],           m2, 7
-
-    lea             r6,                  [r6 + r1 * 2]
     movu            m3,                  [r3 + 18]
     paddw           m3,                  m1
     psraw           m3,                  2
@@ -707,82 +423,6 @@
     jnz            .loop
     RET
 
-;-------------------------------------------------------------------------------------------
-; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_dc32, 4, 5, 6
-    mov             r4d,                 r5m
-    add             r2,                  2
-    add             r3,                  2
-    add             r1,                  r1
-    movu            m0,                  [r3]
-    movu            m1,                  [r3 + 16]
-    movu            m2,                  [r3 + 32]
-    movu            m3,                  [r3 + 48]
-    paddw           m0,                  m1
-    paddw           m2,                  m3
-    paddw           m0,                  m2
-    movu            m1,                  [r2]
-    movu            m3,                  [r2 + 16]
-    movu            m4,                  [r2 + 32]
-    movu            m5,                  [r2 + 48]
-    paddw           m1,                  m3
-    paddw           m4,                  m5
-    paddw           m1,                  m4
-    paddw           m0,                  m1
-    movhlps         m1,                  m0
-    paddw           m0,                  m1
-    phaddw          m0,                  m0
-    pmaddwd         m0,                  [pw_1]
-
-    paddd           m0,                  [pd_32]     ; sum = sum + 32
-    psrld           m0,                  6           ; sum = sum / 64
-    pshuflw         m0,                  m0, 0
-    pshufd          m0,                  m0, 0
-
-    lea             r2,                 [r1 * 3]
-    mov             r3d,                4
-.loop:
-    ; store DC 32x32
-    movu            [r0 +  0],          m0
-    movu            [r0 + 16],          m0
-    movu            [r0 + 32],          m0
-    movu            [r0 + 48],          m0
-    movu            [r0 + r1 +  0],     m0
-    movu            [r0 + r1 + 16],     m0
-    movu            [r0 + r1 + 32],     m0
-    movu            [r0 + r1 + 48],     m0
-    movu            [r0 + r1 * 2 +  0], m0
-    movu            [r0 + r1 * 2 + 16], m0
-    movu            [r0 + r1 * 2 + 32], m0
-    movu            [r0 + r1 * 2 + 48], m0
-    movu            [r0 + r2 +  0],     m0
-    movu            [r0 + r2 + 16],     m0
-    movu            [r0 + r2 + 32],     m0
-    movu            [r0 + r2 + 48],     m0
-    lea             r0, [r0 + r1 * 4]
-    movu            [r0 +  0],          m0
-    movu            [r0 + 16],          m0
-    movu            [r0 + 32],          m0
-    movu            [r0 + 48],          m0
-    movu            [r0 + r1 +  0],     m0
-    movu            [r0 + r1 + 16],     m0
-    movu            [r0 + r1 + 32],     m0
-    movu            [r0 + r1 + 48],     m0
-    movu            [r0 + r1 * 2 +  0], m0
-    movu            [r0 + r1 * 2 + 16], m0
-    movu            [r0 + r1 * 2 + 32], m0
-    movu            [r0 + r1 * 2 + 48], m0
-    movu            [r0 + r2 +  0],     m0
-    movu            [r0 + r2 + 16],     m0
-    movu            [r0 + r2 + 32],     m0
-    movu            [r0 + r2 + 48],     m0
-    lea             r0, [r0 + r1 * 4]
-    dec             r3d
-    jnz            .loop
-    RET
-
 ;-----------------------------------------------------------------------------------------------------------
 ; void intra_pred_planar(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
 ;-----------------------------------------------------------------------------------------------------------
diff -r c238ea33fa51 -r 723d7d1622b8 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm	Mon Jan 12 12:48:45 2015 +0530
+++ b/source/common/x86/intrapred8.asm	Tue Dec 16 16:44:04 2014 +0530
@@ -72,70 +72,6 @@
 cextern multiH3
 cextern multi_2Row
 
-;-----------------------------------------------------------------------------
-; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-----------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_dc4, 4,6,3
-    mov         r4d, r5m
-    inc         r2
-    inc         r3
-    pxor        m0, m0
-    movd        m1, [r2]
-    movd        m2, [r3]
-    punpckldq   m1, m2
-    psadbw      m1, m0              ; m1 = sum
-
-    test        r4d, r4d
-
-    mov         r4d, 4096
-    movd        m2, r4d
-    pmulhrsw    m1, m2              ; m1 = (sum + 4) / 8
-    movd        r4d, m1             ; r4d = dc_val
-    pshufb      m1, m0              ; m1 = byte [dc_val ...]
-
-    ; store DC 4x4
-    lea         r5, [r1 * 3]
-    movd        [r0], m1
-    movd        [r0 + r1], m1
-    movd        [r0 + r1 * 2], m1
-    movd        [r0 + r5], m1
-
-    ; do DC filter
-    jz         .end
-    lea         r5d, [r4d * 2 + 2]  ; r5d = DC * 2 + 2
-    add         r4d, r5d            ; r4d = DC * 3 + 2
-    movd        m1, r4d
-    pshuflw     m1, m1, 0           ; m1 = pixDCx3
-
-    ; filter top
-    pmovzxbw    m2, [r3]
-    paddw       m2, m1
-    psraw       m2, 2
-    packuswb    m2, m2
-    movd        [r0], m2            ; overwrite top-left pixel, we will update it later
-
-    ; filter top-left
-    movzx       r3d, byte [r3]
-    add         r5d, r3d
-    movzx       r3d, byte [r2]
-    add         r3d, r5d
-    shr         r3d, 2
-    mov         [r0], r3b
-
-    ; filter left
-    add         r0, r1
-    pmovzxbw    m2, [r2 + 1]
-    paddw       m2, m1
-    psraw       m2, 2
-    packuswb    m2, m2
-    pextrb      [r0], m2, 0
-    pextrb      [r0 + r1], m2, 1
-    pextrb      [r0 + r1 * 2], m2, 2
-
-.end:
-    RET
-
 ;---------------------------------------------------------------------------------------------
 ; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel *srcPix, int dirMode, int bFilter)
 ;---------------------------------------------------------------------------------------------
@@ -198,86 +134,6 @@
 .end:
     RET
 
-;-------------------------------------------------------------------------------------------
-; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_dc8, 4, 7, 3
-    mov             r4d,           r5m
-    inc             r2
-    inc             r3
-    pxor            m0,            m0
-    movh            m1,            [r2]
-    movh            m2,            [r3]
-    punpcklqdq      m1,            m2
-    psadbw          m1,            m0
-    pshufd          m2,            m1, 2
-    paddw           m1,            m2
-
-    movd            r5d,           m1
-    add             r5d,           8
-    shr             r5d,           4     ; sum = sum / 16
-    movd            m1,            r5d
-    pshufb          m1,            m0    ; m1 = byte [dc_val ...]
-
-    test            r4d,           r4d
-
-    ; store DC 8x8
-    mov             r6,            r0
-    movh            [r0],          m1
-    movh            [r0 + r1],     m1
-    lea             r0,            [r0 + r1 * 2]
-    movh            [r0],          m1
-    movh            [r0 + r1],     m1
-    lea             r0,            [r0 + r1 * 2]
-    movh            [r0],          m1
-    movh            [r0 + r1],     m1
-    lea             r0,            [r0 + r1 * 2]
-    movh            [r0],          m1
-    movh            [r0 + r1],     m1
-
-    ; Do DC Filter
-    jz              .end
-    lea             r4d,           [r5d * 2 + 2]  ; r4d = DC * 2 + 2
-    add             r5d,           r4d            ; r5d = DC * 3 + 2
-    movd            m1,            r5d
-    pshuflw         m1,            m1, 0          ; m1 = pixDCx3
-    pshufd          m1,            m1, 0
-
-    ; filter top
-    pmovzxbw        m2,            [r3]
-    paddw           m2,            m1
-    psraw           m2,            2
-    packuswb        m2,            m2
-    movh            [r6],          m2
-
-    ; filter top-left
-    movzx           r3d, byte      [r3]
-    add             r4d,           r3d
-    movzx           r3d, byte      [r2]
-    add             r3d,           r4d
-    shr             r3d,           2
-    mov             [r6],          r3b
-
-    ; filter left
-    add             r6,            r1
-    pmovzxbw        m2,            [r2 + 1]
-    paddw           m2,            m1
-    psraw           m2,            2
-    packuswb        m2,            m2
-    pextrb          [r6],          m2, 0
-    pextrb          [r6 + r1],     m2, 1
-    pextrb          [r6 + 2 * r1], m2, 2
-    lea             r6,            [r6 + r1 * 2]
-    pextrb          [r6 + r1],     m2, 3
-    pextrb          [r6 + r1 * 2], m2, 4
-    pextrb          [r6 + r1 * 4], m2, 6
-    lea             r1,            [r1 * 3]
-    pextrb          [r6 + r1],     m2, 5
-
-.end:
-    RET
-
 ;---------------------------------------------------------------------------------------------
 ; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel *srcPix, int dirMode, int bFilter)
 ;---------------------------------------------------------------------------------------------
@@ -357,14 +213,13 @@
 .end:
     RET
 
-;-------------------------------------------------------------------------------------------
-; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_dc16, 5, 7, 4
-    mov             r4d,           r5m
+;--------------------------------------------------------------------------------------------
+; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel *srcPix, int dirMode, int bFilter)
+;--------------------------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal intra_pred_dc16_new, 5, 7, 4
+    lea             r3, [r2 + 33]
     inc             r2
-    inc             r3
     pxor            m0,            m0
     movu            m1,            [r2]
     movu            m2,            [r3]
@@ -417,120 +272,6 @@
     pshufd          m1,            m1, 0
 
     ; filter top
-    pmovzxbw        m2,            [r3]
-    paddw           m2,            m1
-    psraw           m2,            2
-    packuswb        m2,            m2
-    movh            [r6],          m2
-    pmovzxbw        m3,            [r3 + 8]
-    paddw           m3,            m1
-    psraw           m3,            2
-    packuswb        m3,            m3
-    movh            [r6 + 8],      m3
-
-    ; filter top-left
-    movzx           r3d, byte      [r3]
-    add             r4d,           r3d
-    movzx           r3d, byte      [r2]
-    add             r3d,           r4d
-    shr             r3d,           2
-    mov             [r6],          r3b
-
-    ; filter left
-    add             r6,            r1
-    pmovzxbw        m2,            [r2 + 1]
-    paddw           m2,            m1
-    psraw           m2,            2
-    packuswb        m2,            m2
-    pextrb          [r6],          m2, 0
-    pextrb          [r6 + r1],     m2, 1
-    pextrb          [r6 + r1 * 2], m2, 2
-    lea             r6,            [r6 + r1 * 2]
-    pextrb          [r6 + r1],     m2, 3
-    pextrb          [r6 + r1 * 2], m2, 4
-    lea             r6,            [r6 + r1 * 2]
-    pextrb          [r6 + r1],     m2, 5
-    pextrb          [r6 + r1 * 2], m2, 6
-    lea             r6,            [r6 + r1 * 2]
-    pextrb          [r6 + r1],     m2, 7
-
-    pmovzxbw        m3,            [r2 + 9]
-    paddw           m3,            m1
-    psraw           m3,            2
-    packuswb        m3,            m3
-    pextrb          [r6 + r1 * 2], m3, 0
-    lea             r6,            [r6 + r1 * 2]
-    pextrb          [r6 + r1],     m3, 1
-    pextrb          [r6 + r1 * 2], m3, 2
-    lea             r6,            [r6 + r1 * 2]
-    pextrb          [r6 + r1],     m3, 3
-    pextrb          [r6 + r1 * 2], m3, 4
-    lea             r6,            [r6 + r1 * 2]
-    pextrb          [r6 + r1],     m3, 5
-    pextrb          [r6 + r1 * 2], m3, 6
-
-.end:
-    RET
-
-;--------------------------------------------------------------------------------------------
-; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel *srcPix, int dirMode, int bFilter)
-;--------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_dc16_new, 5, 7, 4
-    lea             r3, [r2 + 33]
-    inc             r2
-    pxor            m0,            m0
-    movu            m1,            [r2]
-    movu            m2,            [r3]
-    psadbw          m1,            m0
-    psadbw          m2,            m0
-    paddw           m1,            m2
-    pshufd          m2,            m1, 2
-    paddw           m1,            m2
-
-    movd            r5d,           m1
-    add             r5d,           16
-    shr             r5d,           5     ; sum = sum / 32
-    movd            m1,            r5d
-    pshufb          m1,            m0    ; m1 = byte [dc_val ...]
-
-    test            r4d,           r4d
-
-    ; store DC 16x16
-    mov             r6,            r0
-    movu            [r0],          m1
-    movu            [r0 + r1],     m1
-    lea             r0,            [r0 + r1 * 2]
-    movu            [r0],          m1
-    movu            [r0 + r1],     m1
-    lea             r0,            [r0 + r1 * 2]
-    movu            [r0],          m1
-    movu            [r0 + r1],     m1
-    lea             r0,            [r0 + r1 * 2]
-    movu            [r0],          m1
-    movu            [r0 + r1],     m1
-    lea             r0,            [r0 + r1 * 2]
-    movu            [r0],          m1
-    movu            [r0 + r1],     m1
-    lea             r0,            [r0 + r1 * 2]
-    movu            [r0],          m1
-    movu            [r0 + r1],     m1
-    lea             r0,            [r0 + r1 * 2]
-    movu            [r0],          m1
-    movu            [r0 + r1],     m1
-    lea             r0,            [r0 + r1 * 2]
-    movu            [r0],          m1
-    movu            [r0 + r1],     m1
-
-    ; Do DC Filter
-    jz              .end
-    lea             r4d,           [r5d * 2 + 2]  ; r4d = DC * 2 + 2
-    add             r5d,           r4d            ; r5d = DC * 3 + 2
-    movd            m1,            r5d
-    pshuflw         m1,            m1, 0          ; m1 = pixDCx3
-    pshufd          m1,            m1, 0
-
-    ; filter top
     pmovzxbw        m2,            [r2]
     paddw           m2,            m1
     psraw           m2,            2
@@ -586,80 +327,6 @@
 .end:
     RET
 
-;-------------------------------------------------------------------------------------------
-; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel* left, pixel* above, int dirMode, int filter)
-;-------------------------------------------------------------------------------------------
-INIT_XMM sse4
-cglobal intra_pred_dc32, 4, 5, 5
-    inc             r2
-    inc             r3
-    pxor            m0,            m0
-    movu            m1,            [r2]
-    movu            m2,            [r2 + 16]
-    movu            m3,            [r3]
-    movu            m4,            [r3 + 16]
-    psadbw          m1,            m0
-    psadbw          m2,            m0
-    psadbw          m3,            m0
-    psadbw          m4,            m0
-    paddw           m1,            m2
-    paddw           m3,            m4
-    paddw           m1,            m3
-    pshufd          m2,            m1, 2
-    paddw           m1,            m2
-
-    movd            r4d,           m1
-    add             r4d,           32
-    shr             r4d,           6     ; sum = sum / 64
-    movd            m1,            r4d
-    pshufb          m1,            m0    ; m1 = byte [dc_val ...]
-
-%rep 2
-    ; store DC 16x16
-    movu            [r0],          m1
-    movu            [r0 + r1],     m1
-    movu            [r0 + 16],     m1
-    movu            [r0 + r1 + 16],m1
-    lea             r0,            [r0 + 2 * r1]
-    movu            [r0],          m1
-    movu            [r0 + r1],     m1
-    movu            [r0 + 16],     m1
-    movu            [r0 + r1 + 16],m1
-    lea             r0,            [r0 + 2 * r1]
-    movu            [r0],          m1
-    movu            [r0 + r1],     m1
-    movu            [r0 + 16],     m1
-    movu            [r0 + r1 + 16],m1
-    lea             r0,            [r0 + 2 * r1]
-    movu            [r0],          m1
-    movu            [r0 + r1],     m1
-    movu            [r0 + 16],     m1
-    movu            [r0 + r1 + 16],m1
-    lea             r0,            [r0 + 2 * r1]
-    movu            [r0],          m1
-    movu            [r0 + r1],     m1
-    movu            [r0 + 16],     m1
-    movu            [r0 + r1 + 16],m1
-    lea             r0,            [r0 + 2 * r1]
-    movu            [r0],          m1
-    movu            [r0 + r1],     m1
-    movu            [r0 + 16],     m1
-    movu            [r0 + r1 + 16],m1
-    lea             r0,            [r0 + 2 * r1]
-    movu            [r0],          m1
-    movu            [r0 + r1],     m1
-    movu            [r0 + 16],     m1
-    movu            [r0 + r1 + 16],m1
-    lea             r0,            [r0 + 2 * r1]
-    movu            [r0],          m1
-    movu            [r0 + r1],     m1
-    movu            [r0 + 16],     m1
-    movu            [r0 + r1 + 16],m1
-    lea             r0,            [r0 + 2 * r1]
-%endrep
-
-    RET
-
 ;---------------------------------------------------------------------------------------------
 ; void intra_pred_dc(pixel* dst, intptr_t dstStride, pixel *srcPix, int dirMode, int bFilter)
 ;---------------------------------------------------------------------------------------------


More information about the x265-devel mailing list