[x265] [PATCH] cleanup: removed unused code in pixel-a.asm
yuvaraj at multicorewareinc.com
yuvaraj at multicorewareinc.com
Mon Nov 25 07:10:37 CET 2013
# HG changeset patch
# User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
# Date 1385359751 -19800
# Mon Nov 25 11:39:11 2013 +0530
# Node ID 90a80def0f1aabdf29e1f08dd0f2263d8e6af805
# Parent c0c862dc71fbd021efd3922de99da4f2f93e81f4
cleanup: removed unused code in pixel-a.asm
diff -r c0c862dc71fb -r 90a80def0f1a source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Sun Nov 24 17:34:12 2013 +0800
+++ b/source/common/x86/pixel-a.asm Mon Nov 25 11:39:11 2013 +0530
@@ -7157,173 +7157,6 @@
%endif ; !ARCH_X86_64
%endmacro ; SA8D
-;=============================================================================
-; SA8D_SATD
-;=============================================================================
-
-; %1: vertical/horizontal mode
-; %2-%5: sa8d output regs (m0,m1,m2,m3,m4,m5,m8,m9)
-; m10: satd result
-; m6, m11-15: tmp regs
-%macro SA8D_SATD_8x4 5
-%if %1
- LOAD_DIFF_8x4P %2, %3, %4, %5, 6, 11, 7, r0, r2, 1
- HADAMARD 0, sumsub, %2, %3, 6
- HADAMARD 0, sumsub, %4, %5, 6
- SBUTTERFLY wd, %2, %3, 6
- SBUTTERFLY wd, %4, %5, 6
- HADAMARD2_2D %2, %4, %3, %5, 6, dq
-
- mova m12, m%2
- mova m13, m%3
- mova m14, m%4
- mova m15, m%5
- HADAMARD 0, sumsub, %2, %3, 6
- HADAMARD 0, sumsub, %4, %5, 6
- SBUTTERFLY qdq, 12, 13, 6
- HADAMARD 0, amax, 12, 13, 6
- SBUTTERFLY qdq, 14, 15, 6
- paddw m10, m12
- HADAMARD 0, amax, 14, 15, 6
- paddw m10, m14
-%else
- LOAD_SUMSUB_8x4P %2, %3, %4, %5, 6, 11, 7, r0, r2, 1
- HADAMARD4_V %2, %3, %4, %5, 6
-
- pabsw m12, m%2 ; doing the abs first is a slight advantage
- pabsw m14, m%4
- pabsw m13, m%3
- pabsw m15, m%5
- HADAMARD 1, max, 12, 14, 6, 11
- paddw m10, m12
- HADAMARD 1, max, 13, 15, 6, 11
- paddw m10, m13
-%endif
-%endmacro ; SA8D_SATD_8x4
-
-; %1: add spilled regs?
-; %2: spill regs?
-%macro SA8D_SATD_ACCUM 2
-%if HIGH_BIT_DEPTH
- pmaddwd m10, [pw_1]
- HADDUWD m0, m1
-%if %1
- paddd m10, temp1
- paddd m0, temp0
-%endif
-%if %2
- mova temp1, m10
- pxor m10, m10
-%endif
-%elif %1
- paddw m0, temp0
-%endif
-%if %2
- mova temp0, m0
-%endif
-%endmacro
-
-%macro SA8D_SATD 0
-%define vertical ((notcpuflag(ssse3) || cpuflag(atom)) || HIGH_BIT_DEPTH)
-cglobal pixel_sa8d_satd_8x8_internal
- SA8D_SATD_8x4 vertical, 0, 1, 2, 3
- SA8D_SATD_8x4 vertical, 4, 5, 8, 9
-
-%if vertical ; sse2-style
- HADAMARD2_2D 0, 4, 2, 8, 6, qdq, amax
- HADAMARD2_2D 1, 5, 3, 9, 6, qdq, amax
-%else ; complete sa8d
- SUMSUB_BADC w, 0, 4, 1, 5, 12
- HADAMARD 2, sumsub, 0, 4, 12, 11
- HADAMARD 2, sumsub, 1, 5, 12, 11
- SUMSUB_BADC w, 2, 8, 3, 9, 12
- HADAMARD 2, sumsub, 2, 8, 12, 11
- HADAMARD 2, sumsub, 3, 9, 12, 11
- HADAMARD 1, amax, 0, 4, 12, 11
- HADAMARD 1, amax, 1, 5, 12, 4
- HADAMARD 1, amax, 2, 8, 12, 4
- HADAMARD 1, amax, 3, 9, 12, 4
-%endif
-
- ; create sa8d sub results
- paddw m1, m2
- paddw m0, m3
- paddw m0, m1
-
- SAVE_MM_PERMUTATION
- ret
-
-;-------------------------------------------------------------------------------
-; uint64_t pixel_sa8d_satd_16x16( pixel *, intptr_t, pixel *, intptr_t )
-;-------------------------------------------------------------------------------
-cglobal pixel_sa8d_satd_16x16, 4,8-(mmsize/32),16,SIZEOF_PIXEL*mmsize
- %define temp0 [rsp+0*mmsize]
- %define temp1 [rsp+1*mmsize]
- FIX_STRIDES r1, r3
-%if vertical==0
- mova m7, [hmul_8p]
-%endif
- lea r4, [3*r1]
- lea r5, [3*r3]
- pxor m10, m10
-
-%if mmsize==32
- call pixel_sa8d_satd_8x8_internal
- SA8D_SATD_ACCUM 0, 1
- call pixel_sa8d_satd_8x8_internal
- SA8D_SATD_ACCUM 1, 0
- vextracti128 xm1, m0, 1
- vextracti128 xm2, m10, 1
- paddw xm0, xm1
- paddw xm10, xm2
-%else
- lea r6, [r2+8*SIZEOF_PIXEL]
- lea r7, [r0+8*SIZEOF_PIXEL]
-
- call pixel_sa8d_satd_8x8_internal
- SA8D_SATD_ACCUM 0, 1
- call pixel_sa8d_satd_8x8_internal
- SA8D_SATD_ACCUM 1, 1
-
- mov r0, r7
- mov r2, r6
-
- call pixel_sa8d_satd_8x8_internal
- SA8D_SATD_ACCUM 1, 1
- call pixel_sa8d_satd_8x8_internal
- SA8D_SATD_ACCUM 1, 0
-%endif
-
-; xop already has fast horizontal sums
-%if cpuflag(sse4) && notcpuflag(xop) && HIGH_BIT_DEPTH==0
- pmaddwd xm10, [pw_1]
- HADDUWD xm0, xm1
- phaddd xm0, xm10 ; sa8d1 sa8d2 satd1 satd2
- pshufd xm1, xm0, q2301 ; sa8d2 sa8d1 satd2 satd1
- paddd xm0, xm1 ; sa8d sa8d satd satd
- movd r0d, xm0
- pextrd eax, xm0, 2
-%else
-%if HIGH_BIT_DEPTH
- HADDD xm0, xm1
- HADDD xm10, xm2
-%else
- HADDUW xm0, xm1
- HADDW xm10, xm2
-%endif
- movd r0d, xm0
- movd eax, xm10
-%endif
- add r0d, 1
- shl rax, 32
- shr r0d, 1
- or rax, r0
- RET
-%endmacro ; SA8D_SATD
-
-;=============================================================================
-; INTRA SATD
-;=============================================================================
%define TRANS TRANS_SSE2
%define DIFFOP DIFF_UNPACK_SSE2
%define LOAD_SUMSUB_8x4P LOAD_DIFF_8x4P
@@ -7334,17 +7167,11 @@
INIT_XMM sse2
SA8D
SATDS_SSE2
-%if ARCH_X86_64
-SA8D_SATD
-%endif
%if HIGH_BIT_DEPTH == 0
INIT_XMM ssse3,atom
SATDS_SSE2
SA8D
-%if ARCH_X86_64
-SA8D_SATD
-%endif
%endif
%define DIFFOP DIFF_SUMSUB_SSSE3
@@ -7356,9 +7183,6 @@
INIT_XMM ssse3
SATDS_SSE2
SA8D
-%if ARCH_X86_64
-SA8D_SATD
-%endif
%undef movdqa ; nehalem doesn't like movaps
%undef movdqu ; movups
%undef punpcklqdq ; or movlhps
@@ -7368,9 +7192,6 @@
INIT_XMM sse4
SATDS_SSE2
SA8D
-%if ARCH_X86_64
-SA8D_SATD
-%endif
; Sandy/Ivy Bridge and Bulldozer do movddup in the load unit, so
; it's effectively free.
@@ -7378,26 +7199,17 @@
INIT_XMM avx
SATDS_SSE2
SA8D
-%if ARCH_X86_64
-SA8D_SATD
-%endif
%define TRANS TRANS_XOP
INIT_XMM xop
SATDS_SSE2
SA8D
-%if ARCH_X86_64
-SA8D_SATD
-%endif
%if HIGH_BIT_DEPTH == 0
%define LOAD_SUMSUB_8x4P LOAD_SUMSUB8_16x4P_AVX2
%define LOAD_DUP_4x8P LOAD_DUP_4x16P_AVX2
%define TRANS TRANS_SSE4
-%if ARCH_X86_64
-SA8D_SATD
-%endif
%macro LOAD_SUMSUB_8x8P_AVX2 7 ; 4*dst, 2*tmp, mul]
movq xm%1, [r0]
More information about the x265-devel
mailing list