[x265] [PATCH] cleanup: removed unused code in pixel-a.asm
Yuvaraj Venkatesh
yuvaraj at multicorewareinc.com
Mon Nov 25 12:00:02 CET 2013
Please, ignore the patch. I'm re-sending it.
On Mon, Nov 25, 2013 at 4:14 PM, Deepthi Nandakumar <
deepthi at multicorewareinc.com> wrote:
> Does not apply at the tip.
>
>
> On Mon, Nov 25, 2013 at 11:40 AM, <yuvaraj at multicorewareinc.com> wrote:
>
>> # HG changeset patch
>> # User Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
>> # Date 1385359751 -19800
>> # Mon Nov 25 11:39:11 2013 +0530
>> # Node ID 90a80def0f1aabdf29e1f08dd0f2263d8e6af805
>> # Parent c0c862dc71fbd021efd3922de99da4f2f93e81f4
>> cleanup: removed unused code in pixel-a.asm
>>
>> diff -r c0c862dc71fb -r 90a80def0f1a source/common/x86/pixel-a.asm
>> --- a/source/common/x86/pixel-a.asm Sun Nov 24 17:34:12 2013 +0800
>> +++ b/source/common/x86/pixel-a.asm Mon Nov 25 11:39:11 2013 +0530
>> @@ -7157,173 +7157,6 @@
>> %endif ; !ARCH_X86_64
>> %endmacro ; SA8D
>>
>>
>> -;=============================================================================
>> -; SA8D_SATD
>>
>> -;=============================================================================
>> -
>> -; %1: vertical/horizontal mode
>> -; %2-%5: sa8d output regs (m0,m1,m2,m3,m4,m5,m8,m9)
>> -; m10: satd result
>> -; m6, m11-15: tmp regs
>> -%macro SA8D_SATD_8x4 5
>> -%if %1
>> - LOAD_DIFF_8x4P %2, %3, %4, %5, 6, 11, 7, r0, r2, 1
>> - HADAMARD 0, sumsub, %2, %3, 6
>> - HADAMARD 0, sumsub, %4, %5, 6
>> - SBUTTERFLY wd, %2, %3, 6
>> - SBUTTERFLY wd, %4, %5, 6
>> - HADAMARD2_2D %2, %4, %3, %5, 6, dq
>> -
>> - mova m12, m%2
>> - mova m13, m%3
>> - mova m14, m%4
>> - mova m15, m%5
>> - HADAMARD 0, sumsub, %2, %3, 6
>> - HADAMARD 0, sumsub, %4, %5, 6
>> - SBUTTERFLY qdq, 12, 13, 6
>> - HADAMARD 0, amax, 12, 13, 6
>> - SBUTTERFLY qdq, 14, 15, 6
>> - paddw m10, m12
>> - HADAMARD 0, amax, 14, 15, 6
>> - paddw m10, m14
>> -%else
>> - LOAD_SUMSUB_8x4P %2, %3, %4, %5, 6, 11, 7, r0, r2, 1
>> - HADAMARD4_V %2, %3, %4, %5, 6
>> -
>> - pabsw m12, m%2 ; doing the abs first is a slight advantage
>> - pabsw m14, m%4
>> - pabsw m13, m%3
>> - pabsw m15, m%5
>> - HADAMARD 1, max, 12, 14, 6, 11
>> - paddw m10, m12
>> - HADAMARD 1, max, 13, 15, 6, 11
>> - paddw m10, m13
>> -%endif
>> -%endmacro ; SA8D_SATD_8x4
>> -
>> -; %1: add spilled regs?
>> -; %2: spill regs?
>> -%macro SA8D_SATD_ACCUM 2
>> -%if HIGH_BIT_DEPTH
>> - pmaddwd m10, [pw_1]
>> - HADDUWD m0, m1
>> -%if %1
>> - paddd m10, temp1
>> - paddd m0, temp0
>> -%endif
>> -%if %2
>> - mova temp1, m10
>> - pxor m10, m10
>> -%endif
>> -%elif %1
>> - paddw m0, temp0
>> -%endif
>> -%if %2
>> - mova temp0, m0
>> -%endif
>> -%endmacro
>> -
>> -%macro SA8D_SATD 0
>> -%define vertical ((notcpuflag(ssse3) || cpuflag(atom)) || HIGH_BIT_DEPTH)
>> -cglobal pixel_sa8d_satd_8x8_internal
>> - SA8D_SATD_8x4 vertical, 0, 1, 2, 3
>> - SA8D_SATD_8x4 vertical, 4, 5, 8, 9
>> -
>> -%if vertical ; sse2-style
>> - HADAMARD2_2D 0, 4, 2, 8, 6, qdq, amax
>> - HADAMARD2_2D 1, 5, 3, 9, 6, qdq, amax
>> -%else ; complete sa8d
>> - SUMSUB_BADC w, 0, 4, 1, 5, 12
>> - HADAMARD 2, sumsub, 0, 4, 12, 11
>> - HADAMARD 2, sumsub, 1, 5, 12, 11
>> - SUMSUB_BADC w, 2, 8, 3, 9, 12
>> - HADAMARD 2, sumsub, 2, 8, 12, 11
>> - HADAMARD 2, sumsub, 3, 9, 12, 11
>> - HADAMARD 1, amax, 0, 4, 12, 11
>> - HADAMARD 1, amax, 1, 5, 12, 4
>> - HADAMARD 1, amax, 2, 8, 12, 4
>> - HADAMARD 1, amax, 3, 9, 12, 4
>> -%endif
>> -
>> - ; create sa8d sub results
>> - paddw m1, m2
>> - paddw m0, m3
>> - paddw m0, m1
>> -
>> - SAVE_MM_PERMUTATION
>> - ret
>> -
>>
>> -;-------------------------------------------------------------------------------
>> -; uint64_t pixel_sa8d_satd_16x16( pixel *, intptr_t, pixel *, intptr_t )
>>
>> -;-------------------------------------------------------------------------------
>> -cglobal pixel_sa8d_satd_16x16, 4,8-(mmsize/32),16,SIZEOF_PIXEL*mmsize
>> - %define temp0 [rsp+0*mmsize]
>> - %define temp1 [rsp+1*mmsize]
>> - FIX_STRIDES r1, r3
>> -%if vertical==0
>> - mova m7, [hmul_8p]
>> -%endif
>> - lea r4, [3*r1]
>> - lea r5, [3*r3]
>> - pxor m10, m10
>> -
>> -%if mmsize==32
>> - call pixel_sa8d_satd_8x8_internal
>> - SA8D_SATD_ACCUM 0, 1
>> - call pixel_sa8d_satd_8x8_internal
>> - SA8D_SATD_ACCUM 1, 0
>> - vextracti128 xm1, m0, 1
>> - vextracti128 xm2, m10, 1
>> - paddw xm0, xm1
>> - paddw xm10, xm2
>> -%else
>> - lea r6, [r2+8*SIZEOF_PIXEL]
>> - lea r7, [r0+8*SIZEOF_PIXEL]
>> -
>> - call pixel_sa8d_satd_8x8_internal
>> - SA8D_SATD_ACCUM 0, 1
>> - call pixel_sa8d_satd_8x8_internal
>> - SA8D_SATD_ACCUM 1, 1
>> -
>> - mov r0, r7
>> - mov r2, r6
>> -
>> - call pixel_sa8d_satd_8x8_internal
>> - SA8D_SATD_ACCUM 1, 1
>> - call pixel_sa8d_satd_8x8_internal
>> - SA8D_SATD_ACCUM 1, 0
>> -%endif
>> -
>> -; xop already has fast horizontal sums
>> -%if cpuflag(sse4) && notcpuflag(xop) && HIGH_BIT_DEPTH==0
>> - pmaddwd xm10, [pw_1]
>> - HADDUWD xm0, xm1
>> - phaddd xm0, xm10 ; sa8d1 sa8d2 satd1 satd2
>> - pshufd xm1, xm0, q2301 ; sa8d2 sa8d1 satd2 satd1
>> - paddd xm0, xm1 ; sa8d sa8d satd satd
>> - movd r0d, xm0
>> - pextrd eax, xm0, 2
>> -%else
>> -%if HIGH_BIT_DEPTH
>> - HADDD xm0, xm1
>> - HADDD xm10, xm2
>> -%else
>> - HADDUW xm0, xm1
>> - HADDW xm10, xm2
>> -%endif
>> - movd r0d, xm0
>> - movd eax, xm10
>> -%endif
>> - add r0d, 1
>> - shl rax, 32
>> - shr r0d, 1
>> - or rax, r0
>> - RET
>> -%endmacro ; SA8D_SATD
>> -
>>
>> -;=============================================================================
>> -; INTRA SATD
>>
>> -;=============================================================================
>> %define TRANS TRANS_SSE2
>> %define DIFFOP DIFF_UNPACK_SSE2
>> %define LOAD_SUMSUB_8x4P LOAD_DIFF_8x4P
>> @@ -7334,17 +7167,11 @@
>> INIT_XMM sse2
>> SA8D
>> SATDS_SSE2
>> -%if ARCH_X86_64
>> -SA8D_SATD
>> -%endif
>>
>> %if HIGH_BIT_DEPTH == 0
>> INIT_XMM ssse3,atom
>> SATDS_SSE2
>> SA8D
>> -%if ARCH_X86_64
>> -SA8D_SATD
>> -%endif
>> %endif
>>
>> %define DIFFOP DIFF_SUMSUB_SSSE3
>> @@ -7356,9 +7183,6 @@
>> INIT_XMM ssse3
>> SATDS_SSE2
>> SA8D
>> -%if ARCH_X86_64
>> -SA8D_SATD
>> -%endif
>> %undef movdqa ; nehalem doesn't like movaps
>> %undef movdqu ; movups
>> %undef punpcklqdq ; or movlhps
>> @@ -7368,9 +7192,6 @@
>> INIT_XMM sse4
>> SATDS_SSE2
>> SA8D
>> -%if ARCH_X86_64
>> -SA8D_SATD
>> -%endif
>>
>> ; Sandy/Ivy Bridge and Bulldozer do movddup in the load unit, so
>> ; it's effectively free.
>> @@ -7378,26 +7199,17 @@
>> INIT_XMM avx
>> SATDS_SSE2
>> SA8D
>> -%if ARCH_X86_64
>> -SA8D_SATD
>> -%endif
>>
>> %define TRANS TRANS_XOP
>> INIT_XMM xop
>> SATDS_SSE2
>> SA8D
>> -%if ARCH_X86_64
>> -SA8D_SATD
>> -%endif
>>
>>
>> %if HIGH_BIT_DEPTH == 0
>> %define LOAD_SUMSUB_8x4P LOAD_SUMSUB8_16x4P_AVX2
>> %define LOAD_DUP_4x8P LOAD_DUP_4x16P_AVX2
>> %define TRANS TRANS_SSE4
>> -%if ARCH_X86_64
>> -SA8D_SATD
>> -%endif
>>
>> %macro LOAD_SUMSUB_8x8P_AVX2 7 ; 4*dst, 2*tmp, mul]
>> movq xm%1, [r0]
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131125/662d4a46/attachment.html>
More information about the x265-devel
mailing list