[x265] [PATCH] asm: assembly code for pixel_sad_16x32
Steve Borho
steve at borho.org
Mon Oct 28 19:28:13 CET 2013
On Mon, Oct 28, 2013 at 5:42 AM, <dnyaneshwar at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
> # Date 1382956880 -19800
> # Mon Oct 28 16:11:20 2013 +0530
> # Node ID b9a9f725d4892ced1176288fe1e20069fa188132
> # Parent ef2428fd32feddd60168f3430c50f4d7e6f02741
> asm: assembly code for pixel_sad_16x32
>
this causes encoder crashes when applied
> diff -r ef2428fd32fe -r b9a9f725d489 source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp Mon Oct 28 00:08:06 2013
> -0500
> +++ b/source/common/x86/asm-primitives.cpp Mon Oct 28 16:11:20 2013
> +0530
> @@ -235,6 +235,8 @@
> //p.pixelavg_pp[LUMA_4x4] = x265_pixel_avg_4x4_mmx2;
> //PIXEL_AVE(sse2);
>
> + p.sad[LUMA_16x32] = x265_pixel_sad_16x32_sse2;
> +
> ASSGN_SSE(sse2);
> INIT2(sad, _sse2);
> INIT2(sad_x3, _sse2);
> diff -r ef2428fd32fe -r b9a9f725d489 source/common/x86/sad-a.asm
> --- a/source/common/x86/sad-a.asm Mon Oct 28 00:08:06 2013 -0500
> +++ b/source/common/x86/sad-a.asm Mon Oct 28 16:11:20 2013 +0530
> @@ -119,6 +119,29 @@
> RET
> %endmacro
>
> +%macro PROCESS_SAD_4x16 0
> + mova m1, [r2]
> + movu m2, [r2 + r3]
> + psadbw m1, [r0]
> + psadbw m2, [r0 + r1]
> + paddw m1, m2
> + paddw m0, m1
> +
> + lea r2, [r2 + 2 * r3]
> + lea r0, [r0 + 2 * r1]
> +
> + mova m1, [r2]
> + movu m2, [r2 + r3]
> +
> + psadbw m1, [r0]
> + psadbw m2, [r0 + r1]
> + paddw m1, m2
> + paddw m0, m1
> +
> + lea r2, [r2 + 2 * r3]
> + lea r0, [r0 + 2 * r1]
> +%endmacro
> +
> %macro SAD_W16 0
>
> ;-----------------------------------------------------------------------------
> ; int pixel_sad_16x16( uint8_t *, intptr_t, uint8_t *, intptr_t )
> @@ -223,6 +246,27 @@
> paddw m0, m1
> paddw m0, m3
> SAD_END_SSE2
> +
>
> +;-----------------------------------------------------------------------------
> +; int pixel_sad_WxH( uint8_t *, intptr_t, uint8_t *, intptr_t )
>
> +;-----------------------------------------------------------------------------
> +cglobal pixel_sad_16x32, 4,4,3
> + pxor m0, m0
> +
> + PROCESS_SAD_4x16
> + PROCESS_SAD_4x16
> + PROCESS_SAD_4x16
> + PROCESS_SAD_4x16
> + PROCESS_SAD_4x16
> + PROCESS_SAD_4x16
> + PROCESS_SAD_4x16
> + PROCESS_SAD_4x16
> +
> + movhlps m1, m0
> + paddw m0, m1
> + movd eax, m0
> + RET
> +
> %endmacro
>
> INIT_XMM sse2
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131028/255e5be8/attachment.html>
More information about the x265-devel
mailing list