[x265] [PATCH] asm: assembly code for pixel_sad_16x32

Steve Borho steve at borho.org
Mon Oct 28 19:28:13 CET 2013


On Mon, Oct 28, 2013 at 5:42 AM, <dnyaneshwar at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
> # Date 1382956880 -19800
> #      Mon Oct 28 16:11:20 2013 +0530
> # Node ID b9a9f725d4892ced1176288fe1e20069fa188132
> # Parent  ef2428fd32feddd60168f3430c50f4d7e6f02741
> asm: assembly code for pixel_sad_16x32
>

this causes encoder crashes when applied


> diff -r ef2428fd32fe -r b9a9f725d489 source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp      Mon Oct 28 00:08:06 2013
> -0500
> +++ b/source/common/x86/asm-primitives.cpp      Mon Oct 28 16:11:20 2013
> +0530
> @@ -235,6 +235,8 @@
>          //p.pixelavg_pp[LUMA_4x4]  = x265_pixel_avg_4x4_mmx2;
>          //PIXEL_AVE(sse2);
>
> +        p.sad[LUMA_16x32]  = x265_pixel_sad_16x32_sse2;
> +
>          ASSGN_SSE(sse2);
>          INIT2(sad, _sse2);
>          INIT2(sad_x3, _sse2);
> diff -r ef2428fd32fe -r b9a9f725d489 source/common/x86/sad-a.asm
> --- a/source/common/x86/sad-a.asm       Mon Oct 28 00:08:06 2013 -0500
> +++ b/source/common/x86/sad-a.asm       Mon Oct 28 16:11:20 2013 +0530
> @@ -119,6 +119,29 @@
>      RET
>  %endmacro
>
> +%macro PROCESS_SAD_4x16 0
> +    mova    m1, [r2]
> +    movu    m2, [r2 + r3]
> +    psadbw  m1, [r0]
> +    psadbw  m2, [r0 + r1]
> +    paddw   m1, m2
> +    paddw   m0, m1
> +
> +    lea     r2, [r2 + 2 * r3]
> +    lea     r0, [r0 + 2 * r1]
> +
> +    mova    m1, [r2]
> +    movu    m2, [r2 + r3]
> +
> +    psadbw  m1, [r0]
> +    psadbw  m2, [r0 + r1]
> +    paddw   m1, m2
> +    paddw   m0, m1
> +
> +    lea     r2, [r2 + 2 * r3]
> +    lea     r0, [r0 + 2 * r1]
> +%endmacro
> +
>  %macro SAD_W16 0
>
>  ;-----------------------------------------------------------------------------
>  ; int pixel_sad_16x16( uint8_t *, intptr_t, uint8_t *, intptr_t )
> @@ -223,6 +246,27 @@
>      paddw   m0, m1
>      paddw   m0, m3
>      SAD_END_SSE2
> +
>
> +;-----------------------------------------------------------------------------
> +; int pixel_sad_WxH( uint8_t *, intptr_t, uint8_t *, intptr_t )
>
> +;-----------------------------------------------------------------------------
> +cglobal pixel_sad_16x32, 4,4,3
> +    pxor m0, m0
> +
> +    PROCESS_SAD_4x16
> +    PROCESS_SAD_4x16
> +    PROCESS_SAD_4x16
> +    PROCESS_SAD_4x16
> +    PROCESS_SAD_4x16
> +    PROCESS_SAD_4x16
> +    PROCESS_SAD_4x16
> +    PROCESS_SAD_4x16
> +
> +    movhlps m1, m0
> +    paddw   m0, m1
> +    movd   eax, m0
> +    RET
> +
>  %endmacro
>
>  INIT_XMM sse2
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>



-- 
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131028/255e5be8/attachment.html>


More information about the x265-devel mailing list