[x265] [PATCH] asm : remove chroma_p2s_i444, can be replaced by luma_p2s

Steve Borho steve at borho.org
Mon Mar 3 21:22:09 CET 2014


On Mon, Mar 3, 2014 at 6:15 AM,  <nabajit at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Nabajit Deka
> # Date 1393848896 -19800
> #      Mon Mar 03 17:44:56 2014 +0530
> # Node ID 5e0879e805a24c1c376eee1dbc160f597b7909cd
> # Parent  5e6e06b8ec118904ad28a2d703dc9ad7956b4d44
> asm : remove chroma_p2s_i444, can be replaced by luma_p2s

Nicely done

> diff -r 5e6e06b8ec11 -r 5e0879e805a2 source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp      Mon Mar 03 17:27:42 2014 +0530
> +++ b/source/common/x86/asm-primitives.cpp      Mon Mar 03 17:44:56 2014 +0530
> @@ -1231,7 +1231,7 @@
>          p.luma_hvpp[LUMA_8x8] = x265_interp_8tap_hv_pp_8x8_ssse3;
>          p.luma_p2s = x265_luma_p2s_ssse3;
>          p.chroma_p2s[X265_CSP_I420] = x265_chroma_p2s_ssse3;
> -        p.chroma_p2s[X265_CSP_I444] = x265_chroma_p2s_i444_ssse3; // full width dststride
> +        p.chroma_p2s[X265_CSP_I444] = x265_luma_p2s_ssse3; // for i444 , chroma_p2s can be replaced by luma_p2s
>
>          p.dct[DST_4x4] = x265_dst4_ssse3;
>      }
> diff -r 5e6e06b8ec11 -r 5e0879e805a2 source/common/x86/ipfilter8.asm
> --- a/source/common/x86/ipfilter8.asm   Mon Mar 03 17:27:42 2014 +0530
> +++ b/source/common/x86/ipfilter8.asm   Mon Mar 03 17:44:56 2014 +0530
> @@ -3680,64 +3680,6 @@
>
>      RET
>
> -INIT_XMM ssse3
> -cglobal chroma_p2s_i444, 3, 7, 4
> -
> -    ; load width and height
> -    mov         r3d, r3m
> -    mov         r4d, r4m
> -
> -    ; load constant
> -    mova        m2, [tab_c_128]
> -    mova        m3, [tab_c_64_n64]
> -
> -.loopH:
> -
> -    xor         r5d, r5d
> -.loopW:
> -    lea         r6, [r0 + r5]
> -
> -    movh        m0, [r6]
> -    punpcklbw   m0, m2
> -    pmaddubsw   m0, m3
> -
> -    movh        m1, [r6 + r1]
> -    punpcklbw   m1, m2
> -    pmaddubsw   m1, m3
> -
> -    add         r5d, 8
> -    cmp         r5d, r3d
> -    lea         r6, [r2 + r5 * 2]
> -    jg          .width4
> -    movu        [r6 + FENC_STRIDE * 0 - 16], m0
> -    movu        [r6 + FENC_STRIDE * 2 - 16], m1
> -    je          .nextH
> -    jmp         .loopW
> -
> -.width4:
> -    test        r3d, 4
> -    jz          .width2
> -    test        r3d, 2
> -    movh        [r6 + FENC_STRIDE * 0 - 16], m0
> -    movh        [r6 + FENC_STRIDE * 2 - 16], m1
> -    lea         r6, [r6 + 8]
> -    pshufd      m0, m0, 2
> -    pshufd      m1, m1, 2
> -    jz          .nextH
> -
> -.width2:
> -    movd        [r6 + FENC_STRIDE * 0 - 16], m0
> -    movd        [r6 + FENC_STRIDE * 2 - 16], m1
> -
> -.nextH:
> -    lea         r0, [r0 + r1 * 2]
> -    add         r2, FENC_STRIDE * 4
> -
> -    sub         r4d, 2
> -    jnz         .loopH
> -
> -    RET
> -
>  %macro PROCESS_CHROMA_SP_W4_4R 0
>      movq       m0, [r0]
>      movq       m1, [r0 + r1]
> diff -r 5e6e06b8ec11 -r 5e0879e805a2 source/common/x86/ipfilter8.h
> --- a/source/common/x86/ipfilter8.h     Mon Mar 03 17:27:42 2014 +0530
> +++ b/source/common/x86/ipfilter8.h     Mon Mar 03 17:44:56 2014 +0530
> @@ -300,7 +300,6 @@
>  CHROMA_SS_FILTERS_SSE4(_sse4);
>
>  void x265_chroma_p2s_ssse3(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);
> -void x265_chroma_p2s_i444_ssse3(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);
>
>  #undef SETUP_CHROMA_FUNC_DEF
>  #undef SETUP_CHROMA_SP_FUNC_DEF
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel



-- 
Steve Borho


More information about the x265-devel mailing list