[x265] [PATCH] asm : remove chroma_p2s_i444, can be replaced by luma_p2s
Steve Borho
steve at borho.org
Mon Mar 3 21:22:09 CET 2014
On Mon, Mar 3, 2014 at 6:15 AM, <nabajit at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Nabajit Deka
> # Date 1393848896 -19800
> # Mon Mar 03 17:44:56 2014 +0530
> # Node ID 5e0879e805a24c1c376eee1dbc160f597b7909cd
> # Parent 5e6e06b8ec118904ad28a2d703dc9ad7956b4d44
> asm : remove chroma_p2s_i444, can be replaced by luma_p2s
Nicely done
> diff -r 5e6e06b8ec11 -r 5e0879e805a2 source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp Mon Mar 03 17:27:42 2014 +0530
> +++ b/source/common/x86/asm-primitives.cpp Mon Mar 03 17:44:56 2014 +0530
> @@ -1231,7 +1231,7 @@
> p.luma_hvpp[LUMA_8x8] = x265_interp_8tap_hv_pp_8x8_ssse3;
> p.luma_p2s = x265_luma_p2s_ssse3;
> p.chroma_p2s[X265_CSP_I420] = x265_chroma_p2s_ssse3;
> - p.chroma_p2s[X265_CSP_I444] = x265_chroma_p2s_i444_ssse3; // full width dststride
> + p.chroma_p2s[X265_CSP_I444] = x265_luma_p2s_ssse3; // for i444 , chroma_p2s can be replaced by luma_p2s
>
> p.dct[DST_4x4] = x265_dst4_ssse3;
> }
> diff -r 5e6e06b8ec11 -r 5e0879e805a2 source/common/x86/ipfilter8.asm
> --- a/source/common/x86/ipfilter8.asm Mon Mar 03 17:27:42 2014 +0530
> +++ b/source/common/x86/ipfilter8.asm Mon Mar 03 17:44:56 2014 +0530
> @@ -3680,64 +3680,6 @@
>
> RET
>
> -INIT_XMM ssse3
> -cglobal chroma_p2s_i444, 3, 7, 4
> -
> - ; load width and height
> - mov r3d, r3m
> - mov r4d, r4m
> -
> - ; load constant
> - mova m2, [tab_c_128]
> - mova m3, [tab_c_64_n64]
> -
> -.loopH:
> -
> - xor r5d, r5d
> -.loopW:
> - lea r6, [r0 + r5]
> -
> - movh m0, [r6]
> - punpcklbw m0, m2
> - pmaddubsw m0, m3
> -
> - movh m1, [r6 + r1]
> - punpcklbw m1, m2
> - pmaddubsw m1, m3
> -
> - add r5d, 8
> - cmp r5d, r3d
> - lea r6, [r2 + r5 * 2]
> - jg .width4
> - movu [r6 + FENC_STRIDE * 0 - 16], m0
> - movu [r6 + FENC_STRIDE * 2 - 16], m1
> - je .nextH
> - jmp .loopW
> -
> -.width4:
> - test r3d, 4
> - jz .width2
> - test r3d, 2
> - movh [r6 + FENC_STRIDE * 0 - 16], m0
> - movh [r6 + FENC_STRIDE * 2 - 16], m1
> - lea r6, [r6 + 8]
> - pshufd m0, m0, 2
> - pshufd m1, m1, 2
> - jz .nextH
> -
> -.width2:
> - movd [r6 + FENC_STRIDE * 0 - 16], m0
> - movd [r6 + FENC_STRIDE * 2 - 16], m1
> -
> -.nextH:
> - lea r0, [r0 + r1 * 2]
> - add r2, FENC_STRIDE * 4
> -
> - sub r4d, 2
> - jnz .loopH
> -
> - RET
> -
> %macro PROCESS_CHROMA_SP_W4_4R 0
> movq m0, [r0]
> movq m1, [r0 + r1]
> diff -r 5e6e06b8ec11 -r 5e0879e805a2 source/common/x86/ipfilter8.h
> --- a/source/common/x86/ipfilter8.h Mon Mar 03 17:27:42 2014 +0530
> +++ b/source/common/x86/ipfilter8.h Mon Mar 03 17:44:56 2014 +0530
> @@ -300,7 +300,6 @@
> CHROMA_SS_FILTERS_SSE4(_sse4);
>
> void x265_chroma_p2s_ssse3(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);
> -void x265_chroma_p2s_i444_ssse3(pixel *src, intptr_t srcStride, int16_t *dst, int width, int height);
>
> #undef SETUP_CHROMA_FUNC_DEF
> #undef SETUP_CHROMA_SP_FUNC_DEF
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list