[x265] [PATCH] asm : asm routine for chroma_p2s for 4:4:4 color space format

Steve Borho steve at borho.org
Mon Feb 17 20:09:28 CET 2014


On Mon, Feb 17, 2014 at 6:44 AM, <nabajit at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Nabajit Deka
> # Date 1392641037 -19800
> #      Mon Feb 17 18:13:57 2014 +0530
> # Node ID f5275ca8f2985bb0daf563738e6071b81967c2cd
> # Parent  ce96cdb390fe26aee6effa731e51303c1d9056b0
> asm : asm routine for chroma_p2s for 4:4:4 color space format
>

Queued.  There needs to be a comment somewhere about how the chroma_p2s 444
primitive is different from the others.


>
> diff -r ce96cdb390fe -r f5275ca8f298 source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp      Sun Feb 16 22:47:32 2014
> -0600
> +++ b/source/common/x86/asm-primitives.cpp      Mon Feb 17 18:13:57 2014
> +0530
> @@ -1119,8 +1119,8 @@
>
>          p.luma_hvpp[LUMA_8x8] = x265_interp_8tap_hv_pp_8x8_ssse3;
>          p.luma_p2s = x265_luma_p2s_ssse3;
> -        p.chroma_p2s[X265_CSP_I444] = x265_chroma_p2s_ssse3;
>          p.chroma_p2s[X265_CSP_I420] = x265_chroma_p2s_ssse3;
> +        p.chroma_p2s[X265_CSP_I444] = x265_chroma_p2s_i444_ssse3;
>
>          CHROMA_SP_FILTERS_420(_ssse3);
>          CHROMA_SP_FILTERS_444(_ssse3);
> diff -r ce96cdb390fe -r f5275ca8f298 source/common/x86/ipfilter8.asm
> --- a/source/common/x86/ipfilter8.asm   Sun Feb 16 22:47:32 2014 -0600
> +++ b/source/common/x86/ipfilter8.asm   Mon Feb 17 18:13:57 2014 +0530
> @@ -3680,6 +3680,64 @@
>
>      RET
>
> +INIT_XMM ssse3
> +cglobal chroma_p2s_i444, 3, 7, 4
> +
> +    ; load width and height
> +    mov         r3d, r3m
> +    mov         r4d, r4m
> +
> +    ; load constant
> +    mova        m2, [tab_c_128]
> +    mova        m3, [tab_c_64_n64]
> +
> +.loopH:
> +
> +    xor         r5d, r5d
> +.loopW:
> +    lea         r6, [r0 + r5]
> +
> +    movh        m0, [r6]
> +    punpcklbw   m0, m2
> +    pmaddubsw   m0, m3
> +
> +    movh        m1, [r6 + r1]
> +    punpcklbw   m1, m2
> +    pmaddubsw   m1, m3
> +
> +    add         r5d, 8
> +    cmp         r5d, r3d
> +    lea         r6, [r2 + r5 * 2]
> +    jg          .width4
> +    movu        [r6 + FENC_STRIDE * 0 - 16], m0
> +    movu        [r6 + FENC_STRIDE * 2 - 16], m1
> +    je          .nextH
> +    jmp         .loopW
> +
> +.width4:
> +    test        r3d, 4
> +    jz          .width2
> +    test        r3d, 2
> +    movh        [r6 + FENC_STRIDE * 0 - 16], m0
> +    movh        [r6 + FENC_STRIDE * 2 - 16], m1
> +    lea         r6, [r6 + 8]
> +    pshufd      m0, m0, 2
> +    pshufd      m1, m1, 2
> +    jz          .nextH
> +
> +.width2:
> +    movd        [r6 + FENC_STRIDE * 0 - 16], m0
> +    movd        [r6 + FENC_STRIDE * 2 - 16], m1
> +
> +.nextH:
> +    lea         r0, [r0 + r1 * 2]
> +    add         r2, FENC_STRIDE * 4
> +
> +    sub         r4d, 2
> +    jnz         .loopH
> +
> +    RET
> +
>  %macro PROCESS_CHROMA_SP_W4_4R 0
>      movq       m0, [r0]
>      movq       m1, [r0 + r1]
> diff -r ce96cdb390fe -r f5275ca8f298 source/common/x86/ipfilter8.h
> --- a/source/common/x86/ipfilter8.h     Sun Feb 16 22:47:32 2014 -0600
> +++ b/source/common/x86/ipfilter8.h     Mon Feb 17 18:13:57 2014 +0530
> @@ -214,6 +214,7 @@
>  void x265_interp_8tap_hv_pp_8x8_ssse3(pixel * src, intptr_t srcStride,
> pixel * dst, intptr_t dstStride, int idxX, int idxY);
>  void x265_luma_p2s_ssse3(pixel *src, intptr_t srcStride, int16_t *dst,
> int width, int height);
>  void x265_chroma_p2s_ssse3(pixel *src, intptr_t srcStride, int16_t *dst,
> int width, int height);
> +void x265_chroma_p2s_i444_ssse3(pixel *src, intptr_t srcStride, int16_t
> *dst, int width, int height);
>  void x265_interp_4tap_vert_sp_2x4_sse4(int16_t * src, intptr_t srcStride,
> pixel * dst, intptr_t dstStride, int coeffIdx);
>  void x265_interp_4tap_vert_sp_2x8_sse4(int16_t * src, intptr_t srcStride,
> pixel * dst, intptr_t dstStride, int coeffIdx);
>  void x265_interp_4tap_vert_sp_6x8_sse4(int16_t * src, intptr_t srcStride,
> pixel * dst, intptr_t dstStride, int coeffIdx);
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>



-- 
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140217/60dc43cb/attachment.html>


More information about the x265-devel mailing list