[x265] [PATCH 2 of 2] asm: assembly code for x265_pixel_avg_12x16

Deepthi Nandakumar deepthi at multicorewareinc.com
Tue Nov 12 11:50:56 CET 2013


Failed to apply, since I pushed in the other sizes first. Please pull and
merge?


On Tue, Nov 12, 2013 at 1:47 PM, Min Chen <chenm003 at 163.com> wrote:

> # HG changeset patch
> # User Min Chen <chenm003 at 163.com>
> # Date 1384244066 -28800
> # Node ID 9dda3a715f9fe089ee7b1e4db2ffeff28cd477c3
> # Parent  8c731f8c71ff6f42718a80934433a154417caeec
> asm: assembly code for x265_pixel_avg_12x16
>
> diff -r 8c731f8c71ff -r 9dda3a715f9f source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp      Tue Nov 12 16:14:09 2013
> +0800
> +++ b/source/common/x86/asm-primitives.cpp      Tue Nov 12 16:14:26 2013
> +0800
> @@ -125,6 +125,7 @@
>      p.pixelavg_pp[LUMA_16x12]  = x265_pixel_avg_16x12_ ## cpu; \
>      p.pixelavg_pp[LUMA_16x8]  = x265_pixel_avg_16x8_ ## cpu; \
>      p.pixelavg_pp[LUMA_16x4]  = x265_pixel_avg_16x4_ ## cpu; \
> +    p.pixelavg_pp[LUMA_12x16] = x265_pixel_avg_12x16_ ## cpu; \
>      p.pixelavg_pp[LUMA_8x16]  = x265_pixel_avg_8x16_ ## cpu; \
>      p.pixelavg_pp[LUMA_8x8]   = x265_pixel_avg_8x8_ ## cpu; \
>      p.pixelavg_pp[LUMA_8x4]   = x265_pixel_avg_8x4_ ## cpu;
> diff -r 8c731f8c71ff -r 9dda3a715f9f source/common/x86/mc-a.asm
> --- a/source/common/x86/mc-a.asm        Tue Nov 12 16:14:09 2013 +0800
> +++ b/source/common/x86/mc-a.asm        Tue Nov 12 16:14:26 2013 +0800
> @@ -190,7 +190,13 @@
>      SWAP 0, 6
>      BIWEIGHT [%2+mmsize/2], [%3+mmsize/2]
>      packuswb   m6, m0
> -    mova     [%1], m6
> +%if %4 != 12
> +    mova    [%1], m6
> +%else ; !w12
> +    movh    [%1], m6
> +    movhlps m6, m6
> +    movd    [%1+mmsize/2], m6
> +%endif ; w12
>  %endif
>  %endmacro
>
> @@ -222,8 +228,12 @@
>  %else
>  %assign x 0
>  %rep (%1*SIZEOF_PIXEL+mmsize-1)/mmsize
> -    BIWEIGHT_ROW   t0+x,                   t2+x,                   t4+x,
>                 %1
> -    BIWEIGHT_ROW   t0+x+SIZEOF_PIXEL*t1,   t2+x+SIZEOF_PIXEL*t3,
> t4+x+SIZEOF_PIXEL*t5, %1
> +%assign y mmsize
> +%if (%1 == 12) && (%1*SIZEOF_PIXEL-x < mmsize)
> +%assign y (%1*SIZEOF_PIXEL-x)
> +%endif
> +    BIWEIGHT_ROW   t0+x,                   t2+x,                   t4+x,
>                 y
> +    BIWEIGHT_ROW   t0+x+SIZEOF_PIXEL*t1,   t2+x+SIZEOF_PIXEL*t3,
> t4+x+SIZEOF_PIXEL*t5, y
>  %assign x x+mmsize
>  %endrep
>  %endif
> @@ -235,6 +245,7 @@
>  INIT_MMX mmx2
>  AVG_WEIGHT 4
>  AVG_WEIGHT 8
> +AVG_WEIGHT 12
>  AVG_WEIGHT 16
>  AVG_WEIGHT 32
>  %if HIGH_BIT_DEPTH
> @@ -245,6 +256,7 @@
>  %else ;!HIGH_BIT_DEPTH
>  INIT_XMM sse2
>  AVG_WEIGHT 8,  7
> +AVG_WEIGHT 12, 7
>  AVG_WEIGHT 16, 7
>  AVG_WEIGHT 32, 7
>  %define BIWEIGHT BIWEIGHT_SSSE3
> @@ -253,6 +265,7 @@
>  AVG_WEIGHT 4
>  INIT_XMM ssse3
>  AVG_WEIGHT 8,  7
> +AVG_WEIGHT 12, 7
>  AVG_WEIGHT 16, 7
>  AVG_WEIGHT 32, 7
>
> @@ -648,7 +661,7 @@
>  ;                    pixel *src2, intptr_t src2_stride, int height, int
> weight );
>
>  ;-----------------------------------------------------------------------------
>
> -%macro AVG_FUNC 3
> +%macro AVG_FUNC 3-4
>  cglobal pixel_avg_w%1
>      AVG_START
>  .height_loop:
> @@ -663,8 +676,13 @@
>      pavgb  m0, [t4+x]
>      pavgb  m1, [t4+x+SIZEOF_PIXEL*t5]
>  %endif
> +%if (%1 == 12) && (%1-x/SIZEOF_PIXEL < mmsize)
> +    %4     [t0+x], m0
> +    %4     [t0+x+SIZEOF_PIXEL*t1], m1
> +%else
>      %3     [t0+x], m0
>      %3     [t0+x+SIZEOF_PIXEL*t1], m1
> +%endif
>  %assign x x+mmsize
>  %endrep
>      AVG_END
> @@ -718,6 +736,9 @@
>  AVGH 8,  8
>  AVGH 8,  4
>
> +AVG_FUNC 12, movq, movq, movd
> +AVGH 12, 16
> +
>  AVG_FUNC 16, movq, movq
>  AVGH 16, 64
>  AVGH 16, 32
> @@ -739,6 +760,8 @@
>  AVGH 16, 12
>  AVGH 16, 8
>  AVGH 16, 4
> +AVG_FUNC 12, movdqu, movdqa, movq
> +AVGH 12, 16
>  AVGH  8, 16
>  AVGH  8,  8
>  AVGH  8,  4
> @@ -750,6 +773,7 @@
>  AVGH 16, 12
>  AVGH 16, 8
>  AVGH 16, 4
> +AVGH 12, 16
>  AVGH  8, 16
>  AVGH  8,  8
>  AVGH  8,  4
> diff -r 8c731f8c71ff -r 9dda3a715f9f source/common/x86/pixel.h
> --- a/source/common/x86/pixel.h Tue Nov 12 16:14:09 2013 +0800
> +++ b/source/common/x86/pixel.h Tue Nov 12 16:14:26 2013 +0800
> @@ -245,6 +245,7 @@
>  DECL_SUF(x265_pixel_avg_16x12, (pixel *, intptr_t, pixel *, intptr_t,
> pixel *, intptr_t, int))
>  DECL_SUF(x265_pixel_avg_16x8,  (pixel *, intptr_t, pixel *, intptr_t,
> pixel *, intptr_t, int))
>  DECL_SUF(x265_pixel_avg_16x4,  (pixel *, intptr_t, pixel *, intptr_t,
> pixel *, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_12x16, (pixel *, intptr_t, pixel *, intptr_t,
> pixel *, intptr_t, int))
>  DECL_SUF(x265_pixel_avg_8x16,  (pixel *, intptr_t, pixel *, intptr_t,
> pixel *, intptr_t, int))
>  DECL_SUF(x265_pixel_avg_8x8,   (pixel *, intptr_t, pixel *, intptr_t,
> pixel *, intptr_t, int))
>  DECL_SUF(x265_pixel_avg_8x4,   (pixel *, intptr_t, pixel *, intptr_t,
> pixel *, intptr_t, int))
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131112/20e1224b/attachment.html>


More information about the x265-devel mailing list