[x265] [PATCH 2 of 2] asm: assembly code for x265_pixel_avg_12x16
Steve Borho
steve at borho.org
Wed Nov 13 00:04:02 CET 2013
On Tue, Nov 12, 2013 at 5:27 AM, Min Chen <chenm003 at 163.com> wrote:
> # HG changeset patch
> # User Min Chen <chenm003 at 163.com>
> # Date 1384255626 -28800
> # Node ID d0fb42b3fa396e413dc510bd9cf7eb2a8da22f31
> # Parent 0d8ed55eb94d7cf43afb687edbad7f3db349b84c
> asm: assembly code for x265_pixel_avg_12x16
>
oh; taking this one instead of the one I fixed up
>
> diff -r 0d8ed55eb94d -r d0fb42b3fa39 source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp Tue Nov 12 19:26:48 2013
> +0800
> +++ b/source/common/x86/asm-primitives.cpp Tue Nov 12 19:27:06 2013
> +0800
> @@ -134,6 +134,7 @@
> p.pixelavg_pp[LUMA_16x12] = x265_pixel_avg_16x12_ ## cpu; \
> p.pixelavg_pp[LUMA_16x8] = x265_pixel_avg_16x8_ ## cpu; \
> p.pixelavg_pp[LUMA_16x4] = x265_pixel_avg_16x4_ ## cpu; \
> + p.pixelavg_pp[LUMA_12x16] = x265_pixel_avg_12x16_ ## cpu; \
> p.pixelavg_pp[LUMA_8x32] = x265_pixel_avg_8x32_ ## cpu; \
> p.pixelavg_pp[LUMA_8x16] = x265_pixel_avg_8x16_ ## cpu; \
> p.pixelavg_pp[LUMA_8x8] = x265_pixel_avg_8x8_ ## cpu; \
> diff -r 0d8ed55eb94d -r d0fb42b3fa39 source/common/x86/mc-a.asm
> --- a/source/common/x86/mc-a.asm Tue Nov 12 19:26:48 2013 +0800
> +++ b/source/common/x86/mc-a.asm Tue Nov 12 19:27:06 2013 +0800
> @@ -190,7 +190,13 @@
> SWAP 0, 6
> BIWEIGHT [%2+mmsize/2], [%3+mmsize/2]
> packuswb m6, m0
> - mova [%1], m6
> +%if %4 != 12
> + mova [%1], m6
> +%else ; !w12
> + movh [%1], m6
> + movhlps m6, m6
> + movd [%1+mmsize/2], m6
> +%endif ; w12
> %endif
> %endmacro
>
> @@ -222,8 +228,12 @@
> %else
> %assign x 0
> %rep (%1*SIZEOF_PIXEL+mmsize-1)/mmsize
> - BIWEIGHT_ROW t0+x, t2+x, t4+x,
> %1
> - BIWEIGHT_ROW t0+x+SIZEOF_PIXEL*t1, t2+x+SIZEOF_PIXEL*t3,
> t4+x+SIZEOF_PIXEL*t5, %1
> +%assign y mmsize
> +%if (%1 == 12) && (%1*SIZEOF_PIXEL-x < mmsize)
> +%assign y (%1*SIZEOF_PIXEL-x)
> +%endif
> + BIWEIGHT_ROW t0+x, t2+x, t4+x,
> y
> + BIWEIGHT_ROW t0+x+SIZEOF_PIXEL*t1, t2+x+SIZEOF_PIXEL*t3,
> t4+x+SIZEOF_PIXEL*t5, y
> %assign x x+mmsize
> %endrep
> %endif
> @@ -235,6 +245,7 @@
> INIT_MMX mmx2
> AVG_WEIGHT 4
> AVG_WEIGHT 8
> +AVG_WEIGHT 12
> AVG_WEIGHT 16
> AVG_WEIGHT 32
> AVG_WEIGHT 64
> @@ -248,6 +259,7 @@
> %else ;!HIGH_BIT_DEPTH
> INIT_XMM sse2
> AVG_WEIGHT 8, 7
> +AVG_WEIGHT 12, 7
> AVG_WEIGHT 16, 7
> AVG_WEIGHT 32, 7
> AVG_WEIGHT 64, 7
> @@ -259,6 +271,7 @@
> AVG_WEIGHT 4
> INIT_XMM ssse3
> AVG_WEIGHT 8, 7
> +AVG_WEIGHT 12, 7
> AVG_WEIGHT 16, 7
> AVG_WEIGHT 32, 7
> AVG_WEIGHT 64, 7
> @@ -657,7 +670,7 @@
> ; pixel *src2, intptr_t src2_stride, int height, int
> weight );
>
> ;-----------------------------------------------------------------------------
>
> -%macro AVG_FUNC 3
> +%macro AVG_FUNC 3-4
> cglobal pixel_avg_w%1
> AVG_START
> .height_loop:
> @@ -672,8 +685,13 @@
> pavgb m0, [t4+x]
> pavgb m1, [t4+x+SIZEOF_PIXEL*t5]
> %endif
> +%if (%1 == 12) && (%1-x/SIZEOF_PIXEL < mmsize)
> + %4 [t0+x], m0
> + %4 [t0+x+SIZEOF_PIXEL*t1], m1
> +%else
> %3 [t0+x], m0
> %3 [t0+x+SIZEOF_PIXEL*t1], m1
> +%endif
> %assign x x+mmsize
> %endrep
> AVG_END
> @@ -728,6 +746,9 @@
> AVGH 8, 8
> AVGH 8, 4
>
> +AVG_FUNC 12, movq, movq, movd
> +AVGH 12, 16
> +
> AVG_FUNC 16, movq, movq
> AVGH 16, 64
> AVGH 16, 32
> @@ -780,6 +801,9 @@
> AVG_FUNC 48, movdqu, movdqa
> AVGH 48, 64
>
> +AVG_FUNC 12, movdqu, movdqa, movq
> +AVGH 12, 16
> +
> AVGH 8, 32
> AVGH 8, 16
> AVGH 8, 8
> @@ -806,6 +830,8 @@
>
> AVGH 48, 64
>
> +AVGH 12, 16
> +
> AVGH 8, 32
> AVGH 8, 16
> AVGH 8, 8
> diff -r 0d8ed55eb94d -r d0fb42b3fa39 source/common/x86/pixel.h
> --- a/source/common/x86/pixel.h Tue Nov 12 19:26:48 2013 +0800
> +++ b/source/common/x86/pixel.h Tue Nov 12 19:27:06 2013 +0800
> @@ -254,6 +254,7 @@
> DECL_SUF(x265_pixel_avg_16x12, (pixel *, intptr_t, pixel *, intptr_t,
> pixel *, intptr_t, int))
> DECL_SUF(x265_pixel_avg_16x8, (pixel *, intptr_t, pixel *, intptr_t,
> pixel *, intptr_t, int))
> DECL_SUF(x265_pixel_avg_16x4, (pixel *, intptr_t, pixel *, intptr_t,
> pixel *, intptr_t, int))
> +DECL_SUF(x265_pixel_avg_12x16, (pixel *, intptr_t, pixel *, intptr_t,
> pixel *, intptr_t, int))
> DECL_SUF(x265_pixel_avg_8x32, (pixel *, intptr_t, pixel *, intptr_t,
> pixel *, intptr_t, int))
> DECL_SUF(x265_pixel_avg_8x16, (pixel *, intptr_t, pixel *, intptr_t,
> pixel *, intptr_t, int))
> DECL_SUF(x265_pixel_avg_8x8, (pixel *, intptr_t, pixel *, intptr_t,
> pixel *, intptr_t, int))
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131112/bdc25e38/attachment.html>
More information about the x265-devel
mailing list