[x265] [PATCH] asm code for blockfil_s, 4x4
Steve Borho
steve at borho.org
Thu Nov 7 19:48:00 CET 2013
On Thu, Nov 7, 2013 at 6:56 AM, <praveen at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Praveen Tiwari
> # Date 1383828996 -19800
> # Node ID f2af7af43dfcb08135a08e755f654314a89efae7
> # Parent d71f86b1c58b4fc9f8a3ffeaaef45c60f8bcc468
> asm code for blockfil_s, 4x4
>
>
blockfill has two l
diff -r d71f86b1c58b -r f2af7af43dfc source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp Thu Nov 07 18:16:22 2013
> +0530
> +++ b/source/common/x86/asm-primitives.cpp Thu Nov 07 18:26:36 2013
> +0530
> @@ -361,6 +361,8 @@
> p.luma_copy_sp[LUMA_64x32] = x265_blockcopy_sp_64x32_sse2;
> p.luma_copy_sp[LUMA_64x48] = x265_blockcopy_sp_64x48_sse2;
> p.luma_copy_sp[LUMA_64x64] = x265_blockcopy_sp_64x64_sse2;
> +
> + p.blockfill_s[BLOCK_4x4] = x265_blockfil_s_4x4_sse2;
> #if X86_64
> p.satd[LUMA_8x32] = x265_pixel_satd_8x32_sse2;
> p.satd[LUMA_16x4] = x265_pixel_satd_16x4_sse2;
> diff -r d71f86b1c58b -r f2af7af43dfc source/common/x86/blockcopy8.asm
> --- a/source/common/x86/blockcopy8.asm Thu Nov 07 18:16:22 2013 +0530
> +++ b/source/common/x86/blockcopy8.asm Thu Nov 07 18:26:36 2013 +0530
> @@ -1646,3 +1646,22 @@
> BLOCKCOPY_SP_W64_H1 64, 32
> BLOCKCOPY_SP_W64_H1 64, 48
> BLOCKCOPY_SP_W64_H1 64, 64
> +
>
> +;-----------------------------------------------------------------------------
> +; void blockfil_s_4x4(int16_t *dest, intptr_t destride, int16_t val)
>
> +;-----------------------------------------------------------------------------
> +INIT_XMM sse2
> +cglobal blockfil_s_4x4, 3, 3, 1, dest, destStride, val
> +
> +add r1, r1
> +
> +movd m0, r2d
> +pshuflw m0, m0, 0
> +
> +movh [r0], m0
> +movh [r0 + r1], m0
> +movh [r0 + 2 * r1], m0
> +lea r0, [r0 + 2 * r1]
> +movh [r0 + r1], m0
> +
> +RET
> diff -r d71f86b1c58b -r f2af7af43dfc source/common/x86/pixel.h
> --- a/source/common/x86/pixel.h Thu Nov 07 18:16:22 2013 +0530
> +++ b/source/common/x86/pixel.h Thu Nov 07 18:26:36 2013 +0530
> @@ -266,6 +266,8 @@
> DECL_ADS(2, avx2)
> DECL_ADS(1, avx2)
>
> +void x265_blockfil_s_4x4_sse2(int16_t *dst, intptr_t dstride, int16_t
> val);
> +
>
this belongs in blockcopy8.h
> #undef DECL_PIXELS
> #undef DECL_SUF
> #undef DECL_HEVC_SSD
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131107/38bd25b1/attachment-0001.html>
More information about the x265-devel
mailing list