[x265] [PATCH Review only] asm: code for transpose_8x8 routine
Steve Borho
steve at borho.org
Tue Nov 19 22:59:57 CET 2013
I can't take this of the 16x16 patch because the 4x4 patch still causes crashes. You'll need to fix the first one and then resubmit these all together.
On Nov 19, 2013, at 12:23 AM, murugan at multicorewareinc.com wrote:
> # HG changeset patch
> # User Murugan Vairavel <murugan at multicorewareinc.com>
> # Date 1384842189 -19800
> # Tue Nov 19 11:53:09 2013 +0530
> # Node ID 3a94cc365533bf7def255dc5b28e6a6a1d1bfa50
> # Parent f6a050b79cfa400aa432f49ee8a4c2b9f20cf930
> asm: code for transpose_8x8 routine
>
> diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp Tue Nov 19 11:25:00 2013 +0530
> +++ b/source/common/x86/asm-primitives.cpp Tue Nov 19 11:53:09 2013 +0530
> @@ -546,6 +546,7 @@
> p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2;
> p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2;
> p.transpose[BLOCK_4x4] = x265_transpose4_sse2;
> + p.transpose[BLOCK_8x8] = x265_transpose8_sse2;
> }
> if (cpuMask & X265_CPU_SSSE3)
> {
> diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/pixel-a.asm
> --- a/source/common/x86/pixel-a.asm Tue Nov 19 11:25:00 2013 +0530
> +++ b/source/common/x86/pixel-a.asm Tue Nov 19 11:53:09 2013 +0530
> @@ -8359,3 +8359,45 @@
> movu [r0], m0
>
> RET
> +
> +;-----------------------------------------------------------------
> +; void transpose_8x8(pixel *dst, pixel *src, intptr_t stride)
> +;-----------------------------------------------------------------
> +INIT_XMM sse2
> +cglobal transpose8, 3, 3, 8, dest, src, stride
> +
> + movh m0, [r1]
> + movh m1, [r1 + r2]
> + movh m2, [r1 + 2 * r2]
> + lea r1, [r1 + 2 * r2]
> + movh m3, [r1 + r2]
> + movh m4, [r1 + 2 * r2]
> + lea r1, [r1 + 2 * r2]
> + movh m5, [r1 + r2]
> + movh m6, [r1 + 2 * r2]
> + lea r1, [r1 + 2 * r2]
> + movh m7, [r1 + r2]
> +
> + punpcklbw m0, m1
> + punpcklbw m2, m3
> + punpcklbw m4, m5
> + punpcklbw m6, m7
> + movu m1, m0
> + punpcklwd m0, m2
> + punpckhwd m1, m2
> + movu m5, m4
> + punpcklwd m4, m6
> + punpckhwd m5, m6
> + movu m2, m0
> + punpckldq m0, m4
> + punpckhdq m2, m4
> + movu m3, m1
> + punpckldq m1, m5
> + punpckhdq m3, m5
> +
> + movu [r0], m0
> + movu [r0 + 16], m2
> + movu [r0 + 32], m1
> + movu [r0 + 48], m3
> +
> + RET
> diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/pixel.h
> --- a/source/common/x86/pixel.h Tue Nov 19 11:25:00 2013 +0530
> +++ b/source/common/x86/pixel.h Tue Nov 19 11:53:09 2013 +0530
> @@ -366,5 +366,6 @@
> void x265_getResidual16_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
> void x265_getResidual32_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
> void x265_transpose4_sse2(pixel *dest, pixel *src, intptr_t stride);
> +void x265_transpose8_sse2(pixel *dest, pixel *src, intptr_t stride);
>
> #endif // ifndef X265_I386_PIXEL_H
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 842 bytes
Desc: Message signed with OpenPGP using GPGMail
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131119/d2ac63f8/attachment-0001.sig>
More information about the x265-devel
mailing list