[x265] [PATCH Review only] asm: code for transpose_8x8 routine

Steve Borho steve at borho.org
Tue Nov 19 22:59:57 CET 2013


I can't take this of the 16x16 patch because the 4x4 patch still causes crashes.  You'll need to fix the first one and then resubmit these all together.

On Nov 19, 2013, at 12:23 AM, murugan at multicorewareinc.com wrote:

> # HG changeset patch
> # User Murugan Vairavel <murugan at multicorewareinc.com>
> # Date 1384842189 -19800
> #      Tue Nov 19 11:53:09 2013 +0530
> # Node ID 3a94cc365533bf7def255dc5b28e6a6a1d1bfa50
> # Parent  f6a050b79cfa400aa432f49ee8a4c2b9f20cf930
> asm: code for transpose_8x8 routine
> 
> diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/asm-primitives.cpp
> --- a/source/common/x86/asm-primitives.cpp	Tue Nov 19 11:25:00 2013 +0530
> +++ b/source/common/x86/asm-primitives.cpp	Tue Nov 19 11:53:09 2013 +0530
> @@ -546,6 +546,7 @@
>         p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2;
>         p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2;
>         p.transpose[BLOCK_4x4] = x265_transpose4_sse2;
> +        p.transpose[BLOCK_8x8] = x265_transpose8_sse2;
>     }
>     if (cpuMask & X265_CPU_SSSE3)
>     {
> diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/pixel-a.asm
> --- a/source/common/x86/pixel-a.asm	Tue Nov 19 11:25:00 2013 +0530
> +++ b/source/common/x86/pixel-a.asm	Tue Nov 19 11:53:09 2013 +0530
> @@ -8359,3 +8359,45 @@
>     movu         [r0],    m0
> 
>     RET
> +
> +;-----------------------------------------------------------------
> +; void transpose_8x8(pixel *dst, pixel *src, intptr_t stride)
> +;-----------------------------------------------------------------
> +INIT_XMM sse2
> +cglobal transpose8, 3, 3, 8, dest, src, stride
> +
> +    movh         m0,    [r1]
> +    movh         m1,    [r1 + r2]
> +    movh         m2,    [r1 + 2 * r2]
> +    lea          r1,    [r1 + 2 * r2]
> +    movh         m3,    [r1 + r2]
> +    movh         m4,    [r1 + 2 * r2]
> +    lea          r1,    [r1 + 2 * r2]
> +    movh         m5,    [r1 + r2]
> +    movh         m6,    [r1 + 2 * r2]
> +    lea          r1,    [r1 + 2 * r2]
> +    movh         m7,    [r1 + r2]
> +
> +    punpcklbw    m0,    m1
> +    punpcklbw    m2,    m3
> +    punpcklbw    m4,    m5
> +    punpcklbw    m6,    m7
> +    movu         m1,    m0
> +    punpcklwd    m0,    m2
> +    punpckhwd    m1,    m2
> +    movu         m5,    m4
> +    punpcklwd    m4,    m6
> +    punpckhwd    m5,    m6
> +    movu         m2,    m0
> +    punpckldq    m0,    m4
> +    punpckhdq    m2,    m4
> +    movu         m3,    m1
> +    punpckldq    m1,    m5
> +    punpckhdq    m3,    m5
> +
> +    movu         [r0],         m0
> +    movu         [r0 + 16],    m2
> +    movu         [r0 + 32],    m1
> +    movu         [r0 + 48],    m3
> +
> +    RET
> diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/pixel.h
> --- a/source/common/x86/pixel.h	Tue Nov 19 11:25:00 2013 +0530
> +++ b/source/common/x86/pixel.h	Tue Nov 19 11:53:09 2013 +0530
> @@ -366,5 +366,6 @@
> void x265_getResidual16_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
> void x265_getResidual32_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
> void x265_transpose4_sse2(pixel *dest, pixel *src, intptr_t stride);
> +void x265_transpose8_sse2(pixel *dest, pixel *src, intptr_t stride);
> 
> #endif // ifndef X265_I386_PIXEL_H
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 842 bytes
Desc: Message signed with OpenPGP using GPGMail
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131119/d2ac63f8/attachment-0001.sig>


More information about the x265-devel mailing list