[x265] [PATCH Review only] asm: code for transpose_8x8 routine

Steve Borho steve at borho.org
Tue Nov 19 23:05:05 CET 2013


Sorry, I confused this one with a different patch series.  I hadn't taken the 4x4 patch because Min asked for some white-space improvements.

Please send his requested changes as a follow-on patch


On Nov 19, 2013, at 3:59 PM, Steve Borho <steve at borho.org> wrote:

> I can't take this of the 16x16 patch because the 4x4 patch still causes crashes.  You'll need to fix the first one and then resubmit these all together.
> 
> On Nov 19, 2013, at 12:23 AM, murugan at multicorewareinc.com wrote:
> 
>> # HG changeset patch
>> # User Murugan Vairavel <murugan at multicorewareinc.com>
>> # Date 1384842189 -19800
>> #      Tue Nov 19 11:53:09 2013 +0530
>> # Node ID 3a94cc365533bf7def255dc5b28e6a6a1d1bfa50
>> # Parent  f6a050b79cfa400aa432f49ee8a4c2b9f20cf930
>> asm: code for transpose_8x8 routine
>> 
>> diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/asm-primitives.cpp
>> --- a/source/common/x86/asm-primitives.cpp	Tue Nov 19 11:25:00 2013 +0530
>> +++ b/source/common/x86/asm-primitives.cpp	Tue Nov 19 11:53:09 2013 +0530
>> @@ -546,6 +546,7 @@
>>        p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2;
>>        p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2;
>>        p.transpose[BLOCK_4x4] = x265_transpose4_sse2;
>> +        p.transpose[BLOCK_8x8] = x265_transpose8_sse2;
>>    }
>>    if (cpuMask & X265_CPU_SSSE3)
>>    {
>> diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/pixel-a.asm
>> --- a/source/common/x86/pixel-a.asm	Tue Nov 19 11:25:00 2013 +0530
>> +++ b/source/common/x86/pixel-a.asm	Tue Nov 19 11:53:09 2013 +0530
>> @@ -8359,3 +8359,45 @@
>>    movu         [r0],    m0
>> 
>>    RET
>> +
>> +;-----------------------------------------------------------------
>> +; void transpose_8x8(pixel *dst, pixel *src, intptr_t stride)
>> +;-----------------------------------------------------------------
>> +INIT_XMM sse2
>> +cglobal transpose8, 3, 3, 8, dest, src, stride
>> +
>> +    movh         m0,    [r1]
>> +    movh         m1,    [r1 + r2]
>> +    movh         m2,    [r1 + 2 * r2]
>> +    lea          r1,    [r1 + 2 * r2]
>> +    movh         m3,    [r1 + r2]
>> +    movh         m4,    [r1 + 2 * r2]
>> +    lea          r1,    [r1 + 2 * r2]
>> +    movh         m5,    [r1 + r2]
>> +    movh         m6,    [r1 + 2 * r2]
>> +    lea          r1,    [r1 + 2 * r2]
>> +    movh         m7,    [r1 + r2]
>> +
>> +    punpcklbw    m0,    m1
>> +    punpcklbw    m2,    m3
>> +    punpcklbw    m4,    m5
>> +    punpcklbw    m6,    m7
>> +    movu         m1,    m0
>> +    punpcklwd    m0,    m2
>> +    punpckhwd    m1,    m2
>> +    movu         m5,    m4
>> +    punpcklwd    m4,    m6
>> +    punpckhwd    m5,    m6
>> +    movu         m2,    m0
>> +    punpckldq    m0,    m4
>> +    punpckhdq    m2,    m4
>> +    movu         m3,    m1
>> +    punpckldq    m1,    m5
>> +    punpckhdq    m3,    m5
>> +
>> +    movu         [r0],         m0
>> +    movu         [r0 + 16],    m2
>> +    movu         [r0 + 32],    m1
>> +    movu         [r0 + 48],    m3
>> +
>> +    RET
>> diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/pixel.h
>> --- a/source/common/x86/pixel.h	Tue Nov 19 11:25:00 2013 +0530
>> +++ b/source/common/x86/pixel.h	Tue Nov 19 11:53:09 2013 +0530
>> @@ -366,5 +366,6 @@
>> void x265_getResidual16_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
>> void x265_getResidual32_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
>> void x265_transpose4_sse2(pixel *dest, pixel *src, intptr_t stride);
>> +void x265_transpose8_sse2(pixel *dest, pixel *src, intptr_t stride);
>> 
>> #endif // ifndef X265_I386_PIXEL_H
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
> 

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 842 bytes
Desc: Message signed with OpenPGP using GPGMail
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131119/0003812c/attachment.sig>


More information about the x265-devel mailing list