<div style="line-height:1.7;color:#000000;font-size:14px;font-family:arial"><DIV>good!<BR></DIV><PRE><BR>At 2013-11-18 22:24:12,murugan@multicorewareinc.com wrote:
># HG changeset patch
># User Murugan Vairavel <murugan@multicorewareinc.com>
># Date 1384784621 -19800
># Mon Nov 18 19:53:41 2013 +0530
># Node ID d24c22e915afd33a122326516b41eecf7e055934
># Parent a4735d0fe4759c72a3af408a43723f219688eeb4
>asm: code for transpose4x4 routine
>
>diff -r a4735d0fe475 -r d24c22e915af source/common/x86/asm-primitives.cpp
>--- a/source/common/x86/asm-primitives.cpp Mon Nov 18 18:59:20 2013 +0530
>+++ b/source/common/x86/asm-primitives.cpp Mon Nov 18 19:53:41 2013 +0530
>@@ -545,6 +545,7 @@
> p.calcrecon[BLOCK_8x8] = x265_calcRecons8_sse2;
> p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2;
> p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2;
>+ p.transpose[BLOCK_4x4] = x265_transpose4_sse2;
> }
> if (cpuMask & X265_CPU_SSSE3)
> {
>diff -r a4735d0fe475 -r d24c22e915af source/common/x86/pixel-a.asm
>--- a/source/common/x86/pixel-a.asm Mon Nov 18 18:59:20 2013 +0530
>+++ b/source/common/x86/pixel-a.asm Mon Nov 18 19:53:41 2013 +0530
>@@ -8340,3 +8340,25 @@
> jnz .loop
>
> RET
>+
>+;-----------------------------------------------------------------
>+; void transpose_4x4(pixel *dst, pixel *src, intptr_t stride)
>+;-----------------------------------------------------------------
>+INIT_XMM sse2
>+cglobal transpose4, 3, 3, 4, dest, src, stride
>+
>+ movd m0, [r1]
>+ movd m1, [r1 + r2]
>+ movd m2, [r1 + 2 * r2]
>+
>+ lea r1, [r1 + 2 * r2]
>+
>+ movd m3, [r1 + r2]
>+
>+ punpcklbw m0, m1
>+ punpcklbw m2, m3
>+ punpcklwd m0, m2
>+
>+ movu [r0], m0
>+
>+RET
>diff -r a4735d0fe475 -r d24c22e915af source/common/x86/pixel.h
>--- a/source/common/x86/pixel.h Mon Nov 18 18:59:20 2013 +0530
>+++ b/source/common/x86/pixel.h Mon Nov 18 19:53:41 2013 +0530
>@@ -365,5 +365,6 @@
> void x265_getResidual8_sse2(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
> void x265_getResidual16_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
> void x265_getResidual32_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
>+void x265_transpose4_sse2(pixel *dest, pixel *src, intptr_t stride);
>
> #endif // ifndef X265_I386_PIXEL_H
>_______________________________________________
>x265-devel mailing list
>x265-devel@videolan.org
>https://mailman.videolan.org/listinfo/x265-devel
</PRE></div>