[x265] [PATCH Review only] asm: code for transpose4x4 routine
murugan at multicorewareinc.com
murugan at multicorewareinc.com
Mon Nov 18 15:24:12 CET 2013
# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1384784621 -19800
# Mon Nov 18 19:53:41 2013 +0530
# Node ID d24c22e915afd33a122326516b41eecf7e055934
# Parent a4735d0fe4759c72a3af408a43723f219688eeb4
asm: code for transpose4x4 routine
diff -r a4735d0fe475 -r d24c22e915af source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Mon Nov 18 18:59:20 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Mon Nov 18 19:53:41 2013 +0530
@@ -545,6 +545,7 @@
p.calcrecon[BLOCK_8x8] = x265_calcRecons8_sse2;
p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2;
p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2;
+ p.transpose[BLOCK_4x4] = x265_transpose4_sse2;
}
if (cpuMask & X265_CPU_SSSE3)
{
diff -r a4735d0fe475 -r d24c22e915af source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Mon Nov 18 18:59:20 2013 +0530
+++ b/source/common/x86/pixel-a.asm Mon Nov 18 19:53:41 2013 +0530
@@ -8340,3 +8340,25 @@
jnz .loop
RET
+
+;-----------------------------------------------------------------
+; void transpose_4x4(pixel *dst, pixel *src, intptr_t stride)
+;-----------------------------------------------------------------
+INIT_XMM sse2
+cglobal transpose4, 3, 3, 4, dest, src, stride
+
+ movd m0, [r1]
+ movd m1, [r1 + r2]
+ movd m2, [r1 + 2 * r2]
+
+ lea r1, [r1 + 2 * r2]
+
+ movd m3, [r1 + r2]
+
+ punpcklbw m0, m1
+ punpcklbw m2, m3
+ punpcklwd m0, m2
+
+ movu [r0], m0
+
+RET
diff -r a4735d0fe475 -r d24c22e915af source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Mon Nov 18 18:59:20 2013 +0530
+++ b/source/common/x86/pixel.h Mon Nov 18 19:53:41 2013 +0530
@@ -365,5 +365,6 @@
void x265_getResidual8_sse2(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
void x265_getResidual16_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
void x265_getResidual32_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
+void x265_transpose4_sse2(pixel *dest, pixel *src, intptr_t stride);
#endif // ifndef X265_I386_PIXEL_H
More information about the x265-devel
mailing list