[x265] [PATCH Review only] asm: code for transpose_8x8 routine
murugan at multicorewareinc.com
murugan at multicorewareinc.com
Tue Nov 19 07:23:41 CET 2013
# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1384842189 -19800
# Tue Nov 19 11:53:09 2013 +0530
# Node ID 3a94cc365533bf7def255dc5b28e6a6a1d1bfa50
# Parent f6a050b79cfa400aa432f49ee8a4c2b9f20cf930
asm: code for transpose_8x8 routine
diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Tue Nov 19 11:25:00 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Tue Nov 19 11:53:09 2013 +0530
@@ -546,6 +546,7 @@
p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2;
p.calcresidual[BLOCK_8x8] = x265_getResidual8_sse2;
p.transpose[BLOCK_4x4] = x265_transpose4_sse2;
+ p.transpose[BLOCK_8x8] = x265_transpose8_sse2;
}
if (cpuMask & X265_CPU_SSSE3)
{
diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Tue Nov 19 11:25:00 2013 +0530
+++ b/source/common/x86/pixel-a.asm Tue Nov 19 11:53:09 2013 +0530
@@ -8359,3 +8359,45 @@
movu [r0], m0
RET
+
+;-----------------------------------------------------------------
+; void transpose_8x8(pixel *dst, pixel *src, intptr_t stride)
+;-----------------------------------------------------------------
+INIT_XMM sse2
+cglobal transpose8, 3, 3, 8, dest, src, stride
+
+ movh m0, [r1]
+ movh m1, [r1 + r2]
+ movh m2, [r1 + 2 * r2]
+ lea r1, [r1 + 2 * r2]
+ movh m3, [r1 + r2]
+ movh m4, [r1 + 2 * r2]
+ lea r1, [r1 + 2 * r2]
+ movh m5, [r1 + r2]
+ movh m6, [r1 + 2 * r2]
+ lea r1, [r1 + 2 * r2]
+ movh m7, [r1 + r2]
+
+ punpcklbw m0, m1
+ punpcklbw m2, m3
+ punpcklbw m4, m5
+ punpcklbw m6, m7
+ movu m1, m0
+ punpcklwd m0, m2
+ punpckhwd m1, m2
+ movu m5, m4
+ punpcklwd m4, m6
+ punpckhwd m5, m6
+ movu m2, m0
+ punpckldq m0, m4
+ punpckhdq m2, m4
+ movu m3, m1
+ punpckldq m1, m5
+ punpckhdq m3, m5
+
+ movu [r0], m0
+ movu [r0 + 16], m2
+ movu [r0 + 32], m1
+ movu [r0 + 48], m3
+
+ RET
diff -r f6a050b79cfa -r 3a94cc365533 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Tue Nov 19 11:25:00 2013 +0530
+++ b/source/common/x86/pixel.h Tue Nov 19 11:53:09 2013 +0530
@@ -366,5 +366,6 @@
void x265_getResidual16_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
void x265_getResidual32_sse4(pixel *fenc, pixel *pred, int16_t *residual, intptr_t stride);
void x265_transpose4_sse2(pixel *dest, pixel *src, intptr_t stride);
+void x265_transpose8_sse2(pixel *dest, pixel *src, intptr_t stride);
#endif // ifndef X265_I386_PIXEL_H
More information about the x265-devel
mailing list