[x265] [PATCH Review only] asm: code for transpose_64x64 routine
murugan at multicorewareinc.com
murugan at multicorewareinc.com
Wed Nov 20 13:54:53 CET 2013
# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1384952067 -19800
# Wed Nov 20 18:24:27 2013 +0530
# Node ID df257068b5acbdb80116092cef8c5af64be41294
# Parent 2ea432a36a03a7ee7e0f788181ff35ac8db20337
asm: code for transpose_64x64 routine
diff -r 2ea432a36a03 -r df257068b5ac source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Nov 20 17:51:20 2013 +0530
+++ b/source/common/x86/asm-primitives.cpp Wed Nov 20 18:24:27 2013 +0530
@@ -548,6 +548,7 @@
p.transpose[BLOCK_8x8] = x265_transpose8_sse2;
p.transpose[BLOCK_16x16] = x265_transpose16_sse2;
p.transpose[BLOCK_32x32] = x265_transpose32_sse2;
+ p.transpose[BLOCK_64x64] = x265_transpose64_sse2;
}
if (cpuMask & X265_CPU_SSSE3)
{
diff -r 2ea432a36a03 -r df257068b5ac source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Wed Nov 20 17:51:20 2013 +0530
+++ b/source/common/x86/pixel-a.asm Wed Nov 20 18:24:27 2013 +0530
@@ -7121,6 +7121,98 @@
RET
+;-----------------------------------------------------------------
+; void transpose_64x64(pixel *dst, pixel *src, intptr_t stride)
+;-----------------------------------------------------------------
+INIT_XMM sse2
+cglobal transpose64, 3, 7, 8, dest, src, stride
+
+ mov r3, r0
+ mov r4, r1
+ mov r5, r0
+ mov r6, r1
+ TRANSPOSE_16x16 64
+ lea r1, [r1 - 8 + 2 * r2]
+ lea r0, [r3 + 16]
+ mov r5, r0
+ mov r6, r1
+ TRANSPOSE_16x16 64
+ lea r1, [r1 - 8 + 2 * r2]
+ lea r0, [r3 + 32]
+ mov r5, r0
+ mov r6, r1
+ TRANSPOSE_16x16 64
+ lea r1, [r1 - 8 + 2 * r2]
+ lea r0, [r3 + 48]
+ mov r5, r0
+ mov r6, r1
+ TRANSPOSE_16x16 64
+
+ lea r1, [r4 + 16]
+ lea r0, [r3 + 16 * 64]
+ mov r5, r0
+ mov r6, r1
+ TRANSPOSE_16x16 64
+ lea r1, [r1 - 8 + 2 * r2]
+ lea r0, [r3 + 16 * 64 + 16]
+ mov r5, r0
+ mov r6, r1
+ TRANSPOSE_16x16 64
+ lea r1, [r1 - 8 + 2 * r2]
+ lea r0, [r3 + 16 * 64 + 32]
+ mov r5, r0
+ mov r6, r1
+ TRANSPOSE_16x16 64
+ lea r1, [r1 - 8 + 2 * r2]
+ lea r0, [r3 + 16 * 64 + 48]
+ mov r5, r0
+ mov r6, r1
+ TRANSPOSE_16x16 64
+
+ lea r1, [r4 + 32]
+ lea r0, [r3 + 32 * 64]
+ mov r5, r0
+ mov r6, r1
+ TRANSPOSE_16x16 64
+ lea r1, [r1 - 8 + 2 * r2]
+ lea r0, [r3 + 32 * 64 + 16]
+ mov r5, r0
+ mov r6, r1
+ TRANSPOSE_16x16 64
+ lea r1, [r1 - 8 + 2 * r2]
+ lea r0, [r3 + 32 * 64 + 32]
+ mov r5, r0
+ mov r6, r1
+ TRANSPOSE_16x16 64
+ lea r1, [r1 - 8 + 2 * r2]
+ lea r0, [r3 + 32 * 64 + 48]
+ mov r5, r0
+ mov r6, r1
+ TRANSPOSE_16x16 64
+
+ lea r1, [r4 + 48]
+ lea r0, [r3 + 48 * 64]
+ mov r5, r0
+ mov r6, r1
+ TRANSPOSE_16x16 64
+ lea r1, [r1 - 8 + 2 * r2]
+ lea r0, [r3 + 48 * 64 + 16]
+ mov r5, r0
+ mov r6, r1
+ TRANSPOSE_16x16 64
+ lea r1, [r1 - 8 + 2 * r2]
+ lea r0, [r3 + 48 * 64 + 32]
+ mov r5, r0
+ mov r6, r1
+ TRANSPOSE_16x16 64
+ lea r1, [r1 - 8 + 2 * r2]
+ lea r0, [r3 + 48 * 64 + 48]
+ mov r5, r0
+ mov r6, r1
+ TRANSPOSE_16x16 64
+
+ RET
+
;-----------------------------------------------------------------------------
; void pixel_sub_ps_c_2x4(int16_t *dest, intptr_t destride, pixel *src0, pixel *src1, intptr_t srcstride0, intptr_t srcstride1);
;-----------------------------------------------------------------------------
diff -r 2ea432a36a03 -r df257068b5ac source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Wed Nov 20 17:51:20 2013 +0530
+++ b/source/common/x86/pixel.h Wed Nov 20 18:24:27 2013 +0530
@@ -368,5 +368,6 @@
void x265_transpose8_sse2(pixel *dest, pixel *src, intptr_t stride);
void x265_transpose16_sse2(pixel *dest, pixel *src, intptr_t stride);
void x265_transpose32_sse2(pixel *dest, pixel *src, intptr_t stride);
+void x265_transpose64_sse2(pixel *dest, pixel *src, intptr_t stride);
#endif // ifndef X265_I386_PIXEL_H
More information about the x265-devel
mailing list