[x265] [PATCH] asm: Fix for transpose_32x32 routine
murugan at multicorewareinc.com
murugan at multicorewareinc.com
Thu Nov 21 10:03:27 CET 2013
# HG changeset patch
# User Murugan Vairavel <murugan at multicorewareinc.com>
# Date 1385024427 -19800
# Thu Nov 21 14:30:27 2013 +0530
# Node ID e4e6d522248ece211ace5eb35f6cd9b1f6ca078c
# Parent db1151bb4974f1288745ba39dfd6e1838113feb7
asm: Fix for transpose_32x32 routine
diff -r db1151bb4974 -r e4e6d522248e source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Wed Nov 20 18:36:04 2013 -0600
+++ b/source/common/x86/pixel-a.asm Thu Nov 21 14:30:27 2013 +0530
@@ -8459,18 +8459,22 @@
RET
-%macro TRANSPOSE_16x16 1
- TRANSPOSE_8x8 %1
+cglobal transpose16_internal
+ TRANSPOSE_8x8 r6
lea r1, [r1 + 2 * r2]
lea r0, [r5 + 8]
- TRANSPOSE_8x8 %1
- lea r1, [r6 + 8]
- lea r0, [r5 + 8 * %1]
- TRANSPOSE_8x8 %1
+ TRANSPOSE_8x8 r6
lea r1, [r1 + 2 * r2]
- lea r0, [r5 + 8 * %1 + 8]
- TRANSPOSE_8x8 %1
-%endmacro
+ neg r2
+ lea r1, [r1 + r2 * 8]
+ lea r1, [r1 + r2 * 8 + 8]
+ neg r2
+ lea r0, [r5 + 8 * r6]
+ TRANSPOSE_8x8 r6
+ lea r1, [r1 + 2 * r2]
+ lea r0, [r5 + 8 * r6 + 8]
+ TRANSPOSE_8x8 r6
+ ret
;-----------------------------------------------------------------
; void transpose_32x32(pixel *dst, pixel *src, intptr_t stride)
@@ -8481,23 +8485,20 @@
mov r3, r0
mov r4, r1
mov r5, r0
- mov r6, r1
- TRANSPOSE_16x16 32
+ mov r6, 32
+ call transpose16_internal
lea r1, [r1 - 8 + 2 * r2]
lea r0, [r3 + 16]
mov r5, r0
- mov r6, r1
- TRANSPOSE_16x16 32
+ call transpose16_internal
lea r1, [r4 + 16]
lea r0, [r3 + 16 * 32]
mov r5, r0
- mov r6, r1
- TRANSPOSE_16x16 32
+ call transpose16_internal
lea r1, [r1 - 8 + 2 * r2]
lea r0, [r3 + 16 * 32 + 16]
mov r5, r0
- mov r6, r1
- TRANSPOSE_16x16 32
+ call transpose16_internal
RET
More information about the x265-devel
mailing list