[x265] [PATCH 1 of 2] asm: new macro TRANSPOSE4x4x2_16 to avoid pipeline conflict in combo matrix transpose
Min Chen
chenm003 at 163.com
Thu May 12 19:50:05 CEST 2016
# HG changeset patch
# User Min Chen <min.chen at multicorewareinc.com>
# Date 1463075381 18000
# Node ID f880db0a9a9b352077014aa69571d3169a37a2fc
# Parent 3e530043698b9df0f9aba7eefbb381ac6cc79421
asm: new macro TRANSPOSE4x4x2_16 to avoid pipeline conflict in combo matrix transpose
---
source/common/arm/asm.S | 21 ++++++++++++++++-----
1 files changed, 16 insertions(+), 5 deletions(-)
diff -r 3e530043698b -r f880db0a9a9b source/common/arm/asm.S
--- a/source/common/arm/asm.S Thu May 12 14:53:41 2016 +0530
+++ b/source/common/arm/asm.S Thu May 12 12:49:41 2016 -0500
@@ -175,9 +175,20 @@
vtrn.8 \r2, \r3
.endm
-.macro TRANSPOSE4x4_16 d0 d1 d2 d3
- vtrn.32 \d0, \d2
- vtrn.32 \d1, \d3
- vtrn.16 \d0, \d1
- vtrn.16 \d2, \d3
+.macro TRANSPOSE4x4_16 r0, r1, r2, r3
+ vtrn.32 \r0, \r2 // r0 = [21 20 01 00], r2 = [23 22 03 02]
+ vtrn.32 \r1, \r3 // r1 = [31 30 11 10], r3 = [33 32 13 12]
+ vtrn.16 \r0, \r1 // r0 = [30 20 10 00], r1 = [31 21 11 01]
+ vtrn.16 \r2, \r3 // r2 = [32 22 12 02], r3 = [33 23 13 03]
.endm
+
+.macro TRANSPOSE4x4x2_16 rA0, rA1, rA2, rA3, rB0, rB1, rB2, rB3
+ vtrn.32 \rA0, \rA2 // r0 = [21 20 01 00], r2 = [23 22 03 02]
+ vtrn.32 \rA1, \rA3 // r1 = [31 30 11 10], r3 = [33 32 13 12]
+ vtrn.32 \rB0, \rB2
+ vtrn.32 \rB1, \rB3
+ vtrn.16 \rA0, \rA1 // r0 = [30 20 10 00], r1 = [31 21 11 01]
+ vtrn.16 \rA2, \rA3 // r2 = [32 22 12 02], r3 = [33 23 13 03]
+ vtrn.16 \rB0, \rB1
+ vtrn.16 \rB2, \rB3
+.endm
More information about the x265-devel
mailing list