[x265] [PATCH 3 of 6] asm-intrapred8.asm: asm code size reduction
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Thu Mar 19 06:03:05 CET 2015
# HG changeset patch
# User Praveen Tiwari <praveen at multicorewareinc.com>
# Date 1426671506 -19800
# Node ID 125516e35a73d951a3f49053536b3a3fecd93e08
# Parent 1a4b131bfbb682dbb52e0f9af3e4522f1f172c8a
asm-intrapred8.asm: asm code size reduction
Introduce macor 'INTRA_PRED_ANG32_ALIGNR_STORE' to reduce asm code lines.
diff -r 1a4b131bfbb6 -r 125516e35a73 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm Wed Mar 18 14:55:51 2015 +0530
+++ b/source/common/x86/intrapred8.asm Wed Mar 18 15:08:26 2015 +0530
@@ -11302,15 +11302,25 @@
INTRA_PRED_ANG16_MC0 r0 + 2 * r1, r0 + r3, 4
RET
+%macro INTRA_PRED_ANG32_ALIGNR_STORE 1
+ lea r0, [r0 + 4 * r1]
+ palignr m2, m1, m0, %1
+ movu [r0], m2
+ palignr m2, m1, m0, (%1 + 1)
+ movu [r0 + r1], m2
+ palignr m2, m1, m0, (%1 + 2)
+ movu [r0 + 2 * r1], m2
+ palignr m2, m1, m0, (%1 + 3)
+ movu [r0 + r3], m2
+%endmacro
+
INIT_YMM avx2
cglobal intra_pred_ang32_34, 3, 5,3
lea r3, [3 * r1]
movu m0, [r2 + 2]
movu m1, [r2 + 18]
-
movu [r0], m0
-
palignr m2, m1, m0, 1
movu [r0 + r1], m2
palignr m2, m1, m0, 2
@@ -11318,80 +11328,24 @@
palignr m2, m1, m0, 3
movu [r0 + r3], m2
+ INTRA_PRED_ANG32_ALIGNR_STORE 4
+ INTRA_PRED_ANG32_ALIGNR_STORE 8
+ INTRA_PRED_ANG32_ALIGNR_STORE 12
+
lea r0, [r0 + 4 * r1]
- palignr m2, m1, m0, 4
- movu [r0], m2
- palignr m2, m1, m0, 5
- movu [r0 + r1], m2
- palignr m2, m1, m0, 6
- movu [r0 + 2 * r1], m2
- palignr m2, m1, m0, 7
- movu [r0 + r3], m2
-
- lea r0, [r0 + 4 * r1]
- palignr m2, m1, m0, 8
- movu [r0], m2
- palignr m2, m1, m0, 9
- movu [r0 + r1], m2
- palignr m2, m1, m0, 10
- movu [r0 + 2 * r1], m2
- palignr m2, m1, m0, 11
- movu [r0 + r3], m2
-
- lea r0, [r0 + 4 * r1]
- palignr m2, m1, m0, 12
- movu [r0], m2
- palignr m2, m1, m0, 13
- movu [r0 + r1], m2
- palignr m2, m1, m0, 14
- movu [r0 + 2 * r1], m2
- palignr m2, m1, m0, 15
- movu [r0 + r3], m2
-
- lea r0, [r0 + 4 * r1]
-
palignr m2, m1, m0, 16
movu [r0], m2
-
movu m0, [r2 + 19]
movu [r0 + r1], m0
-
movu m1, [r2 + 35]
-
palignr m2, m1, m0, 1
movu [r0 + 2 * r1], m2
palignr m2, m1, m0, 2
movu [r0 + r3], m2
- lea r0, [r0 + 4 * r1]
- palignr m2, m1, m0, 3
- movu [r0], m2
- palignr m2, m1, m0, 4
- movu [r0 + r1], m2
- palignr m2, m1, m0, 5
- movu [r0 + 2 * r1], m2
- palignr m2, m1, m0, 6
- movu [r0 + r3], m2
-
- lea r0, [r0 + 4 * r1]
- palignr m2, m1, m0, 7
- movu [r0], m2
- palignr m2, m1, m0, 8
- movu [r0 + r1], m2
- palignr m2, m1, m0, 9
- movu [r0 + 2 * r1], m2
- palignr m2, m1, m0, 10
- movu [r0 + r3], m2
-
- lea r0, [r0 + 4 * r1]
- palignr m2, m1, m0, 11
- movu [r0], m2
- palignr m2, m1, m0, 12
- movu [r0 + r1], m2
- palignr m2, m1, m0, 13
- movu [r0 + 2 * r1], m2
- palignr m2, m1, m0, 14
- movu [r0 + r3], m2
+ INTRA_PRED_ANG32_ALIGNR_STORE 3
+ INTRA_PRED_ANG32_ALIGNR_STORE 7
+ INTRA_PRED_ANG32_ALIGNR_STORE 11
RET
INIT_YMM avx2
@@ -11400,9 +11354,7 @@
movu m0, [r2 + 64 + 2]
movu m1, [r2 + 64 + 18]
-
movu [r0], m0
-
palignr m2, m1, m0, 1
movu [r0 + r1], m2
palignr m2, m1, m0, 2
@@ -11410,78 +11362,22 @@
palignr m2, m1, m0, 3
movu [r0 + r3], m2
+ INTRA_PRED_ANG32_ALIGNR_STORE 4
+ INTRA_PRED_ANG32_ALIGNR_STORE 8
+ INTRA_PRED_ANG32_ALIGNR_STORE 12
+
lea r0, [r0 + 4 * r1]
- palignr m2, m1, m0, 4
- movu [r0], m2
- palignr m2, m1, m0, 5
- movu [r0 + r1], m2
- palignr m2, m1, m0, 6
- movu [r0 + 2 * r1], m2
- palignr m2, m1, m0, 7
- movu [r0 + r3], m2
-
- lea r0, [r0 + 4 * r1]
- palignr m2, m1, m0, 8
- movu [r0], m2
- palignr m2, m1, m0, 9
- movu [r0 + r1], m2
- palignr m2, m1, m0, 10
- movu [r0 + 2 * r1], m2
- palignr m2, m1, m0, 11
- movu [r0 + r3], m2
-
- lea r0, [r0 + 4 * r1]
- palignr m2, m1, m0, 12
- movu [r0], m2
- palignr m2, m1, m0, 13
- movu [r0 + r1], m2
- palignr m2, m1, m0, 14
- movu [r0 + 2 * r1], m2
- palignr m2, m1, m0, 15
- movu [r0 + r3], m2
-
- lea r0, [r0 + 4 * r1]
-
palignr m2, m1, m0, 16
movu [r0], m2
-
movu m0, [r2 + 64 + 19]
movu [r0 + r1], m0
-
movu m1, [r2 + 64 + 35]
-
palignr m2, m1, m0, 1
movu [r0 + 2 * r1], m2
palignr m2, m1, m0, 2
movu [r0 + r3], m2
- lea r0, [r0 + 4 * r1]
- palignr m2, m1, m0, 3
- movu [r0], m2
- palignr m2, m1, m0, 4
- movu [r0 + r1], m2
- palignr m2, m1, m0, 5
- movu [r0 + 2 * r1], m2
- palignr m2, m1, m0, 6
- movu [r0 + r3], m2
-
- lea r0, [r0 + 4 * r1]
- palignr m2, m1, m0, 7
- movu [r0], m2
- palignr m2, m1, m0, 8
- movu [r0 + r1], m2
- palignr m2, m1, m0, 9
- movu [r0 + 2 * r1], m2
- palignr m2, m1, m0, 10
- movu [r0 + r3], m2
-
- lea r0, [r0 + 4 * r1]
- palignr m2, m1, m0, 11
- movu [r0], m2
- palignr m2, m1, m0, 12
- movu [r0 + r1], m2
- palignr m2, m1, m0, 13
- movu [r0 + 2 * r1], m2
- palignr m2, m1, m0, 14
- movu [r0 + r3], m2
- RET
+ INTRA_PRED_ANG32_ALIGNR_STORE 3
+ INTRA_PRED_ANG32_ALIGNR_STORE 7
+ INTRA_PRED_ANG32_ALIGNR_STORE 11
+ RET
More information about the x265-devel
mailing list