[x265] [PATCH 3 of 6] asm-intrapred8.asm: asm code size reduction

praveen at multicorewareinc.com praveen at multicorewareinc.com
Thu Mar 19 06:03:05 CET 2015


# HG changeset patch
# User Praveen Tiwari <praveen at multicorewareinc.com>
# Date 1426671506 -19800
# Node ID 125516e35a73d951a3f49053536b3a3fecd93e08
# Parent  1a4b131bfbb682dbb52e0f9af3e4522f1f172c8a
asm-intrapred8.asm: asm code size reduction

Introduce macor 'INTRA_PRED_ANG32_ALIGNR_STORE' to reduce asm code lines.

diff -r 1a4b131bfbb6 -r 125516e35a73 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm	Wed Mar 18 14:55:51 2015 +0530
+++ b/source/common/x86/intrapred8.asm	Wed Mar 18 15:08:26 2015 +0530
@@ -11302,15 +11302,25 @@
     INTRA_PRED_ANG16_MC0 r0 + 2 * r1, r0 + r3, 4
     RET
 
+%macro INTRA_PRED_ANG32_ALIGNR_STORE 1
+    lea     r0, [r0 + 4 * r1]
+    palignr m2, m1, m0, %1
+    movu    [r0], m2
+    palignr m2, m1, m0, (%1 + 1)
+    movu    [r0 + r1], m2
+    palignr m2, m1, m0, (%1 + 2)
+    movu    [r0 + 2 * r1], m2
+    palignr m2, m1, m0, (%1 + 3)
+    movu    [r0 + r3], m2
+%endmacro
+
 INIT_YMM avx2
 cglobal intra_pred_ang32_34, 3, 5,3
     lea     r3, [3 * r1]
 
     movu    m0, [r2 + 2]
     movu    m1, [r2 + 18]
-
     movu    [r0], m0
-
     palignr m2, m1, m0, 1
     movu    [r0 + r1], m2
     palignr m2, m1, m0, 2
@@ -11318,80 +11328,24 @@
     palignr m2, m1, m0, 3
     movu    [r0 + r3], m2
 
+    INTRA_PRED_ANG32_ALIGNR_STORE  4
+    INTRA_PRED_ANG32_ALIGNR_STORE  8
+    INTRA_PRED_ANG32_ALIGNR_STORE 12
+
     lea     r0, [r0 + 4 * r1]
-    palignr m2, m1, m0, 4
-    movu    [r0], m2
-    palignr m2, m1, m0, 5
-    movu    [r0 + r1], m2
-    palignr m2, m1, m0, 6
-    movu    [r0 + 2 * r1], m2
-    palignr m2, m1, m0, 7
-    movu    [r0 + r3], m2
-
-    lea     r0, [r0 + 4 * r1]
-    palignr m2, m1, m0, 8
-    movu    [r0], m2
-    palignr m2, m1, m0, 9
-    movu    [r0 + r1], m2
-    palignr m2, m1, m0, 10
-    movu    [r0 + 2 * r1], m2
-    palignr m2, m1, m0, 11
-    movu    [r0 + r3], m2
-
-    lea     r0, [r0 + 4 * r1]
-    palignr m2, m1, m0, 12
-    movu    [r0], m2
-    palignr m2, m1, m0, 13
-    movu    [r0 + r1], m2
-    palignr m2, m1, m0, 14
-    movu    [r0 + 2 * r1], m2
-    palignr m2, m1, m0, 15
-    movu    [r0 + r3], m2
-
-    lea     r0, [r0 + 4 * r1]
-
     palignr m2, m1, m0, 16
     movu    [r0], m2
-
     movu    m0, [r2 + 19]
     movu    [r0 + r1], m0
-
     movu    m1, [r2 + 35]
-
     palignr m2, m1, m0, 1
     movu    [r0 + 2 * r1], m2
     palignr m2, m1, m0, 2
     movu    [r0 + r3], m2
 
-    lea     r0, [r0 + 4 * r1]
-    palignr m2, m1, m0, 3
-    movu    [r0], m2
-    palignr m2, m1, m0, 4
-    movu    [r0 + r1], m2
-    palignr m2, m1, m0, 5
-    movu    [r0 + 2 * r1], m2
-    palignr m2, m1, m0, 6
-    movu    [r0 + r3], m2
-
-    lea     r0, [r0 + 4 * r1]
-    palignr m2, m1, m0, 7
-    movu    [r0], m2
-    palignr m2, m1, m0, 8
-    movu    [r0 + r1], m2
-    palignr m2, m1, m0, 9
-    movu    [r0 + 2 * r1], m2
-    palignr m2, m1, m0, 10
-    movu    [r0 + r3], m2
-
-    lea     r0, [r0 + 4 * r1]
-    palignr m2, m1, m0, 11
-    movu    [r0], m2
-    palignr m2, m1, m0, 12
-    movu    [r0 + r1], m2
-    palignr m2, m1, m0, 13
-    movu    [r0 + 2 * r1], m2
-    palignr m2, m1, m0, 14
-    movu    [r0 + r3], m2
+    INTRA_PRED_ANG32_ALIGNR_STORE  3
+    INTRA_PRED_ANG32_ALIGNR_STORE  7
+    INTRA_PRED_ANG32_ALIGNR_STORE 11
     RET
 
 INIT_YMM avx2
@@ -11400,9 +11354,7 @@
 
     movu    m0, [r2 + 64 + 2]
     movu    m1, [r2 + 64 + 18]
-
     movu    [r0], m0
-
     palignr m2, m1, m0, 1
     movu    [r0 + r1], m2
     palignr m2, m1, m0, 2
@@ -11410,78 +11362,22 @@
     palignr m2, m1, m0, 3
     movu    [r0 + r3], m2
 
+    INTRA_PRED_ANG32_ALIGNR_STORE  4
+    INTRA_PRED_ANG32_ALIGNR_STORE  8
+    INTRA_PRED_ANG32_ALIGNR_STORE 12
+
     lea     r0, [r0 + 4 * r1]
-    palignr m2, m1, m0, 4
-    movu    [r0], m2
-    palignr m2, m1, m0, 5
-    movu    [r0 + r1], m2
-    palignr m2, m1, m0, 6
-    movu    [r0 + 2 * r1], m2
-    palignr m2, m1, m0, 7
-    movu    [r0 + r3], m2
-
-    lea     r0, [r0 + 4 * r1]
-    palignr m2, m1, m0, 8
-    movu    [r0], m2
-    palignr m2, m1, m0, 9
-    movu    [r0 + r1], m2
-    palignr m2, m1, m0, 10
-    movu    [r0 + 2 * r1], m2
-    palignr m2, m1, m0, 11
-    movu    [r0 + r3], m2
-
-    lea     r0, [r0 + 4 * r1]
-    palignr m2, m1, m0, 12
-    movu    [r0], m2
-    palignr m2, m1, m0, 13
-    movu    [r0 + r1], m2
-    palignr m2, m1, m0, 14
-    movu    [r0 + 2 * r1], m2
-    palignr m2, m1, m0, 15
-    movu    [r0 + r3], m2
-
-    lea     r0, [r0 + 4 * r1]
-
     palignr m2, m1, m0, 16
     movu    [r0], m2
-
     movu    m0, [r2 + 64 + 19]
     movu    [r0 + r1], m0
-
     movu    m1, [r2 + 64 + 35]
-
     palignr m2, m1, m0, 1
     movu    [r0 + 2 * r1], m2
     palignr m2, m1, m0, 2
     movu    [r0 + r3], m2
 
-    lea     r0, [r0 + 4 * r1]
-    palignr m2, m1, m0, 3
-    movu    [r0], m2
-    palignr m2, m1, m0, 4
-    movu    [r0 + r1], m2
-    palignr m2, m1, m0, 5
-    movu    [r0 + 2 * r1], m2
-    palignr m2, m1, m0, 6
-    movu    [r0 + r3], m2
-
-    lea     r0, [r0 + 4 * r1]
-    palignr m2, m1, m0, 7
-    movu    [r0], m2
-    palignr m2, m1, m0, 8
-    movu    [r0 + r1], m2
-    palignr m2, m1, m0, 9
-    movu    [r0 + 2 * r1], m2
-    palignr m2, m1, m0, 10
-    movu    [r0 + r3], m2
-
-    lea     r0, [r0 + 4 * r1]
-    palignr m2, m1, m0, 11
-    movu    [r0], m2
-    palignr m2, m1, m0, 12
-    movu    [r0 + r1], m2
-    palignr m2, m1, m0, 13
-    movu    [r0 + 2 * r1], m2
-    palignr m2, m1, m0, 14
-    movu    [r0 + r3], m2
-    RET
+    INTRA_PRED_ANG32_ALIGNR_STORE  3
+    INTRA_PRED_ANG32_ALIGNR_STORE  7
+    INTRA_PRED_ANG32_ALIGNR_STORE 11
+    RET


More information about the x265-devel mailing list