[x265] [PATCH 7 of 7] asm: optimize buffer address using registers

praveen at multicorewareinc.com praveen at multicorewareinc.com
Tue Apr 7 14:56:52 CEST 2015


# HG changeset patch
# User Praveen Tiwari <praveen at multicorewareinc.com>
# Date 1428410863 -19800
#      Tue Apr 07 18:17:43 2015 +0530
# Node ID e2e9d51a93f9a0682ba16a5637266c13d62fee40
# Parent  def9ad7abd8424526d405d725e0dfb5c9eaf35a4
asm: optimize buffer address using registers

diff -r def9ad7abd84 -r e2e9d51a93f9 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm	Tue Apr 07 17:49:16 2015 +0530
+++ b/source/common/x86/intrapred8.asm	Tue Apr 07 18:17:43 2015 +0530
@@ -12058,20 +12058,21 @@
 
 
 INIT_YMM avx2
-cglobal intra_pred_ang16_7, 3, 5, 12
+cglobal intra_pred_ang16_7, 3, 6, 12
     mova              m11, [pw_1024]
+    lea               r5, [intra_pred_shuff_0_8]
 
     movu              xm9, [r2 + 1 + 32]
-    pshufb            xm9, [intra_pred_shuff_0_8]
+    pshufb            xm9, [r5]
     movu              xm10, [r2 + 9 + 32]
-    pshufb            xm10, [intra_pred_shuff_0_8]
+    pshufb            xm10, [r5]
 
     movu              xm7, [r2 + 3  + 32]
-    pshufb            xm7, [intra_pred_shuff_0_8]
+    pshufb            xm7, [r5]
     vinserti128       m9, m9, xm7, 1
 
     movu              xm8, [r2 + 11 + 32]
-    pshufb            xm8, [intra_pred_shuff_0_8]
+    pshufb            xm8, [r5]
     vinserti128       m10, m10, xm8, 1
 
     lea               r3, [3 * r1]
@@ -12081,21 +12082,21 @@
     INTRA_PRED_ANG16_CAL_ROW m1, m2, 1
 
     movu              xm7, [r2 + 4  + 32]
-    pshufb            xm7, [intra_pred_shuff_0_8]
+    pshufb            xm7, [r5]
     vinserti128       m9, m9, xm7, 1
 
     movu              xm8, [r2 + 12 + 32]
-    pshufb            xm8, [intra_pred_shuff_0_8]
+    pshufb            xm8, [r5]
     vinserti128       m10, m10, xm8, 1
 
     INTRA_PRED_ANG16_CAL_ROW m2, m3, 2
 
     movu              xm7, [r2 + 2  + 32]
-    pshufb            xm7, [intra_pred_shuff_0_8]
+    pshufb            xm7, [r5]
     vinserti128       m9, m9, xm7, 0
 
     movu              xm8, [r2 + 10 + 32]
-    pshufb            xm8, [intra_pred_shuff_0_8]
+    pshufb            xm8, [r5]
     vinserti128       m10, m10, xm8, 0
 
     INTRA_PRED_ANG16_CAL_ROW m3, m4, 3
@@ -12106,21 +12107,21 @@
     INTRA_PRED_ANG16_CAL_ROW m5, m6, 1
 
     movu              xm7, [r2 + 5  + 32]
-    pshufb            xm7, [intra_pred_shuff_0_8]
+    pshufb            xm7, [r5]
     vinserti128       m9, m9, xm7, 1
 
     movu              xm8, [r2 + 13 + 32]
-    pshufb            xm8, [intra_pred_shuff_0_8]
+    pshufb            xm8, [r5]
     vinserti128       m10, m10, xm8, 1
 
     INTRA_PRED_ANG16_CAL_ROW m6, m7, 2
 
     movu              xm7, [r2 + 3  + 32]
-    pshufb            xm7, [intra_pred_shuff_0_8]
+    pshufb            xm7, [r5]
     vinserti128       m9, m9, xm7, 0
 
     movu              xm8, [r2 + 11 + 32]
-    pshufb            xm8, [intra_pred_shuff_0_8]
+    pshufb            xm8, [r5]
     vinserti128       m10, m10, xm8, 0
 
     INTRA_PRED_ANG16_CAL_ROW m7, m8, 3
@@ -12130,20 +12131,21 @@
     RET
 
 INIT_YMM avx2
-cglobal intra_pred_ang16_8, 3, 5, 12
+cglobal intra_pred_ang16_8, 3, 6, 12
     mova              m11, [pw_1024]
+    lea               r5, [intra_pred_shuff_0_8]
 
     movu              xm9, [r2 + 1 + 32]
-    pshufb            xm9, [intra_pred_shuff_0_8]
+    pshufb            xm9, [r5]
     movu              xm10, [r2 + 9 + 32]
-    pshufb            xm10, [intra_pred_shuff_0_8]
+    pshufb            xm10, [r5]
 
     movu              xm7, [r2 + 2  + 32]
-    pshufb            xm7, [intra_pred_shuff_0_8]
+    pshufb            xm7, [r5]
     vinserti128       m9, m9, xm7, 1
 
     movu              xm8, [r2 + 10 + 32]
-    pshufb            xm8, [intra_pred_shuff_0_8]
+    pshufb            xm8, [r5]
     vinserti128       m10, m10, xm8, 1
 
     lea               r3, [3 * r1]
@@ -12157,11 +12159,11 @@
     add               r4, 4 * mmsize
 
     movu              xm4, [r2 + 3  + 32]
-    pshufb            xm4, [intra_pred_shuff_0_8]
+    pshufb            xm4, [r5]
     vinserti128       m9, m9, xm4, 1
 
     movu              xm5, [r2 + 11 + 32]
-    pshufb            xm5, [intra_pred_shuff_0_8]
+    pshufb            xm5, [r5]
     vinserti128       m10, m10, xm5, 1
 
     INTRA_PRED_ANG16_CAL_ROW m4, m5, 0
@@ -12178,13 +12180,14 @@
     RET
 
 INIT_YMM avx2
-cglobal intra_pred_ang16_9, 3, 5, 12
+cglobal intra_pred_ang16_9, 3, 6, 12
     mova              m11, [pw_1024]
+    lea               r5, [intra_pred_shuff_0_8]
 
     vbroadcasti128    m9, [r2 + 1 + 32]
-    pshufb            m9, [intra_pred_shuff_0_8]
+    pshufb            m9, [r5]
     vbroadcasti128    m10, [r2 + 9 + 32]
-    pshufb            m10, [intra_pred_shuff_0_8]
+    pshufb            m10, [r5]
 
     lea               r3, [3 * r1]
     lea               r4, [c_ang16_mode_9]
@@ -12201,11 +12204,11 @@
     INTRA_PRED_ANG16_CAL_ROW m6, m7, 2
 
     movu              xm7, [r2 + 2 + 32]
-    pshufb            xm7, [intra_pred_shuff_0_8]
+    pshufb            xm7, [r5]
     vinserti128       m9, m9, xm7, 1
 
     movu              xm7, [r2 + 10 + 32]
-    pshufb            xm7, [intra_pred_shuff_0_8]
+    pshufb            xm7, [r5]
     vinserti128       m10, m10, xm7, 1
 
     INTRA_PRED_ANG16_CAL_ROW m7, m8, 3


More information about the x265-devel mailing list