[x265] [PATCH 2 of 7] asm: intra_pred_ang32_24 improved by ~5% over AVX2

praveen at multicorewareinc.com praveen at multicorewareinc.com
Thu Apr 2 06:44:23 CEST 2015


# HG changeset patch
# User Praveen Tiwari <praveen at multicorewareinc.com>
# Date 1427873015 -19800
#      Wed Apr 01 12:53:35 2015 +0530
# Node ID 3107a47b6704a11681cfc1ded30d47f1aff30a25
# Parent  bd86ef402456d97515f28a71404e12ccd432e8db
asm: intra_pred_ang32_24 improved by ~5% over AVX2

diff -r bd86ef402456 -r 3107a47b6704 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm	Wed Apr 01 12:35:26 2015 +0530
+++ b/source/common/x86/intrapred8.asm	Wed Apr 01 12:53:35 2015 +0530
@@ -14349,15 +14349,15 @@
     RET
 
 INIT_YMM avx2
-cglobal intra_pred_ang32_24, 3, 5, 11
+cglobal intra_pred_ang32_24, 3, 5, 12
     mova              m0, [pw_1024]
     mova              m1, [intra_pred_shuff_0_8]
     lea               r3, [3 * r1]
     lea               r4, [c_ang32_mode_24]
 
     ;row[0, 1]
-    vbroadcasti128    m2, [r2 + 0]
-    pshufb            m2, m1
+    vbroadcasti128    m11, [r2 + 0]
+    pshufb            m2, m11, m1
     vbroadcasti128    m3, [r2 + 8]
     pshufb            m3, m1
     vbroadcasti128    m4, [r2 + 16]
@@ -14387,9 +14387,9 @@
     movu              [r0 + r1], m6
 
     ;row[6, 7]
-    movu              xm2, [r2 - 1]
-    pinsrb            xm2, [r2 + 70], 0
-    vinserti128       m2, m2, xm2, 1
+    pslldq            xm11, 1
+    pinsrb            xm11, [r2 + 70], 0
+    vinserti128       m2, m11, xm11, 1
     pshufb            m2, m1
     vbroadcasti128    m3, [r2 + 7]
     pshufb            m3, m1
@@ -14421,10 +14421,9 @@
     movu              [r0 + r3], m6
 
     ;row[12, 13]
-    movu              xm2, [r2 - 2]
-    pinsrb            xm2, [r2 + 70], 1
-    pinsrb            xm2, [r2 + 77], 0
-    vinserti128       m2, m2, xm2, 1
+    pslldq            xm11, 1
+    pinsrb            xm11, [r2 + 77], 0
+    vinserti128       m2, m11, xm11, 1
     pshufb            m2, m1
     vbroadcasti128    m3, [r2 + 6]
     pshufb            m3, m1
@@ -14469,11 +14468,9 @@
     movu              [r0 + 2 * r1], m6
 
     ;row[19, 20]
-    movu              xm2, [r2 - 3]
-    pinsrb            xm2, [r2 + 70], 2
-    pinsrb            xm2, [r2 + 77], 1
-    pinsrb            xm2, [r2 + 83], 0
-    vinserti128       m2, m2, xm2, 1
+    pslldq            xm11, 1
+    pinsrb            xm11, [r2 + 83], 0
+    vinserti128       m2, m11, xm11, 1
     pshufb            m2, m1
     vbroadcasti128    m3, [r2 + 5]
     pshufb            m3, m1
@@ -14506,12 +14503,9 @@
     movu              [r0], m6
 
     ;row[25, 26]
-    movu              xm2, [r2 - 4]
-    pinsrb            xm2, [r2 + 70], 3
-    pinsrb            xm2, [r2 + 77], 2
-    pinsrb            xm2, [r2 + 83], 1
-    pinsrb            xm2, [r2 + 90], 0
-    vinserti128       m2, m2, xm2, 1
+    pslldq            xm11, 1
+    pinsrb            xm11, [r2 + 90], 0
+    vinserti128       m2, m11, xm11, 1
     pshufb            m2, m1
     vbroadcasti128    m3, [r2 + 4]
     pshufb            m3, m1


More information about the x265-devel mailing list