[x265] [PATCH] asm-avx2: align const for more performance

praveen at multicorewareinc.com praveen at multicorewareinc.com
Mon Mar 9 07:56:13 CET 2015


# HG changeset patch
# User Praveen Tiwari <praveen at multicorewareinc.com>
# Date 1425884136 -19800
# Node ID 914a720aef8b96180aee32fec205e9999dc755d7
# Parent  043c2418864b0a3ada6f597e6def6ead73d90b5f
asm-avx2: align const for more performance

diff -r 043c2418864b -r 914a720aef8b source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm	Fri Mar 06 13:15:55 2015 -0600
+++ b/source/common/x86/intrapred8.asm	Mon Mar 09 12:25:36 2015 +0530
@@ -26,6 +26,9 @@
 
 SECTION_RODATA 32
 
+; intra_pred constants
+c_ang8_src1_9_1_9:    db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
+
 pb_0_8        times 8 db  0,  8
 pb_unpackbw1  times 2 db  1,  8,  2,  8,  3,  8,  4,  8
 pb_swap8:     times 2 db  7,  6,  5,  4,  3,  2,  1,  0
@@ -81,7 +84,6 @@
 c_ang8_7_20:          db 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 25, 7, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20, 12, 20
 c_ang8_1_14:          db 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 31, 1, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14, 18, 14
 c_ang8_27_8:          db 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 5, 27, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8, 24, 8
-c_ang8_src1_9_1_9:    db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
 c_ang8_src2_10_2_10:  db 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9
 c_ang8_src3_11_3_11:  db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10
 
@@ -33009,7 +33011,7 @@
 cglobal intra_pred_ang8_9, 3, 5, 6
     mova              m3, [pw_1024]
     vbroadcasti128    m0, [r2 + 17]
-    movu              m5, [c_ang8_src1_9_1_9]
+    mova              m5, [c_ang8_src1_9_1_9]
 
     pshufb            m0, m5
 
@@ -33053,7 +33055,7 @@
 cglobal intra_pred_ang8_27, 3, 5, 6
     mova              m3, [pw_1024]
     vbroadcasti128    m0, [r2 + 1]
-    movu              m5, [c_ang8_src1_9_1_9]
+    mova              m5, [c_ang8_src1_9_1_9]
 
     pshufb            m0, m5
 
@@ -33169,7 +33171,7 @@
 cglobal intra_pred_ang8_8, 3, 4, 6
     mova              m3, [pw_1024]
     vbroadcasti128    m0, [r2 + 17]
-    movu              m5, [c_ang8_src1_9_1_9]
+    mova              m5, [c_ang8_src1_9_1_9]
 
     pshufb            m1, m0, m5
     pshufb            m2, m0, m5
@@ -33215,7 +33217,7 @@
 cglobal intra_pred_ang8_28, 3, 4, 6
     mova              m3, [pw_1024]
     vbroadcasti128    m0, [r2 + 1]
-    movu              m5, [c_ang8_src1_9_1_9]
+    mova              m5, [c_ang8_src1_9_1_9]
 
     pshufb            m1, m0, m5
     pshufb            m2, m0, m5


More information about the x265-devel mailing list