[x265] [PATCH 7 of 7] asm: optimize buffer address using registers
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Tue Apr 7 14:56:52 CEST 2015
# HG changeset patch
# User Praveen Tiwari <praveen at multicorewareinc.com>
# Date 1428410863 -19800
# Tue Apr 07 18:17:43 2015 +0530
# Node ID e2e9d51a93f9a0682ba16a5637266c13d62fee40
# Parent def9ad7abd8424526d405d725e0dfb5c9eaf35a4
asm: optimize buffer address using registers
diff -r def9ad7abd84 -r e2e9d51a93f9 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm Tue Apr 07 17:49:16 2015 +0530
+++ b/source/common/x86/intrapred8.asm Tue Apr 07 18:17:43 2015 +0530
@@ -12058,20 +12058,21 @@
INIT_YMM avx2
-cglobal intra_pred_ang16_7, 3, 5, 12
+cglobal intra_pred_ang16_7, 3, 6, 12
mova m11, [pw_1024]
+ lea r5, [intra_pred_shuff_0_8]
movu xm9, [r2 + 1 + 32]
- pshufb xm9, [intra_pred_shuff_0_8]
+ pshufb xm9, [r5]
movu xm10, [r2 + 9 + 32]
- pshufb xm10, [intra_pred_shuff_0_8]
+ pshufb xm10, [r5]
movu xm7, [r2 + 3 + 32]
- pshufb xm7, [intra_pred_shuff_0_8]
+ pshufb xm7, [r5]
vinserti128 m9, m9, xm7, 1
movu xm8, [r2 + 11 + 32]
- pshufb xm8, [intra_pred_shuff_0_8]
+ pshufb xm8, [r5]
vinserti128 m10, m10, xm8, 1
lea r3, [3 * r1]
@@ -12081,21 +12082,21 @@
INTRA_PRED_ANG16_CAL_ROW m1, m2, 1
movu xm7, [r2 + 4 + 32]
- pshufb xm7, [intra_pred_shuff_0_8]
+ pshufb xm7, [r5]
vinserti128 m9, m9, xm7, 1
movu xm8, [r2 + 12 + 32]
- pshufb xm8, [intra_pred_shuff_0_8]
+ pshufb xm8, [r5]
vinserti128 m10, m10, xm8, 1
INTRA_PRED_ANG16_CAL_ROW m2, m3, 2
movu xm7, [r2 + 2 + 32]
- pshufb xm7, [intra_pred_shuff_0_8]
+ pshufb xm7, [r5]
vinserti128 m9, m9, xm7, 0
movu xm8, [r2 + 10 + 32]
- pshufb xm8, [intra_pred_shuff_0_8]
+ pshufb xm8, [r5]
vinserti128 m10, m10, xm8, 0
INTRA_PRED_ANG16_CAL_ROW m3, m4, 3
@@ -12106,21 +12107,21 @@
INTRA_PRED_ANG16_CAL_ROW m5, m6, 1
movu xm7, [r2 + 5 + 32]
- pshufb xm7, [intra_pred_shuff_0_8]
+ pshufb xm7, [r5]
vinserti128 m9, m9, xm7, 1
movu xm8, [r2 + 13 + 32]
- pshufb xm8, [intra_pred_shuff_0_8]
+ pshufb xm8, [r5]
vinserti128 m10, m10, xm8, 1
INTRA_PRED_ANG16_CAL_ROW m6, m7, 2
movu xm7, [r2 + 3 + 32]
- pshufb xm7, [intra_pred_shuff_0_8]
+ pshufb xm7, [r5]
vinserti128 m9, m9, xm7, 0
movu xm8, [r2 + 11 + 32]
- pshufb xm8, [intra_pred_shuff_0_8]
+ pshufb xm8, [r5]
vinserti128 m10, m10, xm8, 0
INTRA_PRED_ANG16_CAL_ROW m7, m8, 3
@@ -12130,20 +12131,21 @@
RET
INIT_YMM avx2
-cglobal intra_pred_ang16_8, 3, 5, 12
+cglobal intra_pred_ang16_8, 3, 6, 12
mova m11, [pw_1024]
+ lea r5, [intra_pred_shuff_0_8]
movu xm9, [r2 + 1 + 32]
- pshufb xm9, [intra_pred_shuff_0_8]
+ pshufb xm9, [r5]
movu xm10, [r2 + 9 + 32]
- pshufb xm10, [intra_pred_shuff_0_8]
+ pshufb xm10, [r5]
movu xm7, [r2 + 2 + 32]
- pshufb xm7, [intra_pred_shuff_0_8]
+ pshufb xm7, [r5]
vinserti128 m9, m9, xm7, 1
movu xm8, [r2 + 10 + 32]
- pshufb xm8, [intra_pred_shuff_0_8]
+ pshufb xm8, [r5]
vinserti128 m10, m10, xm8, 1
lea r3, [3 * r1]
@@ -12157,11 +12159,11 @@
add r4, 4 * mmsize
movu xm4, [r2 + 3 + 32]
- pshufb xm4, [intra_pred_shuff_0_8]
+ pshufb xm4, [r5]
vinserti128 m9, m9, xm4, 1
movu xm5, [r2 + 11 + 32]
- pshufb xm5, [intra_pred_shuff_0_8]
+ pshufb xm5, [r5]
vinserti128 m10, m10, xm5, 1
INTRA_PRED_ANG16_CAL_ROW m4, m5, 0
@@ -12178,13 +12180,14 @@
RET
INIT_YMM avx2
-cglobal intra_pred_ang16_9, 3, 5, 12
+cglobal intra_pred_ang16_9, 3, 6, 12
mova m11, [pw_1024]
+ lea r5, [intra_pred_shuff_0_8]
vbroadcasti128 m9, [r2 + 1 + 32]
- pshufb m9, [intra_pred_shuff_0_8]
+ pshufb m9, [r5]
vbroadcasti128 m10, [r2 + 9 + 32]
- pshufb m10, [intra_pred_shuff_0_8]
+ pshufb m10, [r5]
lea r3, [3 * r1]
lea r4, [c_ang16_mode_9]
@@ -12201,11 +12204,11 @@
INTRA_PRED_ANG16_CAL_ROW m6, m7, 2
movu xm7, [r2 + 2 + 32]
- pshufb xm7, [intra_pred_shuff_0_8]
+ pshufb xm7, [r5]
vinserti128 m9, m9, xm7, 1
movu xm7, [r2 + 10 + 32]
- pshufb xm7, [intra_pred_shuff_0_8]
+ pshufb xm7, [r5]
vinserti128 m10, m10, xm7, 1
INTRA_PRED_ANG16_CAL_ROW m7, m8, 3
More information about the x265-devel
mailing list