[x265] [PATCH 2 of 7] asm: intra_pred_ang32_24 improved by ~5% over AVX2
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Thu Apr 2 06:44:23 CEST 2015
# HG changeset patch
# User Praveen Tiwari <praveen at multicorewareinc.com>
# Date 1427873015 -19800
# Wed Apr 01 12:53:35 2015 +0530
# Node ID 3107a47b6704a11681cfc1ded30d47f1aff30a25
# Parent bd86ef402456d97515f28a71404e12ccd432e8db
asm: intra_pred_ang32_24 improved by ~5% over AVX2
diff -r bd86ef402456 -r 3107a47b6704 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm Wed Apr 01 12:35:26 2015 +0530
+++ b/source/common/x86/intrapred8.asm Wed Apr 01 12:53:35 2015 +0530
@@ -14349,15 +14349,15 @@
RET
INIT_YMM avx2
-cglobal intra_pred_ang32_24, 3, 5, 11
+cglobal intra_pred_ang32_24, 3, 5, 12
mova m0, [pw_1024]
mova m1, [intra_pred_shuff_0_8]
lea r3, [3 * r1]
lea r4, [c_ang32_mode_24]
;row[0, 1]
- vbroadcasti128 m2, [r2 + 0]
- pshufb m2, m1
+ vbroadcasti128 m11, [r2 + 0]
+ pshufb m2, m11, m1
vbroadcasti128 m3, [r2 + 8]
pshufb m3, m1
vbroadcasti128 m4, [r2 + 16]
@@ -14387,9 +14387,9 @@
movu [r0 + r1], m6
;row[6, 7]
- movu xm2, [r2 - 1]
- pinsrb xm2, [r2 + 70], 0
- vinserti128 m2, m2, xm2, 1
+ pslldq xm11, 1
+ pinsrb xm11, [r2 + 70], 0
+ vinserti128 m2, m11, xm11, 1
pshufb m2, m1
vbroadcasti128 m3, [r2 + 7]
pshufb m3, m1
@@ -14421,10 +14421,9 @@
movu [r0 + r3], m6
;row[12, 13]
- movu xm2, [r2 - 2]
- pinsrb xm2, [r2 + 70], 1
- pinsrb xm2, [r2 + 77], 0
- vinserti128 m2, m2, xm2, 1
+ pslldq xm11, 1
+ pinsrb xm11, [r2 + 77], 0
+ vinserti128 m2, m11, xm11, 1
pshufb m2, m1
vbroadcasti128 m3, [r2 + 6]
pshufb m3, m1
@@ -14469,11 +14468,9 @@
movu [r0 + 2 * r1], m6
;row[19, 20]
- movu xm2, [r2 - 3]
- pinsrb xm2, [r2 + 70], 2
- pinsrb xm2, [r2 + 77], 1
- pinsrb xm2, [r2 + 83], 0
- vinserti128 m2, m2, xm2, 1
+ pslldq xm11, 1
+ pinsrb xm11, [r2 + 83], 0
+ vinserti128 m2, m11, xm11, 1
pshufb m2, m1
vbroadcasti128 m3, [r2 + 5]
pshufb m3, m1
@@ -14506,12 +14503,9 @@
movu [r0], m6
;row[25, 26]
- movu xm2, [r2 - 4]
- pinsrb xm2, [r2 + 70], 3
- pinsrb xm2, [r2 + 77], 2
- pinsrb xm2, [r2 + 83], 1
- pinsrb xm2, [r2 + 90], 0
- vinserti128 m2, m2, xm2, 1
+ pslldq xm11, 1
+ pinsrb xm11, [r2 + 90], 0
+ vinserti128 m2, m11, xm11, 1
pshufb m2, m1
vbroadcasti128 m3, [r2 + 4]
pshufb m3, m1
More information about the x265-devel
mailing list