[x265] [PATCH 1 of 3] asm: reduce binary size
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Wed Apr 1 07:39:30 CEST 2015
# HG changeset patch
# User Praveen Tiwari <praveen at multicorewareinc.com>
# Date 1427791659 -19800
# Tue Mar 31 14:17:39 2015 +0530
# Node ID 6b9308ad7368a9f18e13d29e177dd2e720996ccb
# Parent ac85c775620f1dcb0df056874633cbf916098bd2
asm: reduce binary size
diff -r ac85c775620f -r 6b9308ad7368 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm Tue Mar 31 20:04:28 2015 -0500
+++ b/source/common/x86/intrapred8.asm Tue Mar 31 14:17:39 2015 +0530
@@ -13665,12 +13665,11 @@
vbroadcasti128 m5, [r2 + 25]
pshufb m5, m1
- mova m10, [r4 + 0 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 0 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 0 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
@@ -13686,12 +13685,11 @@
vbroadcasti128 m5, [r2 + 26]
pshufb m5, m1
- mova m10, [r4 + 1 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 1 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 1 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
@@ -13707,12 +13705,11 @@
vbroadcasti128 m5, [r2 + 27]
pshufb m5, m1
- mova m10, [r4 + 2 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 2 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 2 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
@@ -13728,12 +13725,11 @@
vbroadcasti128 m5, [r2 + 28]
pshufb m5, m1
- mova m10, [r4 + 3 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 3 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 3 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
@@ -13767,12 +13763,11 @@
vbroadcasti128 m5, [r2 + 30]
pshufb m5, m1
- mova m10, [r4 + 1 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 1 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 1 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
@@ -13788,12 +13783,11 @@
vbroadcasti128 m5, [r2 + 31]
pshufb m5, m1
- mova m10, [r4 + 2 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 2 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 2 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
@@ -13810,12 +13804,11 @@
pshufb m5, m1
lea r0, [r0 + 4 * r1]
- mova m10, [r4 + 3 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 3 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 3 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
@@ -13848,12 +13841,11 @@
vbroadcasti128 m5, [r2 + 34]
pshufb m5, m1
- mova m10, [r4 + 1 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 1 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 1 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
@@ -13870,12 +13862,11 @@
pshufb m5, m1
lea r0, [r0 + 4 * r1]
- mova m10, [r4 + 2 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 2 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 2 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
@@ -13891,12 +13882,11 @@
vbroadcasti128 m5, [r2 + 36]
pshufb m5, m1
- mova m10, [r4 + 3 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 3 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 3 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
@@ -13913,12 +13903,11 @@
pshufb m5, m1
add r4, 4 * mmsize
- mova m10, [r4 + 0 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 0 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 0 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
@@ -13951,12 +13940,11 @@
vbroadcasti128 m5, [r2 + 39]
pshufb m5, m1
- mova m10, [r4 + 2 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 2 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 2 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
@@ -13972,12 +13960,11 @@
vbroadcasti128 m5, [r2 + 40]
pshufb m5, m1
- mova m10, [r4 + 3 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 3 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 3 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
@@ -13994,12 +13981,11 @@
pshufb m5, m1
add r4, 4 * mmsize
- mova m10, [r4 + 0 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 0 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 0 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
@@ -14032,12 +14018,11 @@
vbroadcasti128 m5, [r2 + 43]
pshufb m5, m1
- mova m10, [r4 + 2 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 2 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 2 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
@@ -14053,12 +14038,11 @@
vbroadcasti128 m5, [r2 + 44]
pshufb m5, m1
- mova m10, [r4 + 3 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 3 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 3 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
@@ -14076,12 +14060,11 @@
add r4, 4 * mmsize
lea r0, [r0 + 4 * r1]
- mova m10, [r4 + 0 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 0 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 0 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
@@ -14113,12 +14096,11 @@
vbroadcasti128 m5, [r2 + 47]
pshufb m5, m1
- mova m10, [r4 + 2 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 2 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 2 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
@@ -14135,12 +14117,11 @@
pshufb m5, m1
lea r0, [r0 + 4 * r1]
- mova m10, [r4 + 3 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 3 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 3 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
@@ -14157,12 +14138,11 @@
pshufb m5, m1
add r4, 4 * mmsize
- mova m10, [r4 + 0 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 0 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 0 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
@@ -14178,12 +14158,11 @@
vbroadcasti128 m5, [r2 + 50]
pshufb m5, m1
- mova m10, [r4 + 1 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 1 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 1 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
@@ -14199,12 +14178,11 @@
vbroadcasti128 m5, [r2 + 51]
pshufb m5, m1
- mova m10, [r4 + 2 * mmsize]
- vperm2i128 m6, m2, m3, 00100000b
- pmaddubsw m6, m10
- pmulhrsw m6, m0
- vperm2i128 m7, m4, m5, 00100000b
- pmaddubsw m7, m10
+ vperm2i128 m6, m2, m3, 00100000b
+ pmaddubsw m6, [r4 + 2 * mmsize]
+ pmulhrsw m6, m0
+ vperm2i128 m7, m4, m5, 00100000b
+ pmaddubsw m7, [r4 + 2 * mmsize]
pmulhrsw m7, m0
packuswb m6, m7
vpermq m6, m6, 11011000b
More information about the x265-devel
mailing list