[x265] [PATCH] all_angs_pred_32x32, asm code improvement
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Thu Feb 27 11:56:06 CET 2014
# HG changeset patch
# User Praveen Tiwari
# Date 1393498551 -19800
# Node ID d3e3baaf80b490f330d2171e454ad5b7856acaa7
# Parent 7de2875c614058648475618d2b9faa5a9611225b
all_angs_pred_32x32, asm code improvement
diff -r 7de2875c6140 -r d3e3baaf80b4 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm Wed Feb 26 17:58:24 2014 +0530
+++ b/source/common/x86/intrapred8.asm Thu Feb 27 16:25:51 2014 +0530
@@ -23689,8 +23689,6 @@
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-;pinsrb m4, [r4 + 4], 1
-;pinsrb m4, [r4 + 3], 0
pinsrw m4, [r4 + 3], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
@@ -24461,13 +24459,11 @@
packuswb m4, m5
movu [r0 + 1120 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 4], 1
-pinsrb m1, [r3 + 3], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 12], 1
-pinsrb m3, [r3 + 11], 0
+pinsrw m1, [r3 + 3], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 11], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24488,13 +24484,11 @@
packuswb m4, m5
movu [r0 + 1122 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 3], 1
-pinsrb m1, [r3 + 2], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 11], 1
-pinsrb m3, [r3 + 10], 0
+pinsrw m1, [r3 + 2], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 10], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24515,13 +24509,11 @@
packuswb m4, m5
movu [r0 + 1124 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 2], 1
-pinsrb m1, [r3 + 1], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 10], 1
-pinsrb m3, [r3 + 9], 0
+pinsrw m1, [r3 + 1], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 9], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24542,13 +24534,11 @@
packuswb m4, m5
movu [r0 + 1126 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 1], 1
-pinsrb m1, [r3 + 0], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 9], 1
-pinsrb m3, [r3 + 8], 0
+pinsrw m1, [r3 + 0], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 8], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24616,8 +24606,7 @@
pmaddubsw m4, m1, m6
pmulhrsw m4, m7
pslldq m3, 2
-pinsrb m3, [r3 + 7], 1
-pinsrb m3, [r3 + 6], 0
+pinsrw m3, [r3 + 6], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24643,8 +24632,7 @@
pmaddubsw m4, m1, m6
pmulhrsw m4, m7
pslldq m3, 2
-pinsrb m3, [r3 + 6], 1
-pinsrb m3, [r3 + 5], 0
+pinsrw m3, [r3 + 5], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24670,8 +24658,7 @@
pmaddubsw m4, m1, m6
pmulhrsw m4, m7
pslldq m3, 2
-pinsrb m3, [r3 + 5], 1
-pinsrb m3, [r3 + 4], 0
+pinsrw m3, [r3 + 4], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24697,8 +24684,7 @@
pmaddubsw m4, m1, m6
pmulhrsw m4, m7
pslldq m3, 2
-pinsrb m3, [r3 + 4], 1
-pinsrb m3, [r3 + 3], 0
+pinsrw m3, [r3 + 3], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24739,8 +24725,7 @@
pmaddubsw m4, m1, m6
pmulhrsw m4, m7
pslldq m3, 2
-pinsrb m3, [r3 + 3], 1
-pinsrb m3, [r3 + 2], 0
+pinsrw m3, [r3 + 2], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24766,8 +24751,7 @@
pmaddubsw m4, m1, m6
pmulhrsw m4, m7
pslldq m3, 2
-pinsrb m3, [r3 + 2], 1
-pinsrb m3, [r3 + 1], 0
+pinsrw m3, [r3 + 1], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24793,8 +24777,7 @@
pmaddubsw m4, m1, m6
pmulhrsw m4, m7
pslldq m3, 2
-pinsrb m3, [r3 + 1], 1
-pinsrb m3, [r3 + 0], 0
+pinsrw m3, [r3 + 0], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24873,20 +24856,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 8], 1
-pinsrb m2, [r3 + 7], 0
+pinsrw m2, [r3 + 7], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1154 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 16], 1
-pinsrb m1, [r3 + 15], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 24], 1
-pinsrb m3, [r3 + 23], 0
+pinsrw m1, [r3 + 15], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 23], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24915,20 +24895,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 7], 1
-pinsrb m2, [r3 + 6], 0
+pinsrw m2, [r3 + 6], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1158 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 15], 1
-pinsrb m1, [r3 + 14], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 23], 1
-pinsrb m3, [r3 + 22], 0
+pinsrw m1, [r3 + 14], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 22], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24942,20 +24919,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 6], 1
-pinsrb m2, [r3 + 5], 0
+pinsrw m2, [r3 + 5], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1160 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 14], 1
-pinsrb m1, [r3 + 13], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 22], 1
-pinsrb m3, [r3 + 21], 0
+pinsrw m1, [r3 + 13], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 21], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24984,20 +24958,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 5], 1
-pinsrb m2, [r3 + 4], 0
+pinsrw m2, [r3 + 4], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1164 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 13], 1
-pinsrb m1, [r3 + 12], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 21], 1
-pinsrb m3, [r3 + 20], 0
+pinsrw m1, [r3 + 12], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 20], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25011,20 +24982,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 4], 1
-pinsrb m2, [r3 + 3], 0
+pinsrw m2, [r3 + 3], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1166 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 12], 1
-pinsrb m1, [r3 + 11], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 20], 1
-pinsrb m3, [r3 + 19], 0
+pinsrw m1, [r3 + 11], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 19], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25060,13 +25028,11 @@
packuswb m4, m5
movu [r0 + 1170 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 11], 1
-pinsrb m1, [r3 + 10], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 19], 1
-pinsrb m3, [r3 + 18], 0
+pinsrw m1, [r3 + 10], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 18], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25080,20 +25046,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 2], 1
-pinsrb m2, [r3 + 1], 0
+pinsrw m2, [r3 + 1], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1172 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 10], 1
-pinsrb m1, [r3 + 9], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 18], 1
-pinsrb m3, [r3 + 17], 0
+pinsrw m1, [r3 + 9], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 17], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25129,13 +25092,11 @@
packuswb m4, m5
movu [r0 + 1176 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 9], 1
-pinsrb m1, [r3 + 8], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 17], 1
-pinsrb m3, [r3 + 16], 0
+pinsrw m1, [r3 + 8], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 16], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25156,13 +25117,11 @@
packuswb m4, m5
movu [r0 + 1178 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 8], 1
-pinsrb m1, [r3 + 7], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 16], 1
-pinsrb m3, [r3 + 15], 0
+pinsrw m1, [r3 + 7], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 15], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25198,13 +25157,11 @@
packuswb m4, m5
movu [r0 + 1182 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 7], 1
-pinsrb m1, [r3 + 6], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 15], 1
-pinsrb m3, [r3 + 14], 0
+pinsrw m1, [r3 + 6], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 14], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25225,13 +25182,11 @@
packuswb m4, m5
movu [r0 + 1184 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 6], 1
-pinsrb m1, [r3 + 5], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 14], 1
-pinsrb m3, [r3 + 13], 0
+pinsrw m1, [r3 + 5], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 13], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25267,13 +25222,11 @@
packuswb m4, m5
movu [r0 + 1188 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 5], 1
-pinsrb m1, [r3 + 4], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 13], 1
-pinsrb m3, [r3 + 12], 0
+pinsrw m1, [r3 + 4], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 12], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25294,13 +25247,11 @@
packuswb m4, m5
movu [r0 + 1190 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 4], 1
-pinsrb m1, [r3 + 3], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 12], 1
-pinsrb m3, [r3 + 11], 0
+pinsrw m1, [r3 + 3], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 11], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25336,13 +25287,11 @@
packuswb m4, m5
movu [r0 + 1194 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 3], 1
-pinsrb m1, [r3 + 2], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 11], 1
-pinsrb m3, [r3 + 10], 0
+pinsrw m1, [r3 + 2], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 10], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25363,13 +25312,11 @@
packuswb m4, m5
movu [r0 + 1196 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 2], 1
-pinsrb m1, [r3 + 1], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 10], 1
-pinsrb m3, [r3 + 9], 0
+pinsrw m1, [r3 + 1], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 9], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25405,13 +25352,11 @@
packuswb m4, m5
movu [r0 + 1200 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 1], 1
-pinsrb m1, [r3 + 0], 0
+pinsrw m1, [r3 + 0], 0
pmaddubsw m4, m1, m6
pmulhrsw m4, m7
pslldq m3, 2
-pinsrb m3, [r3 + 9], 1
-pinsrb m3, [r3 + 8], 0
+pinsrw m3, [r3 + 8], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25437,8 +25382,7 @@
pmaddubsw m4, m1, m6
pmulhrsw m4, m7
pslldq m3, 2
-pinsrb m3, [r3 + 8], 1
-pinsrb m3, [r3 + 7], 0
+pinsrw m3, [r3 + 7], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25479,8 +25423,7 @@
pmaddubsw m4, m1, m6
pmulhrsw m4, m7
pslldq m3, 2
-pinsrb m3, [r3 + 7], 1
-pinsrb m3, [r3 + 6], 0
+pinsrw m3, [r3 + 6], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25506,8 +25449,7 @@
pmaddubsw m4, m1, m6
pmulhrsw m4, m7
pslldq m3, 2
-pinsrb m3, [r3 + 6], 1
-pinsrb m3, [r3 + 5], 0
+pinsrw m3, [r3 + 5], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25548,8 +25490,7 @@
pmaddubsw m4, m1, m6
pmulhrsw m4, m7
pslldq m3, 2
-pinsrb m3, [r3 + 5], 1
-pinsrb m3, [r3 + 4], 0
+pinsrw m3, [r3 + 4], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25601,20 +25542,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 8], 1
-pinsrb m2, [r3 + 7], 0
+pinsrw m2, [r3 + 7], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1218 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 16], 1
-pinsrb m1, [r3 + 15], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 24], 1
-pinsrb m3, [r3 + 23], 0
+pinsrw m1, [r3 + 15], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 23], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25643,20 +25581,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 7], 1
-pinsrb m2, [r3 + 6], 0
+pinsrw m2, [r3 + 6], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1222 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 15], 1
-pinsrb m1, [r3 + 14], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 23], 1
-pinsrb m3, [r3 + 22], 0
+pinsrw m1, [r3 + 14], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 22], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25685,20 +25620,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 6], 1
-pinsrb m2, [r3 + 5], 0
+pinsrw m2, [r3 + 5], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1226 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 14], 1
-pinsrb m1, [r3 + 13], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 22], 1
-pinsrb m3, [r3 + 21], 0
+pinsrw m1, [r3 + 13], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 21], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25727,20 +25659,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 5], 1
-pinsrb m2, [r3 + 4], 0
+pinsrw m2, [r3 + 4], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1230 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 13], 1
-pinsrb m1, [r3 + 12], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 21], 1
-pinsrb m3, [r3 + 20], 0
+pinsrw m1, [r3 + 12], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 20], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25769,20 +25698,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 4], 1
-pinsrb m2, [r3 + 3], 0
+pinsrw m2, [r3 + 3], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1234 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 12], 1
-pinsrb m1, [r3 + 11], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 20], 1
-pinsrb m3, [r3 + 19], 0
+pinsrw m1, [r3 + 11], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 19], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25811,20 +25737,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 3], 1
-pinsrb m2, [r3 + 2], 0
+pinsrw m2, [r3 + 2], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1238 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 11], 1
-pinsrb m1, [r3 + 10], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 19], 1
-pinsrb m3, [r3 + 18], 0
+pinsrw m1, [r3 + 10], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 18], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25853,20 +25776,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 2], 1
-pinsrb m2, [r3 + 1], 0
+pinsrw m2, [r3 + 1], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1242 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 10], 1
-pinsrb m1, [r3 + 9], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 18], 1
-pinsrb m3, [r3 + 17], 0
+pinsrw m1, [r3 + 9], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 17], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25895,20 +25815,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 1], 1
-pinsrb m2, [r3 + 0], 0
+pinsrw m2, [r3 + 0], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1246 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 9], 1
-pinsrb m1, [r3 + 8], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 17], 1
-pinsrb m3, [r3 + 16], 0
+pinsrw m1, [r3 + 8], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 16], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -25929,13 +25846,11 @@
packuswb m4, m5
movu [r0 + 1248 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 8], 1
-pinsrb m1, [r3 + 7], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 16], 1
-pinsrb m3, [r3 + 15], 0
+pinsrw m1, [r3 + 7], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 15], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -26013,13 +25928,11 @@
packuswb m4, m5
movu [r0 + 1256 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 6], 1
-pinsrb m1, [r3 + 5], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 14], 1
-pinsrb m3, [r3 + 13], 0
+pinsrw m1, [r3 + 5], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 13], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -26055,13 +25968,11 @@
packuswb m4, m5
movu [r0 + 1260 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 5], 1
-pinsrb m1, [r3 + 4], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 13], 1
-pinsrb m3, [r3 + 12], 0
+pinsrw m1, [r3 + 4], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 12], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -26097,13 +26008,11 @@
packuswb m4, m5
movu [r0 + 1264 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 4], 1
-pinsrb m1, [r3 + 3], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 12], 1
-pinsrb m3, [r3 + 11], 0
+pinsrw m1, [r3 + 3], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 11], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -26139,13 +26048,11 @@
packuswb m4, m5
movu [r0 + 1268 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 3], 1
-pinsrb m1, [r3 + 2], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 11], 1
-pinsrb m3, [r3 + 10], 0
+pinsrw m1, [r3 + 2], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 10], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -26181,13 +26088,11 @@
packuswb m4, m5
movu [r0 + 1272 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 2], 1
-pinsrb m1, [r3 + 1], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 10], 1
-pinsrb m3, [r3 + 9], 0
+pinsrw m1, [r3 + 1], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 9], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -26223,13 +26128,11 @@
packuswb m4, m5
movu [r0 + 1276 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 1], 1
-pinsrb m1, [r3 + 0], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 9], 1
-pinsrb m3, [r3 + 8], 0
+pinsrw m1, [r3 + 0], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 8], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -26296,20 +26199,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 8], 1
-pinsrb m2, [r3 + 7], 0
+pinsrw m2, [r3 + 7], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1284 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 16], 1
-pinsrb m1, [r3 + 15], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 24], 1
-pinsrb m3, [r3 + 23], 0
+pinsrw m1, [r3 + 15], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 23], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -26338,20 +26238,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 7], 1
-pinsrb m2, [r3 + 6], 0
+pinsrw m2, [r3 + 6], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1288 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 15], 1
-pinsrb m1, [r3 + 14], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 23], 1
-pinsrb m3, [r3 + 22], 0
+pinsrw m1, [r3 + 14], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 22], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -26395,20 +26292,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 6], 1
-pinsrb m2, [r3 + 5], 0
+pinsrw m2, [r3 + 5], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1294 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 14], 1
-pinsrb m1, [r3 + 13], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 22], 1
-pinsrb m3, [r3 + 21], 0
+pinsrw m1, [r3 + 13], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 21], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -26437,20 +26331,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 5], 1
-pinsrb m2, [r3 + 4], 0
+pinsrw m2, [r3 + 4], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1298 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 13], 1
-pinsrb m1, [r3 + 12], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 21], 1
-pinsrb m3, [r3 + 20], 0
+pinsrw m1, [r3 + 12], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 20], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -26494,20 +26385,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 4], 1
-pinsrb m2, [r3 + 3], 0
+pinsrw m2, [r3 + 3], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1304 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 12], 1
-pinsrb m1, [r3 + 11], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 20], 1
-pinsrb m3, [r3 + 19], 0
+pinsrw m1, [r3 + 11], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 19], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -26536,20 +26424,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 3], 1
-pinsrb m2, [r3 + 2], 0
+pinsrw m2, [r3 + 2], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1308 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 11], 1
-pinsrb m1, [r3 + 10], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 19], 1
-pinsrb m3, [r3 + 18], 0
+pinsrw m1, [r3 + 10], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 18], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -26593,20 +26478,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 2], 1
-pinsrb m2, [r3 + 1], 0
+pinsrw m2, [r3 + 1], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1314 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 10], 1
-pinsrb m1, [r3 + 9], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 18], 1
-pinsrb m3, [r3 + 17], 0
+pinsrw m1, [r3 + 9], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 17], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -26635,20 +26517,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 1], 1
-pinsrb m2, [r3 + 0], 0
+pinsrw m2, [r3 + 0], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1318 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 9], 1
-pinsrb m1, [r3 + 8], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 17], 1
-pinsrb m3, [r3 + 16], 0
+pinsrw m1, [r3 + 8], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 16], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -26699,13 +26578,11 @@
packuswb m4, m5
movu [r0 + 1324 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 8], 1
-pinsrb m1, [r3 + 7], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 16], 1
-pinsrb m3, [r3 + 15], 0
+pinsrw m1, [r3 + 7], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 15], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -26741,13 +26618,11 @@
packuswb m4, m5
movu [r0 + 1328 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 7], 1
-pinsrb m1, [r3 + 6], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 15], 1
-pinsrb m3, [r3 + 14], 0
+pinsrw m1, [r3 + 6], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 14], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -26798,13 +26673,11 @@
packuswb m4, m5
movu [r0 + 1334 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 6], 1
-pinsrb m1, [r3 + 5], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 14], 1
-pinsrb m3, [r3 + 13], 0
+pinsrw m1, [r3 + 5], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 13], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -26840,13 +26713,11 @@
packuswb m4, m5
movu [r0 + 1338 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 5], 1
-pinsrb m1, [r3 + 4], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 13], 1
-pinsrb m3, [r3 + 12], 0
+pinsrw m1, [r3 + 4], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 12], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -26943,20 +26814,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 8], 1
-pinsrb m2, [r3 + 7], 0
+pinsrw m2, [r3 + 7], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1350 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 16], 1
-pinsrb m1, [r3 + 15], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 24], 1
-pinsrb m3, [r3 + 23], 0
+pinsrw m1, [r3 + 15], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 23], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -27015,20 +26883,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 7], 1
-pinsrb m2, [r3 + 6], 0
+pinsrw m2, [r3 + 6], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1358 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 15], 1
-pinsrb m1, [r3 + 14], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 23], 1
-pinsrb m3, [r3 + 22], 0
+pinsrw m1, [r3 + 14], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 22], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -27072,20 +26937,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 6], 1
-pinsrb m2, [r3 + 5], 0
+pinsrw m2, [r3 + 5], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1364 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 14], 1
-pinsrb m1, [r3 + 13], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 22], 1
-pinsrb m3, [r3 + 21], 0
+pinsrw m1, [r3 + 13], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 21], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -27144,20 +27006,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 5], 1
-pinsrb m2, [r3 + 4], 0
+pinsrw m2, [r3 + 4], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1372 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 13], 1
-pinsrb m1, [r3 + 12], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 21], 1
-pinsrb m3, [r3 + 20], 0
+pinsrw m1, [r3 + 12], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 20], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -27201,20 +27060,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 4], 1
-pinsrb m2, [r3 + 3], 0
+pinsrw m2, [r3 + 3], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1378 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 12], 1
-pinsrb m1, [r3 + 11], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 20], 1
-pinsrb m3, [r3 + 19], 0
+pinsrw m1, [r3 + 11], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 19], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -27273,20 +27129,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 3], 1
-pinsrb m2, [r3 + 2], 0
+pinsrw m2, [r3 + 2], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1386 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 11], 1
-pinsrb m1, [r3 + 10], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 19], 1
-pinsrb m3, [r3 + 18], 0
+pinsrw m1, [r3 + 10], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 18], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -27330,20 +27183,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 2], 1
-pinsrb m2, [r3 + 1], 0
+pinsrw m2, [r3 + 1], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1392 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 10], 1
-pinsrb m1, [r3 + 9], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 18], 1
-pinsrb m3, [r3 + 17], 0
+pinsrw m1, [r3 + 9], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 17], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -27402,20 +27252,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 1], 1
-pinsrb m2, [r3 + 0], 0
+pinsrw m2, [r3 + 0], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1400 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 9], 1
-pinsrb m1, [r3 + 8], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 17], 1
-pinsrb m3, [r3 + 16], 0
+pinsrw m1, [r3 + 8], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 16], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -27572,20 +27419,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 8], 1
-pinsrb m2, [r3 + 7], 0
+pinsrw m2, [r3 + 7], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1420 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 16], 1
-pinsrb m1, [r3 + 15], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 24], 1
-pinsrb m3, [r3 + 23], 0
+pinsrw m1, [r3 + 15], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 23], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -27674,20 +27518,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 7], 1
-pinsrb m2, [r3 + 6], 0
+pinsrw m2, [r3 + 6], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1432 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 15], 1
-pinsrb m1, [r3 + 14], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 23], 1
-pinsrb m3, [r3 + 22], 0
+pinsrw m1, [r3 + 14], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 22], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -27791,20 +27632,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 6], 1
-pinsrb m2, [r3 + 5], 0
+pinsrw m2, [r3 + 5], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1446 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 14], 1
-pinsrb m1, [r3 + 13], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 22], 1
-pinsrb m3, [r3 + 21], 0
+pinsrw m1, [r3 + 13], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 21], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -27893,20 +27731,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 5], 1
-pinsrb m2, [r3 + 4], 0
+pinsrw m2, [r3 + 4], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1458 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 13], 1
-pinsrb m1, [r3 + 12], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 21], 1
-pinsrb m3, [r3 + 20], 0
+pinsrw m1, [r3 + 12], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 20], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -28249,24 +28084,21 @@
movu m6, [r5 + 30 * 16]
pslldq m0, 2
pinsrb m0, [r4 + 0], 1
-pinsrb m0, [r4 + 16], 0
-pmaddubsw m4, m0, m6
-pmulhrsw m4, m7
-pslldq m2, 2
-pinsrb m2, [r3 + 8], 1
-pinsrb m2, [r3 + 7], 0
+pinsrb m0, [r4 + 16], 0
+pmaddubsw m4, m0, m6
+pmulhrsw m4, m7
+pslldq m2, 2
+pinsrw m2, [r3 + 7], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1504 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 16], 1
-pinsrb m1, [r3 + 15], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 24], 1
-pinsrb m3, [r3 + 23], 0
+pinsrw m1, [r3 + 15], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 23], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
More information about the x265-devel
mailing list