[x265] [PATCH] all_angs_pred_32x32, asm code improvement
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Wed Feb 26 13:28:52 CET 2014
# HG changeset patch
# User Praveen Tiwari
# Date 1393417704 -19800
# Node ID 7de2875c614058648475618d2b9faa5a9611225b
# Parent 53c7e3e789435a3e7b51f1ad61e9425f59ea6cf7
all_angs_pred_32x32, asm code improvement
diff -r 53c7e3e78943 -r 7de2875c6140 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm Tue Feb 25 18:48:08 2014 +0530
+++ b/source/common/x86/intrapred8.asm Wed Feb 26 17:58:24 2014 +0530
@@ -21028,20 +21028,17 @@
pmaddubsw m3, m6, [r5 + 24 * 16]
pmulhrsw m3, m7
pslldq m2, 2
-pinsrb m2, [r4 + 6], 1
-pinsrb m2, [r4 + 5], 0
+pinsrw m2, [r4 + 5], 0
pmaddubsw m5, m2, [r5 + 24 * 16]
pmulhrsw m5, m7
packuswb m3, m5
movu [r0 + 782 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 14], 1
-pinsrb m1, [r4 + 13], 0
+pinsrw m1, [r4 + 13], 0
pmaddubsw m3, m1, [r5 + 24 * 16]
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 22], 1
-pinsrb m4, [r4 + 21], 0
+pinsrw m4, [r4 + 21], 0
pmaddubsw m5, m4, [r5 + 24 * 16]
pmulhrsw m5, m7
packuswb m3, m5
@@ -21242,15 +21239,13 @@
pmaddubsw m3, m7, [r5 + 30 * 16]
pmulhrsw m3, [pw_1024]
pslldq m2, 2
-pinsrb m2, [r4 + 5], 1
-pinsrb m2, [r4 + 4], 0
+pinsrw m2, [r4 + 4], 0
pmaddubsw m5, m2, [r5 + 30 * 16]
pmulhrsw m5, [pw_1024]
packuswb m3, m5
movu [r0 + 786 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 13], 1
-pinsrb m1, [r4 + 12], 0
+pinsrw m1, [r4 + 12], 0
pmaddubsw m3, m1, [r5 + 30 * 16]
pmulhrsw m3, [pw_1024]
pslldq m4, 2
@@ -21459,20 +21454,17 @@
pmaddubsw m3, m6, [r5 + 30 * 16]
pmulhrsw m3, [pw_1024]
pslldq m2, 2
-pinsrb m2, [r4 + 4], 1
-pinsrb m2, [r4 + 3], 0
+pinsrw m2, [r4 + 3], 0
pmaddubsw m5, m2, [r5 + 30 * 16]
pmulhrsw m5, [pw_1024]
packuswb m3, m5
movu [r0 + 738 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 12], 1
-pinsrb m1, [r4 + 11], 0
+pinsrw m1, [r4 + 11], 0
pmaddubsw m3, m1, [r5 + 30 * 16]
pmulhrsw m3, [pw_1024]
pslldq m4, 2
-pinsrb m4, [r4 + 20], 1
-pinsrb m4, [r4 + 19], 0
+pinsrw m4, [r4 + 19], 0
pmaddubsw m5, m4, [r5 + 30 * 16]
pmulhrsw m5, [pw_1024]
packuswb m3, m5
@@ -21582,20 +21574,17 @@
pmaddubsw m3, m6, [r5 + 26 * 16]
pmulhrsw m3, [pw_1024]
pslldq m2, 2
-pinsrb m2, [r4 + 3], 1
-pinsrb m2, [r4 + 2], 0
+pinsrw m2, [r4 + 2], 0
pmaddubsw m5, m2, [r5 + 26 * 16]
pmulhrsw m5, [pw_1024]
packuswb m3, m5
movu [r0 + 746 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 11], 1
-pinsrb m1, [r4 + 10], 0
+pinsrw m1, [r4 + 10], 0
pmaddubsw m3, m1, [r5 + 26 * 16]
pmulhrsw m3, [pw_1024]
pslldq m4, 2
-pinsrb m4, [r4 + 19], 1
-pinsrb m4, [r4 + 18], 0
+pinsrw m4, [r4 + 18], 0
pmaddubsw m5, m4, [r5 + 26 * 16]
pmulhrsw m5, [pw_1024]
packuswb m3, m5
@@ -21705,20 +21694,17 @@
pmaddubsw m3, m6, [r5 + 31 * 16]
pmulhrsw m3, [pw_1024]
pslldq m2, 2
-pinsrb m2, [r4 + 2], 1
-pinsrb m2, [r4 + 1], 0
+pinsrw m2, [r4 + 1], 0
pmaddubsw m5, m2, [r5 + 31 * 16]
pmulhrsw m5, [pw_1024]
packuswb m3, m5
movu [r0 + 752 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 10], 1
-pinsrb m1, [r4 + 9], 0
+pinsrw m1, [r4 + 9], 0
pmaddubsw m3, m1, [r5 + 31 * 16]
pmulhrsw m3, [pw_1024]
pslldq m4, 2
-pinsrb m4, [r4 + 18], 1
-pinsrb m4, [r4 + 17], 0
+pinsrw m4, [r4 + 17], 0
pmaddubsw m5, m4, [r5 + 31 * 16]
pmulhrsw m5, [pw_1024]
packuswb m3, m5
@@ -21828,20 +21814,17 @@
pmaddubsw m3, m6, [r5 + 27 * 16]
pmulhrsw m3, [pw_1024]
pslldq m2, 2
-pinsrb m2, [r4 + 1], 1
-pinsrb m2, [r4 + 0], 0
+pinsrw m2, [r4 + 0], 0
pmaddubsw m5, m2, [r5 + 27 * 16]
pmulhrsw m5, [pw_1024]
packuswb m3, m5
movu [r0 + 760 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 9], 1
-pinsrb m1, [r4 + 8], 0
+pinsrw m1, [r4 + 8], 0
pmaddubsw m3, m1, [r5 + 27 * 16]
pmulhrsw m3, [pw_1024]
pslldq m4, 2
-pinsrb m4, [r4 + 17], 1
-pinsrb m4, [r4 + 16], 0
+pinsrw m4, [r4 + 16], 0
pmaddubsw m5, m4, [r5 + 27 * 16]
pmulhrsw m5, [pw_1024]
packuswb m3, m5
@@ -21946,13 +21929,11 @@
packuswb m3, m5
movu [r0 + 812 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 8], 1
-pinsrb m1, [r4 + 7], 0
+pinsrw m1, [r4 + 7], 0
pmaddubsw m3, m1, [r5 + 21 * 16]
pmulhrsw m3, [pw_1024]
pslldq m4, 2
-pinsrb m4, [r4 + 16], 1
-pinsrb m4, [r4 + 15], 0
+pinsrw m4, [r4 + 15], 0
pmaddubsw m5, m4, [r5 + 21 * 16]
pmulhrsw m5, [pw_1024]
packuswb m3, m5
@@ -22010,13 +21991,11 @@
packuswb m3, m5
movu [r0 + 816 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 7], 1
-pinsrb m1, [r4 + 6], 0
+pinsrw m1, [r4 + 6], 0
pmaddubsw m3, m1, [r5 + 27 * 16]
pmulhrsw m3, [pw_1024]
pslldq m4, 2
-pinsrb m4, [r4 + 15], 1
-pinsrb m4, [r4 + 14], 0
+pinsrw m4, [r4 + 14], 0
pmaddubsw m5, m4, [r5 + 27 * 16]
pmulhrsw m5, [pw_1024]
packuswb m3, m5
@@ -22178,13 +22157,11 @@
packuswb m3, m5
movu [r0 + 822 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 6], 1
-pinsrb m1, [r4 + 5], 0
+pinsrw m1, [r4 + 5], 0
pmaddubsw m3, m1, [r5 + 20 * 16]
pmulhrsw m3, [pw_1024]
pslldq m4, 2
-pinsrb m4, [r4 + 14], 1
-pinsrb m4, [r4 + 13], 0
+pinsrw m4, [r4 + 13], 0
pmaddubsw m5, m4, [r5 + 20 * 16]
pmulhrsw m5, [pw_1024]
packuswb m3, m5
@@ -22234,13 +22211,11 @@
packuswb m3, m5
movu [r0 + 826 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 5], 1
-pinsrb m1, [r4 + 4], 0
+pinsrw m1, [r4 + 4], 0
pmaddubsw m3, m1, [r5 + 26 * 16]
pmulhrsw m3, [pw_1024]
pslldq m4, 2
-pinsrb m4, [r4 + 13], 1
-pinsrb m4, [r4 + 12], 0
+pinsrw m4, [r4 + 12], 0
pmaddubsw m5, m4, [r5 + 26 * 16]
pmulhrsw m5, [pw_1024]
packuswb m3, m5
@@ -22296,13 +22271,11 @@
; mode 15 [row 24]
pslldq m1, 2
-pinsrb m1, [r4 + 4], 1
-pinsrb m1, [r4 + 3], 0
+pinsrw m1, [r4 + 3], 0
pmaddubsw m3, m1, [r5 + 23 * 16]
pmulhrsw m3, [pw_1024]
pslldq m4, 2
-pinsrb m4, [r4 + 12], 1
-pinsrb m4, [r4 + 11], 0
+pinsrw m4, [r4 + 11], 0
pmaddubsw m5, m4, [r5 + 23 * 16]
pmulhrsw m5, [pw_1024]
packuswb m3, m5
@@ -22318,13 +22291,11 @@
; mode 15 [row 26]
pslldq m1, 2
-pinsrb m1, [r4 + 3], 1
-pinsrb m1, [r4 + 2], 0
+pinsrw m1, [r4 + 2], 0
pmaddubsw m3, m1, [r5 + 21 * 16]
pmulhrsw m3, [pw_1024]
pslldq m4, 2
-pinsrb m4, [r4 + 11], 1
-pinsrb m4, [r4 + 10], 0
+pinsrw m4, [r4 + 10], 0
pmaddubsw m5, m4, [r5 + 21 * 16]
pmulhrsw m5, [pw_1024]
packuswb m3, m5
@@ -22340,13 +22311,11 @@
; mode 15 [row 28]
pslldq m1, 2
-pinsrb m1, [r4 + 2], 1
-pinsrb m1, [r4 + 1], 0
+pinsrw m1, [r4 + 1], 0
pmaddubsw m3, m1, [r5 + 19 * 16]
pmulhrsw m3, [pw_1024]
pslldq m4, 2
-pinsrb m4, [r4 + 10], 1
-pinsrb m4, [r4 + 9], 0
+pinsrw m4, [r4 + 9], 0
pmaddubsw m5, m4, [r5 + 19 * 16]
pmulhrsw m5, [pw_1024]
packuswb m3, m5
@@ -22362,13 +22331,11 @@
; mode 15 [row 30]
pslldq m1, 2
-pinsrb m1, [r4 + 1], 1
-pinsrb m1, [r4 + 0], 0
+pinsrw m1, [r4 + 0], 0
pmaddubsw m3, m1, [r5 + 17 * 16]
pmulhrsw m3, [pw_1024]
pslldq m4, 2
-pinsrb m4, [r4 + 9], 1
-pinsrb m4, [r4 + 8], 0
+pinsrw m4, [r4 + 8], 0
pmaddubsw m5, m4, [r5 + 17 * 16]
pmulhrsw m5, [pw_1024]
packuswb m3, m5
@@ -22417,21 +22384,18 @@
pmaddubsw m3, m0, m6
pmulhrsw m3, m7
pslldq m2, 2
-pinsrb m2, [r4 + 8], 1
-pinsrb m2, [r4 + 7], 0
+pinsrw m2, [r4 + 7], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m3, m5
movu [r0 + 898 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 16], 1
-pinsrb m1, [r4 + 15], 0
-pmaddubsw m3, m1, m6
-pmulhrsw m3, m7
-pslldq m4, 2
-pinsrb m4, [r4 + 24], 1
-pinsrb m4, [r4 + 23], 0
+pinsrw m1, [r4 + 15], 0
+pmaddubsw m3, m1, m6
+pmulhrsw m3, m7
+pslldq m4, 2
+pinsrw m4, [r4 + 23], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -22461,21 +22425,18 @@
pmaddubsw m3, m0, m6
pmulhrsw m3, m7
pslldq m2, 2
-pinsrb m2, [r4 + 7], 1
-pinsrb m2, [r4 + 6], 0
+pinsrw m2, [r4 + 6], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m3, m5
movu [r0 + 902 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 15], 1
-pinsrb m1, [r4 + 14], 0
-pmaddubsw m3, m1, m6
-pmulhrsw m3, m7
-pslldq m4, 2
-pinsrb m4, [r4 + 23], 1
-pinsrb m4, [r4 + 22], 0
+pinsrw m1, [r4 + 14], 0
+pmaddubsw m3, m1, m6
+pmulhrsw m3, m7
+pslldq m4, 2
+pinsrw m4, [r4 + 22], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -22489,21 +22450,18 @@
pmaddubsw m3, m0, m6
pmulhrsw m3, m7
pslldq m2, 2
-pinsrb m2, [r4 + 6], 1
-pinsrb m2, [r4 + 5], 0
+pinsrw m2, [r4 + 5], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m3, m5
movu [r0 + 904 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 14], 1
-pinsrb m1, [r4 + 13], 0
-pmaddubsw m3, m1, m6
-pmulhrsw m3, m7
-pslldq m4, 2
-pinsrb m4, [r4 + 22], 1
-pinsrb m4, [r4 + 21], 0
+pinsrw m1, [r4 + 13], 0
+pmaddubsw m3, m1, m6
+pmulhrsw m3, m7
+pslldq m4, 2
+pinsrw m4, [r4 + 21], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -22539,15 +22497,12 @@
pmulhrsw m5, m7
packuswb m3, m5
movu [r0 + 908 * 16], m3
-
-pslldq m1, 2
-pinsrb m1, [r4 + 13], 1
-pinsrb m1, [r4 + 12], 0
-pmaddubsw m3, m1, m6
-pmulhrsw m3, m7
-pslldq m4, 2
-pinsrb m4, [r4 + 21], 1
-pinsrb m4, [r4 + 20], 0
+pslldq m1, 2
+pinsrw m1, [r4 + 12], 0
+pmaddubsw m3, m1, m6
+pmulhrsw m3, m7
+pslldq m4, 2
+pinsrw m4, [r4 + 20], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -22561,21 +22516,18 @@
pmaddubsw m3, m0, m6
pmulhrsw m3, m7
pslldq m2, 2
-pinsrb m2, [r4 + 4], 1
-pinsrb m2, [r4 + 3], 0
+pinsrw m2, [r4 + 3], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m3, m5
movu [r0 + 910 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 12], 1
-pinsrb m1, [r4 + 11], 0
-pmaddubsw m3, m1, m6
-pmulhrsw m3, m7
-pslldq m4, 2
-pinsrb m4, [r4 + 20], 1
-pinsrb m4, [r4 + 19], 0
+pinsrw m1, [r4 + 11], 0
+pmaddubsw m3, m1, m6
+pmulhrsw m3, m7
+pslldq m4, 2
+pinsrw m4, [r4 + 19], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -22605,21 +22557,18 @@
pmaddubsw m3, m0, m6
pmulhrsw m3, m7
pslldq m2, 2
-pinsrb m2, [r4 + 3], 1
-pinsrb m2, [r4 + 2], 0
+pinsrw m2, [r4 + 2], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m3, m5
movu [r0 + 914 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 11], 1
-pinsrb m1, [r4 + 10], 0
-pmaddubsw m3, m1, m6
-pmulhrsw m3, m7
-pslldq m4, 2
-pinsrb m4, [r4 + 19], 1
-pinsrb m4, [r4 + 18], 0
+pinsrw m1, [r4 + 10], 0
+pmaddubsw m3, m1, m6
+pmulhrsw m3, m7
+pslldq m4, 2
+pinsrw m4, [r4 + 18], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -22633,16 +22582,14 @@
pmaddubsw m3, m0, m6
pmulhrsw m3, m7
pslldq m2, 2
-pinsrb m2, [r4 + 2], 1
-pinsrb m2, [r4 + 1], 0
+pinsrw m2, [r4 + 1], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m3, m5
movu [r0 + 916 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 10], 1
-pinsrb m1, [r4 + 9], 0
+pinsrw m1, [r4 + 9], 0
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
@@ -22677,21 +22624,18 @@
pmaddubsw m3, m0, m6
pmulhrsw m3, m7
pslldq m2, 2
-pinsrb m2, [r4 + 1], 1
-pinsrb m2, [r4 + 0], 0
+pinsrw m2, [r4 + 0], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m3, m5
movu [r0 + 920 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 9], 1
-pinsrb m1, [r4 + 8], 0
+pinsrw m1, [r4 + 8], 0
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 17], 1
-pinsrb m4, [r4 + 16], 0
+pinsrw m4, [r4 + 16], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -22713,13 +22657,11 @@
movu [r0 + 922 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 8], 1
-pinsrb m1, [r4 + 7], 0
+pinsrw m1, [r4 + 7], 0
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 16], 1
-pinsrb m4, [r4 + 15], 0
+pinsrw m4, [r4 + 15], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -22757,13 +22699,11 @@
movu [r0 + 926 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 7], 1
-pinsrb m1, [r4 + 6], 0
-pmaddubsw m3, m1, m6
-pmulhrsw m3, m7
-pslldq m4, 2
-pinsrb m4, [r4 + 15], 1
-pinsrb m4, [r4 + 14], 0
+pinsrw m1, [r4 + 6], 0
+pmaddubsw m3, m1, m6
+pmulhrsw m3, m7
+pslldq m4, 2
+pinsrw m4, [r4 + 14], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -22785,13 +22725,11 @@
movu [r0 + 928 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 6], 1
-pinsrb m1, [r4 + 5], 0
-pmaddubsw m3, m1, m6
-pmulhrsw m3, m7
-pslldq m4, 2
-pinsrb m4, [r4 + 14], 1
-pinsrb m4, [r4 + 13], 0
+pinsrw m1, [r4 + 5], 0
+pmaddubsw m3, m1, m6
+pmulhrsw m3, m7
+pslldq m4, 2
+pinsrw m4, [r4 + 13], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -22829,13 +22767,11 @@
movu [r0 + 932 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 5], 1
-pinsrb m1, [r4 + 4], 0
-pmaddubsw m3, m1, m6
-pmulhrsw m3, m7
-pslldq m4, 2
-pinsrb m4, [r4 + 13], 1
-pinsrb m4, [r4 + 12], 0
+pinsrw m1, [r4 + 4], 0
+pmaddubsw m3, m1, m6
+pmulhrsw m3, m7
+pslldq m4, 2
+pinsrw m4, [r4 + 12], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -22857,13 +22793,11 @@
movu [r0 + 934 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 4], 1
-pinsrb m1, [r4 + 3], 0
-pmaddubsw m3, m1, m6
-pmulhrsw m3, m7
-pslldq m4, 2
-pinsrb m4, [r4 + 12], 1
-pinsrb m4, [r4 + 11], 0
+pinsrw m1, [r4 + 3], 0
+pmaddubsw m3, m1, m6
+pmulhrsw m3, m7
+pslldq m4, 2
+pinsrw m4, [r4 + 11], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -22901,13 +22835,11 @@
movu [r0 + 938 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 3], 1
-pinsrb m1, [r4 + 2], 0
-pmaddubsw m3, m1, m6
-pmulhrsw m3, m7
-pslldq m4, 2
-pinsrb m4, [r4 + 11], 1
-pinsrb m4, [r4 + 10], 0
+pinsrw m1, [r4 + 2], 0
+pmaddubsw m3, m1, m6
+pmulhrsw m3, m7
+pslldq m4, 2
+pinsrw m4, [r4 + 10], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -22929,13 +22861,11 @@
movu [r0 + 940 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 2], 1
-pinsrb m1, [r4 + 1], 0
-pmaddubsw m3, m1, m6
-pmulhrsw m3, m7
-pslldq m4, 2
-pinsrb m4, [r4 + 10], 1
-pinsrb m4, [r4 + 9], 0
+pinsrw m1, [r4 + 1], 0
+pmaddubsw m3, m1, m6
+pmulhrsw m3, m7
+pslldq m4, 2
+pinsrw m4, [r4 + 9], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -22973,13 +22903,11 @@
movu [r0 + 944 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 1], 1
-pinsrb m1, [r4 + 0], 0
-pmaddubsw m3, m1, m6
-pmulhrsw m3, m7
-pslldq m4, 2
-pinsrb m4, [r4 + 9], 1
-pinsrb m4, [r4 + 8], 0
+pinsrw m1, [r4 + 0], 0
+pmaddubsw m3, m1, m6
+pmulhrsw m3, m7
+pslldq m4, 2
+pinsrw m4, [r4 + 8], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23006,8 +22934,7 @@
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 8], 1
-pinsrb m4, [r4 + 7], 0
+pinsrw m4, [r4 + 7], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23050,8 +22977,7 @@
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 7], 1
-pinsrb m4, [r4 + 6], 0
+pinsrw m4, [r4 + 6], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23078,8 +23004,7 @@
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 6], 1
-pinsrb m4, [r4 + 5], 0
+pinsrw m4, [r4 + 5], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23122,8 +23047,7 @@
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 5], 1
-pinsrb m4, [r4 + 4], 0
+pinsrw m4, [r4 + 4], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23176,21 +23100,18 @@
pmaddubsw m3, m0, m6
pmulhrsw m3, m7
pslldq m2, 2
-pinsrb m2, [r4 + 8], 1
-pinsrb m2, [r4 + 7], 0
+pinsrw m2, [r4 + 7], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m3, m5
movu [r0 + 962 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 16], 1
-pinsrb m1, [r4 + 15], 0
+pinsrw m1, [r4 + 15], 0
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 24], 1
-pinsrb m4, [r4 + 23], 0
+pinsrw m4, [r4 + 23], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23204,21 +23125,18 @@
pmaddubsw m3, m0, m6
pmulhrsw m3, m7
pslldq m2, 2
-pinsrb m2, [r4 + 7], 1
-pinsrb m2, [r4 + 6], 0
+pinsrw m2, [r4 + 6], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m3, m5
movu [r0 + 964 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 15], 1
-pinsrb m1, [r4 + 14], 0
+pinsrw m1, [r4 + 14], 0
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 23], 1
-pinsrb m4, [r4 + 22], 0
+pinsrw m4, [r4 + 22], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23232,21 +23150,18 @@
pmaddubsw m3, m0, m6
pmulhrsw m3, m7
pslldq m2, 2
-pinsrb m2, [r4 + 6], 1
-pinsrb m2, [r4 + 5], 0
+pinsrw m2, [r4 + 5], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m3, m5
movu [r0 + 966 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 14], 1
-pinsrb m1, [r4 + 13], 0
+pinsrw m1, [r4 + 13], 0
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 22], 1
-pinsrb m4, [r4 + 21], 0
+pinsrw m4, [r4 + 21], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23260,21 +23175,18 @@
pmaddubsw m3, m0, m6
pmulhrsw m3, m7
pslldq m2, 2
-pinsrb m2, [r4 + 5], 1
-pinsrb m2, [r4 + 4], 0
+pinsrw m2, [r4 + 4], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m3, m5
movu [r0 + 968 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 13], 1
-pinsrb m1, [r4 + 12], 0
+pinsrw m1, [r4 + 12], 0
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 21], 1
-pinsrb m4, [r4 + 20], 0
+pinsrw m4, [r4 + 20], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23304,21 +23216,18 @@
pmaddubsw m3, m0, m6
pmulhrsw m3, m7
pslldq m2, 2
-pinsrb m2, [r4 + 4], 1
-pinsrb m2, [r4 + 3], 0
+pinsrw m2, [r4 + 3], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m3, m5
movu [r0 + 972 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 12], 1
-pinsrb m1, [r4 + 11], 0
+pinsrw m1, [r4 + 11], 0
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 20], 1
-pinsrb m4, [r4 + 19], 0
+pinsrw m4, [r4 + 19], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23332,21 +23241,18 @@
pmaddubsw m3, m0, m6
pmulhrsw m3, m7
pslldq m2, 2
-pinsrb m2, [r4 + 3], 1
-pinsrb m2, [r4 + 2], 0
+pinsrw m2, [r4 + 2], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m3, m5
movu [r0 + 974 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 11], 1
-pinsrb m1, [r4 + 10], 0
+pinsrw m1, [r4 + 10], 0
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 19], 1
-pinsrb m4, [r4 + 18], 0
+pinsrw m4, [r4 + 18], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23360,21 +23266,18 @@
pmaddubsw m3, m0, m6
pmulhrsw m3, m7
pslldq m2, 2
-pinsrb m2, [r4 + 2], 1
-pinsrb m2, [r4 + 1], 0
+pinsrw m2, [r4 + 1], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m3, m5
movu [r0 + 976 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 10], 1
-pinsrb m1, [r4 + 9], 0
+pinsrw m1, [r4 + 9], 0
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 18], 1
-pinsrb m4, [r4 + 17], 0
+pinsrw m4, [r4 + 17], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23388,21 +23291,18 @@
pmaddubsw m3, m0, m6
pmulhrsw m3, m7
pslldq m2, 2
-pinsrb m2, [r4 + 1], 1
-pinsrb m2, [r4 + 0], 0
+pinsrw m2, [r4 + 0], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m3, m5
movu [r0 + 978 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 9], 1
-pinsrb m1, [r4 + 8], 0
+pinsrw m1, [r4 + 8], 0
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 17], 1
-pinsrb m4, [r4 + 16], 0
+pinsrw m4, [r4 + 16], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23440,13 +23340,11 @@
movu [r0 + 982 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 8], 1
-pinsrb m1, [r4 + 7], 0
+pinsrw m1, [r4 + 7], 0
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 16], 1
-pinsrb m4, [r4 + 15], 0
+pinsrw m4, [r4 + 15], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23468,13 +23366,11 @@
movu [r0 + 984 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 7], 1
-pinsrb m1, [r4 + 6], 0
+pinsrw m1, [r4 + 6], 0
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 15], 1
-pinsrb m4, [r4 + 14], 0
+pinsrw m4, [r4 + 14], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23496,13 +23392,11 @@
movu [r0 + 986 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 6], 1
-pinsrb m1, [r4 + 5], 0
+pinsrw m1, [r4 + 5], 0
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 14], 1
-pinsrb m4, [r4 + 13], 0
+pinsrw m4, [r4 + 13], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23524,13 +23418,11 @@
movu [r0 + 988 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 5], 1
-pinsrb m1, [r4 + 4], 0
+pinsrw m1, [r4 + 4], 0
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 13], 1
-pinsrb m4, [r4 + 12], 0
+pinsrw m4, [r4 + 12], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23562,13 +23454,11 @@
movu [r0 + 992 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 4], 1
-pinsrb m1, [r4 + 3], 0
+pinsrw m1, [r4 + 3], 0
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 12], 1
-pinsrb m4, [r4 + 11], 0
+pinsrw m4, [r4 + 11], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23590,13 +23480,11 @@
movu [r0 + 994 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 3], 1
-pinsrb m1, [r4 + 2], 0
+pinsrw m1, [r4 + 2], 0
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 11], 1
-pinsrb m4, [r4 + 10], 0
+pinsrw m4, [r4 + 10], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23618,13 +23506,11 @@
movu [r0 + 996 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 2], 1
-pinsrb m1, [r4 + 1], 0
+pinsrw m1, [r4 + 1], 0
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 10], 1
-pinsrb m4, [r4 + 9], 0
+pinsrw m4, [r4 + 9], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23646,13 +23532,11 @@
movu [r0 + 998 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 1], 1
-pinsrb m1, [r4 + 0], 0
+pinsrw m1, [r4 + 0], 0
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 9], 1
-pinsrb m4, [r4 + 8], 0
+pinsrw m4, [r4 + 8], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23679,8 +23563,9 @@
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 8], 1
-pinsrb m4, [r4 + 7], 0
+;pinsrb m4, [r4 + 8], 1
+;pinsrb m4, [r4 + 7], 0
+pinsrw m4, [r4 + 7], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23723,8 +23608,7 @@
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 7], 1
-pinsrb m4, [r4 + 6], 0
+pinsrw m4, [r4 + 6], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23751,8 +23635,7 @@
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 6], 1
-pinsrb m4, [r4 + 5], 0
+pinsrw m4, [r4 + 5], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23779,8 +23662,7 @@
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 5], 1
-pinsrb m4, [r4 + 4], 0
+pinsrw m4, [r4 + 4], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23807,8 +23689,9 @@
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 4], 1
-pinsrb m4, [r4 + 3], 0
+;pinsrb m4, [r4 + 4], 1
+;pinsrb m4, [r4 + 3], 0
+pinsrw m4, [r4 + 3], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23851,8 +23734,7 @@
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 3], 1
-pinsrb m4, [r4 + 2], 0
+pinsrw m4, [r4 + 2], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23879,8 +23761,7 @@
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 2], 1
-pinsrb m4, [r4 + 1], 0
+pinsrw m4, [r4 + 1], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -23907,8 +23788,7 @@
pmaddubsw m3, m1, m6
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 1], 1
-pinsrb m4, [r4 + 0], 0
+pinsrw m4, [r4 + 0], 0
pmaddubsw m5, m4, m6
pmulhrsw m5, m7
packuswb m3, m5
@@ -24242,20 +24122,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 8], 1
-pinsrb m2, [r3 + 7], 0
+pinsrw m2, [r3 + 7], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1090 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 16], 1
-pinsrb m1, [r3 + 15], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 24], 1
-pinsrb m3, [r3 + 23], 0
+pinsrw m1, [r3 + 15], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 23], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24269,20 +24146,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 7], 1
-pinsrb m2, [r3 + 6], 0
+pinsrw m2, [r3 + 6], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1092 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 15], 1
-pinsrb m1, [r3 + 14], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 23], 1
-pinsrb m3, [r3 + 22], 0
+pinsrw m1, [r3 + 14], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 22], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24296,20 +24170,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 6], 1
-pinsrb m2, [r3 + 5], 0
+pinsrw m2, [r3 + 5], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1094 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 14], 1
-pinsrb m1, [r3 + 13], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 22], 1
-pinsrb m3, [r3 + 21], 0
+pinsrw m1, [r3 + 13], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 21], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24323,20 +24194,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 5], 1
-pinsrb m2, [r3 + 4], 0
+pinsrw m2, [r3 + 4], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1096 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 13], 1
-pinsrb m1, [r3 + 12], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 21], 1
-pinsrb m3, [r3 + 20], 0
+pinsrw m1, [r3 + 12], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 20], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24365,20 +24233,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 4], 1
-pinsrb m2, [r3 + 3], 0
+pinsrw m2, [r3 + 3], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1100 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 12], 1
-pinsrb m1, [r3 + 11], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 20], 1
-pinsrb m3, [r3 + 19], 0
+pinsrw m1, [r3 + 11], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 19], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24392,20 +24257,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 3], 1
-pinsrb m2, [r3 + 2], 0
+pinsrw m2, [r3 + 2], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1102 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 11], 1
-pinsrb m1, [r3 + 10], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 19], 1
-pinsrb m3, [r3 + 18], 0
+pinsrw m1, [r3 + 10], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 18], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24419,20 +24281,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 2], 1
-pinsrb m2, [r3 + 1], 0
+pinsrw m2, [r3 + 1], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1104 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 10], 1
-pinsrb m1, [r3 + 9], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 18], 1
-pinsrb m3, [r3 + 17], 0
+pinsrw m1, [r3 + 9], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 17], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24446,20 +24305,17 @@
pmaddubsw m4, m0, m6
pmulhrsw m4, m7
pslldq m2, 2
-pinsrb m2, [r3 + 1], 1
-pinsrb m2, [r3 + 0], 0
+pinsrw m2, [r3 + 0], 0
pmaddubsw m5, m2, m6
pmulhrsw m5, m7
packuswb m4, m5
movu [r0 + 1106 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 9], 1
-pinsrb m1, [r3 + 8], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 17], 1
-pinsrb m3, [r3 + 16], 0
+pinsrw m1, [r3 + 8], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 16], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24495,13 +24351,11 @@
packuswb m4, m5
movu [r0 + 1110 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 8], 1
-pinsrb m1, [r3 + 7], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 16], 1
-pinsrb m3, [r3 + 15], 0
+pinsrw m1, [r3 + 7], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 15], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24522,13 +24376,11 @@
packuswb m4, m5
movu [r0 + 1112 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 7], 1
-pinsrb m1, [r3 + 6], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 15], 1
-pinsrb m3, [r3 + 14], 0
+pinsrw m1, [r3 + 6], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 14], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24549,13 +24401,11 @@
packuswb m4, m5
movu [r0 + 1114 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 6], 1
-pinsrb m1, [r3 + 5], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 14], 1
-pinsrb m3, [r3 + 13], 0
+pinsrw m1, [r3 + 5], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 13], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
@@ -24576,13 +24426,11 @@
packuswb m4, m5
movu [r0 + 1116 * 16], m4
pslldq m1, 2
-pinsrb m1, [r3 + 5], 1
-pinsrb m1, [r3 + 4], 0
-pmaddubsw m4, m1, m6
-pmulhrsw m4, m7
-pslldq m3, 2
-pinsrb m3, [r3 + 13], 1
-pinsrb m3, [r3 + 12], 0
+pinsrw m1, [r3 + 4], 0
+pmaddubsw m4, m1, m6
+pmulhrsw m4, m7
+pslldq m3, 2
+pinsrw m3, [r3 + 12], 0
pmaddubsw m5, m3, m6
pmulhrsw m5, m7
packuswb m4, m5
More information about the x265-devel
mailing list