[x265] [PATCH] asm: reduced 'vpermq' instructions, improved 1040c->800c, 22%
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Fri Sep 25 06:55:04 CEST 2015
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1442311334 -19800
# Tue Sep 15 15:32:14 2015 +0530
# Node ID df65e7f6acf475d33a6ce377167df1a5a2355950
# Parent d890ce2af3de2f1c23f5bf07fef6471417b7f8ef
asm: reduced 'vpermq' instructions, improved 1040c->800c, 22%
diff -r d890ce2af3de -r df65e7f6acf4 source/common/x86/intrapred8_allangs.asm
--- a/source/common/x86/intrapred8_allangs.asm Fri Sep 25 10:20:30 2015 +0530
+++ b/source/common/x86/intrapred8_allangs.asm Tue Sep 15 15:32:14 2015 +0530
@@ -27,62 +27,63 @@
SECTION_RODATA 32
-all_ang4_shuff: db 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6, 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6
- db 0, 1, 1, 2, 2, 3, 3, 4, 1, 2, 2, 3, 3, 4, 4, 5, 2, 3, 3, 4, 4, 5, 5, 6, 3, 4, 4, 5, 5, 6, 6, 7
- db 0, 1, 1, 2, 2, 3, 3, 4, 1, 2, 2, 3, 3, 4, 4, 5, 1, 2, 2, 3, 3, 4, 4, 5, 2, 3, 3, 4, 4, 5, 5, 6
- db 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4, 1, 2, 2, 3, 3, 4, 4, 5, 1, 2, 2, 3, 3, 4, 4, 5
- db 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4, 1, 2, 2, 3, 3, 4, 4, 5
- db 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4
- db 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
- db 0, 9, 9, 10, 10, 11, 11, 12, 0, 9, 9, 10, 10, 11, 11, 12, 0, 9, 9, 10, 10, 11, 11, 12, 0, 9, 9, 10, 10, 11, 11, 12
- db 0, 9, 9, 10, 10, 11, 11, 12, 0, 9, 9, 10, 10, 11, 11, 12, 0, 9, 9, 10, 10, 11, 11, 12, 4, 0, 0, 9, 9, 10, 10, 11
- db 0, 9, 9, 10, 10, 11, 11, 12, 0, 9, 9, 10, 10, 11, 11, 12, 2, 0, 0, 9, 9, 10, 10, 11, 2, 0, 0, 9, 9, 10, 10, 11
- db 0, 9, 9, 10, 10, 11, 11, 12, 2, 0, 0, 9, 9, 10, 10, 11, 2, 0, 0, 9, 9, 10, 10, 11, 4, 2, 2, 0, 0, 9, 9, 10
- db 0, 9, 9, 10, 10, 11, 11, 12, 2, 0, 0, 9, 9, 10, 10, 11, 2, 0, 0, 9, 9, 10, 10, 11, 3, 2, 2, 0, 0, 9, 9, 10
- db 0, 9, 9, 10, 10, 11, 11, 12, 1, 0, 0, 9, 9, 10, 10, 11, 2, 1, 1, 0, 0, 9, 9, 10, 4, 2, 2, 1, 1, 0, 0, 9
- db 0, 1, 2, 3, 9, 0, 1, 2, 10, 9, 0, 1, 11, 10, 9, 0, 0, 1, 2, 3, 9, 0, 1, 2, 10, 9, 0, 1, 11, 10, 9, 0
- db 0, 1, 1, 2, 2, 3, 3, 4, 9, 0, 0, 1, 1, 2, 2, 3, 10, 9, 9, 0, 0, 1, 1, 2, 12, 10, 10, 9, 9, 0, 0, 1
- db 0, 1, 1, 2, 2, 3, 3, 4, 10, 0, 0, 1, 1, 2, 2, 3, 10, 0, 0, 1, 1, 2, 2, 3, 11, 10, 10, 0, 0, 1, 1, 2
- db 0, 1, 1, 2, 2, 3, 3, 4, 10, 0, 0, 1, 1, 2, 2, 3, 10, 0, 0, 1, 1, 2, 2, 3, 12, 10, 10, 0, 0, 1, 1, 2
- db 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4, 10, 0, 0, 1, 1, 2, 2, 3, 10, 0, 0, 1, 1, 2, 2, 3
- db 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4, 12, 0, 0, 1, 1, 2, 2, 3
- db 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4
- db 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4
- db 1, 2, 2, 3, 3, 4, 4, 5, 1, 2, 2, 3, 3, 4, 4, 5, 1, 2, 2, 3, 3, 4, 4, 5, 1, 2, 2, 3, 3, 4, 4, 5
- db 1, 2, 2, 3, 3, 4, 4, 5, 1, 2, 2, 3, 3, 4, 4, 5, 1, 2, 2, 3, 3, 4, 4, 5, 2, 3, 3, 4, 4, 5, 5, 6
- db 1, 2, 2, 3, 3, 4, 4, 5, 1, 2, 2, 3, 3, 4, 4, 5, 2, 3, 3, 4, 4, 5, 5, 6, 2, 3, 3, 4, 4, 5, 5, 6
- db 1, 2, 2, 3, 3, 4, 4, 5, 2, 3, 3, 4, 4, 5, 5, 6, 2, 3, 3, 4, 4, 5, 5, 6, 3, 4, 4, 5, 5, 6, 6, 7
- db 1, 2, 2, 3, 3, 4, 4, 5, 2, 3, 3, 4, 4, 5, 5, 6, 3, 4, 4, 5, 5, 6, 6, 7, 4, 5, 5, 6, 6, 7, 7, 8
- db 2, 3, 4, 5, 3, 4, 5, 6, 4, 5, 6, 7, 5, 6, 7, 8, 2, 3, 4, 5, 3, 4, 5, 6, 4, 5, 6, 7, 5, 6, 7, 8
-
-all_ang4: db 6, 26, 6, 26, 6, 26, 6, 26, 12, 20, 12, 20, 12, 20, 12, 20, 18, 14, 18, 14, 18, 14, 18, 14, 24, 8, 24, 8, 24, 8, 24, 8
- db 11, 21, 11, 21, 11, 21, 11, 21, 22, 10, 22, 10, 22, 10, 22, 10, 1, 31, 1, 31, 1, 31, 1, 31, 12, 20, 12, 20, 12, 20, 12, 20
- db 15, 17, 15, 17, 15, 17, 15, 17, 30, 2, 30, 2, 30, 2, 30, 2, 13, 19, 13, 19, 13, 19, 13, 19, 28, 4, 28, 4, 28, 4, 28, 4
- db 19, 13, 19, 13, 19, 13, 19, 13, 6, 26, 6, 26, 6, 26, 6, 26, 25, 7, 25, 7, 25, 7, 25, 7, 12, 20, 12, 20, 12, 20, 12, 20
- db 23, 9, 23, 9, 23, 9, 23, 9, 14, 18, 14, 18, 14, 18, 14, 18, 5, 27, 5, 27, 5, 27, 5, 27, 28, 4, 28, 4, 28, 4, 28, 4
- db 27, 5, 27, 5, 27, 5, 27, 5, 22, 10, 22, 10, 22, 10, 22, 10, 17, 15, 17, 15, 17, 15, 17, 15, 12, 20, 12, 20, 12, 20, 12, 20
- db 30, 2, 30, 2, 30, 2, 30, 2, 28, 4, 28, 4, 28, 4, 28, 4, 26, 6, 26, 6, 26, 6, 26, 6, 24, 8, 24, 8, 24, 8, 24, 8
- db 2, 30, 2, 30, 2, 30, 2, 30, 4, 28, 4, 28, 4, 28, 4, 28, 6, 26, 6, 26, 6, 26, 6, 26, 8, 24, 8, 24, 8, 24, 8, 24
- db 5, 27, 5, 27, 5, 27, 5, 27, 10, 22, 10, 22, 10, 22, 10, 22, 15, 17, 15, 17, 15, 17, 15, 17, 20, 12, 20, 12, 20, 12, 20, 12
- db 9, 23, 9, 23, 9, 23, 9, 23, 18, 14, 18, 14, 18, 14, 18, 14, 27, 5, 27, 5, 27, 5, 27, 5, 4, 28, 4, 28, 4, 28, 4, 28
- db 13, 19, 13, 19, 13, 19, 13, 19, 26, 6, 26, 6, 26, 6, 26, 6, 7, 25, 7, 25, 7, 25, 7, 25, 20, 12, 20, 12, 20, 12, 20, 12
- db 17, 15, 17, 15, 17, 15, 17, 15, 2, 30, 2, 30, 2, 30, 2, 30, 19, 13, 19, 13, 19, 13, 19, 13, 4, 28, 4, 28, 4, 28, 4, 28
- db 21, 11, 21, 11, 21, 11, 21, 11, 10, 22, 10, 22, 10, 22, 10, 22, 31, 1, 31, 1, 31, 1, 31, 1, 20, 12, 20, 12, 20, 12, 20, 12
- db 26, 6, 26, 6, 26, 6, 26, 6, 20, 12, 20, 12, 20, 12, 20, 12, 14, 18, 14, 18, 14, 18, 14, 18, 8, 24, 8, 24, 8, 24, 8, 24
- db 26, 6, 26, 6, 26, 6, 26, 6, 20, 12, 20, 12, 20, 12, 20, 12, 14, 18, 14, 18, 14, 18, 14, 18, 8, 24, 8, 24, 8, 24, 8, 24
- db 21, 11, 21, 11, 21, 11, 21, 11, 10, 22, 10, 22, 10, 22, 10, 22, 31, 1, 31, 1, 31, 1, 31, 1, 20, 12, 20, 12, 20, 12, 20, 12
- db 17, 15, 17, 15, 17, 15, 17, 15, 2, 30, 2, 30, 2, 30, 2, 30, 19, 13, 19, 13, 19, 13, 19, 13, 4, 28, 4, 28, 4, 28, 4, 28
- db 13, 19, 13, 19, 13, 19, 13, 19, 26, 6, 26, 6, 26, 6, 26, 6, 7, 25, 7, 25, 7, 25, 7, 25, 20, 12, 20, 12, 20, 12, 20, 12
- db 9, 23, 9, 23, 9, 23, 9, 23, 18, 14, 18, 14, 18, 14, 18, 14, 27, 5, 27, 5, 27, 5, 27, 5, 4, 28, 4, 28, 4, 28, 4, 28
- db 5, 27, 5, 27, 5, 27, 5, 27, 10, 22, 10, 22, 10, 22, 10, 22, 15, 17, 15, 17, 15, 17, 15, 17, 20, 12, 20, 12, 20, 12, 20, 12
- db 2, 30, 2, 30, 2, 30, 2, 30, 4, 28, 4, 28, 4, 28, 4, 28, 6, 26, 6, 26, 6, 26, 6, 26, 8, 24, 8, 24, 8, 24, 8, 24
- db 30, 2, 30, 2, 30, 2, 30, 2, 28, 4, 28, 4, 28, 4, 28, 4, 26, 6, 26, 6, 26, 6, 26, 6, 24, 8, 24, 8, 24, 8, 24, 8
- db 27, 5, 27, 5, 27, 5, 27, 5, 22, 10, 22, 10, 22, 10, 22, 10, 17, 15, 17, 15, 17, 15, 17, 15, 12, 20, 12, 20, 12, 20, 12, 20
- db 23, 9, 23, 9, 23, 9, 23, 9, 14, 18, 14, 18, 14, 18, 14, 18, 5, 27, 5, 27, 5, 27, 5, 27, 28, 4, 28, 4, 28, 4, 28, 4
- db 19, 13, 19, 13, 19, 13, 19, 13, 6, 26, 6, 26, 6, 26, 6, 26, 25, 7, 25, 7, 25, 7, 25, 7, 12, 20, 12, 20, 12, 20, 12, 20
- db 15, 17, 15, 17, 15, 17, 15, 17, 30, 2, 30, 2, 30, 2, 30, 2, 13, 19, 13, 19, 13, 19, 13, 19, 28, 4, 28, 4, 28, 4, 28, 4
- db 11, 21, 11, 21, 11, 21, 11, 21, 22, 10, 22, 10, 22, 10, 22, 10, 1, 31, 1, 31, 1, 31, 1, 31, 12, 20, 12, 20, 12, 20, 12, 20
- db 6, 26, 6, 26, 6, 26, 6, 26, 12, 20, 12, 20, 12, 20, 12, 20, 18, 14, 18, 14, 18, 14, 18, 14, 24, 8, 24, 8, 24, 8, 24, 8
+const allAng4_shuf_mode2, db 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6, 4, 5, 6, 7, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6, 4, 5, 6, 7
+const allAng4_shuf_mode3_4, db 0, 1, 1, 2, 2, 3, 3, 4, 1, 2, 2, 3, 3, 4, 4, 5, 0, 1, 1, 2, 2, 3, 3, 4, 1, 2, 2, 3, 3, 4, 4, 5
+ db 2, 3, 3, 4, 4, 5, 5, 6, 3, 4, 4, 5, 5, 6, 6, 7, 1, 2, 2, 3, 3, 4, 4, 5, 2, 3, 3, 4, 4, 5, 5, 6
+const allAng4_shuf_mode5_6, db 0, 1, 1, 2, 2, 3, 3, 4, 1, 2, 2, 3, 3, 4, 4, 5, 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4
+ db 1, 2, 2, 3, 3, 4, 4, 5, 2, 3, 3, 4, 4, 5, 5, 6, 1, 2, 2, 3, 3, 4, 4, 5, 1, 2, 2, 3, 3, 4, 4, 5
+const allAng4_shuf_mode7_8, db 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4
+ db 0, 1, 1, 2, 2, 3, 3, 4, 1, 2, 2, 3, 3, 4, 4, 5, 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4
+const allAng4_shuf_mode10, db 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
+const allAng4_shuf_mode11_12, db 0, 9, 9, 10, 10, 11, 11, 12, 0, 9, 9, 10, 10, 11, 11, 12, 0, 9, 9, 10, 10, 11, 11, 12, 0, 9, 9, 10, 10, 11, 11, 12
+const allAng4_shuf_mode13_14, db 0, 9, 9, 10, 10, 11, 11, 12, 4, 0, 0, 9, 9, 10, 10, 11, 2, 0, 0, 9, 9, 10, 10, 11, 2, 0, 0, 9, 9, 10, 10, 11
+const allAng4_shuf_mode15_16, db 0, 9, 9, 10, 10, 11, 11, 12, 2, 0, 0, 9, 9, 10, 10, 11, 0, 9, 9, 10, 10, 11, 11, 12, 2, 0, 0, 9, 9, 10, 10, 11
+ db 2, 0, 0, 9, 9, 10, 10, 11, 4, 2, 2, 0, 0, 9, 9, 10, 2, 0, 0, 9, 9, 10, 10, 11, 3, 2, 2, 0, 0, 9, 9, 10
+const allAng4_shuf_mode17, db 0, 9, 9, 10, 10, 11, 11, 12, 1, 0, 0, 9, 9, 10, 10, 11, 2, 1, 1, 0, 0, 9, 9, 10, 4, 2, 2, 1, 1, 0, 0, 9
+ db 0, 1, 2, 3, 9, 0, 1, 2, 10, 9, 0, 1, 11, 10, 9, 0, 0, 1, 2, 3, 9, 0, 1, 2, 10, 9, 0, 1, 11, 10, 9, 0
+const allAng4_shuf_mode18, db 0, 1, 2, 3, 9, 0, 1, 2, 10, 9, 0, 1, 11, 10, 9, 0, 0, 1, 2, 3, 9, 0, 1, 2, 10, 9, 0, 1, 11, 10, 9, 0
+const allAng4_shuf_mode19_20, db 0, 1, 1, 2, 2, 3, 3, 4, 9, 0, 0, 1, 1, 2, 2, 3, 0, 1, 1, 2, 2, 3, 3, 4, 10, 0, 0, 1, 1, 2, 2, 3
+ db 10, 9, 9, 0, 0, 1, 1, 2, 12, 10, 10, 9, 9, 0, 0, 1, 10, 0, 0, 1, 1, 2, 2, 3, 11, 10, 10, 0, 0, 1, 1, 2
+const allAng4_shuf_mode21_22, db 0, 1, 1, 2, 2, 3, 3, 4, 10, 0, 0, 1, 1, 2, 2, 3, 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4
+ db 10, 0, 0, 1, 1, 2, 2, 3, 12, 10, 10, 0, 0, 1, 1, 2, 10, 0, 0, 1, 1, 2, 2, 3, 10, 0, 0, 1, 1, 2, 2, 3
+const allAng4_shuf_mode23_24, db 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4
+ db 0, 1, 1, 2, 2, 3, 3, 4, 12, 0, 0, 1, 1, 2, 2, 3, 0, 1, 1, 2, 2, 3, 3, 4, 0, 1, 1, 2, 2, 3, 3, 4
+const allAng4_shuf_mode26, db 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4
+const allAng4_shuf_mode27_28, db 1, 2, 2, 3, 3, 4, 4, 5, 1, 2, 2, 3, 3, 4, 4, 5, 1, 2, 2, 3, 3, 4, 4, 5, 1, 2, 2, 3, 3, 4, 4, 5
+const allAng4_shuf_mode29_30, db 1, 2, 2, 3, 3, 4, 4, 5, 2, 3, 3, 4, 4, 5, 5, 6, 2, 3, 3, 4, 4, 5, 5, 6, 2, 3, 3, 4, 4, 5, 5, 6
+const allAng4_shuf_mode31_32, db 1, 2, 2, 3, 3, 4, 4, 5, 2, 3, 3, 4, 4, 5, 5, 6, 1, 2, 2, 3, 3, 4, 4, 5, 2, 3, 3, 4, 4, 5, 5, 6
+ db 2, 3, 3, 4, 4, 5, 5, 6, 3, 4, 4, 5, 5, 6, 6, 7, 2, 3, 3, 4, 4, 5, 5, 6, 3, 4, 4, 5, 5, 6, 6, 7
+const allAng4_shuf_mode33, db 1, 2, 2, 3, 3, 4, 4, 5, 2, 3, 3, 4, 4, 5, 5, 6, 3, 4, 4, 5, 5, 6, 6, 7, 4, 5, 5, 6, 6, 7, 7, 8
+const allAng4_shuf_mode34, db 2, 3, 4, 5, 3, 4, 5, 6, 4, 5, 6, 7, 5, 6, 7, 8, 2, 3, 4, 5, 3, 4, 5, 6, 4, 5, 6, 7, 5, 6, 7, 8
+
+const allAng4_fact_mode3_4, db 6, 26, 6, 26, 6, 26, 6, 26, 12, 20, 12, 20, 12, 20, 12, 20, 11, 21, 11, 21, 11, 21, 11, 21, 22, 10, 22, 10, 22, 10, 22, 10
+ db 18, 14, 18, 14, 18, 14, 18, 14, 24, 8, 24, 8, 24, 8, 24, 8, 1, 31, 1, 31, 1, 31, 1, 31, 12, 20, 12, 20, 12, 20, 12, 20
+const allAng4_fact_mode5_6, db 15, 17, 15, 17, 15, 17, 15, 17, 30, 2, 30, 2, 30, 2, 30, 2, 19, 13, 19, 13, 19, 13, 19, 13, 6, 26, 6, 26, 6, 26, 6, 26
+ db 13, 19, 13, 19, 13, 19, 13, 19, 28, 4, 28, 4, 28, 4, 28, 4, 25, 7, 25, 7, 25, 7, 25, 7, 12, 20, 12, 20, 12, 20, 12, 20
+const allAng4_fact_mode7_8, db 23, 9, 23, 9, 23, 9, 23, 9, 14, 18, 14, 18, 14, 18, 14, 18, 27, 5, 27, 5, 27, 5, 27, 5, 22, 10, 22, 10, 22, 10, 22, 10
+ db 5, 27, 5, 27, 5, 27, 5, 27, 28, 4, 28, 4, 28, 4, 28, 4, 17, 15, 17, 15, 17, 15, 17, 15, 12, 20, 12, 20, 12, 20, 12, 20
+const allAng4_fact_mode9, db 30, 2, 30, 2, 30, 2, 30, 2, 28, 4, 28, 4, 28, 4, 28, 4, 26, 6, 26, 6, 26, 6, 26, 6, 24, 8, 24, 8, 24, 8, 24, 8
+const allAng4_fact_mode11_12, db 2, 30, 2, 30, 2, 30, 2, 30, 4, 28, 4, 28, 4, 28, 4, 28, 5, 27, 5, 27, 5, 27, 5, 27, 10, 22, 10, 22, 10, 22, 10, 22
+ db 6, 26, 6, 26, 6, 26, 6, 26, 8, 24, 8, 24, 8, 24, 8, 24, 15, 17, 15, 17, 15, 17, 15, 17, 20, 12, 20, 12, 20, 12, 20, 12
+const allAng4_fact_mode13_14, db 9, 23, 9, 23, 9, 23, 9, 23, 18, 14, 18, 14, 18, 14, 18, 14, 13, 19, 13, 19, 13, 19, 13, 19, 26, 6, 26, 6, 26, 6, 26, 6
+ db 27, 5, 27, 5, 27, 5, 27, 5, 4, 28, 4, 28, 4, 28, 4, 28, 7, 25, 7, 25, 7, 25, 7, 25, 20, 12, 20, 12, 20, 12, 20, 12
+const allAng4_fact_mode15_16, db 17, 15, 17, 15, 17, 15, 17, 15, 2, 30, 2, 30, 2, 30, 2, 30, 21, 11, 21, 11, 21, 11, 21, 11, 10, 22, 10, 22, 10, 22, 10, 22
+ db 19, 13, 19, 13, 19, 13, 19, 13, 4, 28, 4, 28, 4, 28, 4, 28, 31, 1, 31, 1, 31, 1, 31, 1, 20, 12, 20, 12, 20, 12, 20, 12
+const allAng4_fact_mode17, db 26, 6, 26, 6, 26, 6, 26, 6, 20, 12, 20, 12, 20, 12, 20, 12, 14, 18, 14, 18, 14, 18, 14, 18, 8, 24, 8, 24, 8, 24, 8, 24
+const allAng4_fact_mode19_20, db 26, 6, 26, 6, 26, 6, 26, 6, 20, 12, 20, 12, 20, 12, 20, 12, 21, 11, 21, 11, 21, 11, 21, 11, 10, 22, 10, 22, 10, 22, 10, 22
+ db 14, 18, 14, 18, 14, 18, 14, 18, 8, 24, 8, 24, 8, 24, 8, 24, 31, 1, 31, 1, 31, 1, 31, 1, 20, 12, 20, 12, 20, 12, 20, 12
+const allAng4_fact_mode21_22, db 17, 15, 17, 15, 17, 15, 17, 15, 2, 30, 2, 30, 2, 30, 2, 30, 13, 19, 13, 19, 13, 19, 13, 19, 26, 6, 26, 6, 26, 6, 26, 6
+ db 19, 13, 19, 13, 19, 13, 19, 13, 4, 28, 4, 28, 4, 28, 4, 28, 7, 25, 7, 25, 7, 25, 7, 25, 20, 12, 20, 12, 20, 12, 20, 12
+const allAng4_fact_mode23_24, db 9, 23, 9, 23, 9, 23, 9, 23, 18, 14, 18, 14, 18, 14, 18, 14, 5, 27, 5, 27, 5, 27, 5, 27, 10, 22, 10, 22, 10, 22, 10, 22
+ db 27, 5, 27, 5, 27, 5, 27, 5, 4, 28, 4, 28, 4, 28, 4, 28, 15, 17, 15, 17, 15, 17, 15, 17, 20, 12, 20, 12, 20, 12, 20, 12
+const allAng4_fact_mode25, db 2, 30, 2, 30, 2, 30, 2, 30, 4, 28, 4, 28, 4, 28, 4, 28, 6, 26, 6, 26, 6, 26, 6, 26, 8, 24, 8, 24, 8, 24, 8, 24
+const allAng4_fact_mode27_28, db 30, 2, 30, 2, 30, 2, 30, 2, 28, 4, 28, 4, 28, 4, 28, 4, 27, 5, 27, 5, 27, 5, 27, 5, 22, 10, 22, 10, 22, 10, 22, 10
+ db 26, 6, 26, 6, 26, 6, 26, 6, 24, 8, 24, 8, 24, 8, 24, 8, 17, 15, 17, 15, 17, 15, 17, 15, 12, 20, 12, 20, 12, 20, 12, 20
+const allAng4_fact_mode29_30, db 23, 9, 23, 9, 23, 9, 23, 9, 14, 18, 14, 18, 14, 18, 14, 18, 19, 13, 19, 13, 19, 13, 19, 13, 6, 26, 6, 26, 6, 26, 6, 26
+ db 5, 27, 5, 27, 5, 27, 5, 27, 28, 4, 28, 4, 28, 4, 28, 4, 25, 7, 25, 7, 25, 7, 25, 7, 12, 20, 12, 20, 12, 20, 12, 20
+const allAng4_fact_mode31_32, db 15, 17, 15, 17, 15, 17, 15, 17, 30, 2, 30, 2, 30, 2, 30, 2, 11, 21, 11, 21, 11, 21, 11, 21, 22, 10, 22, 10, 22, 10, 22, 10
+ db 13, 19, 13, 19, 13, 19, 13, 19, 28, 4, 28, 4, 28, 4, 28, 4, 1, 31, 1, 31, 1, 31, 1, 31, 12, 20, 12, 20, 12, 20, 12, 20
+const allAng4_fact_mode33, db 6, 26, 6, 26, 6, 26, 6, 26, 12, 20, 12, 20, 12, 20, 12, 20, 18, 14, 18, 14, 18, 14, 18, 14, 24, 8, 24, 8, 24, 8, 24, 8
SECTION .text
@@ -23075,80 +23076,69 @@
; void all_angs_pred_4x4(pixel *dest, pixel *refPix, pixel *filtPix, int bLuma)
;-----------------------------------------------------------------------------
INIT_YMM avx2
-cglobal all_angs_pred_4x4, 4, 4, 6
+cglobal all_angs_pred_4x4, 2, 2, 6
mova m5, [pw_1024]
- lea r2, [all_ang4]
- lea r3, [all_ang4_shuff]
; mode 2
vbroadcasti128 m0, [r1 + 9]
- mova xm1, xm0
- psrldq xm1, 1
- pshufb xm1, [r3]
+ pshufb m1, m0, [allAng4_shuf_mode2]
movu [r0], xm1
; mode 3
- pshufb m1, m0, [r3 + 1 * mmsize]
- pmaddubsw m1, [r2]
+ pshufb m1, m0, [allAng4_shuf_mode3_4]
+ pmaddubsw m1, [allAng4_fact_mode3_4]
pmulhrsw m1, m5
; mode 4
- pshufb m2, m0, [r3 + 2 * mmsize]
- pmaddubsw m2, [r2 + 1 * mmsize]
+ pshufb m2, m0, [allAng4_shuf_mode3_4 + mmsize]
+ pmaddubsw m2, [allAng4_fact_mode3_4 + mmsize]
pmulhrsw m2, m5
packuswb m1, m2
- vpermq m1, m1, 11011000b
movu [r0 + (3 - 2) * 16], m1
; mode 5
- pshufb m1, m0, [r3 + 2 * mmsize]
- pmaddubsw m1, [r2 + 2 * mmsize]
+ pshufb m1, m0, [allAng4_shuf_mode5_6]
+ pmaddubsw m1, [allAng4_fact_mode5_6]
pmulhrsw m1, m5
; mode 6
- pshufb m2, m0, [r3 + 3 * mmsize]
- pmaddubsw m2, [r2 + 3 * mmsize]
+ pshufb m2, m0, [allAng4_shuf_mode5_6 + mmsize]
+ pmaddubsw m2, [allAng4_fact_mode5_6 + mmsize]
pmulhrsw m2, m5
packuswb m1, m2
- vpermq m1, m1, 11011000b
movu [r0 + (5 - 2) * 16], m1
- add r3, 4 * mmsize
- add r2, 4 * mmsize
-
; mode 7
- pshufb m1, m0, [r3 + 0 * mmsize]
- pmaddubsw m1, [r2 + 0 * mmsize]
+ pshufb m3, m0, [allAng4_shuf_mode7_8]
+ pmaddubsw m1, m3, [allAng4_fact_mode7_8]
pmulhrsw m1, m5
; mode 8
- pshufb m2, m0, [r3 + 1 * mmsize]
- pmaddubsw m2, [r2 + 1 * mmsize]
+ pshufb m2, m0, [allAng4_shuf_mode7_8 + mmsize]
+ pmaddubsw m2, [allAng4_fact_mode7_8 + mmsize]
pmulhrsw m2, m5
packuswb m1, m2
- vpermq m1, m1, 11011000b
movu [r0 + (7 - 2) * 16], m1
; mode 9
- pshufb m1, m0, [r3 + 1 * mmsize]
- pmaddubsw m1, [r2 + 2 * mmsize]
- pmulhrsw m1, m5
- packuswb m1, m1
- vpermq m1, m1, 11011000b
- movu [r0 + (9 - 2) * 16], xm1
+ pmaddubsw m3, [allAng4_fact_mode9]
+ pmulhrsw m3, m5
+ packuswb m3, m3
+ vpermq m3, m3, 11011000b
+ movu [r0 + (9 - 2) * 16], xm3
; mode 10
- pshufb xm1, xm0, [r3 + 2 * mmsize]
+ pshufb xm1, xm0, [allAng4_shuf_mode10]
movu [r0 + (10 - 2) * 16], xm1
pxor xm1, xm1
@@ -23173,135 +23163,111 @@
; mode 11
vbroadcasti128 m0, [r1]
- pshufb m1, m0, [r3 + 3 * mmsize]
- pmaddubsw m1, [r2 + 3 * mmsize]
+ pshufb m3, m0, [allAng4_shuf_mode11_12]
+ pmaddubsw m1, m3, [allAng4_fact_mode11_12]
pmulhrsw m1, m5
; mode 12
- add r2, 4 * mmsize
-
- pshufb m2, m0, [r3 + 3 * mmsize]
- pmaddubsw m2, [r2 + 0 * mmsize]
+ pmaddubsw m2, m3, [allAng4_fact_mode11_12 + mmsize]
pmulhrsw m2, m5
packuswb m1, m2
- vpermq m1, m1, 11011000b
movu [r0 + (11 - 2) * 16], m1
; mode 13
- add r3, 4 * mmsize
-
- pshufb m1, m0, [r3 + 0 * mmsize]
- pmaddubsw m1, [r2 + 1 * mmsize]
+ pmaddubsw m3, [allAng4_fact_mode13_14]
+ pmulhrsw m3, m5
+
+; mode 14
+
+ pshufb m2, m0, [allAng4_shuf_mode13_14]
+ pmaddubsw m2, [allAng4_fact_mode13_14 + mmsize]
+ pmulhrsw m2, m5
+ packuswb m3, m2
+ movu [r0 + (13 - 2) * 16], m3
+
+; mode 15
+
+ pshufb m1, m0, [allAng4_shuf_mode15_16]
+ pmaddubsw m1, [allAng4_fact_mode15_16]
pmulhrsw m1, m5
-; mode 14
-
- pshufb m2, m0, [r3 + 1 * mmsize]
- pmaddubsw m2, [r2 + 2 * mmsize]
+; mode 16
+
+ pshufb m2, m0, [allAng4_shuf_mode15_16 + mmsize]
+ pmaddubsw m2, [allAng4_fact_mode15_16 + mmsize]
pmulhrsw m2, m5
packuswb m1, m2
- vpermq m1, m1, 11011000b
- movu [r0 + (13 - 2) * 16], m1
-
-; mode 15
-
- pshufb m1, m0, [r3 + 2 * mmsize]
- pmaddubsw m1, [r2 + 3 * mmsize]
- pmulhrsw m1, m5
-
-; mode 16
-
- add r2, 4 * mmsize
-
- pshufb m2, m0, [r3 + 3 * mmsize]
- pmaddubsw m2, [r2 + 0 * mmsize]
- pmulhrsw m2, m5
- packuswb m1, m2
- vpermq m1, m1, 11011000b
movu [r0 + (15 - 2) * 16], m1
; mode 17
- add r3, 4 * mmsize
-
- pshufb m1, m0, [r3 + 0 * mmsize]
- pmaddubsw m1, [r2 + 1 * mmsize]
+ pshufb m1, m0, [allAng4_shuf_mode17]
+ pmaddubsw m1, [allAng4_fact_mode17]
pmulhrsw m1, m5
packuswb m1, m1
vpermq m1, m1, 11011000b
; mode 18
- pshufb m2, m0, [r3 + 1 * mmsize]
+ pshufb m2, m0, [allAng4_shuf_mode18]
vinserti128 m1, m1, xm2, 1
movu [r0 + (17 - 2) * 16], m1
; mode 19
- pshufb m1, m0, [r3 + 2 * mmsize]
- pmaddubsw m1, [r2 + 2 * mmsize]
+ pshufb m1, m0, [allAng4_shuf_mode19_20]
+ pmaddubsw m1, [allAng4_fact_mode19_20]
pmulhrsw m1, m5
; mode 20
- pshufb m2, m0, [r3 + 3 * mmsize]
- pmaddubsw m2, [r2 + 3 * mmsize]
+ pshufb m2, m0, [allAng4_shuf_mode19_20 + mmsize]
+ pmaddubsw m2, [allAng4_fact_mode19_20 + mmsize]
pmulhrsw m2, m5
packuswb m1, m2
- vpermq m1, m1, 11011000b
movu [r0 + (19 - 2) * 16], m1
; mode 21
- add r2, 4 * mmsize
- add r3, 4 * mmsize
-
- pshufb m1, m0, [r3 + 0 * mmsize]
- pmaddubsw m1, [r2 + 0 * mmsize]
+ pshufb m1, m0, [allAng4_shuf_mode21_22]
+ pmaddubsw m1, [allAng4_fact_mode21_22]
pmulhrsw m1, m5
; mode 22
- pshufb m2, m0, [r3 + 1 * mmsize]
- pmaddubsw m2, [r2 + 1 * mmsize]
+ pshufb m2, m0, [allAng4_shuf_mode21_22 + mmsize]
+ pmaddubsw m2, [allAng4_fact_mode21_22 + mmsize]
pmulhrsw m2, m5
packuswb m1, m2
- vpermq m1, m1, 11011000b
movu [r0 + (21 - 2) * 16], m1
; mode 23
- pshufb m1, m0, [r3 + 2 * mmsize]
- pmaddubsw m1, [r2 + 2 * mmsize]
+ pshufb m3, m0, [allAng4_shuf_mode23_24]
+ pmaddubsw m1, m3, [allAng4_fact_mode23_24]
pmulhrsw m1, m5
; mode 24
- pshufb m2, m0, [r3 + 3 * mmsize]
- pmaddubsw m2, [r2 + 3 * mmsize]
+ pshufb m2, m0, [allAng4_shuf_mode23_24 + mmsize]
+ pmaddubsw m2, [allAng4_fact_mode23_24 + mmsize]
pmulhrsw m2, m5
packuswb m1, m2
- vpermq m1, m1, 11011000b
movu [r0 + (23 - 2) * 16], m1
; mode 25
- add r2, 4 * mmsize
-
- pshufb m1, m0, [r3 + 3 * mmsize]
- pmaddubsw m1, [r2 + 0 * mmsize]
- pmulhrsw m1, m5
- packuswb m1, m1
- vpermq m1, m1, 11011000b
- movu [r0 + (25 - 2) * 16], xm1
+ pmaddubsw m3, [allAng4_fact_mode25]
+ pmulhrsw m3, m5
+ packuswb m3, m3
+ vpermq m3, m3, 11011000b
+ movu [r0 + (25 - 2) * 16], xm3
; mode 26
- add r3, 4 * mmsize
-
- pshufb xm1, xm0, [r3 + 0 * mmsize]
+ pshufb m1, m0, [allAng4_shuf_mode26]
movu [r0 + (26 - 2) * 16], xm1
pxor xm1, xm1
@@ -23326,64 +23292,55 @@
; mode 27
- pshufb m1, m0, [r3 + 1 * mmsize]
- pmaddubsw m1, [r2 + 1 * mmsize]
+ pshufb m3, m0, [allAng4_shuf_mode27_28]
+ pmaddubsw m1, m3, [allAng4_fact_mode27_28]
pmulhrsw m1, m5
; mode 28
- pshufb m2, m0, [r3 + 1 * mmsize]
- pmaddubsw m2, [r2 + 2 * mmsize]
+ pmaddubsw m2, m3, [allAng4_fact_mode27_28 + mmsize]
pmulhrsw m2, m5
packuswb m1, m2
- vpermq m1, m1, 11011000b
movu [r0 + (27 - 2) * 16], m1
; mode 29
- pshufb m1, m0, [r3 + 2 * mmsize]
- pmaddubsw m1, [r2 + 3 * mmsize]
+ pmaddubsw m3, [allAng4_fact_mode29_30]
+ pmulhrsw m3, m5
+
+; mode 30
+
+ pshufb m2, m0, [allAng4_shuf_mode29_30]
+ pmaddubsw m2, [allAng4_fact_mode29_30 + mmsize]
+ pmulhrsw m2, m5
+ packuswb m3, m2
+ movu [r0 + (29 - 2) * 16], m3
+
+; mode 31
+
+ pshufb m1, m0, [allAng4_shuf_mode31_32]
+ pmaddubsw m1, [allAng4_fact_mode31_32]
pmulhrsw m1, m5
-; mode 30
-
- add r2, 4 * mmsize
-
- pshufb m2, m0, [r3 + 3 * mmsize]
- pmaddubsw m2, [r2 + 0 * mmsize]
+; mode 32
+
+ pshufb m2, m0, [allAng4_shuf_mode31_32 + mmsize]
+ pmaddubsw m2, [allAng4_fact_mode31_32 + mmsize]
pmulhrsw m2, m5
packuswb m1, m2
- vpermq m1, m1, 11011000b
- movu [r0 + (29 - 2) * 16], m1
-
-; mode 31
-
- add r3, 4 * mmsize
-
- pshufb m1, m0, [r3 + 0 * mmsize]
- pmaddubsw m1, [r2 + 1 * mmsize]
- pmulhrsw m1, m5
-
-; mode 32
-
- pshufb m2, m0, [r3 + 0 * mmsize]
- pmaddubsw m2, [r2 + 2 * mmsize]
- pmulhrsw m2, m5
- packuswb m1, m2
- vpermq m1, m1, 11011000b
movu [r0 + (31 - 2) * 16], m1
; mode 33
- pshufb m1, m0, [r3 + 1 * mmsize]
- pmaddubsw m1, [r2 + 3 * mmsize]
+ pshufb m1, m0, [allAng4_shuf_mode33]
+ pmaddubsw m1, [allAng4_fact_mode33]
pmulhrsw m1, m5
packuswb m1, m2
vpermq m1, m1, 11011000b
; mode 34
- pshufb m0, [r3 + 2 * mmsize]
+ pshufb m0, [allAng4_shuf_mode34]
vinserti128 m1, m1, xm0, 1
movu [r0 + (33 - 2) * 16], m1
RET
More information about the x265-devel
mailing list