[x265] [PATCH 160 of 307] x86: nits - enforce optimal SIMD register uses
mythreyi at multicorewareinc.com
mythreyi at multicorewareinc.com
Sat Apr 7 04:32:38 CEST 2018
# HG changeset patch
# User Praveen Tiwari <praveen at multicorewareinc.com>
# Date 1510641789 28800
# Mon Nov 13 22:43:09 2017 -0800
# Node ID b45e6581fdf689f4b17e4e0dae733a7df795de07
# Parent 8bfedd92563a0e1da365c4d64a0e565e35f6025a
x86: nits - enforce optimal SIMD register uses
diff -r 8bfedd92563a -r b45e6581fdf6 source/common/x86/dct8.asm
--- a/source/common/x86/dct8.asm Mon Nov 13 06:26:25 2017 -0800
+++ b/source/common/x86/dct8.asm Mon Nov 13 22:43:09 2017 -0800
@@ -2399,7 +2399,7 @@
%endmacro
INIT_ZMM avx512
-cglobal dct8, 3, 7, 28
+cglobal dct8, 3, 7, 24
vbroadcasti32x4 m5, [pd_ %+ DCT8_ROUND1]
vbroadcasti32x8 m4, [dct8_shuf]
@@ -2438,15 +2438,15 @@
vpbroadcastq m21, [r6 + 1 * 8]
vpbroadcastq m22, [r6 + 2 * 8]
vpbroadcastq m23, [r6 + 3 * 8]
- vpbroadcastq m24, [r6 + 4 * 8]
- vpbroadcastq m25, [r6 + 5 * 8]
- vpbroadcastq m26, [r6 + 6 * 8]
- vpbroadcastq m27, [r6 + 7 * 8]
+ vpbroadcastq m7, [r6 + 4 * 8]
+ vpbroadcastq m12, [r6 + 5 * 8]
+ vpbroadcastq m14, [r6 + 6 * 8]
+ vpbroadcastq m16, [r6 + 7 * 8]
DCT8_AVX512_PASS_1 20, 9, 21, 10
- DCT8_AVX512_PASS_1 22, 11, 23, 12
- DCT8_AVX512_PASS_1 24, 13, 25, 14
- DCT8_AVX512_PASS_1 26, 15, 27, 16
+ DCT8_AVX512_PASS_1 22, 11, 23, 10
+ DCT8_AVX512_PASS_1 7, 13, 12, 10
+ DCT8_AVX512_PASS_1 14, 15, 16, 10
;pass2
vbroadcasti32x4 m5, [pd_ %+ DCT8_ROUND2]
@@ -2458,13 +2458,13 @@
vbroadcasti32x4 m21, [r5 + 1 * 16]
vbroadcasti32x4 m22, [r5 + 2 * 16]
vbroadcasti32x4 m23, [r5 + 3 * 16]
- vbroadcasti32x4 m25, [r5 + 5 * 16]
- vbroadcasti32x4 m26, [r5 + 6 * 16]
- vbroadcasti32x4 m27, [r5 + 7 * 16]
+ vbroadcasti32x4 m12, [r5 + 5 * 16]
+ vbroadcasti32x4 m14, [r5 + 6 * 16]
+ vbroadcasti32x4 m16, [r5 + 7 * 16]
DCT8_AVX512_PASS_2 20, 21, 22, 23
movu [r1], m0
- DCT8_AVX512_PASS_2 24, 25, 26, 27
+ DCT8_AVX512_PASS_2 7, 12, 14, 16
movu [r1 + 64], m0
RET
More information about the x265-devel
mailing list