[x264-devel] x86inc: Correctly set mmreg variables
Henrik Gramner
git at videolan.org
Thu Jan 18 21:23:02 CET 2018
x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Sat Jan 6 17:47:42 2018 +0100| [4a158b00943c334ec9e0aabe6a919900c32e360e] | committer: Henrik Gramner
x86inc: Correctly set mmreg variables
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=4a158b00943c334ec9e0aabe6a919900c32e360e
---
common/x86/x86inc.asm | 79 +++++++++++++++++++++++----------------------------
1 file changed, 35 insertions(+), 44 deletions(-)
diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
index f4ffcd55..5ec29e29 100644
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -887,6 +887,36 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
%undef %1%2
%endmacro
+%macro DEFINE_MMREGS 1 ; mmtype
+ %assign %%prev_mmregs 0
+ %ifdef num_mmregs
+ %assign %%prev_mmregs num_mmregs
+ %endif
+
+ %assign num_mmregs 8
+ %if ARCH_X86_64 && mmsize >= 16
+ %assign num_mmregs 16
+ %if cpuflag(avx512) || mmsize == 64
+ %assign num_mmregs 32
+ %endif
+ %endif
+
+ %assign %%i 0
+ %rep num_mmregs
+ CAT_XDEFINE m, %%i, %1 %+ %%i
+ CAT_XDEFINE nn%1, %%i, %%i
+ %assign %%i %%i+1
+ %endrep
+ %if %%prev_mmregs > num_mmregs
+ %rep %%prev_mmregs - num_mmregs
+ CAT_UNDEF m, %%i
+ CAT_UNDEF nn %+ mmtype, %%i
+ %assign %%i %%i+1
+ %endrep
+ %endif
+ %xdefine mmtype %1
+%endmacro
+
; Prefer registers 16-31 over 0-15 to avoid having to use vzeroupper
%macro AVX512_MM_PERMUTATION 0-1 0 ; start_reg
%if ARCH_X86_64 && cpuflag(avx512)
@@ -903,47 +933,26 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
%assign avx_enabled 0
%define RESET_MM_PERMUTATION INIT_MMX %1
%define mmsize 8
- %define num_mmregs 8
%define mova movq
%define movu movq
%define movh movd
%define movnta movntq
- %assign %%i 0
- %rep 8
- CAT_XDEFINE m, %%i, mm %+ %%i
- CAT_XDEFINE nnmm, %%i, %%i
- %assign %%i %%i+1
- %endrep
- %rep 24
- CAT_UNDEF m, %%i
- CAT_UNDEF nnmm, %%i
- %assign %%i %%i+1
- %endrep
INIT_CPUFLAGS %1
+ DEFINE_MMREGS mm
%endmacro
%macro INIT_XMM 0-1+
%assign avx_enabled 0
%define RESET_MM_PERMUTATION INIT_XMM %1
%define mmsize 16
- %define num_mmregs 8
- %if ARCH_X86_64
- %define num_mmregs 32
- %endif
%define mova movdqa
%define movu movdqu
%define movh movq
%define movnta movntdq
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, xmm %+ %%i
- CAT_XDEFINE nnxmm, %%i, %%i
- %assign %%i %%i+1
- %endrep
INIT_CPUFLAGS %1
+ DEFINE_MMREGS xmm
%if WIN64
- ; Swap callee-saved registers with volatile registers
- AVX512_MM_PERMUTATION 6
+ AVX512_MM_PERMUTATION 6 ; Swap callee-saved registers with volatile registers
%endif
%endmacro
@@ -951,21 +960,12 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
%assign avx_enabled 1
%define RESET_MM_PERMUTATION INIT_YMM %1
%define mmsize 32
- %define num_mmregs 8
- %if ARCH_X86_64
- %define num_mmregs 32
- %endif
%define mova movdqa
%define movu movdqu
%undef movh
%define movnta movntdq
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, ymm %+ %%i
- CAT_XDEFINE nnymm, %%i, %%i
- %assign %%i %%i+1
- %endrep
INIT_CPUFLAGS %1
+ DEFINE_MMREGS ymm
AVX512_MM_PERMUTATION
%endmacro
@@ -973,21 +973,12 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
%assign avx_enabled 1
%define RESET_MM_PERMUTATION INIT_ZMM %1
%define mmsize 64
- %define num_mmregs 8
- %if ARCH_X86_64
- %define num_mmregs 32
- %endif
%define mova movdqa
%define movu movdqu
%undef movh
%define movnta movntdq
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, zmm %+ %%i
- CAT_XDEFINE nnzmm, %%i, %%i
- %assign %%i %%i+1
- %endrep
INIT_CPUFLAGS %1
+ DEFINE_MMREGS zmm
AVX512_MM_PERMUTATION
%endmacro
More information about the x264-devel
mailing list