[x265] [PATCH 10 of 13] x86inc: Correctly set mmreg variables
vignesh at multicorewareinc.com
vignesh at multicorewareinc.com
Fri Jul 6 11:18:10 CEST 2018
# HG changeset patch
# User Vignesh Vijayakumar<vignesh at multicorewareinc.com>
# Date 1517547008 -19800
# Fri Feb 02 10:20:08 2018 +0530
# Node ID 1485405aa16ff2d6f04acb8aeafdae6f32a3bfb5
# Parent 038a52e0813279f4812e9d170a50a134122df4f4
x86inc: Correctly set mmreg variables
diff -r 038a52e08132 -r 1485405aa16f source/common/x86/x86inc.asm
--- a/source/common/x86/x86inc.asm Fri Feb 02 10:13:20 2018 +0530
+++ b/source/common/x86/x86inc.asm Fri Feb 02 10:20:08 2018 +0530
@@ -866,6 +866,36 @@
%undef %1%2
%endmacro
+%macro DEFINE_MMREGS 1 ; mmtype
+ %assign %%prev_mmregs 0
+ %ifdef num_mmregs
+ %assign %%prev_mmregs num_mmregs
+ %endif
+
+ %assign num_mmregs 8
+ %if ARCH_X86_64 && mmsize >= 16
+ %assign num_mmregs 16
+ %if cpuflag(avx512) || mmsize == 64
+ %assign num_mmregs 32
+ %endif
+ %endif
+
+ %assign %%i 0
+ %rep num_mmregs
+ CAT_XDEFINE m, %%i, %1 %+ %%i
+ CAT_XDEFINE nn%1, %%i, %%i
+ %assign %%i %%i+1
+ %endrep
+ %if %%prev_mmregs > num_mmregs
+ %rep %%prev_mmregs - num_mmregs
+ CAT_UNDEF m, %%i
+ CAT_UNDEF nn %+ mmtype, %%i
+ %assign %%i %%i+1
+ %endrep
+ %endif
+ %xdefine mmtype %1
+%endmacro
+
; Prefer registers 16-31 over 0-15 to avoid having to use vzeroupper
%macro AVX512_MM_PERMUTATION 0-1 0 ; start_reg
%if ARCH_X86_64 && cpuflag(avx512)
@@ -882,44 +912,24 @@
%assign avx_enabled 0
%define RESET_MM_PERMUTATION INIT_MMX %1
%define mmsize 8
- %define num_mmregs 8
%define mova movq
%define movu movq
%define movh movd
%define movnta movntq
- %assign %%i 0
- %rep 8
- CAT_XDEFINE m, %%i, mm %+ %%i
- CAT_XDEFINE nnmm, %%i, %%i
- %assign %%i %%i+1
- %endrep
- %rep 24
- CAT_UNDEF m, %%i
- CAT_UNDEF nnmm, %%i
- %assign %%i %%i+1
- %endrep
INIT_CPUFLAGS %1
+ DEFINE_MMREGS mm
%endmacro
%macro INIT_XMM 0-1+
%assign avx_enabled 0
%define RESET_MM_PERMUTATION INIT_XMM %1
%define mmsize 16
- %define num_mmregs 8
- %if ARCH_X86_64
- %define num_mmregs 32
- %endif
%define mova movdqa
%define movu movdqu
%define movh movq
%define movnta movntdq
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, xmm %+ %%i
- CAT_XDEFINE nnxmm, %%i, %%i
- %assign %%i %%i+1
- %endrep
INIT_CPUFLAGS %1
+ DEFINE_MMREGS xmm
%if WIN64
; Swap callee-saved registers with volatile registers
AVX512_MM_PERMUTATION 6
@@ -930,21 +940,12 @@
%assign avx_enabled 1
%define RESET_MM_PERMUTATION INIT_YMM %1
%define mmsize 32
- %define num_mmregs 8
- %if ARCH_X86_64
- %define num_mmregs 32
- %endif
%define mova movdqa
%define movu movdqu
%undef movh
%define movnta movntdq
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, ymm %+ %%i
- CAT_XDEFINE nnymm, %%i, %%i
- %assign %%i %%i+1
- %endrep
INIT_CPUFLAGS %1
+ DEFINE_MMREGS ymm
AVX512_MM_PERMUTATION
%endmacro
@@ -952,21 +953,12 @@
%assign avx_enabled 1
%define RESET_MM_PERMUTATION INIT_ZMM %1
%define mmsize 64
- %define num_mmregs 8
- %if ARCH_X86_64
- %define num_mmregs 32
- %endif
%define mova movdqa
%define movu movdqu
%undef movh
%define movnta movntdq
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, zmm %+ %%i
- CAT_XDEFINE nnzmm, %%i, %%i
- %assign %%i %%i+1
- %endrep
INIT_CPUFLAGS %1
+ DEFINE_MMREGS zmm
AVX512_MM_PERMUTATION
%endmacro
-------------- next part --------------
A non-text attachment was scrubbed...
Name: x265-10.patch
Type: text/x-patch
Size: 3696 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20180706/502d68b7/attachment-0001.bin>
More information about the x265-devel
mailing list