[x265] [PATCH 10 of 13] x86inc: Correctly set mmreg variables

vignesh at multicorewareinc.com vignesh at multicorewareinc.com
Fri Jul 6 11:18:10 CEST 2018


# HG changeset patch
# User Vignesh Vijayakumar<vignesh at multicorewareinc.com>
# Date 1517547008 -19800
#      Fri Feb 02 10:20:08 2018 +0530
# Node ID 1485405aa16ff2d6f04acb8aeafdae6f32a3bfb5
# Parent  038a52e0813279f4812e9d170a50a134122df4f4
x86inc: Correctly set mmreg variables

diff -r 038a52e08132 -r 1485405aa16f source/common/x86/x86inc.asm
--- a/source/common/x86/x86inc.asm	Fri Feb 02 10:13:20 2018 +0530
+++ b/source/common/x86/x86inc.asm	Fri Feb 02 10:20:08 2018 +0530
@@ -866,6 +866,36 @@
     %undef %1%2
 %endmacro
 
+%macro DEFINE_MMREGS 1 ; mmtype
+    %assign %%prev_mmregs 0
+    %ifdef num_mmregs
+        %assign %%prev_mmregs num_mmregs
+    %endif
+
+    %assign num_mmregs 8
+    %if ARCH_X86_64 && mmsize >= 16
+        %assign num_mmregs 16
+        %if cpuflag(avx512) || mmsize == 64
+            %assign num_mmregs 32
+        %endif
+    %endif
+
+    %assign %%i 0
+    %rep num_mmregs
+        CAT_XDEFINE m, %%i, %1 %+ %%i
+        CAT_XDEFINE nn%1, %%i, %%i
+        %assign %%i %%i+1
+    %endrep
+    %if %%prev_mmregs > num_mmregs
+        %rep %%prev_mmregs - num_mmregs
+            CAT_UNDEF m, %%i
+            CAT_UNDEF nn %+ mmtype, %%i
+            %assign %%i %%i+1
+        %endrep
+    %endif
+    %xdefine mmtype %1
+%endmacro
+
 ; Prefer registers 16-31 over 0-15 to avoid having to use vzeroupper
 %macro AVX512_MM_PERMUTATION 0-1 0 ; start_reg
     %if ARCH_X86_64 && cpuflag(avx512)
@@ -882,44 +912,24 @@
     %assign avx_enabled 0
     %define RESET_MM_PERMUTATION INIT_MMX %1
     %define mmsize 8
-    %define num_mmregs 8
     %define mova movq
     %define movu movq
     %define movh movd
     %define movnta movntq
-    %assign %%i 0
-    %rep 8
-        CAT_XDEFINE m, %%i, mm %+ %%i
-        CAT_XDEFINE nnmm, %%i, %%i
-        %assign %%i %%i+1
-    %endrep
-    %rep 24
-        CAT_UNDEF m, %%i
-        CAT_UNDEF nnmm, %%i
-        %assign %%i %%i+1
-    %endrep
     INIT_CPUFLAGS %1
+    DEFINE_MMREGS mm
 %endmacro
 
 %macro INIT_XMM 0-1+
     %assign avx_enabled 0
     %define RESET_MM_PERMUTATION INIT_XMM %1
     %define mmsize 16
-    %define num_mmregs 8
-    %if ARCH_X86_64
-        %define num_mmregs 32
-    %endif
     %define mova movdqa
     %define movu movdqu
     %define movh movq
     %define movnta movntdq
-    %assign %%i 0
-    %rep num_mmregs
-        CAT_XDEFINE m, %%i, xmm %+ %%i
-        CAT_XDEFINE nnxmm, %%i, %%i
-        %assign %%i %%i+1
-    %endrep
     INIT_CPUFLAGS %1
+    DEFINE_MMREGS xmm
     %if WIN64
         ; Swap callee-saved registers with volatile registers
         AVX512_MM_PERMUTATION 6
@@ -930,21 +940,12 @@
     %assign avx_enabled 1
     %define RESET_MM_PERMUTATION INIT_YMM %1
     %define mmsize 32
-    %define num_mmregs 8
-    %if ARCH_X86_64
-        %define num_mmregs 32
-    %endif
     %define mova movdqa
     %define movu movdqu
     %undef movh
     %define movnta movntdq
-    %assign %%i 0
-    %rep num_mmregs
-        CAT_XDEFINE m, %%i, ymm %+ %%i
-        CAT_XDEFINE nnymm, %%i, %%i
-        %assign %%i %%i+1
-    %endrep
     INIT_CPUFLAGS %1
+    DEFINE_MMREGS ymm
     AVX512_MM_PERMUTATION
 %endmacro
 
@@ -952,21 +953,12 @@
     %assign avx_enabled 1
     %define RESET_MM_PERMUTATION INIT_ZMM %1
     %define mmsize 64
-    %define num_mmregs 8
-    %if ARCH_X86_64
-        %define num_mmregs 32
-    %endif
     %define mova movdqa
     %define movu movdqu
     %undef movh
     %define movnta movntdq
-    %assign %%i 0
-    %rep num_mmregs
-        CAT_XDEFINE m, %%i, zmm %+ %%i
-        CAT_XDEFINE nnzmm, %%i, %%i
-        %assign %%i %%i+1
-    %endrep
     INIT_CPUFLAGS %1
+    DEFINE_MMREGS zmm
     AVX512_MM_PERMUTATION
 %endmacro
 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: x265-10.patch
Type: text/x-patch
Size: 3696 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20180706/502d68b7/attachment-0001.bin>


More information about the x265-devel mailing list