[x264-devel] x86inc: Improve SAVE/LOAD_MM_PERMUTATION macros

Henrik Gramner git at videolan.org
Tue Aug 7 00:05:30 CEST 2018


x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Sun Apr 22 22:49:15 2018 +0200| [28e4879842a86cc6bb63db0f5f386a3e9268fd46] | committer: Henrik Gramner

x86inc: Improve SAVE/LOAD_MM_PERMUTATION macros

Use register numbers instead of copying the full register names. This makes it
possible to change register widths in the middle of a function and keep the
mmreg permutations intact which can be useful for code that only needs larger
vectors for parts of the function in combination with macros etc.

Also change the LOAD_MM_PERMUTATION macro to use the same default name as the
SAVE macro. This simplifies swapping from ymm to xmm registers or vice versa:

    SAVE_MM_PERMUTATION
    INIT_XMM <cpuflags>
    LOAD_MM_PERMUTATION

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=28e4879842a86cc6bb63db0f5f386a3e9268fd46
---

 common/x86/x86inc.asm | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
index 280a9955..16ba62b4 100644
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -1077,19 +1077,32 @@ INIT_XMM
     %endif
     %assign %%i 0
     %rep num_mmregs
-        CAT_XDEFINE %%f, %%i, m %+ %%i
+        %xdefine %%tmp m %+ %%i
+        CAT_XDEFINE %%f, %%i, regnumof %+ %%tmp
         %assign %%i %%i+1
     %endrep
 %endmacro
 
-%macro LOAD_MM_PERMUTATION 1 ; name to load from
-    %ifdef %1_m0
+%macro LOAD_MM_PERMUTATION 0-1 ; name to load from
+    %if %0
+        %xdefine %%f %1_m
+    %else
+        %xdefine %%f current_function %+ _m
+    %endif
+    %xdefine %%tmp %%f %+ 0
+    %ifnum %%tmp
+        RESET_MM_PERMUTATION
         %assign %%i 0
         %rep num_mmregs
-            CAT_XDEFINE m, %%i, %1_m %+ %%i
-            CAT_XDEFINE nn, m %+ %%i, %%i
+            %xdefine %%tmp %%f %+ %%i
+            CAT_XDEFINE %%m, %%i, m %+ %%tmp
             %assign %%i %%i+1
         %endrep
+        %rep num_mmregs
+            %assign %%i %%i-1
+            CAT_XDEFINE m, %%i, %%m %+ %%i
+            CAT_XDEFINE nn, m %+ %%i, %%i
+        %endrep
     %endif
 %endmacro
 



More information about the x264-devel mailing list