[x264-devel] [Git][videolan/x264][master] 2 commits: x86inc: Support memory operands in src1 in 3-operand instructions

Wed Sep 29 21:10:42 UTC 2021


Anton Mitrofanov pushed to branch master at VideoLAN / x264


Commits:
e73fc230 by Henrik Gramner at 2021-06-14T23:41:19+02:00
x86inc: Support memory operands in src1 in 3-operand instructions

Particularly in code that makes heavy use of macros it's possible
to end up with 3-operand instructions with a memory operand in src1.
In the case of SSE this works fine due to automatic move insertions,
but in AVX that fails since memory operands are only allowed in src2.

The main purpose of this feature is to minimize the amount of code
changes required to facilitate conversion of existing SSE code to AVX.

- - - - -
8b2721d2 by Henrik Gramner at 2021-08-25T22:01:38+02:00
x86inc: Add an option for forcing VEX-encoding in non-AVX functions

- - - - -


1 changed file:

- common/x86/x86inc.asm


Changes:

=====================================
common/x86/x86inc.asm
=====================================
@@ -86,6 +86,11 @@
     %define mangle(x) x
 %endif
 
+; Use VEX-encoding even in non-AVX functions
+%ifndef FORCE_VEX_ENCODING
+    %define FORCE_VEX_ENCODING 0
+%endif
+
 %macro SECTION_RODATA 0-1 16
     %ifidn __OUTPUT_FORMAT__,win32
         SECTION .rdata align=%1
@@ -1014,7 +1019,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
 %endmacro
 
 %macro INIT_XMM 0-1+
-    %assign avx_enabled 0
+    %assign avx_enabled FORCE_VEX_ENCODING
     %define RESET_MM_PERMUTATION INIT_XMM %1
     %define mmsize 16
     %define mova movdqa
@@ -1339,26 +1344,50 @@ INIT_XMM
     %elif %0 >= 9
         __instr %6, %7, %8, %9
     %elif %0 == 8
-        %if avx_enabled && %5
+        %if avx_enabled && __sizeofreg >= 16 && %4 == 0
             %xdefine __src1 %7
             %xdefine __src2 %8
-            %ifnum regnumof%7
-                %ifnum regnumof%8
-                    %if regnumof%7 < 8 && regnumof%8 >= 8 && regnumof%8 < 16 && sizeof%8 <= 32
-                        ; Most VEX-encoded instructions require an additional byte to encode when
-                        ; src2 is a high register (e.g. m8..15). If the instruction is commutative
-                        ; we can swap src1 and src2 when doing so reduces the instruction length.
-                        %xdefine __src1 %8
-                        %xdefine __src2 %7
+            %if %5
+                %ifnum regnumof%7
+                    %ifnum regnumof%8
+                        %if regnumof%7 < 8 && regnumof%8 >= 8 && regnumof%8 < 16 && sizeof%8 <= 32
+                            ; Most VEX-encoded instructions require an additional byte to encode when
+                            ; src2 is a high register (e.g. m8..15). If the instruction is commutative
+                            ; we can swap src1 and src2 when doing so reduces the instruction length.
+                            %xdefine __src1 %8
+                            %xdefine __src2 %7
+                        %endif
                     %endif
+                %elifnum regnumof%8 ; put memory operands in src2 when possible
+                    %xdefine __src1 %8
+                    %xdefine __src2 %7
+                %else
+                    %assign __emulate_avx 1
+                %endif
+            %elifnnum regnumof%7
+                ; EVEX allows imm8 shift instructions to be used with memory operands,
+                ; but VEX does not. This handles those special cases.
+                %ifnnum %8
+                    %assign __emulate_avx 1
+                %elif notcpuflag(avx512)
+                    %assign __emulate_avx 1
                 %endif
             %endif
-            __instr %6, __src1, __src2
+            %if __emulate_avx ; a separate load is required
+                %if %3
+                    vmovaps %6, %7
+                %else
+                    vmovdqa %6, %7
+                %endif
+                __instr %6, %8
+            %else
+                __instr %6, __src1, __src2
+            %endif
         %else
             __instr %6, %7, %8
         %endif
     %elif %0 == 7
-        %if avx_enabled && %5
+        %if avx_enabled && __sizeofreg >= 16 && %5
             %xdefine __src1 %6
             %xdefine __src2 %7
             %ifnum regnumof%6



View it on GitLab: https://code.videolan.org/videolan/x264/-/compare/ae03d92b52bb7581df2e75d571989cb1ecd19cbd...8b2721d2770250cd8afa22fd20df3184943e857f

-- 
View it on GitLab: https://code.videolan.org/videolan/x264/-/compare/ae03d92b52bb7581df2e75d571989cb1ecd19cbd...8b2721d2770250cd8afa22fd20df3184943e857f
You're receiving this email because of your account on code.videolan.org.