[x264-devel] [Git][videolan/x264][master] 3 commits: x86inc: Enable 4-operand emulation for variable blend instructions

Anton Mitrofanov (@BugMaster) gitlab at videolan.org
Mon Feb 21 21:42:40 UTC 2022



Anton Mitrofanov pushed to branch master at VideoLAN / x264


Commits:
3e2a0d4c by Henrik Gramner at 2022-02-19T23:06:27+01:00
x86inc: Enable 4-operand emulation for variable blend instructions

With legacy encoding the last operand (the index) must be xmm0,
but aside from that emulating non-destructive forms works
the same as any other instruction.

- - - - -
f52e5e11 by Henrik Gramner at 2022-02-21T03:42:54+01:00
x86inc: Fix edge case in forced VEX-encoding

Correctly handle emulation of 4-operand instructions (e.g. 'shufps')
where src1 is a memory operand.

- - - - -
6d10612a by Henrik Gramner at 2022-02-21T03:42:55+01:00
x86inc: Add REPX macro to repeat instructions/operations

When operating on large blocks of data it's common to repeatedly use
an instruction on multiple registers. Using the REPX macro makes it
easy to quickly write dense code to achieve this without having to
explicitly duplicate the same instruction over and over.

For example,

    REPX {paddw x, m4}, m0, m1, m2, m3
    REPX {mova [r0+16*x], m5}, 0, 1, 2, 3

will expand to

    paddw       m0, m4
    paddw       m1, m4
    paddw       m2, m4
    paddw       m3, m4
    mova [r0+16*0], m5
    mova [r0+16*1], m5
    mova [r0+16*2], m5
    mova [r0+16*3], m5

- - - - -


1 changed file:

- common/x86/x86inc.asm


Changes:

=====================================
common/x86/x86inc.asm
=====================================
@@ -245,6 +245,16 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
 %endif
 %endmacro
 
+; Repeats an instruction/operation for multiple arguments.
+; Example usage: "REPX {psrlw x, 8}, m0, m1, m2, m3"
+%macro REPX 2-* ; operation, args
+    %xdefine %%f(x) %1
+    %rep %0 - 1
+        %rotate 1
+        %%f(%1)
+    %endrep
+%endmacro
+
 %macro PUSH 1
     push %1
     %ifidn rstk, rsp
@@ -1342,7 +1352,20 @@ INIT_XMM
             %1 %6, __src2
         %endif
     %elif %0 >= 9
-        __instr %6, %7, %8, %9
+        %if avx_enabled && __sizeofreg >= 16 && %4 == 1
+            %ifnnum regnumof%7
+                %if %3
+                    vmovaps %6, %7
+                %else
+                    vmovdqa %6, %7
+                %endif
+                __instr %6, %6, %8, %9
+            %else
+                __instr %6, %7, %8, %9
+            %endif
+        %else
+            __instr %6, %7, %8, %9
+        %endif
     %elif %0 == 8
         %if avx_enabled && __sizeofreg >= 16 && %4 == 0
             %xdefine __src1 %7
@@ -1379,7 +1402,7 @@ INIT_XMM
                 %else
                     vmovdqa %6, %7
                 %endif
-                __instr %6, %8
+                __instr %6, %6, %8
             %else
                 __instr %6, __src1, __src2
             %endif
@@ -1448,8 +1471,8 @@ AVX_INSTR andpd, sse2, 1, 0, 1
 AVX_INSTR andps, sse, 1, 0, 1
 AVX_INSTR blendpd, sse4, 1, 1, 0
 AVX_INSTR blendps, sse4, 1, 1, 0
-AVX_INSTR blendvpd, sse4 ; can't be emulated
-AVX_INSTR blendvps, sse4 ; can't be emulated
+AVX_INSTR blendvpd, sse4, 1, 1, 0 ; last operand must be xmm0 with legacy encoding
+AVX_INSTR blendvps, sse4, 1, 1, 0 ; last operand must be xmm0 with legacy encoding
 AVX_INSTR cmpeqpd, sse2, 1, 0, 1
 AVX_INSTR cmpeqps, sse, 1, 0, 1
 AVX_INSTR cmpeqsd, sse2, 1, 0, 0
@@ -1582,7 +1605,7 @@ AVX_INSTR pand, mmx, 0, 0, 1
 AVX_INSTR pandn, mmx, 0, 0, 0
 AVX_INSTR pavgb, mmx2, 0, 0, 1
 AVX_INSTR pavgw, mmx2, 0, 0, 1
-AVX_INSTR pblendvb, sse4 ; can't be emulated
+AVX_INSTR pblendvb, sse4, 0, 1, 0 ; last operand must be xmm0 with legacy encoding
 AVX_INSTR pblendw, sse4, 0, 1, 0
 AVX_INSTR pclmulqdq, fnord, 0, 1, 0
 AVX_INSTR pclmulhqhqdq, fnord, 0, 0, 0



View it on GitLab: https://code.videolan.org/videolan/x264/-/compare/5585eafe31c2299163dcb206e05e18be7dd93098...6d10612ab0007f8f60dd2399182efd696da3ffe4

-- 
View it on GitLab: https://code.videolan.org/videolan/x264/-/compare/5585eafe31c2299163dcb206e05e18be7dd93098...6d10612ab0007f8f60dd2399182efd696da3ffe4
You're receiving this email because of your account on code.videolan.org.


VideoLAN code repository instance


More information about the x264-devel mailing list