[x264-devel] [Git][videolan/x264][master] 3 commits: x86inc: Enable 4-operand emulation for variable blend instructions
Anton Mitrofanov (@BugMaster)
gitlab at videolan.org
Mon Feb 21 21:42:40 UTC 2022
Anton Mitrofanov pushed to branch master at VideoLAN / x264
Commits:
3e2a0d4c by Henrik Gramner at 2022-02-19T23:06:27+01:00
x86inc: Enable 4-operand emulation for variable blend instructions
With legacy encoding the last operand (the index) must be xmm0,
but aside from that emulating non-destructive forms works
the same as any other instruction.
- - - - -
f52e5e11 by Henrik Gramner at 2022-02-21T03:42:54+01:00
x86inc: Fix edge case in forced VEX-encoding
Correctly handle emulation of 4-operand instructions (e.g. 'shufps')
where src1 is a memory operand.
- - - - -
6d10612a by Henrik Gramner at 2022-02-21T03:42:55+01:00
x86inc: Add REPX macro to repeat instructions/operations
When operating on large blocks of data it's common to repeatedly use
an instruction on multiple registers. Using the REPX macro makes it
easy to quickly write dense code to achieve this without having to
explicitly duplicate the same instruction over and over.
For example,
REPX {paddw x, m4}, m0, m1, m2, m3
REPX {mova [r0+16*x], m5}, 0, 1, 2, 3
will expand to
paddw m0, m4
paddw m1, m4
paddw m2, m4
paddw m3, m4
mova [r0+16*0], m5
mova [r0+16*1], m5
mova [r0+16*2], m5
mova [r0+16*3], m5
- - - - -
1 changed file:
- common/x86/x86inc.asm
Changes:
=====================================
common/x86/x86inc.asm
=====================================
@@ -245,6 +245,16 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
%endif
%endmacro
+; Repeats an instruction/operation for multiple arguments.
+; Example usage: "REPX {psrlw x, 8}, m0, m1, m2, m3"
+%macro REPX 2-* ; operation, args
+ %xdefine %%f(x) %1
+ %rep %0 - 1
+ %rotate 1
+ %%f(%1)
+ %endrep
+%endmacro
+
%macro PUSH 1
push %1
%ifidn rstk, rsp
@@ -1342,7 +1352,20 @@ INIT_XMM
%1 %6, __src2
%endif
%elif %0 >= 9
- __instr %6, %7, %8, %9
+ %if avx_enabled && __sizeofreg >= 16 && %4 == 1
+ %ifnnum regnumof%7
+ %if %3
+ vmovaps %6, %7
+ %else
+ vmovdqa %6, %7
+ %endif
+ __instr %6, %6, %8, %9
+ %else
+ __instr %6, %7, %8, %9
+ %endif
+ %else
+ __instr %6, %7, %8, %9
+ %endif
%elif %0 == 8
%if avx_enabled && __sizeofreg >= 16 && %4 == 0
%xdefine __src1 %7
@@ -1379,7 +1402,7 @@ INIT_XMM
%else
vmovdqa %6, %7
%endif
- __instr %6, %8
+ __instr %6, %6, %8
%else
__instr %6, __src1, __src2
%endif
@@ -1448,8 +1471,8 @@ AVX_INSTR andpd, sse2, 1, 0, 1
AVX_INSTR andps, sse, 1, 0, 1
AVX_INSTR blendpd, sse4, 1, 1, 0
AVX_INSTR blendps, sse4, 1, 1, 0
-AVX_INSTR blendvpd, sse4 ; can't be emulated
-AVX_INSTR blendvps, sse4 ; can't be emulated
+AVX_INSTR blendvpd, sse4, 1, 1, 0 ; last operand must be xmm0 with legacy encoding
+AVX_INSTR blendvps, sse4, 1, 1, 0 ; last operand must be xmm0 with legacy encoding
AVX_INSTR cmpeqpd, sse2, 1, 0, 1
AVX_INSTR cmpeqps, sse, 1, 0, 1
AVX_INSTR cmpeqsd, sse2, 1, 0, 0
@@ -1582,7 +1605,7 @@ AVX_INSTR pand, mmx, 0, 0, 1
AVX_INSTR pandn, mmx, 0, 0, 0
AVX_INSTR pavgb, mmx2, 0, 0, 1
AVX_INSTR pavgw, mmx2, 0, 0, 1
-AVX_INSTR pblendvb, sse4 ; can't be emulated
+AVX_INSTR pblendvb, sse4, 0, 1, 0 ; last operand must be xmm0 with legacy encoding
AVX_INSTR pblendw, sse4, 0, 1, 0
AVX_INSTR pclmulqdq, fnord, 0, 1, 0
AVX_INSTR pclmulhqhqdq, fnord, 0, 0, 0
View it on GitLab: https://code.videolan.org/videolan/x264/-/compare/5585eafe31c2299163dcb206e05e18be7dd93098...6d10612ab0007f8f60dd2399182efd696da3ffe4
--
View it on GitLab: https://code.videolan.org/videolan/x264/-/compare/5585eafe31c2299163dcb206e05e18be7dd93098...6d10612ab0007f8f60dd2399182efd696da3ffe4
You're receiving this email because of your account on code.videolan.org.
VideoLAN code repository instance
More information about the x264-devel
mailing list