[x264-devel] x86inc: Optimize VEX instruction encoding
Henrik Gramner
git at videolan.org
Tue Aug 7 00:05:29 CEST 2018
x264 | branch: master | Henrik Gramner <henrik at gramner.com> | Sat Mar 31 13:49:56 2018 +0200| [8badb910847e94abb66686009e424bdce355c9f4] | committer: Henrik Gramner
x86inc: Optimize VEX instruction encoding
Most VEX-encoded instructions require an additional byte to encode when src2
is a high register (e.g. x|ymm8..15). If the instruction is commutative we
can swap src1 and src2 when doing so reduces the instruction length, e.g.
vpaddw xmm0, xmm0, xmm8 -> vpaddw xmm0, xmm8, xmm0
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=8badb910847e94abb66686009e424bdce355c9f4
---
common/x86/x86inc.asm | 35 +++++++++++++++++++++++++++++++++--
1 file changed, 33 insertions(+), 2 deletions(-)
diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm
index 49e73d65..280a9955 100644
--- a/common/x86/x86inc.asm
+++ b/common/x86/x86inc.asm
@@ -1240,9 +1240,40 @@ INIT_XMM
%elif %0 >= 9
__instr %6, %7, %8, %9
%elif %0 == 8
- __instr %6, %7, %8
+ %if avx_enabled && %5
+ %xdefine __src1 %7
+ %xdefine __src2 %8
+ %ifnum regnumof%7
+ %ifnum regnumof%8
+ %if regnumof%7 < 8 && regnumof%8 >= 8 && regnumof%8 < 16 && sizeof%8 <= 32
+ ; Most VEX-encoded instructions require an additional byte to encode when
+ ; src2 is a high register (e.g. m8..15). If the instruction is commutative
+ ; we can swap src1 and src2 when doing so reduces the instruction length.
+ %xdefine __src1 %8
+ %xdefine __src2 %7
+ %endif
+ %endif
+ %endif
+ __instr %6, __src1, __src2
+ %else
+ __instr %6, %7, %8
+ %endif
%elif %0 == 7
- __instr %6, %7
+ %if avx_enabled && %5
+ %xdefine __src1 %6
+ %xdefine __src2 %7
+ %ifnum regnumof%6
+ %ifnum regnumof%7
+ %if regnumof%6 < 8 && regnumof%7 >= 8 && regnumof%7 < 16 && sizeof%7 <= 32
+ %xdefine __src1 %7
+ %xdefine __src2 %6
+ %endif
+ %endif
+ %endif
+ __instr %6, __src1, __src2
+ %else
+ __instr %6, %7
+ %endif
%else
__instr %6
%endif
More information about the x264-devel
mailing list