[vlc-devel] [PATCH 1/3] build: x86: add build support for nasm via x86inc.asm/x86util.asm
Rémi Denis-Courmont
remi at remlab.net
Mon Jan 28 09:09:50 CET 2019
AFAIR, AC_SUBST invocation should not be conditional.
Le 26 janvier 2019 14:23:36 GMT+02:00, Janne Grunau <janne-vlc at jannau.net> a écrit :
>x86inc.asm copied from dav1d (8c5d34c85613) and x86util.asm from libav
>(994c4bc10751). Libav's LGPL licensed x86util.asm is required for
>yadif.
>
>This reverts "Remove unused support for .asm files"
>commit 6c0f63cd6853c0d184a5abbf2e19c1626d2854ef.
>---
> configure.ac | 28 +
> extras/include/x86/x86inc.asm | 1742 ++++++++++++++++++++++++++++++++
> extras/include/x86/x86util.asm | 705 +++++++++++++
> modules/common.am | 5 +-
> 4 files changed, 2479 insertions(+), 1 deletion(-)
> create mode 100644 extras/include/x86/x86inc.asm
> create mode 100644 extras/include/x86/x86util.asm
>
>diff --git a/configure.ac b/configure.ac
>index a2b8ade789..96d13fa1d2 100644
>--- a/configure.ac
>+++ b/configure.ac
>@@ -95,6 +95,19 @@ HAVE_IOS="0"
> HAVE_OSX="0"
> HAVE_TVOS="0"
>
>+dnl Set x86 asm flags and defines
>+X86ASMFLAGS=""
>+case "${host_cpu}" in
>+ i?86)
>+ X86ASMFLAGS="-f elf32"
>+ X86ASMDEFS="-DARCH_X86_32=1 -DARCH_X86_64=0"
>+ ;;
>+ x86_64)
>+ X86ASMFLAGS="-f elf64"
>+ X86ASMDEFS="-DARCH_X86_32=0 -DARCH_X86_64=1"
>+ ;;
>+esac
>+
> case "${host_os}" in
> "")
> SYS=unknown
>@@ -132,6 +145,8 @@ case "${host_os}" in
> case "${host_cpu}" in
> i?86)
> ARCH_flag="-arch i386"
>+ X86ASMFLAGS="-f macho32"
>+ X86ASMDEFS="${X86ASMDEFS} -DPREFIX"
> ;;
> ppc64*)
> ARCH_flag="-arch ppc64"
>@@ -141,6 +156,8 @@ case "${host_os}" in
> ;;
> x86_64)
> ARCH_flag="-arch x86_64"
>+ X86ASMFLAGS="-f macho64"
>+ X86ASMDEFS="${X86ASMDEFS} -DPREFIX"
> ;;
> arm*)
> ac_cv_c_bigendian="no"
>@@ -259,10 +276,13 @@ case "${host_os}" in
> WINDOWS_ARCH="x64"
> PROGRAMFILES="PROGRAMFILES64"
>LDFLAGS="${LDFLAGS} -Wl,--high-entropy-va -Wl,--image-base,0x140000000"
>+ X86ASMFLAGS="-f win64"
> ;;
> *)
> WINDOWS_ARCH="x86"
> PROGRAMFILES="PROGRAMFILES"
>+ X86ASMFLAGS="-f win32"
>+ X86ASMDEFS="${X86ASMDEFS} -DPREFIX"
> ;;
> esac
> AC_SUBST([WINDOWS_ARCH])
>@@ -332,6 +352,14 @@ AM_CONDITIONAL([HAVE_WIN64], [test
>"${HAVE_WIN64}" = "1"]) dnl Only used for t
> AM_CONDITIONAL([HAVE_WINSTORE], [test "$vlc_winstore_app" = "1"])
>AM_CONDITIONAL([HAVE_WIN32_DESKTOP], [test "${SYS}" = "mingw32" -a
>"$vlc_winstore_app" = "0"])
>
>+dnl Use nasm/yasm only on x86
>+AC_CHECK_PROGS(X86ASM, [nasm yasm])
>+AM_CONDITIONAL([HAVE_X86ASM], [test -n "${X86ASM}" && test -n
>"{X86ASMFLAGS}"])
>+AM_COND_IF([HAVE_X86ASM], [
>+ AC_DEFINE([HAVE_X86ASM], [1], [Use external asm on x86.]),
>+ AC_SUBST([X86ASMFLAGS]),
>+ AC_SUBST([X86ASMDEFS])])
>+
> dnl
>dnl Sadly autoconf does not think about testing foo.exe when ask to
>test
> dnl for program foo on win32
>diff --git a/extras/include/x86/x86inc.asm
>b/extras/include/x86/x86inc.asm
>new file mode 100644
>index 0000000000..b249f2a792
>--- /dev/null
>+++ b/extras/include/x86/x86inc.asm
>@@ -0,0 +1,1742 @@
>+;*****************************************************************************
>+;* x86inc.asm: x264asm abstraction layer
>+;*****************************************************************************
>+;* Copyright (C) 2005-2018 x264 project
>+;*
>+;* Authors: Loren Merritt <lorenm at u.washington.edu>
>+;* Henrik Gramner <henrik at gramner.com>
>+;* Anton Mitrofanov <BugMaster at narod.ru>
>+;* Fiona Glaser <fiona at x264.com>
>+;*
>+;* Permission to use, copy, modify, and/or distribute this software
>for any
>+;* purpose with or without fee is hereby granted, provided that the
>above
>+;* copyright notice and this permission notice appear in all copies.
>+;*
>+;* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
>WARRANTIES
>+;* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
>+;* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE
>FOR
>+;* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY
>DAMAGES
>+;* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
>AN
>+;* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
>OUT OF
>+;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
>+;*****************************************************************************
>+
>+; This is a header file for the x264ASM assembly language, which uses
>+; NASM/YASM syntax combined with a large number of macros to provide
>easy
>+; abstraction between different calling conventions (x86_32, win64,
>linux64).
>+; It also has various other useful features to simplify writing the
>kind of
>+; DSP functions that are most often used in x264.
>+
>+; Unlike the rest of x264, this file is available under an ISC
>license, as it
>+; has significant usefulness outside of x264 and we want it to be
>available
>+; to the largest audience possible. Of course, if you modify it for
>your own
>+; purposes to add a new feature, we strongly encourage contributing a
>patch
>+; as this feature might be useful for others as well. Send patches or
>ideas
>+; to x264-devel at videolan.org .
>+
>+%ifndef private_prefix
>+ %define private_prefix dav1d
>+%endif
>+
>+%ifndef public_prefix
>+ %define public_prefix private_prefix
>+%endif
>+
>+%ifndef STACK_ALIGNMENT
>+ %if ARCH_X86_64
>+ %define STACK_ALIGNMENT 16
>+ %else
>+ %define STACK_ALIGNMENT 4
>+ %endif
>+%endif
>+
>+%define WIN64 0
>+%define UNIX64 0
>+%if ARCH_X86_64
>+ %ifidn __OUTPUT_FORMAT__,win32
>+ %define WIN64 1
>+ %elifidn __OUTPUT_FORMAT__,win64
>+ %define WIN64 1
>+ %elifidn __OUTPUT_FORMAT__,x64
>+ %define WIN64 1
>+ %else
>+ %define UNIX64 1
>+ %endif
>+%endif
>+
>+%define FORMAT_ELF 0
>+%ifidn __OUTPUT_FORMAT__,elf
>+ %define FORMAT_ELF 1
>+%elifidn __OUTPUT_FORMAT__,elf32
>+ %define FORMAT_ELF 1
>+%elifidn __OUTPUT_FORMAT__,elf64
>+ %define FORMAT_ELF 1
>+%endif
>+
>+%ifdef PREFIX
>+ %define mangle(x) _ %+ x
>+%else
>+ %define mangle(x) x
>+%endif
>+
>+%macro SECTION_RODATA 0-1 16
>+ %ifidn __OUTPUT_FORMAT__,win32
>+ SECTION .rdata align=%1
>+ %elif WIN64
>+ SECTION .rdata align=%1
>+ %else
>+ SECTION .rodata align=%1
>+ %endif
>+%endmacro
>+
>+%if ARCH_X86_64
>+ %define PIC 1 ; always use PIC on x86-64
>+ default rel
>+%elifidn __OUTPUT_FORMAT__,win32
>+ %define PIC 0 ; PIC isn't used on 32-bit Windows
>+%elifndef PIC
>+ %define PIC 0
>+%endif
>+
>+%ifdef __NASM_VER__
>+ %use smartalign
>+%endif
>+
>+; Macros to eliminate most code duplication between x86_32 and x86_64:
>+; Currently this works only for leaf functions which load all their
>arguments
>+; into registers at the start, and make no other use of the stack.
>Luckily that
>+; covers most of x264's asm.
>+
>+; PROLOGUE:
>+; %1 = number of arguments. loads them from stack if needed.
>+; %2 = number of registers used. pushes callee-saved regs if needed.
>+; %3 = number of xmm registers used. pushes callee-saved xmm regs if
>needed.
>+; %4 = (optional) stack size to be allocated. The stack will be
>aligned before
>+; allocating the specified stack size. If the required stack
>alignment is
>+; larger than the known stack alignment the stack will be
>manually aligned
>+; and an extra register will be allocated to hold the original
>stack
>+; pointer (to not invalidate r0m etc.). To prevent the use of an
>extra
>+; register as stack pointer, request a negative stack size.
>+; %4+/%5+ = list of names to define to registers
>+; PROLOGUE can also be invoked by adding the same options to cglobal
>+
>+; e.g.
>+; cglobal foo, 2,3,7,0x40, dst, src, tmp
>+; declares a function (foo) that automatically loads two arguments
>(dst and
>+; src) into registers, uses one additional register (tmp) plus 7
>vector
>+; registers (m0-m6) and allocates 0x40 bytes of stack space.
>+
>+; TODO Some functions can use some args directly from the stack. If
>they're the
>+; last args then you can just not declare them, but if they're in the
>middle
>+; we need more flexible macro.
>+
>+; RET:
>+; Pops anything that was pushed by PROLOGUE, and returns.
>+
>+; REP_RET:
>+; Use this instead of RET if it's a branch target.
>+
>+; registers:
>+; rN and rNq are the native-size register holding function argument N
>+; rNd, rNw, rNb are dword, word, and byte size
>+; rNh is the high 8 bits of the word size
>+; rNm is the original location of arg N (a register or on the stack),
>dword
>+; rNmp is native size
>+
>+%macro DECLARE_REG 2-3
>+ %define r%1q %2
>+ %define r%1d %2d
>+ %define r%1w %2w
>+ %define r%1b %2b
>+ %define r%1h %2h
>+ %define %2q %2
>+ %if %0 == 2
>+ %define r%1m %2d
>+ %define r%1mp %2
>+ %elif ARCH_X86_64 ; memory
>+ %define r%1m [rstk + stack_offset + %3]
>+ %define r%1mp qword r %+ %1 %+ m
>+ %else
>+ %define r%1m [rstk + stack_offset + %3]
>+ %define r%1mp dword r %+ %1 %+ m
>+ %endif
>+ %define r%1 %2
>+%endmacro
>+
>+%macro DECLARE_REG_SIZE 3
>+ %define r%1q r%1
>+ %define e%1q r%1
>+ %define r%1d e%1
>+ %define e%1d e%1
>+ %define r%1w %1
>+ %define e%1w %1
>+ %define r%1h %3
>+ %define e%1h %3
>+ %define r%1b %2
>+ %define e%1b %2
>+ %if ARCH_X86_64 == 0
>+ %define r%1 e%1
>+ %endif
>+%endmacro
>+
>+DECLARE_REG_SIZE ax, al, ah
>+DECLARE_REG_SIZE bx, bl, bh
>+DECLARE_REG_SIZE cx, cl, ch
>+DECLARE_REG_SIZE dx, dl, dh
>+DECLARE_REG_SIZE si, sil, null
>+DECLARE_REG_SIZE di, dil, null
>+DECLARE_REG_SIZE bp, bpl, null
>+
>+; t# defines for when per-arch register allocation is more complex
>than just function arguments
>+
>+%macro DECLARE_REG_TMP 1-*
>+ %assign %%i 0
>+ %rep %0
>+ CAT_XDEFINE t, %%i, r%1
>+ %assign %%i %%i+1
>+ %rotate 1
>+ %endrep
>+%endmacro
>+
>+%macro DECLARE_REG_TMP_SIZE 0-*
>+ %rep %0
>+ %define t%1q t%1 %+ q
>+ %define t%1d t%1 %+ d
>+ %define t%1w t%1 %+ w
>+ %define t%1h t%1 %+ h
>+ %define t%1b t%1 %+ b
>+ %rotate 1
>+ %endrep
>+%endmacro
>+
>+DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
>+
>+%if ARCH_X86_64
>+ %define gprsize 8
>+%else
>+ %define gprsize 4
>+%endif
>+
>+%macro LEA 2
>+%if ARCH_X86_64
>+ lea %1, [%2]
>+%elif PIC
>+ call $+5 ; special-cased to not affect the RSB on most CPU:s
>+ pop %1
>+ add %1, (%2)-$+1
>+%else
>+ mov %1, %2
>+%endif
>+%endmacro
>+
>+%macro PUSH 1
>+ push %1
>+ %ifidn rstk, rsp
>+ %assign stack_offset stack_offset+gprsize
>+ %endif
>+%endmacro
>+
>+%macro POP 1
>+ pop %1
>+ %ifidn rstk, rsp
>+ %assign stack_offset stack_offset-gprsize
>+ %endif
>+%endmacro
>+
>+%macro PUSH_IF_USED 1-*
>+ %rep %0
>+ %if %1 < regs_used
>+ PUSH r%1
>+ %endif
>+ %rotate 1
>+ %endrep
>+%endmacro
>+
>+%macro POP_IF_USED 1-*
>+ %rep %0
>+ %if %1 < regs_used
>+ pop r%1
>+ %endif
>+ %rotate 1
>+ %endrep
>+%endmacro
>+
>+%macro LOAD_IF_USED 1-*
>+ %rep %0
>+ %if %1 < num_args
>+ mov r%1, r %+ %1 %+ mp
>+ %endif
>+ %rotate 1
>+ %endrep
>+%endmacro
>+
>+%macro SUB 2
>+ sub %1, %2
>+ %ifidn %1, rstk
>+ %assign stack_offset stack_offset+(%2)
>+ %endif
>+%endmacro
>+
>+%macro ADD 2
>+ add %1, %2
>+ %ifidn %1, rstk
>+ %assign stack_offset stack_offset-(%2)
>+ %endif
>+%endmacro
>+
>+%macro movifnidn 2
>+ %ifnidn %1, %2
>+ mov %1, %2
>+ %endif
>+%endmacro
>+
>+%if ARCH_X86_64 == 0
>+ %define movsxd movifnidn
>+%endif
>+
>+%macro movsxdifnidn 2
>+ %ifnidn %1, %2
>+ movsxd %1, %2
>+ %endif
>+%endmacro
>+
>+%macro ASSERT 1
>+ %if (%1) == 0
>+ %error assertion ``%1'' failed
>+ %endif
>+%endmacro
>+
>+%macro DEFINE_ARGS 0-*
>+ %ifdef n_arg_names
>+ %assign %%i 0
>+ %rep n_arg_names
>+ CAT_UNDEF arg_name %+ %%i, q
>+ CAT_UNDEF arg_name %+ %%i, d
>+ CAT_UNDEF arg_name %+ %%i, w
>+ CAT_UNDEF arg_name %+ %%i, h
>+ CAT_UNDEF arg_name %+ %%i, b
>+ CAT_UNDEF arg_name %+ %%i, m
>+ CAT_UNDEF arg_name %+ %%i, mp
>+ CAT_UNDEF arg_name, %%i
>+ %assign %%i %%i+1
>+ %endrep
>+ %endif
>+
>+ %xdefine %%stack_offset stack_offset
>+ %undef stack_offset ; so that the current value of stack_offset
>doesn't get baked in by xdefine
>+ %assign %%i 0
>+ %rep %0
>+ %xdefine %1q r %+ %%i %+ q
>+ %xdefine %1d r %+ %%i %+ d
>+ %xdefine %1w r %+ %%i %+ w
>+ %xdefine %1h r %+ %%i %+ h
>+ %xdefine %1b r %+ %%i %+ b
>+ %xdefine %1m r %+ %%i %+ m
>+ %xdefine %1mp r %+ %%i %+ mp
>+ CAT_XDEFINE arg_name, %%i, %1
>+ %assign %%i %%i+1
>+ %rotate 1
>+ %endrep
>+ %xdefine stack_offset %%stack_offset
>+ %assign n_arg_names %0
>+%endmacro
>+
>+%define required_stack_alignment ((mmsize + 15) & ~15)
>+%define vzeroupper_required (mmsize > 16 && (ARCH_X86_64 == 0 ||
>xmm_regs_used > 16 || notcpuflag(avx512)))
>+%define high_mm_regs (16*cpuflag(avx512))
>+
>+%macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only)
>+ %ifnum %1
>+ %if %1 != 0
>+ %assign %%pad 0
>+ %assign stack_size %1
>+ %if stack_size < 0
>+ %assign stack_size -stack_size
>+ %endif
>+ %if WIN64
>+ %assign %%pad %%pad + 32 ; shadow space
>+ %if mmsize != 8
>+ %assign xmm_regs_used %2
>+ %if xmm_regs_used > 8
>+ %assign %%pad %%pad + (xmm_regs_used-8)*16 ;
>callee-saved xmm registers
>+ %endif
>+ %endif
>+ %endif
>+ %if required_stack_alignment <= STACK_ALIGNMENT
>+ ; maintain the current stack alignment
>+ %assign stack_size_padded stack_size + %%pad +
>((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
>+ SUB rsp, stack_size_padded
>+ %else
>+ %assign %%reg_num (regs_used - 1)
>+ %xdefine rstk r %+ %%reg_num
>+ ; align stack, and save original stack location
>directly above
>+ ; it, i.e. in [rsp+stack_size_padded], so we can
>restore the
>+ ; stack in a single instruction (i.e. mov rsp, rstk or
>mov
>+ ; rsp, [rsp+stack_size_padded])
>+ %if %1 < 0 ; need to store rsp on stack
>+ %xdefine rstkm [rsp + stack_size + %%pad]
>+ %assign %%pad %%pad + gprsize
>+ %else ; can keep rsp in rstk during whole function
>+ %xdefine rstkm rstk
>+ %endif
>+ %assign stack_size_padded stack_size + ((%%pad +
>required_stack_alignment-1) & ~(required_stack_alignment-1))
>+ mov rstk, rsp
>+ and rsp, ~(required_stack_alignment-1)
>+ sub rsp, stack_size_padded
>+ movifnidn rstkm, rstk
>+ %endif
>+ WIN64_PUSH_XMM
>+ %endif
>+ %endif
>+%endmacro
>+
>+%macro SETUP_STACK_POINTER 1
>+ %ifnum %1
>+ %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT
>+ %if %1 > 0
>+ ; Reserve an additional register for storing the
>original stack pointer, but avoid using
>+ ; eax/rax for this purpose since it can potentially
>get overwritten as a return value.
>+ %assign regs_used (regs_used + 1)
>+ %if ARCH_X86_64 && regs_used == 7
>+ %assign regs_used 8
>+ %elif ARCH_X86_64 == 0 && regs_used == 1
>+ %assign regs_used 2
>+ %endif
>+ %endif
>+ %if ARCH_X86_64 && regs_used < 5 + UNIX64 * 3
>+ ; Ensure that we don't clobber any registers
>containing arguments. For UNIX64 we also preserve r6 (rax)
>+ ; since it's used as a hidden argument in vararg
>functions to specify the number of vector registers used.
>+ %assign regs_used 5 + UNIX64 * 3
>+ %endif
>+ %endif
>+ %endif
>+%endmacro
>+
>+%macro DEFINE_ARGS_INTERNAL 3+
>+ %ifnum %2
>+ DEFINE_ARGS %3
>+ %elif %1 == 4
>+ DEFINE_ARGS %2
>+ %elif %1 > 4
>+ DEFINE_ARGS %2, %3
>+ %endif
>+%endmacro
>+
>+%if WIN64 ; Windows x64
>;=================================================
>+
>+DECLARE_REG 0, rcx
>+DECLARE_REG 1, rdx
>+DECLARE_REG 2, R8
>+DECLARE_REG 3, R9
>+DECLARE_REG 4, R10, 40
>+DECLARE_REG 5, R11, 48
>+DECLARE_REG 6, rax, 56
>+DECLARE_REG 7, rdi, 64
>+DECLARE_REG 8, rsi, 72
>+DECLARE_REG 9, rbx, 80
>+DECLARE_REG 10, rbp, 88
>+DECLARE_REG 11, R14, 96
>+DECLARE_REG 12, R15, 104
>+DECLARE_REG 13, R12, 112
>+DECLARE_REG 14, R13, 120
>+
>+%macro PROLOGUE 2-5+ 0 ; #args, #regs, #xmm_regs, [stack_size,]
>arg_names...
>+ %assign num_args %1
>+ %assign regs_used %2
>+ ASSERT regs_used >= num_args
>+ SETUP_STACK_POINTER %4
>+ ASSERT regs_used <= 15
>+ PUSH_IF_USED 7, 8, 9, 10, 11, 12, 13, 14
>+ ALLOC_STACK %4, %3
>+ %if mmsize != 8 && stack_size == 0
>+ WIN64_SPILL_XMM %3
>+ %endif
>+ LOAD_IF_USED 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
>+ DEFINE_ARGS_INTERNAL %0, %4, %5
>+%endmacro
>+
>+%macro WIN64_PUSH_XMM 0
>+ ; Use the shadow space to store XMM6 and XMM7, the rest needs
>stack space allocated.
>+ %if xmm_regs_used > 6 + high_mm_regs
>+ movaps [rstk + stack_offset + 8], xmm6
>+ %endif
>+ %if xmm_regs_used > 7 + high_mm_regs
>+ movaps [rstk + stack_offset + 24], xmm7
>+ %endif
>+ %assign %%xmm_regs_on_stack xmm_regs_used - high_mm_regs - 8
>+ %if %%xmm_regs_on_stack > 0
>+ %assign %%i 8
>+ %rep %%xmm_regs_on_stack
>+ movaps [rsp + (%%i-8)*16 + stack_size + 32], xmm %+ %%i
>+ %assign %%i %%i+1
>+ %endrep
>+ %endif
>+%endmacro
>+
>+%macro WIN64_SPILL_XMM 1
>+ %assign xmm_regs_used %1
>+ ASSERT xmm_regs_used <= 16 + high_mm_regs
>+ %assign %%xmm_regs_on_stack xmm_regs_used - high_mm_regs - 8
>+ %if %%xmm_regs_on_stack > 0
>+ ; Allocate stack space for callee-saved xmm registers plus
>shadow space and align the stack.
>+ %assign %%pad %%xmm_regs_on_stack*16 + 32
>+ %assign stack_size_padded %%pad +
>((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
>+ SUB rsp, stack_size_padded
>+ %endif
>+ WIN64_PUSH_XMM
>+%endmacro
>+
>+%macro WIN64_RESTORE_XMM_INTERNAL 0
>+ %assign %%pad_size 0
>+ %assign %%xmm_regs_on_stack xmm_regs_used - high_mm_regs - 8
>+ %if %%xmm_regs_on_stack > 0
>+ %assign %%i xmm_regs_used - high_mm_regs
>+ %rep %%xmm_regs_on_stack
>+ %assign %%i %%i-1
>+ movaps xmm %+ %%i, [rsp + (%%i-8)*16 + stack_size + 32]
>+ %endrep
>+ %endif
>+ %if stack_size_padded > 0
>+ %if stack_size > 0 && required_stack_alignment >
>STACK_ALIGNMENT
>+ mov rsp, rstkm
>+ %else
>+ add rsp, stack_size_padded
>+ %assign %%pad_size stack_size_padded
>+ %endif
>+ %endif
>+ %if xmm_regs_used > 7 + high_mm_regs
>+ movaps xmm7, [rsp + stack_offset - %%pad_size + 24]
>+ %endif
>+ %if xmm_regs_used > 6 + high_mm_regs
>+ movaps xmm6, [rsp + stack_offset - %%pad_size + 8]
>+ %endif
>+%endmacro
>+
>+%macro WIN64_RESTORE_XMM 0
>+ WIN64_RESTORE_XMM_INTERNAL
>+ %assign stack_offset (stack_offset-stack_size_padded)
>+ %assign stack_size_padded 0
>+ %assign xmm_regs_used 0
>+%endmacro
>+
>+%define has_epilogue regs_used > 7 || stack_size > 0 ||
>vzeroupper_required || xmm_regs_used > 6+high_mm_regs
>+
>+%macro RET 0
>+ WIN64_RESTORE_XMM_INTERNAL
>+ POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
>+ %if vzeroupper_required
>+ vzeroupper
>+ %endif
>+ AUTO_REP_RET
>+%endmacro
>+
>+%elif ARCH_X86_64 ; *nix x64
>;=============================================
>+
>+DECLARE_REG 0, rdi
>+DECLARE_REG 1, rsi
>+DECLARE_REG 2, rdx
>+DECLARE_REG 3, rcx
>+DECLARE_REG 4, R8
>+DECLARE_REG 5, R9
>+DECLARE_REG 6, rax, 8
>+DECLARE_REG 7, R10, 16
>+DECLARE_REG 8, R11, 24
>+DECLARE_REG 9, rbx, 32
>+DECLARE_REG 10, rbp, 40
>+DECLARE_REG 11, R14, 48
>+DECLARE_REG 12, R15, 56
>+DECLARE_REG 13, R12, 64
>+DECLARE_REG 14, R13, 72
>+
>+%macro PROLOGUE 2-5+ 0 ; #args, #regs, #xmm_regs, [stack_size,]
>arg_names...
>+ %assign num_args %1
>+ %assign regs_used %2
>+ %assign xmm_regs_used %3
>+ ASSERT regs_used >= num_args
>+ SETUP_STACK_POINTER %4
>+ ASSERT regs_used <= 15
>+ PUSH_IF_USED 9, 10, 11, 12, 13, 14
>+ ALLOC_STACK %4
>+ LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14
>+ DEFINE_ARGS_INTERNAL %0, %4, %5
>+%endmacro
>+
>+%define has_epilogue regs_used > 9 || stack_size > 0 ||
>vzeroupper_required
>+
>+%macro RET 0
>+ %if stack_size_padded > 0
>+ %if required_stack_alignment > STACK_ALIGNMENT
>+ mov rsp, rstkm
>+ %else
>+ add rsp, stack_size_padded
>+ %endif
>+ %endif
>+ POP_IF_USED 14, 13, 12, 11, 10, 9
>+ %if vzeroupper_required
>+ vzeroupper
>+ %endif
>+ AUTO_REP_RET
>+%endmacro
>+
>+%else ; X86_32
>;==============================================================
>+
>+DECLARE_REG 0, eax, 4
>+DECLARE_REG 1, ecx, 8
>+DECLARE_REG 2, edx, 12
>+DECLARE_REG 3, ebx, 16
>+DECLARE_REG 4, esi, 20
>+DECLARE_REG 5, edi, 24
>+DECLARE_REG 6, ebp, 28
>+%define rsp esp
>+
>+%macro DECLARE_ARG 1-*
>+ %rep %0
>+ %define r%1m [rstk + stack_offset + 4*%1 + 4]
>+ %define r%1mp dword r%1m
>+ %rotate 1
>+ %endrep
>+%endmacro
>+
>+DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
>+
>+%macro PROLOGUE 2-5+ ; #args, #regs, #xmm_regs, [stack_size,]
>arg_names...
>+ %assign num_args %1
>+ %assign regs_used %2
>+ ASSERT regs_used >= num_args
>+ %if num_args > 7
>+ %assign num_args 7
>+ %endif
>+ %if regs_used > 7
>+ %assign regs_used 7
>+ %endif
>+ SETUP_STACK_POINTER %4
>+ ASSERT regs_used <= 7
>+ PUSH_IF_USED 3, 4, 5, 6
>+ ALLOC_STACK %4
>+ LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6
>+ DEFINE_ARGS_INTERNAL %0, %4, %5
>+%endmacro
>+
>+%define has_epilogue regs_used > 3 || stack_size > 0 ||
>vzeroupper_required
>+
>+%macro RET 0
>+ %if stack_size_padded > 0
>+ %if required_stack_alignment > STACK_ALIGNMENT
>+ mov rsp, rstkm
>+ %else
>+ add rsp, stack_size_padded
>+ %endif
>+ %endif
>+ POP_IF_USED 6, 5, 4, 3
>+ %if vzeroupper_required
>+ vzeroupper
>+ %endif
>+ AUTO_REP_RET
>+%endmacro
>+
>+%endif
>;======================================================================
>+
>+%if WIN64 == 0
>+ %macro WIN64_SPILL_XMM 1
>+ %endmacro
>+ %macro WIN64_RESTORE_XMM 0
>+ %endmacro
>+ %macro WIN64_PUSH_XMM 0
>+ %endmacro
>+%endif
>+
>+; On AMD cpus <=K10, an ordinary ret is slow if it immediately follows
>either
>+; a branch or a branch target. So switch to a 2-byte form of ret in
>that case.
>+; We can automatically detect "follows a branch", but not a branch
>target.
>+; (SSSE3 is a sufficient condition to know that your cpu doesn't have
>this problem.)
>+%macro REP_RET 0
>+ %if has_epilogue || cpuflag(ssse3)
>+ RET
>+ %else
>+ rep ret
>+ %endif
>+ annotate_function_size
>+%endmacro
>+
>+%define last_branch_adr $$
>+%macro AUTO_REP_RET 0
>+ %if notcpuflag(ssse3)
>+ times ((last_branch_adr-$)>>31)+1 rep ; times 1 iff $ ==
>last_branch_adr.
>+ %endif
>+ ret
>+ annotate_function_size
>+%endmacro
>+
>+%macro BRANCH_INSTR 0-*
>+ %rep %0
>+ %macro %1 1-2 %1
>+ %2 %1
>+ %if notcpuflag(ssse3)
>+ %%branch_instr equ $
>+ %xdefine last_branch_adr %%branch_instr
>+ %endif
>+ %endmacro
>+ %rotate 1
>+ %endrep
>+%endmacro
>+
>+BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge,
>ja, jae, jna, jnae, jb, jbe, jnb, jnbe, jc, jnc, js, jns, jo, jno, jp,
>jnp
>+
>+%macro TAIL_CALL 1-2 1 ; callee, is_nonadjacent
>+ %if has_epilogue
>+ call %1
>+ RET
>+ %elif %2
>+ jmp %1
>+ %endif
>+ annotate_function_size
>+%endmacro
>+
>+;=============================================================================
>+; arch-independent part
>+;=============================================================================
>+
>+%assign function_align 16
>+
>+; B
--
Envoyé de mon appareil Android avec Courriel K-9 Mail. Veuillez excuser ma brièveté.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/vlc-devel/attachments/20190128/69da797c/attachment.html>
More information about the vlc-devel
mailing list