[vlc-devel] [PATCH 1/3] build: x86: add build support for nasm via x86inc.asm/x86util.asm

Rémi Denis-Courmont remi at remlab.net
Mon Jan 28 09:09:50 CET 2019


AFAIR, AC_SUBST invocation should not be conditional.

Le 26 janvier 2019 14:23:36 GMT+02:00, Janne Grunau <janne-vlc at jannau.net> a écrit :
>x86inc.asm copied from dav1d (8c5d34c85613) and x86util.asm from libav
>(994c4bc10751). Libav's LGPL licensed x86util.asm is required for
>yadif.
>
>This reverts "Remove unused support for .asm files"
>commit 6c0f63cd6853c0d184a5abbf2e19c1626d2854ef.
>---
> configure.ac                   |   28 +
> extras/include/x86/x86inc.asm  | 1742 ++++++++++++++++++++++++++++++++
> extras/include/x86/x86util.asm |  705 +++++++++++++
> modules/common.am              |    5 +-
> 4 files changed, 2479 insertions(+), 1 deletion(-)
> create mode 100644 extras/include/x86/x86inc.asm
> create mode 100644 extras/include/x86/x86util.asm
>
>diff --git a/configure.ac b/configure.ac
>index a2b8ade789..96d13fa1d2 100644
>--- a/configure.ac
>+++ b/configure.ac
>@@ -95,6 +95,19 @@ HAVE_IOS="0"
> HAVE_OSX="0"
> HAVE_TVOS="0"
> 
>+dnl  Set x86 asm flags and defines
>+X86ASMFLAGS=""
>+case "${host_cpu}" in
>+    i?86)
>+    X86ASMFLAGS="-f elf32"
>+    X86ASMDEFS="-DARCH_X86_32=1 -DARCH_X86_64=0"
>+        ;;
>+    x86_64)
>+    X86ASMFLAGS="-f elf64"
>+    X86ASMDEFS="-DARCH_X86_32=0 -DARCH_X86_64=1"
>+        ;;
>+esac
>+
> case "${host_os}" in
>   "")
>     SYS=unknown
>@@ -132,6 +145,8 @@ case "${host_os}" in
>     case "${host_cpu}" in
>       i?86)
>         ARCH_flag="-arch i386"
>+        X86ASMFLAGS="-f macho32"
>+        X86ASMDEFS="${X86ASMDEFS} -DPREFIX"
>       ;;
>       ppc64*)
>         ARCH_flag="-arch ppc64"
>@@ -141,6 +156,8 @@ case "${host_os}" in
>       ;;
>       x86_64)
>         ARCH_flag="-arch x86_64"
>+        X86ASMFLAGS="-f macho64"
>+        X86ASMDEFS="${X86ASMDEFS} -DPREFIX"
>       ;;
>       arm*)
>         ac_cv_c_bigendian="no"
>@@ -259,10 +276,13 @@ case "${host_os}" in
>                 WINDOWS_ARCH="x64"
>                 PROGRAMFILES="PROGRAMFILES64"
>LDFLAGS="${LDFLAGS} -Wl,--high-entropy-va -Wl,--image-base,0x140000000"
>+                X86ASMFLAGS="-f win64"
>             ;;
>             *)
>                 WINDOWS_ARCH="x86"
>                 PROGRAMFILES="PROGRAMFILES"
>+                X86ASMFLAGS="-f win32"
>+                X86ASMDEFS="${X86ASMDEFS} -DPREFIX"
>             ;;
>         esac
>         AC_SUBST([WINDOWS_ARCH])
>@@ -332,6 +352,14 @@ AM_CONDITIONAL([HAVE_WIN64],   [test
>"${HAVE_WIN64}" = "1"]) dnl Only used for t
> AM_CONDITIONAL([HAVE_WINSTORE], [test "$vlc_winstore_app" = "1"])
>AM_CONDITIONAL([HAVE_WIN32_DESKTOP], [test "${SYS}" = "mingw32" -a
>"$vlc_winstore_app" = "0"])
> 
>+dnl Use nasm/yasm only on x86
>+AC_CHECK_PROGS(X86ASM, [nasm yasm])
>+AM_CONDITIONAL([HAVE_X86ASM], [test -n "${X86ASM}" && test -n
>"{X86ASMFLAGS}"])
>+AM_COND_IF([HAVE_X86ASM], [
>+    AC_DEFINE([HAVE_X86ASM], [1], [Use external asm on x86.]),
>+    AC_SUBST([X86ASMFLAGS]),
>+    AC_SUBST([X86ASMDEFS])])
>+
> dnl
>dnl Sadly autoconf does not think about testing foo.exe when ask to
>test
> dnl for program foo on win32
>diff --git a/extras/include/x86/x86inc.asm
>b/extras/include/x86/x86inc.asm
>new file mode 100644
>index 0000000000..b249f2a792
>--- /dev/null
>+++ b/extras/include/x86/x86inc.asm
>@@ -0,0 +1,1742 @@
>+;*****************************************************************************
>+;* x86inc.asm: x264asm abstraction layer
>+;*****************************************************************************
>+;* Copyright (C) 2005-2018 x264 project
>+;*
>+;* Authors: Loren Merritt <lorenm at u.washington.edu>
>+;*          Henrik Gramner <henrik at gramner.com>
>+;*          Anton Mitrofanov <BugMaster at narod.ru>
>+;*          Fiona Glaser <fiona at x264.com>
>+;*
>+;* Permission to use, copy, modify, and/or distribute this software
>for any
>+;* purpose with or without fee is hereby granted, provided that the
>above
>+;* copyright notice and this permission notice appear in all copies.
>+;*
>+;* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
>WARRANTIES
>+;* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
>+;* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE
>FOR
>+;* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY
>DAMAGES
>+;* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
>AN
>+;* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
>OUT OF
>+;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
>+;*****************************************************************************
>+
>+; This is a header file for the x264ASM assembly language, which uses
>+; NASM/YASM syntax combined with a large number of macros to provide
>easy
>+; abstraction between different calling conventions (x86_32, win64,
>linux64).
>+; It also has various other useful features to simplify writing the
>kind of
>+; DSP functions that are most often used in x264.
>+
>+; Unlike the rest of x264, this file is available under an ISC
>license, as it
>+; has significant usefulness outside of x264 and we want it to be
>available
>+; to the largest audience possible.  Of course, if you modify it for
>your own
>+; purposes to add a new feature, we strongly encourage contributing a
>patch
>+; as this feature might be useful for others as well.  Send patches or
>ideas
>+; to x264-devel at videolan.org .
>+
>+%ifndef private_prefix
>+    %define private_prefix dav1d
>+%endif
>+
>+%ifndef public_prefix
>+    %define public_prefix private_prefix
>+%endif
>+
>+%ifndef STACK_ALIGNMENT
>+    %if ARCH_X86_64
>+        %define STACK_ALIGNMENT 16
>+    %else
>+        %define STACK_ALIGNMENT 4
>+    %endif
>+%endif
>+
>+%define WIN64  0
>+%define UNIX64 0
>+%if ARCH_X86_64
>+    %ifidn __OUTPUT_FORMAT__,win32
>+        %define WIN64  1
>+    %elifidn __OUTPUT_FORMAT__,win64
>+        %define WIN64  1
>+    %elifidn __OUTPUT_FORMAT__,x64
>+        %define WIN64  1
>+    %else
>+        %define UNIX64 1
>+    %endif
>+%endif
>+
>+%define FORMAT_ELF 0
>+%ifidn __OUTPUT_FORMAT__,elf
>+    %define FORMAT_ELF 1
>+%elifidn __OUTPUT_FORMAT__,elf32
>+    %define FORMAT_ELF 1
>+%elifidn __OUTPUT_FORMAT__,elf64
>+    %define FORMAT_ELF 1
>+%endif
>+
>+%ifdef PREFIX
>+    %define mangle(x) _ %+ x
>+%else
>+    %define mangle(x) x
>+%endif
>+
>+%macro SECTION_RODATA 0-1 16
>+    %ifidn __OUTPUT_FORMAT__,win32
>+        SECTION .rdata align=%1
>+    %elif WIN64
>+        SECTION .rdata align=%1
>+    %else
>+        SECTION .rodata align=%1
>+    %endif
>+%endmacro
>+
>+%if ARCH_X86_64
>+    %define PIC 1 ; always use PIC on x86-64
>+    default rel
>+%elifidn __OUTPUT_FORMAT__,win32
>+    %define PIC 0 ; PIC isn't used on 32-bit Windows
>+%elifndef PIC
>+    %define PIC 0
>+%endif
>+
>+%ifdef __NASM_VER__
>+    %use smartalign
>+%endif
>+
>+; Macros to eliminate most code duplication between x86_32 and x86_64:
>+; Currently this works only for leaf functions which load all their
>arguments
>+; into registers at the start, and make no other use of the stack.
>Luckily that
>+; covers most of x264's asm.
>+
>+; PROLOGUE:
>+; %1 = number of arguments. loads them from stack if needed.
>+; %2 = number of registers used. pushes callee-saved regs if needed.
>+; %3 = number of xmm registers used. pushes callee-saved xmm regs if
>needed.
>+; %4 = (optional) stack size to be allocated. The stack will be
>aligned before
>+;      allocating the specified stack size. If the required stack
>alignment is
>+;      larger than the known stack alignment the stack will be
>manually aligned
>+;      and an extra register will be allocated to hold the original
>stack
>+;      pointer (to not invalidate r0m etc.). To prevent the use of an
>extra
>+;      register as stack pointer, request a negative stack size.
>+; %4+/%5+ = list of names to define to registers
>+; PROLOGUE can also be invoked by adding the same options to cglobal
>+
>+; e.g.
>+; cglobal foo, 2,3,7,0x40, dst, src, tmp
>+; declares a function (foo) that automatically loads two arguments
>(dst and
>+; src) into registers, uses one additional register (tmp) plus 7
>vector
>+; registers (m0-m6) and allocates 0x40 bytes of stack space.
>+
>+; TODO Some functions can use some args directly from the stack. If
>they're the
>+; last args then you can just not declare them, but if they're in the
>middle
>+; we need more flexible macro.
>+
>+; RET:
>+; Pops anything that was pushed by PROLOGUE, and returns.
>+
>+; REP_RET:
>+; Use this instead of RET if it's a branch target.
>+
>+; registers:
>+; rN and rNq are the native-size register holding function argument N
>+; rNd, rNw, rNb are dword, word, and byte size
>+; rNh is the high 8 bits of the word size
>+; rNm is the original location of arg N (a register or on the stack),
>dword
>+; rNmp is native size
>+
>+%macro DECLARE_REG 2-3
>+    %define r%1q %2
>+    %define r%1d %2d
>+    %define r%1w %2w
>+    %define r%1b %2b
>+    %define r%1h %2h
>+    %define %2q %2
>+    %if %0 == 2
>+        %define r%1m  %2d
>+        %define r%1mp %2
>+    %elif ARCH_X86_64 ; memory
>+        %define r%1m [rstk + stack_offset + %3]
>+        %define r%1mp qword r %+ %1 %+ m
>+    %else
>+        %define r%1m [rstk + stack_offset + %3]
>+        %define r%1mp dword r %+ %1 %+ m
>+    %endif
>+    %define r%1  %2
>+%endmacro
>+
>+%macro DECLARE_REG_SIZE 3
>+    %define r%1q r%1
>+    %define e%1q r%1
>+    %define r%1d e%1
>+    %define e%1d e%1
>+    %define r%1w %1
>+    %define e%1w %1
>+    %define r%1h %3
>+    %define e%1h %3
>+    %define r%1b %2
>+    %define e%1b %2
>+    %if ARCH_X86_64 == 0
>+        %define r%1 e%1
>+    %endif
>+%endmacro
>+
>+DECLARE_REG_SIZE ax, al, ah
>+DECLARE_REG_SIZE bx, bl, bh
>+DECLARE_REG_SIZE cx, cl, ch
>+DECLARE_REG_SIZE dx, dl, dh
>+DECLARE_REG_SIZE si, sil, null
>+DECLARE_REG_SIZE di, dil, null
>+DECLARE_REG_SIZE bp, bpl, null
>+
>+; t# defines for when per-arch register allocation is more complex
>than just function arguments
>+
>+%macro DECLARE_REG_TMP 1-*
>+    %assign %%i 0
>+    %rep %0
>+        CAT_XDEFINE t, %%i, r%1
>+        %assign %%i %%i+1
>+        %rotate 1
>+    %endrep
>+%endmacro
>+
>+%macro DECLARE_REG_TMP_SIZE 0-*
>+    %rep %0
>+        %define t%1q t%1 %+ q
>+        %define t%1d t%1 %+ d
>+        %define t%1w t%1 %+ w
>+        %define t%1h t%1 %+ h
>+        %define t%1b t%1 %+ b
>+        %rotate 1
>+    %endrep
>+%endmacro
>+
>+DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
>+
>+%if ARCH_X86_64
>+    %define gprsize 8
>+%else
>+    %define gprsize 4
>+%endif
>+
>+%macro LEA 2
>+%if ARCH_X86_64
>+    lea %1, [%2]
>+%elif PIC
>+    call $+5 ; special-cased to not affect the RSB on most CPU:s
>+    pop %1
>+    add %1, (%2)-$+1
>+%else
>+    mov %1, %2
>+%endif
>+%endmacro
>+
>+%macro PUSH 1
>+    push %1
>+    %ifidn rstk, rsp
>+        %assign stack_offset stack_offset+gprsize
>+    %endif
>+%endmacro
>+
>+%macro POP 1
>+    pop %1
>+    %ifidn rstk, rsp
>+        %assign stack_offset stack_offset-gprsize
>+    %endif
>+%endmacro
>+
>+%macro PUSH_IF_USED 1-*
>+    %rep %0
>+        %if %1 < regs_used
>+            PUSH r%1
>+        %endif
>+        %rotate 1
>+    %endrep
>+%endmacro
>+
>+%macro POP_IF_USED 1-*
>+    %rep %0
>+        %if %1 < regs_used
>+            pop r%1
>+        %endif
>+        %rotate 1
>+    %endrep
>+%endmacro
>+
>+%macro LOAD_IF_USED 1-*
>+    %rep %0
>+        %if %1 < num_args
>+            mov r%1, r %+ %1 %+ mp
>+        %endif
>+        %rotate 1
>+    %endrep
>+%endmacro
>+
>+%macro SUB 2
>+    sub %1, %2
>+    %ifidn %1, rstk
>+        %assign stack_offset stack_offset+(%2)
>+    %endif
>+%endmacro
>+
>+%macro ADD 2
>+    add %1, %2
>+    %ifidn %1, rstk
>+        %assign stack_offset stack_offset-(%2)
>+    %endif
>+%endmacro
>+
>+%macro movifnidn 2
>+    %ifnidn %1, %2
>+        mov %1, %2
>+    %endif
>+%endmacro
>+
>+%if ARCH_X86_64 == 0
>+    %define movsxd movifnidn
>+%endif
>+
>+%macro movsxdifnidn 2
>+    %ifnidn %1, %2
>+        movsxd %1, %2
>+    %endif
>+%endmacro
>+
>+%macro ASSERT 1
>+    %if (%1) == 0
>+        %error assertion ``%1'' failed
>+    %endif
>+%endmacro
>+
>+%macro DEFINE_ARGS 0-*
>+    %ifdef n_arg_names
>+        %assign %%i 0
>+        %rep n_arg_names
>+            CAT_UNDEF arg_name %+ %%i, q
>+            CAT_UNDEF arg_name %+ %%i, d
>+            CAT_UNDEF arg_name %+ %%i, w
>+            CAT_UNDEF arg_name %+ %%i, h
>+            CAT_UNDEF arg_name %+ %%i, b
>+            CAT_UNDEF arg_name %+ %%i, m
>+            CAT_UNDEF arg_name %+ %%i, mp
>+            CAT_UNDEF arg_name, %%i
>+            %assign %%i %%i+1
>+        %endrep
>+    %endif
>+
>+    %xdefine %%stack_offset stack_offset
>+    %undef stack_offset ; so that the current value of stack_offset
>doesn't get baked in by xdefine
>+    %assign %%i 0
>+    %rep %0
>+        %xdefine %1q r %+ %%i %+ q
>+        %xdefine %1d r %+ %%i %+ d
>+        %xdefine %1w r %+ %%i %+ w
>+        %xdefine %1h r %+ %%i %+ h
>+        %xdefine %1b r %+ %%i %+ b
>+        %xdefine %1m r %+ %%i %+ m
>+        %xdefine %1mp r %+ %%i %+ mp
>+        CAT_XDEFINE arg_name, %%i, %1
>+        %assign %%i %%i+1
>+        %rotate 1
>+    %endrep
>+    %xdefine stack_offset %%stack_offset
>+    %assign n_arg_names %0
>+%endmacro
>+
>+%define required_stack_alignment ((mmsize + 15) & ~15)
>+%define vzeroupper_required (mmsize > 16 && (ARCH_X86_64 == 0 ||
>xmm_regs_used > 16 || notcpuflag(avx512)))
>+%define high_mm_regs (16*cpuflag(avx512))
>+
>+%macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only)
>+    %ifnum %1
>+        %if %1 != 0
>+            %assign %%pad 0
>+            %assign stack_size %1
>+            %if stack_size < 0
>+                %assign stack_size -stack_size
>+            %endif
>+            %if WIN64
>+                %assign %%pad %%pad + 32 ; shadow space
>+                %if mmsize != 8
>+                    %assign xmm_regs_used %2
>+                    %if xmm_regs_used > 8
>+                        %assign %%pad %%pad + (xmm_regs_used-8)*16 ;
>callee-saved xmm registers
>+                    %endif
>+                %endif
>+            %endif
>+            %if required_stack_alignment <= STACK_ALIGNMENT
>+                ; maintain the current stack alignment
>+                %assign stack_size_padded stack_size + %%pad +
>((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
>+                SUB rsp, stack_size_padded
>+            %else
>+                %assign %%reg_num (regs_used - 1)
>+                %xdefine rstk r %+ %%reg_num
>+                ; align stack, and save original stack location
>directly above
>+                ; it, i.e. in [rsp+stack_size_padded], so we can
>restore the
>+                ; stack in a single instruction (i.e. mov rsp, rstk or
>mov
>+                ; rsp, [rsp+stack_size_padded])
>+                %if %1 < 0 ; need to store rsp on stack
>+                    %xdefine rstkm [rsp + stack_size + %%pad]
>+                    %assign %%pad %%pad + gprsize
>+                %else ; can keep rsp in rstk during whole function
>+                    %xdefine rstkm rstk
>+                %endif
>+                %assign stack_size_padded stack_size + ((%%pad +
>required_stack_alignment-1) & ~(required_stack_alignment-1))
>+                mov rstk, rsp
>+                and rsp, ~(required_stack_alignment-1)
>+                sub rsp, stack_size_padded
>+                movifnidn rstkm, rstk
>+            %endif
>+            WIN64_PUSH_XMM
>+        %endif
>+    %endif
>+%endmacro
>+
>+%macro SETUP_STACK_POINTER 1
>+    %ifnum %1
>+        %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT
>+            %if %1 > 0
>+                ; Reserve an additional register for storing the
>original stack pointer, but avoid using
>+                ; eax/rax for this purpose since it can potentially
>get overwritten as a return value.
>+                %assign regs_used (regs_used + 1)
>+                %if ARCH_X86_64 && regs_used == 7
>+                    %assign regs_used 8
>+                %elif ARCH_X86_64 == 0 && regs_used == 1
>+                    %assign regs_used 2
>+                %endif
>+            %endif
>+            %if ARCH_X86_64 && regs_used < 5 + UNIX64 * 3
>+                ; Ensure that we don't clobber any registers
>containing arguments. For UNIX64 we also preserve r6 (rax)
>+                ; since it's used as a hidden argument in vararg
>functions to specify the number of vector registers used.
>+                %assign regs_used 5 + UNIX64 * 3
>+            %endif
>+        %endif
>+    %endif
>+%endmacro
>+
>+%macro DEFINE_ARGS_INTERNAL 3+
>+    %ifnum %2
>+        DEFINE_ARGS %3
>+    %elif %1 == 4
>+        DEFINE_ARGS %2
>+    %elif %1 > 4
>+        DEFINE_ARGS %2, %3
>+    %endif
>+%endmacro
>+
>+%if WIN64 ; Windows x64
>;=================================================
>+
>+DECLARE_REG 0,  rcx
>+DECLARE_REG 1,  rdx
>+DECLARE_REG 2,  R8
>+DECLARE_REG 3,  R9
>+DECLARE_REG 4,  R10, 40
>+DECLARE_REG 5,  R11, 48
>+DECLARE_REG 6,  rax, 56
>+DECLARE_REG 7,  rdi, 64
>+DECLARE_REG 8,  rsi, 72
>+DECLARE_REG 9,  rbx, 80
>+DECLARE_REG 10, rbp, 88
>+DECLARE_REG 11, R14, 96
>+DECLARE_REG 12, R15, 104
>+DECLARE_REG 13, R12, 112
>+DECLARE_REG 14, R13, 120
>+
>+%macro PROLOGUE 2-5+ 0 ; #args, #regs, #xmm_regs, [stack_size,]
>arg_names...
>+    %assign num_args %1
>+    %assign regs_used %2
>+    ASSERT regs_used >= num_args
>+    SETUP_STACK_POINTER %4
>+    ASSERT regs_used <= 15
>+    PUSH_IF_USED 7, 8, 9, 10, 11, 12, 13, 14
>+    ALLOC_STACK %4, %3
>+    %if mmsize != 8 && stack_size == 0
>+        WIN64_SPILL_XMM %3
>+    %endif
>+    LOAD_IF_USED 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
>+    DEFINE_ARGS_INTERNAL %0, %4, %5
>+%endmacro
>+
>+%macro WIN64_PUSH_XMM 0
>+    ; Use the shadow space to store XMM6 and XMM7, the rest needs
>stack space allocated.
>+    %if xmm_regs_used > 6 + high_mm_regs
>+        movaps [rstk + stack_offset +  8], xmm6
>+    %endif
>+    %if xmm_regs_used > 7 + high_mm_regs
>+        movaps [rstk + stack_offset + 24], xmm7
>+    %endif
>+    %assign %%xmm_regs_on_stack xmm_regs_used - high_mm_regs - 8
>+    %if %%xmm_regs_on_stack > 0
>+        %assign %%i 8
>+        %rep %%xmm_regs_on_stack
>+            movaps [rsp + (%%i-8)*16 + stack_size + 32], xmm %+ %%i
>+            %assign %%i %%i+1
>+        %endrep
>+    %endif
>+%endmacro
>+
>+%macro WIN64_SPILL_XMM 1
>+    %assign xmm_regs_used %1
>+    ASSERT xmm_regs_used <= 16 + high_mm_regs
>+    %assign %%xmm_regs_on_stack xmm_regs_used - high_mm_regs - 8
>+    %if %%xmm_regs_on_stack > 0
>+        ; Allocate stack space for callee-saved xmm registers plus
>shadow space and align the stack.
>+        %assign %%pad %%xmm_regs_on_stack*16 + 32
>+        %assign stack_size_padded %%pad +
>((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
>+        SUB rsp, stack_size_padded
>+    %endif
>+    WIN64_PUSH_XMM
>+%endmacro
>+
>+%macro WIN64_RESTORE_XMM_INTERNAL 0
>+    %assign %%pad_size 0
>+    %assign %%xmm_regs_on_stack xmm_regs_used - high_mm_regs - 8
>+    %if %%xmm_regs_on_stack > 0
>+        %assign %%i xmm_regs_used - high_mm_regs
>+        %rep %%xmm_regs_on_stack
>+            %assign %%i %%i-1
>+            movaps xmm %+ %%i, [rsp + (%%i-8)*16 + stack_size + 32]
>+        %endrep
>+    %endif
>+    %if stack_size_padded > 0
>+        %if stack_size > 0 && required_stack_alignment >
>STACK_ALIGNMENT
>+            mov rsp, rstkm
>+        %else
>+            add rsp, stack_size_padded
>+            %assign %%pad_size stack_size_padded
>+        %endif
>+    %endif
>+    %if xmm_regs_used > 7 + high_mm_regs
>+        movaps xmm7, [rsp + stack_offset - %%pad_size + 24]
>+    %endif
>+    %if xmm_regs_used > 6 + high_mm_regs
>+        movaps xmm6, [rsp + stack_offset - %%pad_size +  8]
>+    %endif
>+%endmacro
>+
>+%macro WIN64_RESTORE_XMM 0
>+    WIN64_RESTORE_XMM_INTERNAL
>+    %assign stack_offset (stack_offset-stack_size_padded)
>+    %assign stack_size_padded 0
>+    %assign xmm_regs_used 0
>+%endmacro
>+
>+%define has_epilogue regs_used > 7 || stack_size > 0 ||
>vzeroupper_required || xmm_regs_used > 6+high_mm_regs
>+
>+%macro RET 0
>+    WIN64_RESTORE_XMM_INTERNAL
>+    POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
>+    %if vzeroupper_required
>+        vzeroupper
>+    %endif
>+    AUTO_REP_RET
>+%endmacro
>+
>+%elif ARCH_X86_64 ; *nix x64
>;=============================================
>+
>+DECLARE_REG 0,  rdi
>+DECLARE_REG 1,  rsi
>+DECLARE_REG 2,  rdx
>+DECLARE_REG 3,  rcx
>+DECLARE_REG 4,  R8
>+DECLARE_REG 5,  R9
>+DECLARE_REG 6,  rax, 8
>+DECLARE_REG 7,  R10, 16
>+DECLARE_REG 8,  R11, 24
>+DECLARE_REG 9,  rbx, 32
>+DECLARE_REG 10, rbp, 40
>+DECLARE_REG 11, R14, 48
>+DECLARE_REG 12, R15, 56
>+DECLARE_REG 13, R12, 64
>+DECLARE_REG 14, R13, 72
>+
>+%macro PROLOGUE 2-5+ 0 ; #args, #regs, #xmm_regs, [stack_size,]
>arg_names...
>+    %assign num_args %1
>+    %assign regs_used %2
>+    %assign xmm_regs_used %3
>+    ASSERT regs_used >= num_args
>+    SETUP_STACK_POINTER %4
>+    ASSERT regs_used <= 15
>+    PUSH_IF_USED 9, 10, 11, 12, 13, 14
>+    ALLOC_STACK %4
>+    LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14
>+    DEFINE_ARGS_INTERNAL %0, %4, %5
>+%endmacro
>+
>+%define has_epilogue regs_used > 9 || stack_size > 0 ||
>vzeroupper_required
>+
>+%macro RET 0
>+    %if stack_size_padded > 0
>+        %if required_stack_alignment > STACK_ALIGNMENT
>+            mov rsp, rstkm
>+        %else
>+            add rsp, stack_size_padded
>+        %endif
>+    %endif
>+    POP_IF_USED 14, 13, 12, 11, 10, 9
>+    %if vzeroupper_required
>+        vzeroupper
>+    %endif
>+    AUTO_REP_RET
>+%endmacro
>+
>+%else ; X86_32
>;==============================================================
>+
>+DECLARE_REG 0, eax, 4
>+DECLARE_REG 1, ecx, 8
>+DECLARE_REG 2, edx, 12
>+DECLARE_REG 3, ebx, 16
>+DECLARE_REG 4, esi, 20
>+DECLARE_REG 5, edi, 24
>+DECLARE_REG 6, ebp, 28
>+%define rsp esp
>+
>+%macro DECLARE_ARG 1-*
>+    %rep %0
>+        %define r%1m [rstk + stack_offset + 4*%1 + 4]
>+        %define r%1mp dword r%1m
>+        %rotate 1
>+    %endrep
>+%endmacro
>+
>+DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
>+
>+%macro PROLOGUE 2-5+ ; #args, #regs, #xmm_regs, [stack_size,]
>arg_names...
>+    %assign num_args %1
>+    %assign regs_used %2
>+    ASSERT regs_used >= num_args
>+    %if num_args > 7
>+        %assign num_args 7
>+    %endif
>+    %if regs_used > 7
>+        %assign regs_used 7
>+    %endif
>+    SETUP_STACK_POINTER %4
>+    ASSERT regs_used <= 7
>+    PUSH_IF_USED 3, 4, 5, 6
>+    ALLOC_STACK %4
>+    LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6
>+    DEFINE_ARGS_INTERNAL %0, %4, %5
>+%endmacro
>+
>+%define has_epilogue regs_used > 3 || stack_size > 0 ||
>vzeroupper_required
>+
>+%macro RET 0
>+    %if stack_size_padded > 0
>+        %if required_stack_alignment > STACK_ALIGNMENT
>+            mov rsp, rstkm
>+        %else
>+            add rsp, stack_size_padded
>+        %endif
>+    %endif
>+    POP_IF_USED 6, 5, 4, 3
>+    %if vzeroupper_required
>+        vzeroupper
>+    %endif
>+    AUTO_REP_RET
>+%endmacro
>+
>+%endif
>;======================================================================
>+
>+%if WIN64 == 0
>+    %macro WIN64_SPILL_XMM 1
>+    %endmacro
>+    %macro WIN64_RESTORE_XMM 0
>+    %endmacro
>+    %macro WIN64_PUSH_XMM 0
>+    %endmacro
>+%endif
>+
>+; On AMD cpus <=K10, an ordinary ret is slow if it immediately follows
>either
>+; a branch or a branch target. So switch to a 2-byte form of ret in
>that case.
>+; We can automatically detect "follows a branch", but not a branch
>target.
>+; (SSSE3 is a sufficient condition to know that your cpu doesn't have
>this problem.)
>+%macro REP_RET 0
>+    %if has_epilogue || cpuflag(ssse3)
>+        RET
>+    %else
>+        rep ret
>+    %endif
>+    annotate_function_size
>+%endmacro
>+
>+%define last_branch_adr $$
>+%macro AUTO_REP_RET 0
>+    %if notcpuflag(ssse3)
>+        times ((last_branch_adr-$)>>31)+1 rep ; times 1 iff $ ==
>last_branch_adr.
>+    %endif
>+    ret
>+    annotate_function_size
>+%endmacro
>+
>+%macro BRANCH_INSTR 0-*
>+    %rep %0
>+        %macro %1 1-2 %1
>+            %2 %1
>+            %if notcpuflag(ssse3)
>+                %%branch_instr equ $
>+                %xdefine last_branch_adr %%branch_instr
>+            %endif
>+        %endmacro
>+        %rotate 1
>+    %endrep
>+%endmacro
>+
>+BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge,
>ja, jae, jna, jnae, jb, jbe, jnb, jnbe, jc, jnc, js, jns, jo, jno, jp,
>jnp
>+
>+%macro TAIL_CALL 1-2 1 ; callee, is_nonadjacent
>+    %if has_epilogue
>+        call %1
>+        RET
>+    %elif %2
>+        jmp %1
>+    %endif
>+    annotate_function_size
>+%endmacro
>+
>+;=============================================================================
>+; arch-independent part
>+;=============================================================================
>+
>+%assign function_align 16
>+
>+; B

-- 
Envoyé de mon appareil Android avec Courriel K-9 Mail. Veuillez excuser ma brièveté.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/vlc-devel/attachments/20190128/69da797c/attachment.html>


More information about the vlc-devel mailing list