[x265] [PATCH] testbench: port x264 stack & registers check code

Steve Borho steve at borho.org
Tue Apr 22 17:06:40 CEST 2014


On Tue, Apr 22, 2014 at 1:18 AM, Min Chen <chenm003 at 163.com> wrote:
> # HG changeset patch
> # User Min Chen <chenm003 at 163.com>
> # Date 1398147465 -28800
> # Node ID 156abee4f7d30caa1937561eea16c890403bcd6f
> # Parent  84315557c97ff2b10cf01910d6b131e28fce1781
> testbench: port x264 stack & registers check code

thanks Min, I hope this is enough for the team to finish this

> diff -r 84315557c97f -r 156abee4f7d3 source/common/CMakeLists.txt
> --- a/source/common/CMakeLists.txt      Mon Apr 21 22:23:48 2014 -0500
> +++ b/source/common/CMakeLists.txt      Tue Apr 22 14:17:45 2014 +0800
> @@ -104,7 +104,7 @@
>      set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h)
>      set(A_SRCS pixel-a.asm const-a.asm cpu-a.asm ssd-a.asm mc-a.asm
>                 mc-a2.asm pixel-util8.asm blockcopy8.asm
> -               pixeladd8.asm dct8.asm)
> +               pixeladd8.asm dct8.asm checkasm-a.asm)

it's sub-optimal to link the checkasm functions into libx265, but it's
probably even more sub-optimal to have the MSVC/Xcode YASM-cmake
workaround in two places.  Yuck.

>      if(HIGH_BIT_DEPTH)
>          set(A_SRCS ${A_SRCS} sad16-a.asm intrapred16.asm ipfilter16.asm)
>      else()
> diff -r 84315557c97f -r 156abee4f7d3 source/common/x86/checkasm-a.asm
> --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
> +++ b/source/common/x86/checkasm-a.asm  Tue Apr 22 14:17:45 2014 +0800
> @@ -0,0 +1,219 @@
> +;*****************************************************************************
> +;* checkasm-a.asm: assembly check tool
> +;*****************************************************************************
> +;* Copyright (C) 2008-2014 x264 project
> +;*
> +;* Authors: Loren Merritt <lorenm at u.washington.edu>
> +;*          Henrik Gramner <henrik at gramner.com>
> +;*
> +;* This program is free software; you can redistribute it and/or modify
> +;* it under the terms of the GNU General Public License as published by
> +;* the Free Software Foundation; either version 2 of the License, or
> +;* (at your option) any later version.
> +;*
> +;* This program is distributed in the hope that it will be useful,
> +;* but WITHOUT ANY WARRANTY; without even the implied warranty of
> +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +;* GNU General Public License for more details.
> +;*
> +;* You should have received a copy of the GNU General Public License
> +;* along with this program; if not, write to the Free Software
> +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
> +;*
> +;* This program is also available under a commercial proprietary license.
> +;* For more information, contact us at licensing at x264.com.
> +;*****************************************************************************

I think we would need our own header here, but with the original
author attributions

> +
> +%include "x86inc.asm"
> +
> +SECTION_RODATA
> +
> +error_message: db "failed to preserve register", 0
> +
> +%if ARCH_X86_64
> +; just random numbers to reduce the chance of incidental match
> +ALIGN 16
> +x6:  ddq 0x79445c159ce790641a1b2550a612b48c
> +x7:  ddq 0x86b2536fcd8cf6362eed899d5a28ddcd
> +x8:  ddq 0x3f2bf84fc0fcca4eb0856806085e7943
> +x9:  ddq 0xd229e1f5b281303facbd382dcf5b8de2
> +x10: ddq 0xab63e2e11fa38ed971aeaff20b095fd9
> +x11: ddq 0x77d410d5c42c882d89b0c0765892729a
> +x12: ddq 0x24b3c1d2a024048bc45ea11a955d8dd5
> +x13: ddq 0xdd7b8919edd427862e8ec680de14b47c
> +x14: ddq 0x11e53e2b2ac655ef135ce6888fa02cbf
> +x15: ddq 0x6de8f4c914c334d5011ff554472a7a10
> +n7:   dq 0x21f86d66c8ca00ce
> +n8:   dq 0x75b6ba21077c48ad
> +n9:   dq 0xed56bb2dcb3c7736
> +n10:  dq 0x8bda43d3fd1a7e06
> +n11:  dq 0xb64a9c9e5d318408
> +n12:  dq 0xdf9a54b303f1d3a3
> +n13:  dq 0x4a75479abd64e097
> +n14:  dq 0x249214109d5d1c88
> +%endif
> +
> +SECTION .text
> +
> +cextern_naked puts
> +
> +; max number of args used by any x264 asm function.
> +; (max_args % 4) must equal 3 for stack alignment
> +%define max_args 15
> +
> +%if ARCH_X86_64
> +
> +;-----------------------------------------------------------------------------
> +; void x264_checkasm_stack_clobber( uint64_t clobber, ... )
> +;-----------------------------------------------------------------------------
> +cglobal checkasm_stack_clobber, 1,2
> +    ; Clobber the stack with junk below the stack pointer
> +    %define size (max_args+6)*8
> +    SUB  rsp, size
> +    mov   r1, size-8
> +.loop:
> +    mov [rsp+r1], r0
> +    sub   r1, 8
> +    jge .loop
> +    ADD  rsp, size
> +    RET
> +
> +%if WIN64
> +    %assign free_regs 7
> +%else
> +    %assign free_regs 9
> +%endif
> +
> +;-----------------------------------------------------------------------------
> +; intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... )
> +;-----------------------------------------------------------------------------
> +INIT_XMM
> +cglobal checkasm_call, 2,15,16,max_args*8+8
> +    mov  r6, r0
> +    mov  [rsp+max_args*8], r1
> +
> +    ; All arguments have been pushed on the stack instead of registers in order to
> +    ; test for incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
> +    mov  r0, r6mp
> +    mov  r1, r7mp
> +    mov  r2, r8mp
> +    mov  r3, r9mp
> +%if UNIX64
> +    mov  r4, r10mp
> +    mov  r5, r11mp
> +    %assign i 6
> +    %rep max_args-6
> +        mov  r9, [rsp+stack_offset+(i+1)*8]
> +        mov  [rsp+(i-6)*8], r9
> +        %assign i i+1
> +    %endrep
> +%else
> +    %assign i 4
> +    %rep max_args-4
> +        mov  r9, [rsp+stack_offset+(i+7)*8]
> +        mov  [rsp+i*8], r9
> +        %assign i i+1
> +    %endrep
> +%endif
> +
> +%if WIN64
> +    %assign i 6
> +    %rep 16-6
> +        mova m %+ i, [x %+ i]
> +        %assign i i+1
> +    %endrep
> +%endif
> +
> +%assign i 14
> +%rep 15-free_regs
> +    mov  r %+ i, [n %+ i]
> +    %assign i i-1
> +%endrep
> +    call r6
> +%assign i 14
> +%rep 15-free_regs
> +    xor  r %+ i, [n %+ i]
> +    or  r14, r %+ i
> +    %assign i i-1
> +%endrep
> +
> +%if WIN64
> +    %assign i 6
> +    %rep 16-6
> +        pxor m %+ i, [x %+ i]
> +        por  m6, m %+ i
> +        %assign i i+1
> +    %endrep
> +    packsswb m6, m6
> +    movq r5, m6
> +    or  r14, r5
> +%endif
> +
> +    jz .ok
> +    mov  r9, rax
> +    lea  r0, [error_message]
> +    call puts
> +    mov  r1, [rsp+max_args*8]
> +    mov  dword [r1], 0
> +    mov  rax, r9
> +.ok:
> +    RET
> +
> +%else
> +
> +; just random numbers to reduce the chance of incidental match
> +%define n3 dword 0x6549315c
> +%define n4 dword 0xe02f3e23
> +%define n5 dword 0xb78d0d1d
> +%define n6 dword 0x33627ba7
> +
> +;-----------------------------------------------------------------------------
> +; intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... )
> +;-----------------------------------------------------------------------------
> +cglobal checkasm_call, 1,7
> +    mov  r3, n3
> +    mov  r4, n4
> +    mov  r5, n5
> +    mov  r6, n6
> +%rep max_args
> +    push dword [esp+24+max_args*4]
> +%endrep
> +    call r0
> +    add  esp, max_args*4
> +    xor  r3, n3
> +    xor  r4, n4
> +    xor  r5, n5
> +    xor  r6, n6
> +    or   r3, r4
> +    or   r5, r6
> +    or   r3, r5
> +    jz .ok
> +    mov  r3, eax
> +    lea  r1, [error_message]
> +    push r1
> +    call puts
> +    add  esp, 4
> +    mov  r1, r1m
> +    mov  dword [r1], 0
> +    mov  eax, r3
> +.ok:
> +    REP_RET
> +
> +%endif ; ARCH_X86_64
> +
> +;-----------------------------------------------------------------------------
> +; int x264_stack_pagealign( int (*func)(), int align )
> +;-----------------------------------------------------------------------------
> +cglobal stack_pagealign, 2,2
> +    movsxdifnidn r1, r1d
> +    push rbp
> +    mov  rbp, rsp
> +%if WIN64
> +    sub  rsp, 32 ; shadow space
> +%endif
> +    and  rsp, ~0xfff
> +    sub  rsp, r1
> +    call r0
> +    leave
> +    RET
> +
> diff -r 84315557c97f -r 156abee4f7d3 source/test/pixelharness.cpp
> --- a/source/test/pixelharness.cpp      Mon Apr 21 22:23:48 2014 -0500
> +++ b/source/test/pixelharness.cpp      Tue Apr 22 14:17:45 2014 +0800
> @@ -504,6 +504,7 @@
>
>  bool PixelHarness::check_cvt32to16_shr_t(cvt32to16_shr_t ref, cvt32to16_shr_t opt)
>  {
> +    int ok;
>      ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
>      ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
>
> @@ -513,10 +514,12 @@
>          int shift = (rand() % 7 + 1);
>
>          int index = i % TEST_CASES;
> -        opt(opt_dest, int_test_buff[index] + j, STRIDE, shift, STRIDE);
> -        ref(ref_dest, int_test_buff[index] + j, STRIDE, shift, STRIDE);
> +        ok = 1;
> +        call_a1(opt, opt_dest, int_test_buff[index] + j, STRIDE, shift, STRIDE);
> +        assert(ok == 1);
> +        call_c1(ref, ref_dest, int_test_buff[index] + j, STRIDE, shift, STRIDE);
>
> -        if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
> +        if (!ok || memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
>              return false;

We probably want to macro-ize this so it can be done for all the
check_* functions with minimal pain.

>
>          j += INCR;
> diff -r 84315557c97f -r 156abee4f7d3 source/test/testharness.h
> --- a/source/test/testharness.h Mon Apr 21 22:23:48 2014 -0500
> +++ b/source/test/testharness.h Tue Apr 22 14:17:45 2014 +0800
> @@ -107,4 +107,37 @@
>          printf("\t %-8.2lf \t %-8.2lf\n", optperf, refperf); \
>      }
>
> +#if ARCH_X86 || ARCH_X86_64

ARCH_X86_64 will only be set if ARCH_X86 is 1, so this is a redundant check
These two assembly entry points need to be extern "C", and since they
are being built with our headers, they need x265_ prefixes

> +int x264_stack_pagealign( int (*func)(), int align );
> +
> +/* detect when callee-saved regs aren't saved
> + * needs an explicit asm check because it only sometimes crashes in normal use. */
> +intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... );
> +#else
> +#define x264_stack_pagealign( func, align ) func()
> +#endif
> +
> +#define call_c1(func,...) func(__VA_ARGS__)
> +
> +#if ARCH_X86_64
> +/* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
> + * This is done by clobbering the stack with junk around the stack pointer and calling the
> + * assembly function through x264_checkasm_call with added dummy arguments which forces all
> + * real arguments to be passed on the stack and not in registers. For 32-bit argument the
> + * upper half of the 64-bit register location on the stack will now contain junk. Note that
> + * this is dependant on compiler behaviour and that interrupts etc. at the wrong time may
> + * overwrite the junk written to the stack so there's no guarantee that it will always
> + * detect all functions that assumes zero-extension.
> + */
> +void x264_checkasm_stack_clobber( uint64_t clobber, ... );
> +#define call_a1(func,...) ({ \
> +    uint64_t r = (rand() & 0xffff) * 0x0001000100010001ULL; \
> +    x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+6 */ \
> +    x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, __VA_ARGS__ ); })
> +#elif ARCH_X86
> +#define call_a1(func,...) x264_checkasm_call( (intptr_t(*)())func, &ok, __VA_ARGS__ )
> +#else
> +#define call_a1 call_c1
> +#endif
> +
>  #endif // ifndef _TESTHARNESS_H_
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel



-- 
Steve Borho


More information about the x265-devel mailing list