[x265] [PATCH] testbench: port x264 stack & registers check code
Min Chen
chenm003 at 163.com
Tue Apr 22 08:18:11 CEST 2014
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1398147465 -28800
# Node ID 156abee4f7d30caa1937561eea16c890403bcd6f
# Parent 84315557c97ff2b10cf01910d6b131e28fce1781
testbench: port x264 stack & registers check code
diff -r 84315557c97f -r 156abee4f7d3 source/common/CMakeLists.txt
--- a/source/common/CMakeLists.txt Mon Apr 21 22:23:48 2014 -0500
+++ b/source/common/CMakeLists.txt Tue Apr 22 14:17:45 2014 +0800
@@ -104,7 +104,7 @@
set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h)
set(A_SRCS pixel-a.asm const-a.asm cpu-a.asm ssd-a.asm mc-a.asm
mc-a2.asm pixel-util8.asm blockcopy8.asm
- pixeladd8.asm dct8.asm)
+ pixeladd8.asm dct8.asm checkasm-a.asm)
if(HIGH_BIT_DEPTH)
set(A_SRCS ${A_SRCS} sad16-a.asm intrapred16.asm ipfilter16.asm)
else()
diff -r 84315557c97f -r 156abee4f7d3 source/common/x86/checkasm-a.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/source/common/x86/checkasm-a.asm Tue Apr 22 14:17:45 2014 +0800
@@ -0,0 +1,219 @@
+;*****************************************************************************
+;* checkasm-a.asm: assembly check tool
+;*****************************************************************************
+;* Copyright (C) 2008-2014 x264 project
+;*
+;* Authors: Loren Merritt <lorenm at u.washington.edu>
+;* Henrik Gramner <henrik at gramner.com>
+;*
+;* This program is free software; you can redistribute it and/or modify
+;* it under the terms of the GNU General Public License as published by
+;* the Free Software Foundation; either version 2 of the License, or
+;* (at your option) any later version.
+;*
+;* This program is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;* GNU General Public License for more details.
+;*
+;* You should have received a copy of the GNU General Public License
+;* along with this program; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+;*
+;* This program is also available under a commercial proprietary license.
+;* For more information, contact us at licensing at x264.com.
+;*****************************************************************************
+
+%include "x86inc.asm"
+
+SECTION_RODATA
+
+error_message: db "failed to preserve register", 0
+
+%if ARCH_X86_64
+; just random numbers to reduce the chance of incidental match
+ALIGN 16
+x6: ddq 0x79445c159ce790641a1b2550a612b48c
+x7: ddq 0x86b2536fcd8cf6362eed899d5a28ddcd
+x8: ddq 0x3f2bf84fc0fcca4eb0856806085e7943
+x9: ddq 0xd229e1f5b281303facbd382dcf5b8de2
+x10: ddq 0xab63e2e11fa38ed971aeaff20b095fd9
+x11: ddq 0x77d410d5c42c882d89b0c0765892729a
+x12: ddq 0x24b3c1d2a024048bc45ea11a955d8dd5
+x13: ddq 0xdd7b8919edd427862e8ec680de14b47c
+x14: ddq 0x11e53e2b2ac655ef135ce6888fa02cbf
+x15: ddq 0x6de8f4c914c334d5011ff554472a7a10
+n7: dq 0x21f86d66c8ca00ce
+n8: dq 0x75b6ba21077c48ad
+n9: dq 0xed56bb2dcb3c7736
+n10: dq 0x8bda43d3fd1a7e06
+n11: dq 0xb64a9c9e5d318408
+n12: dq 0xdf9a54b303f1d3a3
+n13: dq 0x4a75479abd64e097
+n14: dq 0x249214109d5d1c88
+%endif
+
+SECTION .text
+
+cextern_naked puts
+
+; max number of args used by any x264 asm function.
+; (max_args % 4) must equal 3 for stack alignment
+%define max_args 15
+
+%if ARCH_X86_64
+
+;-----------------------------------------------------------------------------
+; void x264_checkasm_stack_clobber( uint64_t clobber, ... )
+;-----------------------------------------------------------------------------
+cglobal checkasm_stack_clobber, 1,2
+ ; Clobber the stack with junk below the stack pointer
+ %define size (max_args+6)*8
+ SUB rsp, size
+ mov r1, size-8
+.loop:
+ mov [rsp+r1], r0
+ sub r1, 8
+ jge .loop
+ ADD rsp, size
+ RET
+
+%if WIN64
+ %assign free_regs 7
+%else
+ %assign free_regs 9
+%endif
+
+;-----------------------------------------------------------------------------
+; intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... )
+;-----------------------------------------------------------------------------
+INIT_XMM
+cglobal checkasm_call, 2,15,16,max_args*8+8
+ mov r6, r0
+ mov [rsp+max_args*8], r1
+
+ ; All arguments have been pushed on the stack instead of registers in order to
+ ; test for incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
+ mov r0, r6mp
+ mov r1, r7mp
+ mov r2, r8mp
+ mov r3, r9mp
+%if UNIX64
+ mov r4, r10mp
+ mov r5, r11mp
+ %assign i 6
+ %rep max_args-6
+ mov r9, [rsp+stack_offset+(i+1)*8]
+ mov [rsp+(i-6)*8], r9
+ %assign i i+1
+ %endrep
+%else
+ %assign i 4
+ %rep max_args-4
+ mov r9, [rsp+stack_offset+(i+7)*8]
+ mov [rsp+i*8], r9
+ %assign i i+1
+ %endrep
+%endif
+
+%if WIN64
+ %assign i 6
+ %rep 16-6
+ mova m %+ i, [x %+ i]
+ %assign i i+1
+ %endrep
+%endif
+
+%assign i 14
+%rep 15-free_regs
+ mov r %+ i, [n %+ i]
+ %assign i i-1
+%endrep
+ call r6
+%assign i 14
+%rep 15-free_regs
+ xor r %+ i, [n %+ i]
+ or r14, r %+ i
+ %assign i i-1
+%endrep
+
+%if WIN64
+ %assign i 6
+ %rep 16-6
+ pxor m %+ i, [x %+ i]
+ por m6, m %+ i
+ %assign i i+1
+ %endrep
+ packsswb m6, m6
+ movq r5, m6
+ or r14, r5
+%endif
+
+ jz .ok
+ mov r9, rax
+ lea r0, [error_message]
+ call puts
+ mov r1, [rsp+max_args*8]
+ mov dword [r1], 0
+ mov rax, r9
+.ok:
+ RET
+
+%else
+
+; just random numbers to reduce the chance of incidental match
+%define n3 dword 0x6549315c
+%define n4 dword 0xe02f3e23
+%define n5 dword 0xb78d0d1d
+%define n6 dword 0x33627ba7
+
+;-----------------------------------------------------------------------------
+; intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... )
+;-----------------------------------------------------------------------------
+cglobal checkasm_call, 1,7
+ mov r3, n3
+ mov r4, n4
+ mov r5, n5
+ mov r6, n6
+%rep max_args
+ push dword [esp+24+max_args*4]
+%endrep
+ call r0
+ add esp, max_args*4
+ xor r3, n3
+ xor r4, n4
+ xor r5, n5
+ xor r6, n6
+ or r3, r4
+ or r5, r6
+ or r3, r5
+ jz .ok
+ mov r3, eax
+ lea r1, [error_message]
+ push r1
+ call puts
+ add esp, 4
+ mov r1, r1m
+ mov dword [r1], 0
+ mov eax, r3
+.ok:
+ REP_RET
+
+%endif ; ARCH_X86_64
+
+;-----------------------------------------------------------------------------
+; int x264_stack_pagealign( int (*func)(), int align )
+;-----------------------------------------------------------------------------
+cglobal stack_pagealign, 2,2
+ movsxdifnidn r1, r1d
+ push rbp
+ mov rbp, rsp
+%if WIN64
+ sub rsp, 32 ; shadow space
+%endif
+ and rsp, ~0xfff
+ sub rsp, r1
+ call r0
+ leave
+ RET
+
diff -r 84315557c97f -r 156abee4f7d3 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp Mon Apr 21 22:23:48 2014 -0500
+++ b/source/test/pixelharness.cpp Tue Apr 22 14:17:45 2014 +0800
@@ -504,6 +504,7 @@
bool PixelHarness::check_cvt32to16_shr_t(cvt32to16_shr_t ref, cvt32to16_shr_t opt)
{
+ int ok;
ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
@@ -513,10 +514,12 @@
int shift = (rand() % 7 + 1);
int index = i % TEST_CASES;
- opt(opt_dest, int_test_buff[index] + j, STRIDE, shift, STRIDE);
- ref(ref_dest, int_test_buff[index] + j, STRIDE, shift, STRIDE);
+ ok = 1;
+ call_a1(opt, opt_dest, int_test_buff[index] + j, STRIDE, shift, STRIDE);
+ assert(ok == 1);
+ call_c1(ref, ref_dest, int_test_buff[index] + j, STRIDE, shift, STRIDE);
- if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
+ if (!ok || memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(int16_t)))
return false;
j += INCR;
diff -r 84315557c97f -r 156abee4f7d3 source/test/testharness.h
--- a/source/test/testharness.h Mon Apr 21 22:23:48 2014 -0500
+++ b/source/test/testharness.h Tue Apr 22 14:17:45 2014 +0800
@@ -107,4 +107,37 @@
printf("\t %-8.2lf \t %-8.2lf\n", optperf, refperf); \
}
+#if ARCH_X86 || ARCH_X86_64
+int x264_stack_pagealign( int (*func)(), int align );
+
+/* detect when callee-saved regs aren't saved
+ * needs an explicit asm check because it only sometimes crashes in normal use. */
+intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... );
+#else
+#define x264_stack_pagealign( func, align ) func()
+#endif
+
+#define call_c1(func,...) func(__VA_ARGS__)
+
+#if ARCH_X86_64
+/* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
+ * This is done by clobbering the stack with junk around the stack pointer and calling the
+ * assembly function through x264_checkasm_call with added dummy arguments which forces all
+ * real arguments to be passed on the stack and not in registers. For 32-bit argument the
+ * upper half of the 64-bit register location on the stack will now contain junk. Note that
+ * this is dependant on compiler behaviour and that interrupts etc. at the wrong time may
+ * overwrite the junk written to the stack so there's no guarantee that it will always
+ * detect all functions that assumes zero-extension.
+ */
+void x264_checkasm_stack_clobber( uint64_t clobber, ... );
+#define call_a1(func,...) ({ \
+ uint64_t r = (rand() & 0xffff) * 0x0001000100010001ULL; \
+ x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+6 */ \
+ x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, __VA_ARGS__ ); })
+#elif ARCH_X86
+#define call_a1(func,...) x264_checkasm_call( (intptr_t(*)())func, &ok, __VA_ARGS__ )
+#else
+#define call_a1 call_c1
+#endif
+
#endif // ifndef _TESTHARNESS_H_
More information about the x265-devel
mailing list