[x264-devel] x264 (64bit) crashes in x264_intra_sad_x9_8x8_ssse3

Edward Richards edward at edwardandalison.com
Fri May 4 23:58:30 CEST 2012


Hi all,

I have built x264 using the mingw64/msys32 cross-compiler.

On line "analyse.c#879"
       i_best = h->pixf.intra_mbcmp_x9_8x8( p_src_by,
							  p_dst_by,
							  edge,
	
cost_i4x4_mode-i_pred_mode,
	
a->i_satd_i8x8_dir[idx] );

...it calls the associated SSSE3 function:  x264_intra_sad_x9_8x8_ssse3()

It then crashes with a SIGSEGV (Segmentation Fault) at +510 in (I don't know
why I don't have debug info - can YASM not make debug symbols for macros?)

       x264_intra_sad_x9_8x8_ssse3+510: movd   %xmm1,0x6(%r10)
	       (r10 = 0x4faad58)
       
Here is the call stack.

	x264_intra_sad_x9_8x8_ssse3(	p_src_by = 0x3e880e8,
						p_dst_by = 0x3e884a8,
						edge = 0x4e7a240,
						cost_i4x4_mode-i_pred_mode =
0x707f50,
						a->i_satd_i8x8_dir[idx=3] =
0x4e7ad58 );
	x264_mb_analyse_intra (h=0x3fb1e00, a=0x4faac90,
i_satd_inter=268435456)
	x264_macroblock_analyse (h=0x3fb1e00)
	x264_slice_write (h=0x3fb1e00)
	x264_slices_write (h=0x3fb1e00)
	x264_threadpool_thread (pool=0x17bf410)
	x264_win32thread_worker (arg=0x17bf5c0)

Do the argument values look ok? I noticed the pointers are on 8bit
alignments.

The target cpu is an Intel Core2 Quad Q6700
	x264 [info]: using cpu capabilities: MMX2 SSE2Fast SSSE3 Cache64

What can I do to help work out what the crash is? I'm glad to help but am
very weak in the YASM syntax and don't know the alignment requirements etc
of the code path.

Thanks,
Rallymax.

Here are the registers when the segfault happens

rdi	0xbb
rbp	0x3e884a8
rsp	0x4e79ee0
r8	0x4e7a240
r9	0x707f50
r10	0x4e7ad58
r11	0x4e79fe0
r12	0x0
r13	0x20
r14	0x4e7ac90
r15	0x3e81e00
rip	0x4b98be
eflags	0x202
cs	0x33
ss	0x202002b
ds	0x0
es	0x0
fs	0x0
gs	0x2b0000
st0	0x8000000000000000
st1	0x8000000000000000
st2	0x8000000000000000
st3	0x8000000000000000
st4	0x8000000000000000
st5	0x8000000000000000
st6	0x8000000000000000
st7	0x8000000000000000
fctrl	0x27f
fstat	0xff0000
ftag	0xff
fiseg	0x0
fioff	0x0
foseg	0x0
fooff	0x0
fop	0x0
xmm0	{v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x20, 0x2a, 0x29, 0x29, 0x29, 0x2b, 0x2d, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f,
0x2f, 0x2f, 0x2f, 0x2f}, v8_int16 = {0x2a20, 0x2929, 0x2b29, 0x2f2d, 0x2f2f,
0x2f2f, 0x2f2f, 0x2f2f}, v4_int32 = {0x29292a20, 0x2f2d2b29, 0x2f2f2f2f,
0x2f2f2f2f}, v2_int64 = {0x2f2d2b2929292a20, 0x2f2f2f2f2f2f2f2f}, uint128 =
0x2f2f2f2f2f2f2f2f2f2d2b2929292a20}
xmm1	{v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x63, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xed, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0}, v8_int16 = {0x163, 0x0, 0x0, 0x0, 0xed, 0x0, 0x0, 0x0}, v4_int32
= {0x163, 0x0, 0xed, 0x0}, v2_int64 = {0x163, 0xed}, uint128 =
0x00000000000000ed0000000000000163}
xmm2	{v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0xbd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0}, v8_int16 = {0xbd, 0x0, 0x0, 0x0, 0x30, 0x0, 0x0, 0x0}, v4_int32 =
{0xbd, 0x0, 0x30, 0x0}, v2_int64 = {0xbd, 0x30}, uint128 =
0x000000000000003000000000000000bd}
xmm3	{v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x2b, 0x2a, 0x29, 0x29, 0x2a, 0x2c, 0x2e, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f,
0x2f, 0x2f, 0x2f, 0x2f}, v8_int16 = {0x2a2b, 0x2929, 0x2c2a, 0x2f2e, 0x2f2f,
0x2f2f, 0x2f2f, 0x2f2f}, v4_int32 = {0x29292a2b, 0x2f2e2c2a, 0x2f2f2f2f,
0x2f2f2f2f}, v2_int64 = {0x2f2e2c2a29292a2b, 0x2f2f2f2f2f2f2f2f}, uint128 =
0x2f2f2f2f2f2f2f2f2f2e2c2a29292a2b}
xmm4	{v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x29, 0x29, 0x28, 0x27, 0x26, 0x25, 0x28, 0x29, 0x29, 0x29, 0x29, 0x29,
0x28, 0x26, 0x25, 0x25}, v8_int16 = {0x2929, 0x2728, 0x2526, 0x2928, 0x2929,
0x2929, 0x2628, 0x2525}, v4_int32 = {0x27282929, 0x29282526, 0x29292929,
0x25252628}, v2_int64 = {0x2928252627282929, 0x2525262829292929}, uint128 =
0x25252628292929292928252627282929}
xmm5	{v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x29, 0x29, 0x29, 0x28, 0x26, 0x26, 0x27, 0x27, 0x29, 0x29, 0x29, 0x2a,
0x2a, 0x29, 0x26, 0x25}, v8_int16 = {0x2929, 0x2829, 0x2626, 0x2727, 0x2929,
0x2a29, 0x292a, 0x2526}, v4_int32 = {0x28292929, 0x27272626, 0x2a292929,
0x2526292a}, v2_int64 = {0x2727262628292929, 0x2526292a2a292929}, uint128 =
0x2526292a2a2929292727262628292929}
xmm6	{v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x25, 0x28, 0x2b, 0x2c, 0x2a, 0x2a, 0x28, 0x29, 0x23, 0x23, 0x26, 0x29,
0x29, 0x2a, 0x2b, 0x2b}, v8_int16 = {0x2825, 0x2c2b, 0x2a2a, 0x2928, 0x2323,
0x2926, 0x2a29, 0x2b2b}, v4_int32 = {0x2c2b2825, 0x29282a2a, 0x29262323,
0x2b2b2a29}, v2_int64 = {0x29282a2a2c2b2825, 0x2b2b2a2929262323}, uint128 =
0x2b2b2a292926232329282a2a2c2b2825}
xmm7	{v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x23, 0x23, 0x26, 0x29, 0x29, 0x29, 0x29, 0x2a, 0x23, 0x28, 0x2b, 0x2c,
0x2b, 0x29, 0x29, 0x29}, v8_int16 = {0x2323, 0x2926, 0x2929, 0x2a29, 0x2823,
0x2c2b, 0x292b, 0x2929}, v4_int32 = {0x29262323, 0x2a292929, 0x2c2b2823,
0x2929292b}, v2_int64 = {0x2a29292929262323, 0x2929292b2c2b2823}, uint128 =
0x2929292b2c2b28232a29292929262323}
xmm8	{v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x15, 0x2a, 0x29, 0x29, 0x29, 0x2b, 0x2d, 0x2e, 0x2f, 0x2f, 0x2f, 0x2f,
0x2f, 0x2f, 0x2f, 0x2f}, v8_int16 = {0x2a15, 0x2929, 0x2b29, 0x2e2d, 0x2f2f,
0x2f2f, 0x2f2f, 0x2f2f}, v4_int32 = {0x29292a15, 0x2e2d2b29, 0x2f2f2f2f,
0x2f2f2f2f}, v2_int64 = {0x2e2d2b2929292a15, 0x2f2f2f2f2f2f2f2f}, uint128 =
0x2f2f2f2f2f2f2f2f2e2d2b2929292a15}
xmm9	{v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0,
0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 =
0x00000000000000000000000000000000}
xmm10	{v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0,
0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 =
0x00000000000000000000000000000000}
xmm11	{v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0,
0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 =
0x00000000000000000000000000000000}
xmm12	{v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0,
0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 =
0x00000000000000000000000000000000}
xmm13	{v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0,
0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 =
0x00000000000000000000000000000000}
xmm14	{v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0,
0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 =
0x00000000000000000000000000000000}
xmm15	{v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0,
0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 =
0x00000000000000000000000000000000}
mxcsr	0x1fa0
al	0xc0
bl	0xa0
cl	0x10
dl	0xa8
sil	0xe8
dil	0xbb
bpl	0xa8
spl	0xe0
r8l	0x40
r9l	0x50
r10l	0x58
r11l	0xe0
r12l	0x0
r13l	0x20
r14l	0x90
r15l	0x0
ah	0x96
bh	0x84
ch	0x63
dh	0x84
ax	0x96c0
bx	0x84a0
cx	0x6310
dx	0x84a8
si	0x3e8
di	0xbb
bp	0x84a8
r8w	0xa240
r9w	0x7f50
r10w	0xad58
r11w	0x9fe0
r12w	0x0
r13w	0x20
r14w	0xac90
r15w	0x1e00
eax	0x4b96c0
ebx	0x3e884a0
ecx	0x6e6310
edx	0x3e884a8
esi	0x6d03e8
edi	0xbb
ebp	0x3e884a8
esp	0x4e79ee0
r8d	0x4e7a240
r9d	0x707f50
r10d	0x4e7ad58
r11d	0x4e79fe0
r12d	0x0
r13d	0x20
r14d	0x4e7ac90
r15d	0x3e81e00



More information about the x264-devel mailing list