[x264-devel] x264 (64bit) crashes in x264_intra_sad_x9_8x8_ssse3
Edward Richards
edward at edwardandalison.com
Fri May 4 23:58:30 CEST 2012
Hi all,
I have built x264 using the mingw64/msys32 cross-compiler.
On line "analyse.c#879"
i_best = h->pixf.intra_mbcmp_x9_8x8( p_src_by,
p_dst_by,
edge,
cost_i4x4_mode-i_pred_mode,
a->i_satd_i8x8_dir[idx] );
...it calls the associated SSSE3 function: x264_intra_sad_x9_8x8_ssse3()
It then crashes with a SIGSEGV (Segmentation Fault) at +510 in (I don't know
why I don't have debug info - can YASM not make debug symbols for macros?)
x264_intra_sad_x9_8x8_ssse3+510: movd %xmm1,0x6(%r10)
(r10 = 0x4faad58)
Here is the call stack.
x264_intra_sad_x9_8x8_ssse3( p_src_by = 0x3e880e8,
p_dst_by = 0x3e884a8,
edge = 0x4e7a240,
cost_i4x4_mode-i_pred_mode =
0x707f50,
a->i_satd_i8x8_dir[idx=3] =
0x4e7ad58 );
x264_mb_analyse_intra (h=0x3fb1e00, a=0x4faac90,
i_satd_inter=268435456)
x264_macroblock_analyse (h=0x3fb1e00)
x264_slice_write (h=0x3fb1e00)
x264_slices_write (h=0x3fb1e00)
x264_threadpool_thread (pool=0x17bf410)
x264_win32thread_worker (arg=0x17bf5c0)
Do the argument values look ok? I noticed the pointers are on 8bit
alignments.
The target cpu is an Intel Core2 Quad Q6700
x264 [info]: using cpu capabilities: MMX2 SSE2Fast SSSE3 Cache64
What can I do to help work out what the crash is? I'm glad to help but am
very weak in the YASM syntax and don't know the alignment requirements etc
of the code path.
Thanks,
Rallymax.
Here are the registers when the segfault happens
rdi 0xbb
rbp 0x3e884a8
rsp 0x4e79ee0
r8 0x4e7a240
r9 0x707f50
r10 0x4e7ad58
r11 0x4e79fe0
r12 0x0
r13 0x20
r14 0x4e7ac90
r15 0x3e81e00
rip 0x4b98be
eflags 0x202
cs 0x33
ss 0x202002b
ds 0x0
es 0x0
fs 0x0
gs 0x2b0000
st0 0x8000000000000000
st1 0x8000000000000000
st2 0x8000000000000000
st3 0x8000000000000000
st4 0x8000000000000000
st5 0x8000000000000000
st6 0x8000000000000000
st7 0x8000000000000000
fctrl 0x27f
fstat 0xff0000
ftag 0xff
fiseg 0x0
fioff 0x0
foseg 0x0
fooff 0x0
fop 0x0
xmm0 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x20, 0x2a, 0x29, 0x29, 0x29, 0x2b, 0x2d, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f,
0x2f, 0x2f, 0x2f, 0x2f}, v8_int16 = {0x2a20, 0x2929, 0x2b29, 0x2f2d, 0x2f2f,
0x2f2f, 0x2f2f, 0x2f2f}, v4_int32 = {0x29292a20, 0x2f2d2b29, 0x2f2f2f2f,
0x2f2f2f2f}, v2_int64 = {0x2f2d2b2929292a20, 0x2f2f2f2f2f2f2f2f}, uint128 =
0x2f2f2f2f2f2f2f2f2f2d2b2929292a20}
xmm1 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x63, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xed, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0}, v8_int16 = {0x163, 0x0, 0x0, 0x0, 0xed, 0x0, 0x0, 0x0}, v4_int32
= {0x163, 0x0, 0xed, 0x0}, v2_int64 = {0x163, 0xed}, uint128 =
0x00000000000000ed0000000000000163}
xmm2 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0xbd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0}, v8_int16 = {0xbd, 0x0, 0x0, 0x0, 0x30, 0x0, 0x0, 0x0}, v4_int32 =
{0xbd, 0x0, 0x30, 0x0}, v2_int64 = {0xbd, 0x30}, uint128 =
0x000000000000003000000000000000bd}
xmm3 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x2b, 0x2a, 0x29, 0x29, 0x2a, 0x2c, 0x2e, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f,
0x2f, 0x2f, 0x2f, 0x2f}, v8_int16 = {0x2a2b, 0x2929, 0x2c2a, 0x2f2e, 0x2f2f,
0x2f2f, 0x2f2f, 0x2f2f}, v4_int32 = {0x29292a2b, 0x2f2e2c2a, 0x2f2f2f2f,
0x2f2f2f2f}, v2_int64 = {0x2f2e2c2a29292a2b, 0x2f2f2f2f2f2f2f2f}, uint128 =
0x2f2f2f2f2f2f2f2f2f2e2c2a29292a2b}
xmm4 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x29, 0x29, 0x28, 0x27, 0x26, 0x25, 0x28, 0x29, 0x29, 0x29, 0x29, 0x29,
0x28, 0x26, 0x25, 0x25}, v8_int16 = {0x2929, 0x2728, 0x2526, 0x2928, 0x2929,
0x2929, 0x2628, 0x2525}, v4_int32 = {0x27282929, 0x29282526, 0x29292929,
0x25252628}, v2_int64 = {0x2928252627282929, 0x2525262829292929}, uint128 =
0x25252628292929292928252627282929}
xmm5 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x29, 0x29, 0x29, 0x28, 0x26, 0x26, 0x27, 0x27, 0x29, 0x29, 0x29, 0x2a,
0x2a, 0x29, 0x26, 0x25}, v8_int16 = {0x2929, 0x2829, 0x2626, 0x2727, 0x2929,
0x2a29, 0x292a, 0x2526}, v4_int32 = {0x28292929, 0x27272626, 0x2a292929,
0x2526292a}, v2_int64 = {0x2727262628292929, 0x2526292a2a292929}, uint128 =
0x2526292a2a2929292727262628292929}
xmm6 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x25, 0x28, 0x2b, 0x2c, 0x2a, 0x2a, 0x28, 0x29, 0x23, 0x23, 0x26, 0x29,
0x29, 0x2a, 0x2b, 0x2b}, v8_int16 = {0x2825, 0x2c2b, 0x2a2a, 0x2928, 0x2323,
0x2926, 0x2a29, 0x2b2b}, v4_int32 = {0x2c2b2825, 0x29282a2a, 0x29262323,
0x2b2b2a29}, v2_int64 = {0x29282a2a2c2b2825, 0x2b2b2a2929262323}, uint128 =
0x2b2b2a292926232329282a2a2c2b2825}
xmm7 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x23, 0x23, 0x26, 0x29, 0x29, 0x29, 0x29, 0x2a, 0x23, 0x28, 0x2b, 0x2c,
0x2b, 0x29, 0x29, 0x29}, v8_int16 = {0x2323, 0x2926, 0x2929, 0x2a29, 0x2823,
0x2c2b, 0x292b, 0x2929}, v4_int32 = {0x29262323, 0x2a292929, 0x2c2b2823,
0x2929292b}, v2_int64 = {0x2a29292929262323, 0x2929292b2c2b2823}, uint128 =
0x2929292b2c2b28232a29292929262323}
xmm8 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x15, 0x2a, 0x29, 0x29, 0x29, 0x2b, 0x2d, 0x2e, 0x2f, 0x2f, 0x2f, 0x2f,
0x2f, 0x2f, 0x2f, 0x2f}, v8_int16 = {0x2a15, 0x2929, 0x2b29, 0x2e2d, 0x2f2f,
0x2f2f, 0x2f2f, 0x2f2f}, v4_int32 = {0x29292a15, 0x2e2d2b29, 0x2f2f2f2f,
0x2f2f2f2f}, v2_int64 = {0x2e2d2b2929292a15, 0x2f2f2f2f2f2f2f2f}, uint128 =
0x2f2f2f2f2f2f2f2f2e2d2b2929292a15}
xmm9 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0,
0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 =
0x00000000000000000000000000000000}
xmm10 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0,
0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 =
0x00000000000000000000000000000000}
xmm11 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0,
0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 =
0x00000000000000000000000000000000}
xmm12 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0,
0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 =
0x00000000000000000000000000000000}
xmm13 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0,
0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 =
0x00000000000000000000000000000000}
xmm14 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0,
0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 =
0x00000000000000000000000000000000}
xmm15 {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0, 0x0}, v16_int8 =
{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0}, v8_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0,
0x0, 0x0, 0x0}, v2_int64 = {0x0, 0x0}, uint128 =
0x00000000000000000000000000000000}
mxcsr 0x1fa0
al 0xc0
bl 0xa0
cl 0x10
dl 0xa8
sil 0xe8
dil 0xbb
bpl 0xa8
spl 0xe0
r8l 0x40
r9l 0x50
r10l 0x58
r11l 0xe0
r12l 0x0
r13l 0x20
r14l 0x90
r15l 0x0
ah 0x96
bh 0x84
ch 0x63
dh 0x84
ax 0x96c0
bx 0x84a0
cx 0x6310
dx 0x84a8
si 0x3e8
di 0xbb
bp 0x84a8
r8w 0xa240
r9w 0x7f50
r10w 0xad58
r11w 0x9fe0
r12w 0x0
r13w 0x20
r14w 0xac90
r15w 0x1e00
eax 0x4b96c0
ebx 0x3e884a0
ecx 0x6e6310
edx 0x3e884a8
esi 0x6d03e8
edi 0xbb
ebp 0x3e884a8
esp 0x4e79ee0
r8d 0x4e7a240
r9d 0x707f50
r10d 0x4e7ad58
r11d 0x4e79fe0
r12d 0x0
r13d 0x20
r14d 0x4e7ac90
r15d 0x3e81e00
More information about the x264-devel
mailing list