[x264-devel] x264 crashes on hyperthreaded Intel processors

Fri Sep 15 13:56:58 CEST 2006

Last SVN versions of x264 crash on hyperthreaded Intel processors.
this is revison 564 (few previous did crash too)
on Intel(R) Pentium(R) 4 CPU 2.80GHz
Linux kernel Linux 2.6.17.13-skas3-v9-pre9 #1 SMP PREEMPT
Debian 3.1 "sarge"

I tested on several processors, failed only on Pentium with HT:
Intel(R) Pentium(R) 4 CPU 2.80GHz (hyperthreaded): SIGSEGV
Celeron (Coppermine): OK
AMD Athlon(tm) XP 2500+ : OK
Intel(R) Pentium(R) III Mobile CPU       800MHz : OK

Backtraces and stuff:

(gdb) run -v -o Z.mkv stream.dump 320x240
Starting program: /home/nvy/build/x264-SVN/x264 -v -o Z.mkv stream.dump 320x240
[Thread debugging using libthread_db enabled]
[New Thread -1210244992 (LWP 3775)]
x264 [info]: using cpu capabilities MMX MMXEXT SSE SSE2

Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread -1210244992 (LWP 3775)]
0x080b980c in x264_pixel_ssim_end4_sse2 ()
(gdb) bt
#0  0x080b980c in x264_pixel_ssim_end4_sse2 ()
#1  0x08059403 in x264_pixel_ssim_wxh (pf=0x83d104c,
    pix1=0x8460812 "\020\016\021\v\n", '\f' <repeats 11 times>,
"\r\016\016\017\017\016\f\f\017\020\020\n\n\020\020\v\v\020\021\n\n\021\f\f\r\r\016\016\016\016\r\r\f\016\016\017\v\t\017\v\v\017\017\017\017\017\021\r\v\016\r\r\f\f\r\r\r\017\020\021\r\v\020\a\a\020\020\a\a\020\n\n\n\v\t\020\020\t\v\020\020\v\023\021\r\f\021\n\n\021\021\n\n\021\022\v\v\022\f\r\020",
'\021' <repeats 83 times>..., stride1=384,
    pix2=0x83da3c2 "\f\f\021\v\v\f", '\v' <repeats 12 times>,
"\r\021\021\021\n\n\021\021\021\n\n\021\021\n\n\021\021\n\n\021\n\n\021\n\n\021\021\n\n\021\n\n\021\021\n\n\021\n\n\021\021\021\021\021\021\n\n\n\n\021\n\n\021\021\021\021\021\021\n\n\021\n\n\021\021\n\n\021\n\n\021\n\n\021\021\n\n\021\021\021\021\021\n\n\021\n\n\021\021\n\n\021\021\n\n\021\n\n",
'\021' <repeats 84 times>..., stride2=384, width=79, height=59) at
common/pixel.c:396
#2  0x08053864 in x264_encoder_encode (h=0x83cd3b0, pp_nal=0xbf8ef648,
pi_nal=0xbf8ef64c, pic_in=0x0, pic_out=0xbf8ef650)
    at encoder/encoder.c:1624
#3  0x0804a2f6 in Encode_frame (h=0xbf8eef74, hout=0x83cd008,
pic=0xbf8eef74) at x264.c:686
#4  0x0804a57a in Encode (param=0xbf8ef7d0, opt=0xbf8ef7b0) at x264.c:772
#5  0x0804926f in main (argc=-1081151628, argv=0xbf8eef74) at x264.c:110

(gdb) disass $pc-32 $pc+32
Dump of assembler code from 0x80b97ec to 0x80b982c:
0x080b97ec <x264_pixel_ssim_4x4x2_core_sse2+332>:       or     %ah,0xf(%esi)
0x080b97ef <x264_pixel_ssim_4x4x2_core_sse2+335>:       jae
0x80b97ca <x264_pixel_ssim_4x4x2_core_sse2+298>
0x080b97f1 <x264_pixel_ssim_4x4x2_core_sse2+337>:       or     %ah,0xf(%esi)
0x080b97f4 <x264_pixel_ssim_4x4x2_core_sse2+340>:       (bad)
0x080b97f5 <x264_pixel_ssim_4x4x2_core_sse2+341>:       dec    %eax
0x080b97f6 <x264_pixel_ssim_4x4x2_core_sse2+342>:       adc    %ah,0xf(%esi)
0x080b97f9 <x264_pixel_ssim_4x4x2_core_sse2+345>:       (bad)
0x080b97fa <x264_pixel_ssim_4x4x2_core_sse2+346>:       push   $0x90c35b18
0x080b97ff <x264_pixel_ssim_4x4x2_core_sse2+351>:       nop
0x080b9800 <x264_pixel_ssim_end4_sse2+0>:       mov    0x4(%esp),%eax
0x080b9804 <x264_pixel_ssim_end4_sse2+4>:       mov    0x8(%esp),%ecx
0x080b9808 <x264_pixel_ssim_end4_sse2+8>:       mov    0xc(%esp),%edx
0x080b980c <x264_pixel_ssim_end4_sse2+12>:      movdqa (%eax),%xmm0
0x080b9810 <x264_pixel_ssim_end4_sse2+16>:      movdqa 0x10(%eax),%xmm1
0x080b9815 <x264_pixel_ssim_end4_sse2+21>:      movdqa 0x20(%eax),%xmm2
0x080b981a <x264_pixel_ssim_end4_sse2+26>:      movdqa 0x30(%eax),%xmm3
0x080b981f <x264_pixel_ssim_end4_sse2+31>:      movdqa 0x40(%eax),%xmm4
0x080b9824 <x264_pixel_ssim_end4_sse2+36>:      paddd  (%ecx),%xmm0
0x080b9828 <x264_pixel_ssim_end4_sse2+40>:      paddd  0x10(%ecx),%xmm1
End of assembler dump.

(gdb) info all-registers
eax            0xbf8eef74       -1081151628
ecx            0xbf8eea54       -1081152940
edx            0x4      4
ebx            0x0      0
esp            0xbf8eea3c       0xbf8eea3c
ebp            0xbf8ef4b8       0xbf8ef4b8
esi            0x4f     79
edi            0xbf8eef74       -1081151628
eip            0x80b980c        0x80b980c
eflags         0x10282  66178
cs             0x73     115
ss             0x7b     123
ds             0x7b     123
es             0x7b     123
fs             0x0      0
gs             0x33     51
st0            -nan(0x1600000056)       (raw 0xffff0000001600000056)
st1            -nan(0x000000016)        (raw 0xffff0000000000000016)
st2            -nan(0x200000002)        (raw 0xffff0000000200000002)
st3            -nan(0xb0000000b)        (raw 0xffff0000000b0000000b)
st4            9        (raw 0x40029000000000000000)
st5            9        (raw 0x40029000000000000000)
st6            47.7092742919921875      (raw 0x4004bed64c0000000000)
st7            0        (raw 0x00000000000000000000)
fctrl          0x37f    895
fstat          0x20     32
ftag           0xffff   65535
fiseg          0x73     115
fioff          0x80592f7        134583031
foseg          0x7b     123
fooff          0xbf8ef4a0       -1081150304
fop            0x1e2    482

xmm0           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0,
0x0}, v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {
    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0,
0x0, 0x0}, v2_int64 = {0x0, 0x0},
  uint128 = 0x00000000000000000000000000000000}
xmm1           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0,
0x0}, v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {
    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0,
0x0, 0x0}, v2_int64 = {0x0, 0x0},
  uint128 = 0x00000000000000000000000000000000}
xmm2           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0,
0x0}, v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {
    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0,
0x0, 0x0}, v2_int64 = {0x0, 0x0},
  uint128 = 0x00000000000000000000000000000000}
xmm3           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0,
0x0}, v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {
    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0,
0x0, 0x0}, v2_int64 = {0x0, 0x0},
  uint128 = 0x00000000000000000000000000000000}
xmm4           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0,
0x0}, v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {
    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0,
0x0, 0x0}, v2_int64 = {0x0, 0x0},
  uint128 = 0x00000000000000000000000000000000}
xmm5           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0,
0x0}, v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {
    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0,
0x0, 0x0}, v2_int64 = {0x0, 0x0},
  uint128 = 0x00000000000000000000000000000000}
xmm6           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0,
0x0}, v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {
    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0,
0x0, 0x0}, v2_int64 = {0x0, 0x0},
  uint128 = 0x00000000000000000000000000000000}
xmm7           {v4_float = {0x0, 0x0, 0x0, 0x0}, v2_double = {0x0,
0x0}, v16_int8 = {0x0 <repeats 16 times>}, v8_int16 = {
    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int32 = {0x0, 0x0,
0x0, 0x0}, v2_int64 = {0x0, 0x0},
  uint128 = 0x00000000000000000000000000000000}
---Type <return> to continue, or q <return> to quit---
mxcsr          0x1f80   8064
mm0            {uint64 = 0x1600000056, v2_int32 = {0x56, 0x16},
v4_int16 = {0x56, 0x0, 0x16, 0x0}, v8_int8 = {0x56, 0x0,
    0x0, 0x0, 0x16, 0x0, 0x0, 0x0}}
mm1            {uint64 = 0x16, v2_int32 = {0x16, 0x0}, v4_int16 =
{0x16, 0x0, 0x0, 0x0}, v8_int8 = {0x16, 0x0, 0x0, 0x0,
    0x0, 0x0, 0x0, 0x0}}
mm2            {uint64 = 0x200000002, v2_int32 = {0x2, 0x2}, v4_int16
= {0x2, 0x0, 0x2, 0x0}, v8_int8 = {0x2, 0x0, 0x0, 0x0,
    0x2, 0x0, 0x0, 0x0}}
mm3            {uint64 = 0xb0000000b, v2_int32 = {0xb, 0xb}, v4_int16
= {0xb, 0x0, 0xb, 0x0}, v8_int8 = {0xb, 0x0, 0x0, 0x0,
    0xb, 0x0, 0x0, 0x0}}
mm4            {uint64 = 0x9000000000000000, v2_int32 = {0x0,
0x90000000}, v4_int16 = {0x0, 0x0, 0x0, 0x9000}, v8_int8 = {
    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x90}}
mm5            {uint64 = 0x9000000000000000, v2_int32 = {0x0,
0x90000000}, v4_int16 = {0x0, 0x0, 0x0, 0x9000}, v8_int8 = {
    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x90}}
mm6            {uint64 = 0xbed64c0000000000, v2_int32 = {0x0,
0xbed64c00}, v4_int16 = {0x0, 0x0, 0x4c00, 0xbed6}, v8_int8 = {
    0x0, 0x0, 0x0, 0x0, 0x0, 0x4c, 0xd6, 0xbe}}
mm7            {uint64 = 0x0, v2_int32 = {0x0, 0x0}, v4_int16 = {0x0,
0x0, 0x0, 0x0}, v8_int8 = {0x0, 0x0, 0x0, 0x0, 0x0,
    0x0, 0x0, 0x0}}

-- 
This is the x264-devel mailing-list
To unsubscribe, go to: http://developers.videolan.org/lists.html