<pre>On Sat, 13 Oct 2007, ½¯Ð˲ý wrote:<br><br>&gt;<i>      I&#39;m a newbie of x264 and tried to encode yuv sequences on multi-core<br></i>&gt;<i> platforms.<br></i>
And x264&#39;s builtin threading doesn&#39;t suffice?
---------------------------------------------<div>yes, when I tried 4—16 threads(and I also changed the maxium number of threads defined in common.h),the cpus are</div><div>all less than 50% usage. maybe the slice/frame level parallelisms are not enouth.
<br><br>&gt;<i>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; first I pinned some GOP task on each logical CPUs, and then read a gop<br></i>&gt;<i> for every gop task, when the encoding process is done, the main thread<br></i>&gt;<i> re-order the out-of-order data encoded by the GOP tasks.
<br><br></i>&gt;<i>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; the result is fine, but when I playback the bitstream, there will be<br></i>&gt;<i> some error information displayed on the information window of videolan<br></i>&gt;<i> client:<br></i>&gt;<i><br>
</i>
&gt;<i> ffmpeg warning: error while decoding MB 14 13, bytestream (-7)<br></i>&gt;<i> (<a href="http://mailman.videolan.org/listinfo/x264-devel" target="_blank" onclick="return top.js.OpenExtLink(window,event,this)">h264 at 00CEB3C0
</a>)<br></i>&gt;<i> ffmpeg debug: concealing 145 DC, 145 AC, 145 MV errors<br><br></i>&gt;<i> (<a href="http://mailman.videolan.org/listinfo/x264-devel" target="_blank" onclick="return top.js.OpenExtLink(window,event,this)">
h264 at 00CEB3C0</a>)<br></i>&gt;<i><br></i>&gt;<i>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; any sugestion will be appreciated.<br></i><br>There&#39;s nothing in the bitstream format that would prevent arbitrary <br><br>concatenation of GOPs. So it&#39;s a bug in your code. And since you haven&#39;t 
<br>posted your code, no one can possibly help.</div><div>---------------------------------------------------</div><div>I only changed the <br>x264.c &amp; encoder/encoder.c:</div>/*x264.c*/<br>#define CPUNUM 3 <br>#define GOPSIZE 125 
<br>#define BUFSIZE 10<br>#define INVGOP BUFSIZE<br>x264_picture_t* g_Pic[CPUNUM];<br>int g_count[CPUNUM];<br>int g_progress;<br>sem_t sem[CPUNUM];<br>sem_t resem[CPUNUM];<br>sem_t g_sem;<br>int g_flush;<br><br>typedef struct para
<br>{<br>        int cpuno;<br>        x264_param_t *param;<br>}Para;<br><br>typedef struct data<br>{<br>        unsigned char * pdata;<br>        int length;<br>}Data;<br><br>Data * result[BUFSIZE];<br>int g_ready;<br>int g_curgop[CPUNUM];<br>int complete=0;
<br>x264_t *encoder[CPUNUM];<br><br>static int Encode_frame( x264_t *h, unsigned char **pdata, x264_picture_t *pic,int* gop)<br>{<br>    x264_picture_t pic_out;<br>    x264_nal_t *nal;<br>    int i_nal, i;<br>    int i_file = 0;
<br><br>    *pdata = (unsigned char *)malloc(1024*1024);<br>   <br>    if( x264_encoder_encode( h, &amp;nal, &amp;i_nal, pic, &amp;pic_out,gop) &lt; 0 )<br>    {<br>        fprintf( stderr, &quot;x264_encoder_encode failed\n&quot; );
<br>    }<br><br>    for( i = 0; i &lt; i_nal; i++ )<br>    {<br>        int i_size;<br>        int i_data;<br><br>        i_data = DATA_MAX;<br>        if( ( i_size = x264_nal_encode( data, &amp;i_data, 1, &amp;nal[i] ) ) &gt; 0 )
<br>        {<br>            memcpy(*pdata+i_file,data,i_size);<br>            i_file += i_size;<br>        }<br>        else if( i_size &lt; 0 )<br>        {<br>            fprintf( stderr, &quot;need to increase buffer size (size=%d)\n&quot;, -i_size );
<br>        }<br>    }<br>    return i_file;<br>}<br><br>void * GOP_Thread(void * gop_data)<br>{<br>            x264_picture_t *pics;<br>            Para * para = (Para *)gop_data;<br>            sem_t *cursem = &amp;sem[para-&gt;cpuno-1];<br>
        sem_t *curresem = &amp;resem[para-&gt;cpuno-1];<br>        int ret;<br>        pid_t p = 0;<br>        unsigned long new_mask=1&lt;&lt;para-&gt;cpuno;<br>        unsigned long cur_mask;<br>        int i;<br>        x264_t * h;<br>        int count=0;<br>        int i_file=0;<br>
        int gop=0;<br>        int cpu = para-&gt;cpuno;<br>        Data * dest;<br>        <br>        ret = sched_getaffinity(p,sizeof(new_mask),&amp;cur_mask);<br>        ret = sched_setaffinity(p,sizeof(new_mask),&amp;new_mask);<br><br>        h = encoder[para-&gt;cpuno-1];
<br><br>        while(1)<br>        {<br>            if(complete)<br>                return NULL;<br><br>            if(!(ret=sem_trywait(cursem)))<br>            {<br>                unsigned char * ret;<br>                int length;<br>                int start_frame;<br>                <br>                i_file = 0;
<br>                count = g_count[cpu-1];<br>                gop = g_curgop[cpu-1];<br>                dest = (Data *)malloc(sizeof(Data));<br>                dest-&gt;pdata = (unsigned char *)malloc(5*1024*1024);<br>                        <br>                h-&gt;frames.i_last_idr
 = (gop-1)*GOPSIZE;<br>                h-&gt;frames.i_input = gop*GOPSIZE;<br>                h-&gt;i_frame_num = 0;<br>                h-&gt;frames.last_nonb = NULL;<br>                h-&gt;i_frame = gop*GOPSIZE;<br>                h-&gt;i_idr_pic_id = gop;<br>                start_frame = gop * GOPSIZE;
<br>                <br>                for(i=0;i&lt;count;i++)<br>                {<br>                    pics = g_Pic[cpu-1]+i;<br>                    length = Encode_frame( h, &amp;ret, pics,&amp;start_frame);<br>                    if(length&gt;0)<br>                    {<br>                         memcpy(dest-&gt;pdata+i_file,ret,length);
<br>                        i_file += length;<br>                        }<br>                    free(ret);<br>                }<br>                        <br>                do{<br>                    length=Encode_frame(h, &amp;ret, NULL,NULL);<br>                    if(length&gt;0)<br>                    {<br>                        memcpy(dest-&gt;pdata+i_file,ret,length);
<br>                        i_file += length;<br>                    }<br>                    free(ret);<br>                }while(length);<br>                        <br>                dest-&gt;length=i_file;<br>                result[gop%BUFSIZE]=dest;<br>                g_ready ++;<br>                g_progress --;<br>                sem_post(curresem);<br>            }
<br>        }<br>}<br><br><br>/*****************************************************************************<br> * Encode:<br> *****************************************************************************/<br>static int  Encode( x264_param_t *param, cli_opt_t *opt )
<br>{<br>    x264_picture_t *pic;<br>    int     i_frame, i_frame_total;<br>    int64_t i_start, i_end;<br>    int64_t i_file;<br>    int     i_progress;<br>    int ret;<br>    unsigned long new_mask=1;<br>    unsigned long cur_mask;
<br>    int g_flush=0;<br>    int gop_number=0;<br>    int i,j;<br>    Para para[CPUNUM];<br>    pid_t p=0;<br>    pthread_t handles[CPUNUM];<br>    <br>    sched_getaffinity(p,sizeof(new_mask),&amp;cur_mask);<br>    ret = sched_setaffinity(p,sizeof(new_mask),&amp;new_mask);
<br><br>    g_progress=0;<br>    g_ready=0;<br>    <br>    for(i=0;i&lt;CPUNUM;i++)<br>    {<br>        if( ( encoder[i] = x264_encoder_open( param ) ) == NULL )<br>        {<br>                fprintf( stderr, &quot;x264_encoder_open failed\n&quot; );
<br>                return -1;        <br>        }<br>    }<br><br>    for(i=0;i&lt;CPUNUM;i++)<br>    {<br>         para[i].cpuno = i+1;<br>         para[i].param = param;<br>         sem_init(&amp;sem[i],0,0);<br>         sem_init(&amp;resem[i],0,1);<br>         g_count[i]=0;
<br>         g_curgop[i]=INVGOP;<br>         pthread_create(&amp;handles[i],NULL,GOP_Thread,&amp;para[i]);<br>    }<br><br>    sem_init(&amp;g_sem,0,1);<br><br>    for(i=0;i&lt;BUFSIZE;i++)<br>         result[i] = NULL;<br><br>    i_frame_total = p_get_frame_total( opt-&gt;hin, param-&gt;i_width, param-&gt;i_height );
<br><br>    for(i=0;i&lt;CPUNUM;i++)<br>    {<br>        g_Pic[i]=(x264_picture_t *)malloc(GOPSIZE*sizeof(x264_picture_t));<br>        for(j=0;j&lt;GOPSIZE;j++)<br>                x264_picture_alloc( g_Pic[i]+j, X264_CSP_I420, param-&gt;i_width, param-&gt;i_height );
<br>    }<br><br>    i_start = x264_mdate();<br>    i_frame_total -= opt-&gt;i_seek;<br>    if( opt-&gt;i_maxframes &gt; 0 &amp;&amp; opt-&gt;i_maxframes &lt; i_frame_total )<br>        i_frame_total = opt-&gt;i_maxframes;
<br>   <br>    if( set_param_bsf( opt-&gt;hout, param ) )<br>    {<br>        fprintf( stderr, &quot;x264 [error]: can&#39;t set outfile param\n&quot; );<br>        close_file_yuv( opt-&gt;hin );<br>        close_file_bsf( opt-&gt;hout );<br>
        return -1;<br>    }<br><br>    for(i_frame=0,i_file=0,i_progress=0; b_ctrl_c==0&amp;&amp;(i_frame&lt;i_frame_total||i_frame_total==0);)<br>    {<br>        complete=0;<br>        for(i=0;i&lt;CPUNUM;i++)<br>        {<br>            if((sem_trywait(&amp;resem[i])==0)&amp;&amp;(!complete))
<br>                break;<br>            else <br>            {<br>                if(i&gt;=CPUNUM-1)<br>                {<br>                    if(result[g_flush])<br>                    {<br>                        i_file += write_nalu_bsf( opt-&gt;hout, result[g_flush]-&gt;pdata, result[g_flush]-&gt;length );<br>                        g_ready --;
<br>                        free(result[g_flush]-&gt;pdata);<br>                        free(result[g_flush]);<br>                        result[g_flush]=NULL;<br>                        g_flush=(g_flush+1)%BUFSIZE;<br>                    }<br><br>                    i=-1;<br>                     continue;<br>                }<br>            }<br>        }<br>        <br>        j=0;<br>
        while((j&lt;GOPSIZE)&amp;&amp;(!p_read_frame(g_Pic[i]+j,opt-&gt;hin,i_frame+opt-&gt;i_seek,param-&gt;i_width,param-&gt;i_height))&amp;&amp;(i_frame&lt;i_frame_total))<br>        {<br>            pic = g_Pic[i]+j;<br>            pic-&gt;i_pts = i_frame * param-&gt;i_fps_den;
<br>            pic-&gt;i_type = X264_TYPE_AUTO;<br>            pic-&gt;i_qpplus1 = 0;<br>            j++;<br>            i_frame++;<br>        }<br><br>        g_count[i]=j;<br>        g_curgop[i]=gop_number;<br>        gop_number=gop_number+1;<br>        g_progress ++;<br>        sem_post(&amp;sem[i]);
<br>    }<br><br>    while(g_progress||g_ready)<br>    {<br>        if(result[g_flush])<br>        {<br>            i_file += write_nalu_bsf( opt-&gt;hout, result[g_flush]-&gt;pdata, result[g_flush]-&gt;length );<br>            g_ready --;<br>            free(result[g_flush]-&gt;pdata);
<br>            free(result[g_flush]);<br>            result[g_flush]=NULL;<br>            g_flush=(g_flush+1)%BUFSIZE;<br>       }<br>    }<br><br>    complete=1;<br>    <br>    for(i=0;i&lt;CPUNUM;i++)<br>            pthread_join(handles[i],NULL);
<br><br>    <br>    for(i=0;i&lt;CPUNUM;i++)<br>        x264_encoder_close(encoder[i]);<br><br>    i_end = x264_mdate();<br>    fprintf( stderr, &quot;\n&quot; );<br><br>    if( b_ctrl_c )<br>        fprintf( stderr, &quot;aborted at input frame %d\n&quot;, opt-&gt;i_seek + i_frame );
<br><br>    p_close_infile( opt-&gt;hin );<br>    p_close_outfile( opt-&gt;hout );<br><br>    if( i_frame &gt; 0 )<br>    {<br>        double fps = (double)i_frame * (double)1000000 /<br>                     (double)( i_end - i_start );
<br><br>        fprintf( stderr, &quot;encoded %d frames, %.2f fps, %.2f kb/s\n&quot;, i_frame, fps,<br>                 (double) i_file * 8 * param-&gt;i_fps_num / ( param-&gt;i_fps_den * i_frame * 1000 ) );<br>    }<br>
<br>    return 0;<div><p>}</p><p>/*encoder/encoder.c, I only added the start frame number parameter to x264_encoder_encode function*/ <br></p><p>int     x264_encoder_encode( x264_t *h,<br>                             x264_nal_t **pp_nal, int *pi_nal,
<br>                             x264_picture_t *pic_in,<br>                             x264_picture_t *pic_out,<br>                             int* gop)<br>{<br>    x264_frame_t   *frame_psnr = h-&gt;fdec; /* just to keep the current decoded frame for psnr calculation */
<br>    int     i_nal_type;<br>    int     i_nal_ref_idc;<br>    int     i_slice_type;<br>    int     i_frame_size;<br><br>    int i;<br><br>    int   i_global_qp;<br><br>    char psz_message[80];<br><br>    /* no data out */
<br>    *pi_nal = 0;<br>    *pp_nal = NULL;<br><br><br>    /* ------------------- Setup new frame from picture -------------------- */<br>    TIMER_START( i_mtime_encode_frame );<br>    if( pic_in != NULL )<br>    {<br>        /* 1: Copy the picture to a frame and move it to a buffer */
<br>        x264_frame_t *fenc = x264_frame_get( h-&gt;frames.unused );<br><br>        x264_frame_copy_picture( h, fenc, pic_in );<br><br>        fenc-&gt;i_frame = h-&gt;frames.i_input++;<br><br>        x264_frame_put( h-&gt;
frames.next, fenc );<br><br>        x264_frame_init_lowres( h-&gt;param.cpu, fenc );<br><br>        if( h-&gt;frames.i_input &lt;= h-&gt;frames.i_delay + *gop)<br>        {<br>            /* Nothing yet to encode */<br>            /* waiting for filling bframe buffer */
<br>            pic_out-&gt;i_type = X264_TYPE_AUTO;<br>            return 0;<br>        }<br>    }</p><p>...........</p><br><p><br></p><p>Thanks!<br></p></div></pre>