[x264-devel] [about gop parallelism]

蒋兴昌 jiangxingchang at gmail.com
Mon Oct 15 04:29:15 CEST 2007


On Sat, 13 Oct 2007, ½¯Ð˲ý wrote:

>*      I'm a newbie of x264 and tried to encode yuv sequences on multi-core
*>* platforms.
*
And x264's builtin threading doesn't suffice?
---------------------------------------------yes, when I tried 4—16
threads(and I also changed the maxium number of threads defined in
common.h),the cpus are
all less than 50% usage. maybe the slice/frame level parallelisms are
not enouth.

>*      first I pinned some GOP task on each logical CPUs, and then read a gop
*>* for every gop task, when the encoding process is done, the main thread
*>* re-order the out-of-order data encoded by the GOP tasks.

*>*      the result is fine, but when I playback the bitstream, there will be
*>* some error information displayed on the information window of videolan
*>* client:
*>*
*
>* ffmpeg warning: error while decoding MB 14 13, bytestream (-7)
*>* (h264 at 00CEB3C0 <http://mailman.videolan.org/listinfo/x264-devel>)
*>* ffmpeg debug: concealing 145 DC, 145 AC, 145 MV errors

*>* (h264 at 00CEB3C0 <http://mailman.videolan.org/listinfo/x264-devel>)
*>*
*>*      any sugestion will be appreciated.
*
There's nothing in the bitstream format that would prevent arbitrary

concatenation of GOPs. So it's a bug in your code. And since you haven't
posted your code, no one can possibly help.
---------------------------------------------------
I only changed the
x264.c & encoder/encoder.c:
/*x264.c*/
#define CPUNUM 3
#define GOPSIZE 125
#define BUFSIZE 10
#define INVGOP BUFSIZE
x264_picture_t* g_Pic[CPUNUM];
int g_count[CPUNUM];
int g_progress;
sem_t sem[CPUNUM];
sem_t resem[CPUNUM];
sem_t g_sem;
int g_flush;

typedef struct para
{
	int cpuno;
	x264_param_t *param;
}Para;

typedef struct data
{
	unsigned char * pdata;
	int length;
}Data;

Data * result[BUFSIZE];
int g_ready;
int g_curgop[CPUNUM];
int complete=0;
x264_t *encoder[CPUNUM];

static int Encode_frame( x264_t *h, unsigned char **pdata,
x264_picture_t *pic,int* gop)
{
    x264_picture_t pic_out;
    x264_nal_t *nal;
    int i_nal, i;
    int i_file = 0;

    *pdata = (unsigned char *)malloc(1024*1024);

    if( x264_encoder_encode( h, &nal, &i_nal, pic, &pic_out,gop) < 0 )
    {
        fprintf( stderr, "x264_encoder_encode failed\n" );
    }

    for( i = 0; i < i_nal; i++ )
    {
        int i_size;
        int i_data;

        i_data = DATA_MAX;
        if( ( i_size = x264_nal_encode( data, &i_data, 1, &nal[i] ) ) > 0 )
        {
	    memcpy(*pdata+i_file,data,i_size);
	    i_file += i_size;
        }
        else if( i_size < 0 )
        {
            fprintf( stderr, "need to increase buffer size
(size=%d)\n", -i_size );
        }
    }
    return i_file;
}

void * GOP_Thread(void * gop_data)
{
    	x264_picture_t *pics;
    	Para * para = (Para *)gop_data;
    	sem_t *cursem = &sem[para->cpuno-1];
	sem_t *curresem = &resem[para->cpuno-1];
	int ret;
	pid_t p = 0;
	unsigned long new_mask=1<<para->cpuno;
	unsigned long cur_mask;
	int i;
	x264_t * h;
	int count=0;
	int i_file=0;
	int gop=0;
	int cpu = para->cpuno;
	Data * dest;

	ret = sched_getaffinity(p,sizeof(new_mask),&cur_mask);
	ret = sched_setaffinity(p,sizeof(new_mask),&new_mask);

	h = encoder[para->cpuno-1];

	while(1)
	{
	    if(complete)
	        return NULL;

	    if(!(ret=sem_trywait(cursem)))
	    {
	        unsigned char * ret;
	        int length;
	        int start_frame;
		
	        i_file = 0;
	        count = g_count[cpu-1];
	        gop = g_curgop[cpu-1];
	        dest = (Data *)malloc(sizeof(Data));
	        dest->pdata = (unsigned char *)malloc(5*1024*1024);
			
                h->frames.i_last_idr = (gop-1)*GOPSIZE;
	        h->frames.i_input = gop*GOPSIZE;
		h->i_frame_num = 0;
		h->frames.last_nonb = NULL;
		h->i_frame = gop*GOPSIZE;
		h->i_idr_pic_id = gop;
		start_frame = gop * GOPSIZE;
		
		for(i=0;i<count;i++)
		{
		    pics = g_Pic[cpu-1]+i;
		    length = Encode_frame( h, &ret, pics,&start_frame);
		    if(length>0)
		    {
	 	        memcpy(dest->pdata+i_file,ret,length);
		        i_file += length;
    	            }
	            free(ret);
	        }
			
		do{
		    length=Encode_frame(h, &ret, NULL,NULL);
		    if(length>0)
		    {
		        memcpy(dest->pdata+i_file,ret,length);
		        i_file += length;
		    }
		    free(ret);
		}while(length);
			
		dest->length=i_file;
		result[gop%BUFSIZE]=dest;
		g_ready ++;
		g_progress --;
		sem_post(curresem);
	    }
	}
}


/*****************************************************************************
 * Encode:
 *****************************************************************************/
static int  Encode( x264_param_t *param, cli_opt_t *opt )
{
    x264_picture_t *pic;
    int     i_frame, i_frame_total;
    int64_t i_start, i_end;
    int64_t i_file;
    int     i_progress;
    int ret;
    unsigned long new_mask=1;
    unsigned long cur_mask;
    int g_flush=0;
    int gop_number=0;
    int i,j;
    Para para[CPUNUM];
    pid_t p=0;
    pthread_t handles[CPUNUM];

    sched_getaffinity(p,sizeof(new_mask),&cur_mask);
    ret = sched_setaffinity(p,sizeof(new_mask),&new_mask);

    g_progress=0;
    g_ready=0;

    for(i=0;i<CPUNUM;i++)
    {
        if( ( encoder[i] = x264_encoder_open( param ) ) == NULL )
        {
    	    fprintf( stderr, "x264_encoder_open failed\n" );
    	    return -1;	
        }
    }

    for(i=0;i<CPUNUM;i++)
    {
	 para[i].cpuno = i+1;
	 para[i].param = param;
	 sem_init(&sem[i],0,0);
	 sem_init(&resem[i],0,1);
	 g_count[i]=0;
	 g_curgop[i]=INVGOP;
	 pthread_create(&handles[i],NULL,GOP_Thread,&para[i]);
    }

    sem_init(&g_sem,0,1);

    for(i=0;i<BUFSIZE;i++)
	 result[i] = NULL;

    i_frame_total = p_get_frame_total( opt->hin, param->i_width,
param->i_height );

    for(i=0;i<CPUNUM;i++)
    {
	g_Pic[i]=(x264_picture_t *)malloc(GOPSIZE*sizeof(x264_picture_t));
	for(j=0;j<GOPSIZE;j++)
    	    x264_picture_alloc( g_Pic[i]+j, X264_CSP_I420,
param->i_width, param->i_height );
    }

    i_start = x264_mdate();
    i_frame_total -= opt->i_seek;
    if( opt->i_maxframes > 0 && opt->i_maxframes < i_frame_total )
        i_frame_total = opt->i_maxframes;

    if( set_param_bsf( opt->hout, param ) )
    {
	fprintf( stderr, "x264 [error]: can't set outfile param\n" );
	close_file_yuv( opt->hin );
	close_file_bsf( opt->hout );
	return -1;
    }

    for(i_frame=0,i_file=0,i_progress=0;
b_ctrl_c==0&&(i_frame<i_frame_total||i_frame_total==0);)
    {
	complete=0;
	for(i=0;i<CPUNUM;i++)
	{
	    if((sem_trywait(&resem[i])==0)&&(!complete))
		break;
	    else
	    {
		if(i>=CPUNUM-1)
		{
		    if(result[g_flush])
		    {
			i_file += write_nalu_bsf( opt->hout, result[g_flush]->pdata,
result[g_flush]->length );
			g_ready --;
			free(result[g_flush]->pdata);
			free(result[g_flush]);
			result[g_flush]=NULL;
			g_flush=(g_flush+1)%BUFSIZE;
		    }

		    i=-1;
	 	    continue;
		}
	    }
	}
	
	j=0;
        while((j<GOPSIZE)&&(!p_read_frame(g_Pic[i]+j,opt->hin,i_frame+opt->i_seek,param->i_width,param->i_height))&&(i_frame<i_frame_total))
	{
	    pic = g_Pic[i]+j;
            pic->i_pts = i_frame * param->i_fps_den;
	    pic->i_type = X264_TYPE_AUTO;
	    pic->i_qpplus1 = 0;
	    j++;
	    i_frame++;
	}

	g_count[i]=j;
	g_curgop[i]=gop_number;
	gop_number=gop_number+1;
	g_progress ++;
	sem_post(&sem[i]);
    }

    while(g_progress||g_ready)
    {
	if(result[g_flush])
	{
	    i_file += write_nalu_bsf( opt->hout, result[g_flush]->pdata,
result[g_flush]->length );
	    g_ready --;
	    free(result[g_flush]->pdata);
	    free(result[g_flush]);
            result[g_flush]=NULL;
            g_flush=(g_flush+1)%BUFSIZE;
       }
    }

    complete=1;

    for(i=0;i<CPUNUM;i++)
    	pthread_join(handles[i],NULL);


    for(i=0;i<CPUNUM;i++)
        x264_encoder_close(encoder[i]);

    i_end = x264_mdate();
    fprintf( stderr, "\n" );

    if( b_ctrl_c )
        fprintf( stderr, "aborted at input frame %d\n", opt->i_seek + i_frame );

    p_close_infile( opt->hin );
    p_close_outfile( opt->hout );

    if( i_frame > 0 )
    {
        double fps = (double)i_frame * (double)1000000 /
                     (double)( i_end - i_start );

        fprintf( stderr, "encoded %d frames, %.2f fps, %.2f kb/s\n",
i_frame, fps,
                 (double) i_file * 8 * param->i_fps_num / (
param->i_fps_den * i_frame * 1000 ) );
    }

    return 0;

}

/*encoder/encoder.c, I only added the start frame number parameter to
x264_encoder_encode function*/

int     x264_encoder_encode( x264_t *h,
                             x264_nal_t **pp_nal, int *pi_nal,
                             x264_picture_t *pic_in,
                             x264_picture_t *pic_out,
			     int* gop)
{
    x264_frame_t   *frame_psnr = h->fdec; /* just to keep the current
decoded frame for psnr calculation */
    int     i_nal_type;
    int     i_nal_ref_idc;
    int     i_slice_type;
    int     i_frame_size;

    int i;

    int   i_global_qp;

    char psz_message[80];

    /* no data out */
    *pi_nal = 0;
    *pp_nal = NULL;


    /* ------------------- Setup new frame from picture -------------------- */
    TIMER_START( i_mtime_encode_frame );
    if( pic_in != NULL )
    {
        /* 1: Copy the picture to a frame and move it to a buffer */
        x264_frame_t *fenc = x264_frame_get( h->frames.unused );

        x264_frame_copy_picture( h, fenc, pic_in );

        fenc->i_frame = h->frames.i_input++;

        x264_frame_put( h->frames.next, fenc );

        x264_frame_init_lowres( h->param.cpu, fenc );

        if( h->frames.i_input <= h->frames.i_delay + *gop)
        {
	    /* Nothing yet to encode */
            /* waiting for filling bframe buffer */
            pic_out->i_type = X264_TYPE_AUTO;
            return 0;
        }
    }

...........


Thanks!
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://mailman.videolan.org/pipermail/x264-devel/attachments/20071015/aa448328/attachment.htm 


More information about the x264-devel mailing list