[x264-devel] [about gop parallelism]
蒋兴昌
jiangxingchang at gmail.com
Mon Oct 15 04:29:15 CEST 2007
On Sat, 13 Oct 2007, ½¯Ð˲ý wrote:
>* I'm a newbie of x264 and tried to encode yuv sequences on multi-core
*>* platforms.
*
And x264's builtin threading doesn't suffice?
---------------------------------------------yes, when I tried 4—16
threads(and I also changed the maxium number of threads defined in
common.h),the cpus are
all less than 50% usage. maybe the slice/frame level parallelisms are
not enouth.
>* first I pinned some GOP task on each logical CPUs, and then read a gop
*>* for every gop task, when the encoding process is done, the main thread
*>* re-order the out-of-order data encoded by the GOP tasks.
*>* the result is fine, but when I playback the bitstream, there will be
*>* some error information displayed on the information window of videolan
*>* client:
*>*
*
>* ffmpeg warning: error while decoding MB 14 13, bytestream (-7)
*>* (h264 at 00CEB3C0 <http://mailman.videolan.org/listinfo/x264-devel>)
*>* ffmpeg debug: concealing 145 DC, 145 AC, 145 MV errors
*>* (h264 at 00CEB3C0 <http://mailman.videolan.org/listinfo/x264-devel>)
*>*
*>* any sugestion will be appreciated.
*
There's nothing in the bitstream format that would prevent arbitrary
concatenation of GOPs. So it's a bug in your code. And since you haven't
posted your code, no one can possibly help.
---------------------------------------------------
I only changed the
x264.c & encoder/encoder.c:
/*x264.c*/
#define CPUNUM 3
#define GOPSIZE 125
#define BUFSIZE 10
#define INVGOP BUFSIZE
x264_picture_t* g_Pic[CPUNUM];
int g_count[CPUNUM];
int g_progress;
sem_t sem[CPUNUM];
sem_t resem[CPUNUM];
sem_t g_sem;
int g_flush;
typedef struct para
{
int cpuno;
x264_param_t *param;
}Para;
typedef struct data
{
unsigned char * pdata;
int length;
}Data;
Data * result[BUFSIZE];
int g_ready;
int g_curgop[CPUNUM];
int complete=0;
x264_t *encoder[CPUNUM];
static int Encode_frame( x264_t *h, unsigned char **pdata,
x264_picture_t *pic,int* gop)
{
x264_picture_t pic_out;
x264_nal_t *nal;
int i_nal, i;
int i_file = 0;
*pdata = (unsigned char *)malloc(1024*1024);
if( x264_encoder_encode( h, &nal, &i_nal, pic, &pic_out,gop) < 0 )
{
fprintf( stderr, "x264_encoder_encode failed\n" );
}
for( i = 0; i < i_nal; i++ )
{
int i_size;
int i_data;
i_data = DATA_MAX;
if( ( i_size = x264_nal_encode( data, &i_data, 1, &nal[i] ) ) > 0 )
{
memcpy(*pdata+i_file,data,i_size);
i_file += i_size;
}
else if( i_size < 0 )
{
fprintf( stderr, "need to increase buffer size
(size=%d)\n", -i_size );
}
}
return i_file;
}
void * GOP_Thread(void * gop_data)
{
x264_picture_t *pics;
Para * para = (Para *)gop_data;
sem_t *cursem = &sem[para->cpuno-1];
sem_t *curresem = &resem[para->cpuno-1];
int ret;
pid_t p = 0;
unsigned long new_mask=1<<para->cpuno;
unsigned long cur_mask;
int i;
x264_t * h;
int count=0;
int i_file=0;
int gop=0;
int cpu = para->cpuno;
Data * dest;
ret = sched_getaffinity(p,sizeof(new_mask),&cur_mask);
ret = sched_setaffinity(p,sizeof(new_mask),&new_mask);
h = encoder[para->cpuno-1];
while(1)
{
if(complete)
return NULL;
if(!(ret=sem_trywait(cursem)))
{
unsigned char * ret;
int length;
int start_frame;
i_file = 0;
count = g_count[cpu-1];
gop = g_curgop[cpu-1];
dest = (Data *)malloc(sizeof(Data));
dest->pdata = (unsigned char *)malloc(5*1024*1024);
h->frames.i_last_idr = (gop-1)*GOPSIZE;
h->frames.i_input = gop*GOPSIZE;
h->i_frame_num = 0;
h->frames.last_nonb = NULL;
h->i_frame = gop*GOPSIZE;
h->i_idr_pic_id = gop;
start_frame = gop * GOPSIZE;
for(i=0;i<count;i++)
{
pics = g_Pic[cpu-1]+i;
length = Encode_frame( h, &ret, pics,&start_frame);
if(length>0)
{
memcpy(dest->pdata+i_file,ret,length);
i_file += length;
}
free(ret);
}
do{
length=Encode_frame(h, &ret, NULL,NULL);
if(length>0)
{
memcpy(dest->pdata+i_file,ret,length);
i_file += length;
}
free(ret);
}while(length);
dest->length=i_file;
result[gop%BUFSIZE]=dest;
g_ready ++;
g_progress --;
sem_post(curresem);
}
}
}
/*****************************************************************************
* Encode:
*****************************************************************************/
static int Encode( x264_param_t *param, cli_opt_t *opt )
{
x264_picture_t *pic;
int i_frame, i_frame_total;
int64_t i_start, i_end;
int64_t i_file;
int i_progress;
int ret;
unsigned long new_mask=1;
unsigned long cur_mask;
int g_flush=0;
int gop_number=0;
int i,j;
Para para[CPUNUM];
pid_t p=0;
pthread_t handles[CPUNUM];
sched_getaffinity(p,sizeof(new_mask),&cur_mask);
ret = sched_setaffinity(p,sizeof(new_mask),&new_mask);
g_progress=0;
g_ready=0;
for(i=0;i<CPUNUM;i++)
{
if( ( encoder[i] = x264_encoder_open( param ) ) == NULL )
{
fprintf( stderr, "x264_encoder_open failed\n" );
return -1;
}
}
for(i=0;i<CPUNUM;i++)
{
para[i].cpuno = i+1;
para[i].param = param;
sem_init(&sem[i],0,0);
sem_init(&resem[i],0,1);
g_count[i]=0;
g_curgop[i]=INVGOP;
pthread_create(&handles[i],NULL,GOP_Thread,¶[i]);
}
sem_init(&g_sem,0,1);
for(i=0;i<BUFSIZE;i++)
result[i] = NULL;
i_frame_total = p_get_frame_total( opt->hin, param->i_width,
param->i_height );
for(i=0;i<CPUNUM;i++)
{
g_Pic[i]=(x264_picture_t *)malloc(GOPSIZE*sizeof(x264_picture_t));
for(j=0;j<GOPSIZE;j++)
x264_picture_alloc( g_Pic[i]+j, X264_CSP_I420,
param->i_width, param->i_height );
}
i_start = x264_mdate();
i_frame_total -= opt->i_seek;
if( opt->i_maxframes > 0 && opt->i_maxframes < i_frame_total )
i_frame_total = opt->i_maxframes;
if( set_param_bsf( opt->hout, param ) )
{
fprintf( stderr, "x264 [error]: can't set outfile param\n" );
close_file_yuv( opt->hin );
close_file_bsf( opt->hout );
return -1;
}
for(i_frame=0,i_file=0,i_progress=0;
b_ctrl_c==0&&(i_frame<i_frame_total||i_frame_total==0);)
{
complete=0;
for(i=0;i<CPUNUM;i++)
{
if((sem_trywait(&resem[i])==0)&&(!complete))
break;
else
{
if(i>=CPUNUM-1)
{
if(result[g_flush])
{
i_file += write_nalu_bsf( opt->hout, result[g_flush]->pdata,
result[g_flush]->length );
g_ready --;
free(result[g_flush]->pdata);
free(result[g_flush]);
result[g_flush]=NULL;
g_flush=(g_flush+1)%BUFSIZE;
}
i=-1;
continue;
}
}
}
j=0;
while((j<GOPSIZE)&&(!p_read_frame(g_Pic[i]+j,opt->hin,i_frame+opt->i_seek,param->i_width,param->i_height))&&(i_frame<i_frame_total))
{
pic = g_Pic[i]+j;
pic->i_pts = i_frame * param->i_fps_den;
pic->i_type = X264_TYPE_AUTO;
pic->i_qpplus1 = 0;
j++;
i_frame++;
}
g_count[i]=j;
g_curgop[i]=gop_number;
gop_number=gop_number+1;
g_progress ++;
sem_post(&sem[i]);
}
while(g_progress||g_ready)
{
if(result[g_flush])
{
i_file += write_nalu_bsf( opt->hout, result[g_flush]->pdata,
result[g_flush]->length );
g_ready --;
free(result[g_flush]->pdata);
free(result[g_flush]);
result[g_flush]=NULL;
g_flush=(g_flush+1)%BUFSIZE;
}
}
complete=1;
for(i=0;i<CPUNUM;i++)
pthread_join(handles[i],NULL);
for(i=0;i<CPUNUM;i++)
x264_encoder_close(encoder[i]);
i_end = x264_mdate();
fprintf( stderr, "\n" );
if( b_ctrl_c )
fprintf( stderr, "aborted at input frame %d\n", opt->i_seek + i_frame );
p_close_infile( opt->hin );
p_close_outfile( opt->hout );
if( i_frame > 0 )
{
double fps = (double)i_frame * (double)1000000 /
(double)( i_end - i_start );
fprintf( stderr, "encoded %d frames, %.2f fps, %.2f kb/s\n",
i_frame, fps,
(double) i_file * 8 * param->i_fps_num / (
param->i_fps_den * i_frame * 1000 ) );
}
return 0;
}
/*encoder/encoder.c, I only added the start frame number parameter to
x264_encoder_encode function*/
int x264_encoder_encode( x264_t *h,
x264_nal_t **pp_nal, int *pi_nal,
x264_picture_t *pic_in,
x264_picture_t *pic_out,
int* gop)
{
x264_frame_t *frame_psnr = h->fdec; /* just to keep the current
decoded frame for psnr calculation */
int i_nal_type;
int i_nal_ref_idc;
int i_slice_type;
int i_frame_size;
int i;
int i_global_qp;
char psz_message[80];
/* no data out */
*pi_nal = 0;
*pp_nal = NULL;
/* ------------------- Setup new frame from picture -------------------- */
TIMER_START( i_mtime_encode_frame );
if( pic_in != NULL )
{
/* 1: Copy the picture to a frame and move it to a buffer */
x264_frame_t *fenc = x264_frame_get( h->frames.unused );
x264_frame_copy_picture( h, fenc, pic_in );
fenc->i_frame = h->frames.i_input++;
x264_frame_put( h->frames.next, fenc );
x264_frame_init_lowres( h->param.cpu, fenc );
if( h->frames.i_input <= h->frames.i_delay + *gop)
{
/* Nothing yet to encode */
/* waiting for filling bframe buffer */
pic_out->i_type = X264_TYPE_AUTO;
return 0;
}
}
...........
Thanks!
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://mailman.videolan.org/pipermail/x264-devel/attachments/20071015/aa448328/attachment.htm
More information about the x264-devel
mailing list