[x264-devel] commit: interleave psnr/ssim computation with reference frame filtering, to improve cache coherency (Loren Merritt )
git version control
git at videolan.org
Wed Jun 18 15:47:18 CEST 2008
x264 | branch: master | Loren Merritt <pengvado at akuvian.org> | Thu Jun 12 01:39:22 2008 -0600| [22d3c0409deec7601292c56c7cd0a23427dbc107]
interleave psnr/ssim computation with reference frame filtering, to improve cache coherency
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=22d3c0409deec7601292c56c7cd0a23427dbc107
---
common/common.h | 3 +++
common/pixel.c | 2 +-
encoder/encoder.c | 51 +++++++++++++++++++++++++++++++++++----------------
tools/checkasm.c | 2 +-
4 files changed, 40 insertions(+), 18 deletions(-)
diff --git a/common/common.h b/common/common.h
index 04f5243..0636394 100644
--- a/common/common.h
+++ b/common/common.h
@@ -544,6 +544,9 @@ struct x264_t
int i_mbs_analysed;
/* Adaptive direct mv pred */
int i_direct_score[2];
+ /* Metrics */
+ int64_t i_ssd[3];
+ double f_ssim;
} frame;
/* Cumulated stats */
diff --git a/common/pixel.c b/common/pixel.c
index 11d74a0..64a410e 100644
--- a/common/pixel.c
+++ b/common/pixel.c
@@ -441,7 +441,7 @@ float x264_pixel_ssim_wxh( x264_pixel_function_t *pf,
}
x264_free(sum0);
x264_free(sum1);
- return ssim / ((width-1) * (height-1));
+ return ssim;
}
diff --git a/encoder/encoder.c b/encoder/encoder.c
index 533e8a8..f2710ab 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -893,6 +893,7 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y )
int b_deblock = !h->sh.i_disable_deblocking_filter_idc;
int b_end = mb_y == h->sps->i_mb_height;
int min_y = mb_y - (1 << h->sh.b_mbaff);
+ int max_y = b_end ? h->sps->i_mb_height : mb_y;
b_deblock &= b_hpel || h->param.psz_dump_yuv;
if( mb_y & h->sh.b_mbaff )
return;
@@ -913,7 +914,6 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y )
if( b_deblock )
{
- int max_y = b_end ? h->sps->i_mb_height : mb_y;
int y;
for( y = min_y; y < max_y; y += (1 << h->sh.b_mbaff) )
x264_frame_deblock_row( h, y );
@@ -930,6 +930,33 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y )
{
x264_frame_cond_broadcast( h->fdec, mb_y*16 + (b_end ? 10000 : -(X264_THREAD_HEIGHT << h->sh.b_mbaff)) );
}
+
+ min_y = X264_MAX( min_y*16-8, 0 );
+ max_y = b_end ? h->param.i_height : mb_y*16-8;
+
+ if( h->param.analyse.b_psnr )
+ {
+ int i;
+ for( i=0; i<3; i++ )
+ h->stat.frame.i_ssd[i] +=
+ x264_pixel_ssd_wxh( &h->pixf,
+ h->fdec->plane[i] + (min_y>>!!i) * h->fdec->i_stride[i], h->fdec->i_stride[i],
+ h->fenc->plane[i] + (min_y>>!!i) * h->fenc->i_stride[i], h->fenc->i_stride[i],
+ h->param.i_width >> !!i, (max_y-min_y) >> !!i );
+ }
+
+ if( h->param.analyse.b_ssim )
+ {
+ x264_emms();
+ /* offset by 2 pixels to avoid alignment of ssim blocks with dct blocks,
+ * and overlap by 4 */
+ min_y += min_y == 0 ? 2 : -6;
+ h->stat.frame.f_ssim +=
+ x264_pixel_ssim_wxh( &h->pixf,
+ h->fdec->plane[0] + 2+min_y*h->fdec->i_stride[0], h->fdec->i_stride[0],
+ h->fenc->plane[0] + 2+min_y*h->fenc->i_stride[0], h->fenc->i_stride[0],
+ h->param.i_width-2, max_y-min_y );
+ }
}
static inline void x264_reference_update( x264_t *h )
@@ -1659,16 +1686,11 @@ static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
psz_message[0] = '\0';
if( h->param.analyse.b_psnr )
{
- int64_t sqe[3];
-
- for( i=0; i<3; i++ )
- {
- sqe[i] = x264_pixel_ssd_wxh( &h->pixf,
- h->fdec->plane[i], h->fdec->i_stride[i],
- h->fenc->plane[i], h->fenc->i_stride[i],
- h->param.i_width >> !!i, h->param.i_height >> !!i );
- }
- x264_emms();
+ int64_t sqe[3] = {
+ h->stat.frame.i_ssd[0],
+ h->stat.frame.i_ssd[1],
+ h->stat.frame.i_ssd[2],
+ };
h->stat.i_sqe_global[h->sh.i_type] += sqe[0] + sqe[1] + sqe[2];
h->stat.f_psnr_average[h->sh.i_type] += x264_psnr( sqe[0] + sqe[1] + sqe[2], 3 * h->param.i_width * h->param.i_height / 2 );
@@ -1684,11 +1706,8 @@ static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
if( h->param.analyse.b_ssim )
{
- // offset by 2 pixels to avoid alignment of ssim blocks with dct blocks
- float ssim_y = x264_pixel_ssim_wxh( &h->pixf,
- h->fdec->plane[0] + 2+2*h->fdec->i_stride[0], h->fdec->i_stride[0],
- h->fenc->plane[0] + 2+2*h->fenc->i_stride[0], h->fenc->i_stride[0],
- h->param.i_width-2, h->param.i_height-2 );
+ double ssim_y = h->stat.frame.f_ssim
+ / (((h->param.i_width-6)>>2) * ((h->param.i_height-6)>>2));
h->stat.f_ssim_mean_y[h->sh.i_type] += ssim_y;
snprintf( psz_message + strlen(psz_message), 80 - strlen(psz_message),
" SSIM Y:%.5f", ssim_y );
diff --git a/tools/checkasm.c b/tools/checkasm.c
index c2c1661..b9e3205 100644
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -314,7 +314,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
x264_emms();
res_c = x264_pixel_ssim_wxh( &pixel_c, buf1+2, 32, buf2+2, 32, 32, 28 );
res_a = x264_pixel_ssim_wxh( &pixel_asm, buf1+2, 32, buf2+2, 32, 32, 28 );
- if( fabs(res_c - res_a) > 1e-7 )
+ if( fabs(res_c - res_a) > 1e-6 )
{
ok = 0;
fprintf( stderr, "ssim: %.7f != %.7f [FAILED]\n", res_c, res_a );
More information about the x264-devel
mailing list