[x264-devel] commit: Fix a nondeterminism with threads and subme>7 ( Jason Garrett-Glaser )
git version control
git at videolan.org
Sun Jul 26 10:46:44 CEST 2009
x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Sat Jul 25 22:31:06 2009 -0700| [4074956df13e058421fb5ba89b872be143742ffd] | committer: Jason Garrett-Glaser
Fix a nondeterminism with threads and subme>7
Also add a few more checks to eliminate the need for spel_border.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=4074956df13e058421fb5ba89b872be143742ffd
---
encoder/analyse.c | 5 ++---
encoder/me.c | 34 +++++++++++++---------------------
2 files changed, 15 insertions(+), 24 deletions(-)
diff --git a/encoder/analyse.c b/encoder/analyse.c
index ac85347..c4e70b1 100644
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -309,8 +309,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
int i_fmv_range = 4 * h->param.analyse.i_mv_range;
// limit motion search to a slightly smaller range than the theoretical limit,
// since the search may go a few iterations past its given range
- int i_fpel_border = 5; // umh unconditional radius
- int i_spel_border = 8; // 1.5 for subpel_satd, 1.5 for subpel_rd, 2 for bime, round up
+ int i_fpel_border = 6; // umh: 1 for diamond, 2 for octagon, 2 for hpel
/* Calculate max allowed MV range */
#define CLIP_FMV(mv) x264_clip3( mv, -i_fmv_range, i_fmv_range-1 )
@@ -348,7 +347,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
h->mb.mv_min[1] = 4*( -16*mb_y - 24 );
h->mb.mv_max[1] = 4*( 16*( mb_height - mb_y - 1 ) + 24 );
- h->mb.mv_min_spel[1] = x264_clip3( h->mb.mv_min[1], X264_MAX(4*(-512+i_spel_border), -i_fmv_range), i_fmv_range );
+ h->mb.mv_min_spel[1] = x264_clip3( h->mb.mv_min[1], -i_fmv_range, i_fmv_range );
h->mb.mv_max_spel[1] = CLIP_FMV( h->mb.mv_max[1] );
h->mb.mv_max_spel[1] = X264_MIN( h->mb.mv_max_spel[1], thread_mvy_range*4 );
h->mb.mv_min_fpel[1] = (h->mb.mv_min_spel[1]>>2) + i_fpel_border;
diff --git a/encoder/me.c b/encoder/me.c
index fa14dab..7a8d619 100644
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -535,7 +535,7 @@ me_hex2:
}
}
} while( ++i <= i_me_range/4 );
- if( bmy <= mv_y_max )
+ if( bmy <= mv_y_max && bmy >= mv_y_min )
goto me_hex2;
break;
}
@@ -718,8 +718,6 @@ me_hex2:
int qpel = subpel_iterations[h->mb.i_subpel_refine][3];
refine_subpel( h, m, hpel, qpel, p_halfpel_thresh, 0 );
}
- else if( m->mv[1] > h->mb.mv_max_spel[1] )
- m->mv[1] = h->mb.mv_max_spel[1];
}
#undef COST_MV
@@ -790,8 +788,8 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
/* try the subpel component of the predicted mv */
if( hpel_iters && h->mb.i_subpel_refine < 3 )
{
- int mx = x264_clip3( m->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
- int my = x264_clip3( m->mvp[1], h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] );
+ int mx = x264_clip3( m->mvp[0], h->mb.mv_min_spel[0]+2, h->mb.mv_max_spel[0]-2 );
+ int my = x264_clip3( m->mvp[1], h->mb.mv_min_spel[1]+2, h->mb.mv_max_spel[1]-2 );
if( (mx-bmx)|(my-bmy) )
COST_MV_SAD( mx, my );
}
@@ -818,9 +816,6 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
if( !b_refine_qpel )
{
- /* check for mvrange */
- if( bmy > h->mb.mv_max_spel[1] )
- bmy = h->mb.mv_max_spel[1];
bcost = COST_MAX;
COST_MV_SATD( bmx, bmy, -1 );
}
@@ -844,6 +839,8 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
bdir = -1;
for( i = qpel_iters; i > 0; i-- )
{
+ if( bmy <= h->mb.mv_min_spel[1] || bmy >= h->mb.mv_max_spel[1] )
+ break;
odir = bdir;
omx = bmx;
omy = bmy;
@@ -855,14 +852,6 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
break;
}
- /* check for mvrange */
- if( bmy > h->mb.mv_max_spel[1] )
- {
- bmy = h->mb.mv_max_spel[1];
- bcost = COST_MAX;
- COST_MV_SATD( bmx, bmy, -1 );
- }
-
m->cost = bcost;
m->mv[0] = bmx;
m->mv[1] = bmy;
@@ -970,8 +959,8 @@ static void ALWAYS_INLINE x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_m
/* each byte of visited represents 8 possible m1y positions, so a 4D array isn't needed */
DECLARE_ALIGNED_16( uint8_t visited[8][8][8] );
- if( bm0y > h->mb.mv_max_spel[1] - 8 ||
- bm1y > h->mb.mv_max_spel[1] - 8 )
+ if( bm0y < h->mb.mv_min_spel[1] + 8 || bm1y < h->mb.mv_min_spel[1] + 8 ||
+ bm0y > h->mb.mv_max_spel[1] - 8 || bm1y > h->mb.mv_max_spel[1] - 8 )
return;
h->mc.memzero_aligned( visited, sizeof(visited) );
@@ -1096,6 +1085,10 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int
}
}
+ if( bmy < h->mb.mv_min_spel[1] + 3 ||
+ bmy > h->mb.mv_max_spel[1] - 3 )
+ return;
+
/* subpel hex search, same pattern as ME HEX. */
dir = -2;
omx = bmx;
@@ -1109,8 +1102,8 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int
for( i = 1; i < 10; i++ )
{
const int odir = mod6m1[dir+1];
- if( bmy > h->mb.mv_max_spel[1] - 2 ||
- bmy < h->mb.mv_min_spel[1] - 2 )
+ if( bmy < h->mb.mv_min_spel[1] + 3 ||
+ bmy > h->mb.mv_max_spel[1] - 3 )
break;
dir = -2;
omx = bmx;
@@ -1128,7 +1121,6 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int
for( i=0; i<8; i++ ) COST_MV_SATD( omx + square1[i+1][0], omy + square1[i+1][1], satds[i], 1 );
for( i=0; i<8; i++ ) COST_MV_RD ( omx + square1[i+1][0], omy + square1[i+1][1], satds[i], 0,0 );
- bmy = x264_clip3( bmy, h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] );
m->cost = bcost;
m->mv[0] = bmx;
m->mv[1] = bmy;
More information about the x264-devel
mailing list