[x264-devel] commit: Faster fullpel predictor checking (Jason Garrett-Glaser )
git at videolan.org
git at videolan.org
Sat Apr 24 00:40:10 CEST 2010
x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Tue Apr 20 16:50:13 2010 -0700| [f6114408330c1f1c5bb897ab74258f9972cc9bf9] | committer: Jason Garrett-Glaser
Faster fullpel predictor checking
Also shave a few instructions off dia/hex motion estimation loops.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=f6114408330c1f1c5bb897ab74258f9972cc9bf9
---
encoder/me.c | 35 ++++++++++++++++++++++-------------
1 files changed, 22 insertions(+), 13 deletions(-)
diff --git a/encoder/me.c b/encoder/me.c
index be16a04..1424616 100644
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -219,7 +219,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
COST_MV_HPEL( bmx, bmy );
for( int i = 0; i < i_mvc; i++ )
{
- if( M32( mvc[i] ) && (pmv - M32( mvc[i] )) )
+ if( M32( mvc[i] ) && (pmv != M32( mvc[i] )) )
{
int mx = x264_clip3( mvc[i][0], mv_x_min_qpel, mv_x_max_qpel );
int my = x264_clip3( mvc[i][1], mv_y_min_qpel, mv_y_max_qpel );
@@ -243,16 +243,27 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
* biasing against use of the predicted motion vector. */
bcost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[bmy*stride+bmx], stride );
pmv = pack16to32_mask( bmx, bmy );
- if( i_mvc )
- x264_predictor_roundclip( mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
- for( int i = 0; i < i_mvc; i++ )
+ if( i_mvc > 0 )
{
- if( M32( mvc[i] ) && (pmv - M32( mvc[i] )) )
+ x264_predictor_roundclip( mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
+ bcost <<= 4;
+ for( int i = 1; i <= i_mvc; i++ )
+ {
+ if( M32( mvc[i-1] ) && (pmv != M32( mvc[i-1] )) )
+ {
+ int mx = mvc[i-1][0];
+ int my = mvc[i-1][1];
+ int cost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[my*stride+mx], stride ) + BITS_MVD( mx, my );
+ cost = (cost << 4) + i;
+ COPY1_IF_LT( bcost, cost );
+ }
+ }
+ if( bcost&15 )
{
- int mx = mvc[i][0];
- int my = mvc[i][1];
- COST_MV( mx, my );
+ bmx = mvc[(bcost&15)-1][0];
+ bmy = mvc[(bcost&15)-1][1];
}
+ bcost >>= 4;
}
}
@@ -265,7 +276,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
{
/* diamond search, radius 1 */
bcost <<= 4;
- int i = 0;
+ int i = i_me_range;
do
{
COST_MV_X4_DIR( 0,-1, 0,1, -1,0, 1,0, costs );
@@ -278,9 +289,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
bmx -= (bcost<<28)>>30;
bmy -= (bcost<<30)>>30;
bcost &= ~15;
- if( !CHECK_MVRANGE(bmx, bmy) )
- break;
- } while( ++i < i_me_range );
+ } while( --i && CHECK_MVRANGE(bmx, bmy) );
bcost >>= 4;
break;
}
@@ -325,7 +334,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
bmy += hex2[dir+1][1];
/* half hexagon, not overlapping the previous iteration */
- for( int i = 1; i < i_me_range>>1 && CHECK_MVRANGE(bmx, bmy); i++ )
+ for( int i = (i_me_range>>1) - 1; i > 0 && CHECK_MVRANGE(bmx, bmy); i-- )
{
COST_MV_X3_DIR( hex2[dir+0][0], hex2[dir+0][1],
hex2[dir+1][0], hex2[dir+1][1],
More information about the x264-devel
mailing list