[x264-devel] Faster predictor checking with subme<3
Jason Garrett-Glaser
git at videolan.org
Tue Jul 17 21:21:42 CEST 2012
x264 | branch: master | Jason Garrett-Glaser <jason at x264.com> | Tue Jul 10 14:10:44 2012 -0700| [5f615f7f93d830e55e6fe4f04d214b93d8cb4b53] | committer: Jason Garrett-Glaser
Faster predictor checking with subme<3
Fix a typo that made an early-skip less effective.
Avoid a relatively unpredictable branch.
Slightly changed output due to the typo-fix.
~50 cycles faster on Core i7.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=5f615f7f93d830e55e6fe4f04d214b93d8cb4b53
---
encoder/me.c | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/encoder/me.c b/encoder/me.c
index 22098b2..81cbebd 100644
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -250,24 +250,22 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
if( i_mvc > 0 )
{
ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16],[2] );
- x264_predictor_roundclip( mvc_fpel, mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
+ x264_predictor_roundclip( mvc_fpel+2, mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
+ M32( mvc_fpel[1] ) = pmv;
bcost <<= 4;
for( int i = 1; i <= i_mvc; i++ )
{
- if( M32( mvc_fpel[i-1] ) && (pmv != M32( mvc[i-1] )) )
+ if( M32( mvc_fpel[i+1] ) && (pmv != M32( mvc_fpel[i+1] )) )
{
- int mx = mvc_fpel[i-1][0];
- int my = mvc_fpel[i-1][1];
+ int mx = mvc_fpel[i+1][0];
+ int my = mvc_fpel[i+1][1];
int cost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[my*stride+mx], stride ) + BITS_MVD( mx, my );
cost = (cost << 4) + i;
COPY1_IF_LT( bcost, cost );
}
}
- if( bcost&15 )
- {
- bmx = mvc_fpel[(bcost&15)-1][0];
- bmy = mvc_fpel[(bcost&15)-1][1];
- }
+ bmx = mvc_fpel[(bcost&15)+1][0];
+ bmy = mvc_fpel[(bcost&15)+1][1];
bcost >>= 4;
}
}
More information about the x264-devel
mailing list