[x264-devel] commit: Faster qpel-RD (Jason Garrett-Glaser )
git version control
git at videolan.org
Fri Oct 31 16:57:51 CET 2008
x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Wed Oct 22 02:20:06 2008 -0700| [d0add77f5f084253202747266f85daa65f7fc9cc] | committer: Jason Garrett-Glaser
Faster qpel-RD
3-4% faster qpel-RD; avoid re-checking bmv/pmv during the hex search.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=d0add77f5f084253202747266f85daa65f7fc9cc
---
encoder/me.c | 39 ++++++++++++++++++++++++++-------------
1 files changed, 26 insertions(+), 13 deletions(-)
diff --git a/encoder/me.c b/encoder/me.c
index 25e1195..12c20b0 100644
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -942,18 +942,23 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei
}
#undef COST_MV_SATD
-#define COST_MV_SATD( mx, my, dst ) \
+#define COST_MV_SATD( mx, my, dst, avoid_mvp ) \
{ \
- int stride = 16; \
- uint8_t *src = h->mc.get_ref( pix, &stride, m->p_fref, m->i_stride[0], mx, my, bw*4, bh*4 ); \
- dst = h->pixf.mbcmp_unaligned[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
- + p_cost_mvx[mx] + p_cost_mvy[my]; \
- COPY1_IF_LT( bsatd, dst ); \
+ if( !avoid_pmv || !(mx == pmx && my == pmy) ) \
+ { \
+ int stride = 16; \
+ uint8_t *src = h->mc.get_ref( pix, &stride, m->p_fref, m->i_stride[0], mx, my, bw*4, bh*4 ); \
+ dst = h->pixf.mbcmp_unaligned[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
+ + p_cost_mvx[mx] + p_cost_mvy[my]; \
+ COPY1_IF_LT( bsatd, dst ); \
+ } \
+ else \
+ dst = COST_MAX; \
}
#define COST_MV_RD( mx, my, satd, do_dir, mdir ) \
{ \
- if( satd <= bsatd * SATD_THRESH )\
+ if( satd <= bsatd * SATD_THRESH ) \
{ \
uint64_t cost; \
*(uint32_t*)cache_mv = *(uint32_t*)cache_mv2 = pack16to32_mask(mx,my); \
@@ -991,7 +996,7 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8, int
pmy = m->mvp[1];
p_cost_mvx = m->p_cost_mv - pmx;
p_cost_mvy = m->p_cost_mv - pmy;
- COST_MV_SATD( bmx, bmy, bsatd );
+ COST_MV_SATD( bmx, bmy, bsatd, 0 );
COST_MV_RD( bmx, bmy, 0, 0, 0 );
/* check the predicted mv */
@@ -999,16 +1004,24 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8, int
&& pmx >= h->mb.mv_min_spel[0] && pmx <= h->mb.mv_max_spel[0]
&& pmy >= h->mb.mv_min_spel[1] && pmy <= h->mb.mv_max_spel[1] )
{
- COST_MV_SATD( pmx, pmy, satd );
+ COST_MV_SATD( pmx, pmy, satd, 0 );
COST_MV_RD( pmx, pmy, satd, 0,0 );
+ /* The hex motion search is guaranteed to not repeat the center candidate,
+ * so if pmv is chosen, set the "MV to avoid checking" to bmv instead. */
+ if( bmx == pmx && bmy == pmy )
+ {
+ pmx = m->mv[0];
+ pmy = m->mv[1];
+ }
}
/* subpel hex search, same pattern as ME HEX. */
dir = -2;
omx = bmx;
omy = bmy;
- for( j=0; j<6; j++ ) COST_MV_SATD( omx + hex2[j+1][0], omy + hex2[j+1][1], satds[j] );
+ for( j=0; j<6; j++ ) COST_MV_SATD( omx + hex2[j+1][0], omy + hex2[j+1][1], satds[j], 1 );
for( j=0; j<6; j++ ) COST_MV_RD ( omx + hex2[j+1][0], omy + hex2[j+1][1], satds[j], 1,j );
+
if( dir != -2 )
{
/* half hexagon, not overlapping the previous iteration */
@@ -1021,7 +1034,7 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8, int
dir = -2;
omx = bmx;
omy = bmy;
- for( j=0; j<3; j++ ) COST_MV_SATD( omx + hex2[odir+j][0], omy + hex2[odir+j][1], satds[j] );
+ for( j=0; j<3; j++ ) COST_MV_SATD( omx + hex2[odir+j][0], omy + hex2[odir+j][1], satds[j], 1 );
for( j=0; j<3; j++ ) COST_MV_RD ( omx + hex2[odir+j][0], omy + hex2[odir+j][1], satds[j], 1, odir-1+j );
if( dir == -2 )
break;
@@ -1031,7 +1044,7 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8, int
/* square refine, same as pattern as ME HEX. */
omx = bmx;
omy = bmy;
- for( i=0; i<8; i++ ) COST_MV_SATD( omx + square1[i][0], omy + square1[i][1], satds[i] );
+ for( i=0; i<8; i++ ) COST_MV_SATD( omx + square1[i][0], omy + square1[i][1], satds[i], 1 );
for( i=0; i<8; i++ ) COST_MV_RD ( omx + square1[i][0], omy + square1[i][1], satds[i], 0,0 );
bmy = x264_clip3( bmy, h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] );
@@ -1039,6 +1052,6 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8, int
m->mv[0] = bmx;
m->mv[1] = bmy;
x264_macroblock_cache_mv ( h, 2*(i8&1), i8&2, bw, bh, i_list, pack16to32_mask(bmx, bmy) );
- x264_macroblock_cache_mvd( h, 2*(i8&1), i8&2, bw, bh, i_list, pack16to32_mask(bmx - pmx, bmy - pmy) );
+ x264_macroblock_cache_mvd( h, 2*(i8&1), i8&2, bw, bh, i_list, pack16to32_mask(bmx - m->mvp[0], bmy - m->mvp[1]) );
}
More information about the x264-devel
mailing list