[x264-devel] commit: Special case in qpel refine for subme=1 (Jason Garrett-Glaser )
git at videolan.org
git at videolan.org
Sun Apr 11 06:50:32 CEST 2010
x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Sat Apr 10 00:35:50 2010 -0700| [f21ff8a3e7e1292c542e84842d2738adc72fcd59] | committer: Jason Garrett-Glaser
Special case in qpel refine for subme=1
~15-20% faster qpel refine with subme=1.
Some minor cleanups in refine_supel.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=f21ff8a3e7e1292c542e84842d2738adc72fcd59
---
encoder/me.c | 49 +++++++++++++++++++++++++++++++------------------
1 files changed, 31 insertions(+), 18 deletions(-)
diff --git a/encoder/me.c b/encoder/me.c
index 7cbeec3..115295c 100644
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -788,13 +788,7 @@ if( b_refine_qpel || (dir^1) != odir ) \
cost += h->pixf.mbcmp[i_pixel+3]( m->p_fenc[2], FENC_STRIDE, pix[0], 8 ); \
} \
} \
- if( cost < bcost ) \
- { \
- bcost = cost; \
- bmx = mx; \
- bmy = my; \
- bdir = dir; \
- } \
+ COPY4_IF_LT( bcost, cost, bmx, mx, bmy, my, bdir, dir ); \
}
static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters, int *p_halfpel_thresh, int b_refine_qpel )
@@ -865,19 +859,38 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
}
/* quarterpel diamond search */
- bdir = -1;
- for( int i = qpel_iters; i > 0; i-- )
+ if( h->mb.i_subpel_refine > 1 )
{
- if( bmy <= h->mb.mv_min_spel[1] || bmy >= h->mb.mv_max_spel[1] || bmx <= h->mb.mv_min_spel[0] || bmx >= h->mb.mv_max_spel[0] )
- break;
- odir = bdir;
+ bdir = -1;
+ for( int i = qpel_iters; i > 0; i-- )
+ {
+ if( bmy <= h->mb.mv_min_spel[1] || bmy >= h->mb.mv_max_spel[1] || bmx <= h->mb.mv_min_spel[0] || bmx >= h->mb.mv_max_spel[0] )
+ break;
+ odir = bdir;
+ int omx = bmx, omy = bmy;
+ COST_MV_SATD( omx, omy - 1, 0 );
+ COST_MV_SATD( omx, omy + 1, 1 );
+ COST_MV_SATD( omx - 1, omy, 2 );
+ COST_MV_SATD( omx + 1, omy, 3 );
+ if( (bmx == omx) & (bmy == omy) )
+ break;
+ }
+ }
+ /* Special simplified case for subme=1 */
+ else if( bmy > h->mb.mv_min_spel[1] && bmy < h->mb.mv_max_spel[1] && bmx > h->mb.mv_min_spel[0] && bmx < h->mb.mv_max_spel[0] )
+ {
+ int costs[4];
int omx = bmx, omy = bmy;
- COST_MV_SATD( omx, omy - 1, 0 );
- COST_MV_SATD( omx, omy + 1, 1 );
- COST_MV_SATD( omx - 1, omy, 2 );
- COST_MV_SATD( omx + 1, omy, 3 );
- if( bmx == omx && bmy == omy )
- break;
+ /* We have to use mc_luma because all strides must be the same to use fpelcmp_x4 */
+ h->mc.mc_luma( pix[0] , 32, m->p_fref, m->i_stride[0], omx, omy-1, bw, bh, &m->weight[0] );
+ h->mc.mc_luma( pix[0]+16, 32, m->p_fref, m->i_stride[0], omx, omy+1, bw, bh, &m->weight[0] );
+ h->mc.mc_luma( pix[1] , 32, m->p_fref, m->i_stride[0], omx-1, omy, bw, bh, &m->weight[0] );
+ h->mc.mc_luma( pix[1]+16, 32, m->p_fref, m->i_stride[0], omx+1, omy, bw, bh, &m->weight[0] );
+ h->pixf.fpelcmp_x4[i_pixel]( m->p_fenc[0], pix[0], pix[0]+16, pix[1], pix[1]+16, 32, costs );
+ COPY2_IF_LT( bcost, costs[0] + p_cost_mvx[omx ] + p_cost_mvy[omy-1], bmy, omy-1 );
+ COPY2_IF_LT( bcost, costs[1] + p_cost_mvx[omx ] + p_cost_mvy[omy+1], bmy, omy+1 );
+ COPY3_IF_LT( bcost, costs[2] + p_cost_mvx[omx-1] + p_cost_mvy[omy ], bmx, omx-1, bmy, omy );
+ COPY3_IF_LT( bcost, costs[3] + p_cost_mvx[omx+1] + p_cost_mvy[omy ], bmx, omx+1, bmy, omy );
}
m->cost = bcost;
More information about the x264-devel
mailing list