[x264-devel] commit: Significantly faster qpel-RD (Jason Garrett-Glaser )

Sat Dec 5 10:52:22 CET 2009

x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Tue Dec  1 16:15:15 2009 -0800| [4bf27ffa3e5b40d49e201e4e39c4f3b57a84c737] | committer: Jason Garrett-Glaser 

Significantly faster qpel-RD
Cache the results of MC, like in bidir-RD.
Slightly changes output due to the necessary reordering of satd/RD calls.
5-10% faster qpel-RD.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=4bf27ffa3e5b40d49e201e4e39c4f3b57a84c737
---

 encoder/macroblock.c |    6 +-----
 encoder/me.c         |    2 +-
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/encoder/macroblock.c b/encoder/macroblock.c
index 4abbc2e..e4edb8a 100644
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -1181,13 +1181,9 @@ void x264_macroblock_encode_p4x4( x264_t *h, int i4 )
     int i_qp = h->mb.i_qp;
     uint8_t *p_fenc = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[i4]];
     uint8_t *p_fdec = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[i4]];
-    const int i_ref = h->mb.cache.ref[0][x264_scan8[i4]];
-    const int mvx   = x264_clip3( h->mb.cache.mv[0][x264_scan8[i4]][0], h->mb.mv_min[0], h->mb.mv_max[0] );
-    const int mvy   = x264_clip3( h->mb.cache.mv[0][x264_scan8[i4]][1], h->mb.mv_min[1], h->mb.mv_max[1] );
     int nz;
 
-    h->mc.mc_luma( p_fdec, FDEC_STRIDE, h->mb.pic.p_fref[0][i_ref], h->mb.pic.i_stride[0],
-                   mvx + 4*4*block_idx_x[i4], mvy + 4*4*block_idx_y[i4], 4, 4, &h->sh.weight[i_ref][0] );
+    /* Don't need motion compensation as this function is only used in qpel-RD, which caches pixel data. */
 
     if( h->mb.b_lossless )
     {
diff --git a/encoder/me.c b/encoder/me.c
index 707f68c..80fdb64 100644
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -1027,7 +1027,7 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei
     { \
         int stride = 16; \
         uint8_t *src = h->mc.get_ref( pix, &stride, m->p_fref, m->i_stride[0], mx, my, bw*4, bh*4, &m->weight[0] ); \
-        dst = h->pixf.mbcmp_unaligned[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
+        dst = h->pixf.mbcmp[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
             + p_cost_mvx[mx] + p_cost_mvy[my]; \
         COPY1_IF_LT( bsatd, dst ); \
     } \