[x264-devel] commit: Faster fullpel predictor checking (Jason Garrett-Glaser )

Sat Apr 24 00:40:10 CEST 2010

x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Tue Apr 20 16:50:13 2010 -0700| [f6114408330c1f1c5bb897ab74258f9972cc9bf9] | committer: Jason Garrett-Glaser 

Faster fullpel predictor checking
Also shave a few instructions off dia/hex motion estimation loops.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=f6114408330c1f1c5bb897ab74258f9972cc9bf9
---

 encoder/me.c |   35 ++++++++++++++++++++++-------------
 1 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/encoder/me.c b/encoder/me.c
index be16a04..1424616 100644
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -219,7 +219,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
             COST_MV_HPEL( bmx, bmy );
         for( int i = 0; i < i_mvc; i++ )
         {
-            if( M32( mvc[i] ) && (pmv - M32( mvc[i] )) )
+            if( M32( mvc[i] ) && (pmv != M32( mvc[i] )) )
             {
                 int mx = x264_clip3( mvc[i][0], mv_x_min_qpel, mv_x_max_qpel );
                 int my = x264_clip3( mvc[i][1], mv_y_min_qpel, mv_y_max_qpel );
@@ -243,16 +243,27 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
          * biasing against use of the predicted motion vector. */
         bcost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[bmy*stride+bmx], stride );
         pmv = pack16to32_mask( bmx, bmy );
-        if( i_mvc )
-            x264_predictor_roundclip( mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
-        for( int i = 0; i < i_mvc; i++ )
+        if( i_mvc > 0 )
         {
-            if( M32( mvc[i] ) && (pmv - M32( mvc[i] )) )
+            x264_predictor_roundclip( mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
+            bcost <<= 4;
+            for( int i = 1; i <= i_mvc; i++ )
+            {
+                if( M32( mvc[i-1] ) && (pmv != M32( mvc[i-1] )) )
+                {
+                    int mx = mvc[i-1][0];
+                    int my = mvc[i-1][1];
+                    int cost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[my*stride+mx], stride ) + BITS_MVD( mx, my );
+                    cost = (cost << 4) + i;
+                    COPY1_IF_LT( bcost, cost );
+                }
+            }
+            if( bcost&15 )
             {
-                int mx = mvc[i][0];
-                int my = mvc[i][1];
-                COST_MV( mx, my );
+                bmx = mvc[(bcost&15)-1][0];
+                bmy = mvc[(bcost&15)-1][1];
             }
+            bcost >>= 4;
         }
     }
 
@@ -265,7 +276,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
         {
             /* diamond search, radius 1 */
             bcost <<= 4;
-            int i = 0;
+            int i = i_me_range;
             do
             {
                 COST_MV_X4_DIR( 0,-1, 0,1, -1,0, 1,0, costs );
@@ -278,9 +289,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
                 bmx -= (bcost<<28)>>30;
                 bmy -= (bcost<<30)>>30;
                 bcost &= ~15;
-                if( !CHECK_MVRANGE(bmx, bmy) )
-                    break;
-            } while( ++i < i_me_range );
+            } while( --i && CHECK_MVRANGE(bmx, bmy) );
             bcost >>= 4;
             break;
         }
@@ -325,7 +334,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
                 bmy += hex2[dir+1][1];
 
                 /* half hexagon, not overlapping the previous iteration */
-                for( int i = 1; i < i_me_range>>1 && CHECK_MVRANGE(bmx, bmy); i++ )
+                for( int i = (i_me_range>>1) - 1; i > 0 && CHECK_MVRANGE(bmx, bmy); i-- )
                 {
                     COST_MV_X3_DIR( hex2[dir+0][0], hex2[dir+0][1],
                                     hex2[dir+1][0], hex2[dir+1][1],