[x264-devel] Faster predictor checking with subme<3

Jason Garrett-Glaser git at videolan.org
Tue Jul 17 21:21:42 CEST 2012


x264 | branch: master | Jason Garrett-Glaser <jason at x264.com> | Tue Jul 10 14:10:44 2012 -0700| [5f615f7f93d830e55e6fe4f04d214b93d8cb4b53] | committer: Jason Garrett-Glaser

Faster predictor checking with subme<3
Fix a typo that made an early-skip less effective.
Avoid a relatively unpredictable branch.
Slightly changed output due to the typo-fix.
~50 cycles faster on Core i7.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=5f615f7f93d830e55e6fe4f04d214b93d8cb4b53
---

 encoder/me.c |   16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/encoder/me.c b/encoder/me.c
index 22098b2..81cbebd 100644
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -250,24 +250,22 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
         if( i_mvc > 0 )
         {
             ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16],[2] );
-            x264_predictor_roundclip( mvc_fpel, mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
+            x264_predictor_roundclip( mvc_fpel+2, mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
+            M32( mvc_fpel[1] ) = pmv;
             bcost <<= 4;
             for( int i = 1; i <= i_mvc; i++ )
             {
-                if( M32( mvc_fpel[i-1] ) && (pmv != M32( mvc[i-1] )) )
+                if( M32( mvc_fpel[i+1] ) && (pmv != M32( mvc_fpel[i+1] )) )
                 {
-                    int mx = mvc_fpel[i-1][0];
-                    int my = mvc_fpel[i-1][1];
+                    int mx = mvc_fpel[i+1][0];
+                    int my = mvc_fpel[i+1][1];
                     int cost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[my*stride+mx], stride ) + BITS_MVD( mx, my );
                     cost = (cost << 4) + i;
                     COPY1_IF_LT( bcost, cost );
                 }
             }
-            if( bcost&15 )
-            {
-                bmx = mvc_fpel[(bcost&15)-1][0];
-                bmy = mvc_fpel[(bcost&15)-1][1];
-            }
+            bmx = mvc_fpel[(bcost&15)+1][0];
+            bmy = mvc_fpel[(bcost&15)+1][1];
             bcost >>= 4;
         }
     }



More information about the x264-devel mailing list