[x264-devel] commit: omit redundant mc after non-rdo dct size decision, and in b-direct rdo (Jason Garrett-Glaser )

Wed Jun 4 00:53:47 CEST 2008

x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Fri May 23 21:22:29 2008 -0600| [f88d09450264d597e7b08f83993bed95ba1f7cfa]

omit redundant mc after non-rdo dct size decision, and in b-direct rdo

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=f88d09450264d597e7b08f83993bed95ba1f7cfa
---

 common/common.h      |    2 +-
 common/macroblock.c  |    2 +-
 encoder/analyse.c    |   24 +++++++++++++++++-------
 encoder/macroblock.c |   11 ++++++-----
 encoder/me.c         |    2 +-
 5 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/common/common.h b/common/common.h
index 425fc8f..bb72957 100644
--- a/common/common.h
+++ b/common/common.h
@@ -429,7 +429,7 @@ struct x264_t
         int i_skip_intra;
         /* skip flag for P/B-skip motion compensation */
         /* if we've already done skip MC, we don't need to do it again */
-        int b_skip_pbskip_mc;
+        int b_skip_mc;
 
         struct
         {
diff --git a/common/macroblock.c b/common/macroblock.c
index 7c8b939..d640664 100644
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -308,7 +308,7 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
 static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
 {
     int ref[2];
-    DECLARE_ALIGNED_4( int16_t mv[2][2] );
+    DECLARE_ALIGNED_8( int16_t mv[2][2] );
     int i_list;
     int i8, i4;
     int b8x8;
diff --git a/encoder/analyse.c b/encoder/analyse.c
index c0c44b3..24d594a 100644
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -340,7 +340,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
                 a->b_fast_intra = 1;
             }
         }
-        h->mb.b_skip_pbskip_mc = 0;
+        h->mb.b_skip_mc = 0;
     }
 }
 
@@ -1916,8 +1916,12 @@ static void x264_mb_analyse_b_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_i
     if( a->b_direct_available && a->i_rd16x16direct == COST_MAX )
     {
         h->mb.i_type = B_DIRECT;
+        /* Assumes direct/skip MC is still in fdec */
+        /* Requires b-rdo to be done before intra analysis */
+        h->mb.b_skip_mc = 1;
         x264_analyse_update_cache( h, a );
         a->i_rd16x16direct = x264_rd_cost_mb( h, a->i_lambda2 );
+        h->mb.b_skip_mc = 0;
     }
 
     //FIXME not all the update_cache calls are needed
@@ -2009,7 +2013,7 @@ static inline void x264_mb_analyse_transform( x264_t *h )
     if( x264_mb_transform_8x8_allowed( h ) && h->param.analyse.b_transform_8x8 )
     {
         int i_cost4, i_cost8;
-        /* FIXME only luma mc is needed */
+        /* Only luma MC is really needed, but the full MC is re-used in macroblock_encode. */
         x264_mb_mc( h );
 
         i_cost8 = h->pixf.sa8d[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE,
@@ -2018,6 +2022,7 @@ static inline void x264_mb_analyse_transform( x264_t *h )
                                              h->mb.pic.p_fdec[0], FDEC_STRIDE );
 
         h->mb.b_transform_8x8 = i_cost8 < i_cost4;
+        h->mb.b_skip_mc = 1;
     }
 }
 
@@ -2387,7 +2392,7 @@ void x264_macroblock_analyse( x264_t *h )
                 {
                     h->mb.i_type = B_SKIP;
                     x264_analyse_update_cache( h, &analysis );
-                    h->mb.b_skip_pbskip_mc = 1;
+                    h->mb.b_skip_mc = 1;
                     return;
                 }
             }
@@ -2404,7 +2409,8 @@ void x264_macroblock_analyse( x264_t *h )
             const unsigned int flags = h->param.analyse.inter;
             int i_type;
             int i_partition;
-            h->mb.b_skip_pbskip_mc = 0;
+            int i_satd_inter = 0; // shut up uninitialized warning
+            h->mb.b_skip_mc = 0;
 
             x264_mb_analyse_load_costs( h, &analysis );
 
@@ -2547,11 +2553,9 @@ void x264_macroblock_analyse( x264_t *h )
                 }
             }
 
-            x264_mb_analyse_intra( h, &analysis, i_cost );
-
             if( analysis.b_mbrd )
             {
-                int i_satd_inter = i_cost;
+                i_satd_inter = i_cost;
                 x264_mb_analyse_b_rd( h, &analysis, i_satd_inter );
                 i_type = B_SKIP;
                 i_cost = i_bskip_cost;
@@ -2566,6 +2570,12 @@ void x264_macroblock_analyse( x264_t *h )
 
                 h->mb.i_type = i_type;
                 h->mb.i_partition = i_partition;
+            }
+            
+            x264_mb_analyse_intra( h, &analysis, i_satd_inter );
+
+            if( analysis.b_mbrd )
+            {
                 x264_mb_analyse_transform_rd( h, &analysis, &i_satd_inter, &i_cost );
                 x264_intra_rd( h, &analysis, i_satd_inter * 17/16 );
             }
diff --git a/encoder/macroblock.c b/encoder/macroblock.c
index 74c9413..7ac93b1 100644
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -292,7 +292,7 @@ void x264_macroblock_encode_pskip( x264_t *h )
                                 h->mb.mv_min[1], h->mb.mv_max[1] );
 
     /* don't do pskip motion compensation if it was already done in macroblock_analyse */
-    if( !h->mb.b_skip_pbskip_mc )
+    if( !h->mb.b_skip_mc )
     {
         h->mc.mc_luma( h->mb.pic.p_fdec[0],    FDEC_STRIDE,
                        h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
@@ -348,7 +348,7 @@ void x264_macroblock_encode( x264_t *h )
     if( h->mb.i_type == B_SKIP )
     {
         /* don't do bskip motion compensation if it was already done in macroblock_analyse */
-        if( !h->mb.b_skip_pbskip_mc )
+        if( !h->mb.b_skip_mc )
             x264_mb_mc( h );
         x264_macroblock_encode_skip( h );
         return;
@@ -417,8 +417,9 @@ void x264_macroblock_encode( x264_t *h )
         int i8x8, i4x4;
         int i_decimate_mb = 0;
 
-        /* Motion compensation */
-        x264_mb_mc( h );
+        /* Don't repeat motion compensation if it was already done in non-RD transform analysis */
+        if( !h->mb.b_skip_mc )
+            x264_mb_mc( h );
 
         if( h->mb.b_lossless )
         {
@@ -690,7 +691,7 @@ int x264_macroblock_probe_skip( x264_t *h, const int b_bidir )
         }
     }
 
-    h->mb.b_skip_pbskip_mc = 1;
+    h->mb.b_skip_mc = 1;
     return 1;
 }
 
diff --git a/encoder/me.c b/encoder/me.c
index 7598b76..0dd6378 100644
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -786,7 +786,7 @@ if( pass == 0 || !visited[(m0x)&7][(m0y)&7][(m1x)&7][(m1y)&7] ) \
     int i0 = 4 + 3*(m0x-om0x) + (m0y-om0y); \
     int i1 = 4 + 3*(m1x-om1x) + (m1y-om1y); \
     visited[(m0x)&7][(m0y)&7][(m1x)&7][(m1y)&7] = 1; \
-    memcpy( pix, pix0[i0], bs ); \
+    h->mc.memcpy_aligned( pix, pix0[i0], bs ); \
     if( i_weight == 32 ) \
         h->mc.avg[i_pixel]( pix, bw, pix1[i1], bw ); \
     else \