[x264-devel] commit: MB-tree fixes: (Loren Merritt )

git version control git at videolan.org
Sun Aug 9 11:48:42 CEST 2009


x264 | branch: master | Loren Merritt <pengvado at akuvian.org> | Sat Aug  8 14:53:27 2009 +0000| [886d1e9878a6f2424bd005a9cb16843ca8e8d1df] | committer: Loren Merritt 

MB-tree fixes:
AQ was applied inconsistently, with some AQed costs compared to other non-AQed costs. Strangely enough, fixing this increases SSIM on some sources but decreases it on others. More investigation needed.
Account for weighted bipred.
Reduce memory, increase precision, simplify, and early terminate.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=886d1e9878a6f2424bd005a9cb16843ca8e8d1df
---

 common/frame.c      |    2 +-
 common/frame.h      |    2 +-
 encoder/slicetype.c |   65 ++++++++++++++++++++++----------------------------
 3 files changed, 31 insertions(+), 38 deletions(-)

diff --git a/common/frame.c b/common/frame.c
index d028ce5..1f89275 100644
--- a/common/frame.c
+++ b/common/frame.c
@@ -95,7 +95,7 @@ x264_frame_t *x264_frame_new( x264_t *h )
             }
         CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
         memset( frame->i_intra_cost, -1, i_mb_count * sizeof(uint16_t) );
-        CHECKED_MALLOC( frame->i_propagate_cost, i_mb_count * sizeof(uint32_t) );
+        CHECKED_MALLOC( frame->i_propagate_cost, i_mb_count * sizeof(uint16_t) );
         for( j = 0; j <= h->param.i_bframe+1; j++ )
             for( i = 0; i <= h->param.i_bframe+1; i++ )
             {
diff --git a/common/frame.h b/common/frame.h
index a3da4e4..f70d38a 100644
--- a/common/frame.h
+++ b/common/frame.h
@@ -85,7 +85,7 @@ typedef struct
     float   *f_qp_offset;
     int     b_intra_calculated;
     uint16_t *i_intra_cost;
-    uint32_t *i_propagate_cost;
+    uint16_t *i_propagate_cost;
     uint16_t *i_inv_qscale_factor;
 
     /* threading */
diff --git a/encoder/slicetype.c b/encoder/slicetype.c
index e4754b6..f0fe2b4 100644
--- a/encoder/slicetype.c
+++ b/encoder/slicetype.c
@@ -403,41 +403,32 @@ static int x264_slicetype_frame_cost_recalculate( x264_t *h, x264_mb_analysis_t
 static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, int p0, int p1, int b )
 {
     x264_frame_t *refs[2] = {frames[p0],frames[p1]};
-    int dist_scale_factor = p1 != p0 ? 128 : ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
+    int dist_scale_factor = ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
     int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
+    int16_t (*mvs[2])[2] = { frames[b]->lowres_mvs[0][b-p0-1], frames[b]->lowres_mvs[1][p1-b-1] };
 
     for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
     {
-        for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
+        int mb_index = h->mb.i_mb_y*h->mb.i_mb_stride;
+        for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++, mb_index++ )
         {
-            int mb_index = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
             int inter_cost = frames[b]->lowres_costs[b-p0][p1-b][mb_index];
-            int intra_cost = (frames[b]->i_intra_cost[mb_index] * frames[b]->i_inv_qscale_factor[mb_index]+128)>>8;
-            int lists_used = frames[b]->lowres_inter_types[b-p0][p1-b][mb_index];
-            /* The approximate amount of data that this block contains. */
-            int propagate_amount = intra_cost + frames[b]->i_propagate_cost[mb_index];
-
-            /* Divide by 64 for per-pixel summing. */
-            propagate_amount = (((uint64_t)propagate_amount*(intra_cost-inter_cost)) / intra_cost + 32) >> 6;
+            int intra_cost = frames[b]->i_intra_cost[mb_index];
 
             /* Don't propagate for an intra block. */
             if( inter_cost < intra_cost )
             {
-                int mv[2][2], list;
-                mv[0][0] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][0];
-                mv[0][1] = frames[b]->lowres_mvs[0][b-p0-1][mb_index][1];
-                if( b != p1 )
-                {
-                    mv[1][0] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][0];
-                    mv[1][1] = frames[b]->lowres_mvs[1][p1-b-1][mb_index][1];
-                }
-
+                int lists_used = frames[b]->lowres_inter_types[b-p0][p1-b][mb_index];
+                /* The approximate amount of data that this block contains. */
+                int propagate_amount = frames[b]->i_propagate_cost[mb_index] + ((intra_cost * frames[b]->i_inv_qscale_factor[mb_index] + 128)>>8);
+                propagate_amount = ((uint64_t)propagate_amount*(intra_cost-inter_cost)) / intra_cost;
+                int list;
                 /* Follow the MVs to the previous frame(s). */
                 for( list = 0; list < 2; list++ )
                     if( (lists_used >> list)&1 )
                     {
-                        int x = mv[list][0];
-                        int y = mv[list][1];
+                        int x = mvs[list][mb_index][0];
+                        int y = mvs[list][mb_index][1];
                         int listamount = propagate_amount;
                         int mbx = (x>>5)+h->mb.i_mb_x;
                         int mby = ((y>>5)+h->mb.i_mb_y);
@@ -445,10 +436,12 @@ static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, in
                         int idx1 = idx0 + 1;
                         int idx2 = idx0 + h->mb.i_mb_stride;
                         int idx3 = idx0 + h->mb.i_mb_stride + 1;
-                        int idx0weight = (32-(y&31))*(32-(x&31));
-                        int idx1weight = (32-(y&31))*(x&31);
-                        int idx2weight = (y&31)*(32-(x&31));
-                        int idx3weight = (y&31)*(x&31);
+                        x &= 31;
+                        y &= 31;
+                        int idx0weight = (32-y)*(32-x);
+                        int idx1weight = (32-y)*x;
+                        int idx2weight = y*(32-x);
+                        int idx3weight = y*x;
 
                         /* Apply bipred weighting. */
                         if( lists_used == 3 )
@@ -460,21 +453,21 @@ static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, in
                          * be counted. */
                         if( mbx < h->sps->i_mb_width-1 && mby < h->sps->i_mb_height-1 && mbx >= 0 && mby >= 0 )
                         {
-                            CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+8)>>4 );
-                            CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+8)>>4 );
-                            CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+8)>>4 );
-                            CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+8)>>4 );
+                            CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+512)>>10 );
+                            CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+512)>>10 );
+                            CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+512)>>10 );
+                            CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+512)>>10 );
                         }
                         else /* Check offsets individually */
                         {
                             if( mbx < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx >= 0 && mby >= 0 )
-                                CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+8)>>4 );
+                                CLIP_ADD( refs[list]->i_propagate_cost[idx0], (listamount*idx0weight+512)>>10 );
                             if( mbx+1 < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx+1 >= 0 && mby >= 0 )
-                                CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+8)>>4 );
+                                CLIP_ADD( refs[list]->i_propagate_cost[idx1], (listamount*idx1weight+512)>>10 );
                             if( mbx < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx >= 0 && mby+1 >= 0 )
-                                CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+8)>>4 );
+                                CLIP_ADD( refs[list]->i_propagate_cost[idx2], (listamount*idx2weight+512)>>10 );
                             if( mbx+1 < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx+1 >= 0 && mby+1 >= 0 )
-                                CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+8)>>4 );
+                                CLIP_ADD( refs[list]->i_propagate_cost[idx3], (listamount*idx3weight+512)>>10 );
                         }
                     }
             }
@@ -497,7 +490,7 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
     if( last_nonb < 0 )
         return;
 
-    memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
+    memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
     while( i-- > idx )
     {
         cur_nonb = i;
@@ -506,12 +499,12 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
         if( cur_nonb < idx )
             break;
         x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, last_nonb, 0 );
-        memset( frames[cur_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
+        memset( frames[cur_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
         x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, last_nonb );
         while( frames[i]->i_type == X264_TYPE_B && i > 0 )
         {
             x264_slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, i, 0 );
-            memset( frames[i]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint32_t) );
+            memset( frames[i]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
             x264_macroblock_tree_propagate( h, frames, cur_nonb, last_nonb, i );
             i--;
         }



More information about the x264-devel mailing list