[x264-devel] commit: Faster chroma CBP handling (Henrik Gramner )

Sat Apr 24 00:40:02 CEST 2010

x264 | branch: master | Henrik Gramner <hengar-6 at student.ltu.se> | Fri Apr 16 22:39:45 2010 +0200| [5dab1efd428735f10a9670637dddffe763286dc4] | committer: Jason Garrett-Glaser 

Faster chroma CBP handling

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=5dab1efd428735f10a9670637dddffe763286dc4
---

 common/common.h      |    4 ++--
 encoder/cabac.c      |    4 ++--
 encoder/encoder.c    |   14 +++++++-------
 encoder/macroblock.c |   26 ++++++++++----------------
 4 files changed, 21 insertions(+), 27 deletions(-)

diff --git a/common/common.h b/common/common.h
index 0d7dd99..e75f743 100644
--- a/common/common.h
+++ b/common/common.h
@@ -347,7 +347,7 @@ static const int x264_scan8[16+2*4+3] =
     4+5*8,
 
     /* Chroma DC */
-    5+5*8, 6+5*8
+    6+5*8, 7+5*8
 };
 /*
    0 1 2 3 4 5 6 7
@@ -356,7 +356,7 @@ static const int x264_scan8[16+2*4+3] =
  2   B B   L L L L
  3         L L L L
  4   R R   L L L L
- 5   R R   DyDuDv
+ 5   R R   Dy  DuDv
 */
 
 typedef struct x264_ratecontrol_t   x264_ratecontrol_t;
diff --git a/encoder/cabac.c b/encoder/cabac.c
index a0dcff2..82ced42 100644
--- a/encoder/cabac.c
+++ b/encoder/cabac.c
@@ -237,7 +237,7 @@ static void x264_cabac_mb_cbp_chroma( x264_t *h, x264_cabac_t *cb )
         ctx = 4;
         if( cbp_a == 0x20 ) ctx++;
         if( cbp_b == 0x20 ) ctx += 2;
-        x264_cabac_encode_decision_noup( cb, 77 + ctx, h->mb.i_cbp_chroma > 1 );
+        x264_cabac_encode_decision_noup( cb, 77 + ctx, h->mb.i_cbp_chroma >> 1 );
     }
 }
 
@@ -960,7 +960,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
                     block_residual_write_cabac_cbf( h, cb, DCT_LUMA_4x4, i, h->dct.luma4x4[i], b_intra );
         }
 
-        if( h->mb.i_cbp_chroma&0x03 )    /* Chroma DC residual present */
+        if( h->mb.i_cbp_chroma ) /* Chroma DC residual present */
         {
             block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0], b_intra );
             block_residual_write_cabac_cbf( h, cb, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1], b_intra );
diff --git a/encoder/encoder.c b/encoder/encoder.c
index 06eb843..da4565b 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -1902,7 +1902,8 @@ static int x264_slice_write( x264_t *h )
         /* accumulate mb stats */
         h->stat.frame.i_mb_count[h->mb.i_type]++;
 
-        if( !IS_INTRA(h->mb.i_type) && !IS_SKIP(h->mb.i_type) && !IS_DIRECT(h->mb.i_type) )
+        int b_intra = IS_INTRA( h->mb.i_type );
+        if( !b_intra && !IS_SKIP( h->mb.i_type ) && !IS_DIRECT( h->mb.i_type ) )
         {
             if( h->mb.i_partition != D_8x8 )
                     h->stat.frame.i_mb_partition[h->mb.i_partition] += 4;
@@ -1921,21 +1922,20 @@ static int x264_slice_write( x264_t *h )
 
         if( h->param.i_log_level >= X264_LOG_INFO )
         {
-            if( h->mb.i_cbp_luma || h->mb.i_cbp_chroma )
+            if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma )
             {
                 int cbpsum = (h->mb.i_cbp_luma&1) + ((h->mb.i_cbp_luma>>1)&1)
                            + ((h->mb.i_cbp_luma>>2)&1) + (h->mb.i_cbp_luma>>3);
-                int b_intra = IS_INTRA(h->mb.i_type);
                 h->stat.frame.i_mb_cbp[!b_intra + 0] += cbpsum;
-                h->stat.frame.i_mb_cbp[!b_intra + 2] += h->mb.i_cbp_chroma >= 1;
-                h->stat.frame.i_mb_cbp[!b_intra + 4] += h->mb.i_cbp_chroma == 2;
+                h->stat.frame.i_mb_cbp[!b_intra + 2] += !!h->mb.i_cbp_chroma;
+                h->stat.frame.i_mb_cbp[!b_intra + 4] += h->mb.i_cbp_chroma >> 1;
             }
-            if( h->mb.i_cbp_luma && !IS_INTRA(h->mb.i_type) )
+            if( h->mb.i_cbp_luma && !b_intra )
             {
                 h->stat.frame.i_mb_count_8x8dct[0] ++;
                 h->stat.frame.i_mb_count_8x8dct[1] += h->mb.b_transform_8x8;
             }
-            if( IS_INTRA(h->mb.i_type) && h->mb.i_type != I_PCM )
+            if( b_intra && h->mb.i_type != I_PCM )
             {
                 if( h->mb.i_type == I_16x16 )
                     h->stat.frame.i_mb_pred_mode[0][h->mb.i_intra16x16_pred_mode]++;
diff --git a/encoder/macroblock.c b/encoder/macroblock.c
index 50f939a..fefbcb2 100644
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -343,8 +343,8 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
             h->mb.cache.non_zero_count[x264_scan8[21]] = 0;
             h->mb.cache.non_zero_count[x264_scan8[22]] = 0;
             h->mb.cache.non_zero_count[x264_scan8[23]] = 0;
-            h->mb.cache.non_zero_count[x264_scan8[25]] = 0;
-            h->mb.cache.non_zero_count[x264_scan8[26]] = 0;
+            M16( &h->mb.cache.non_zero_count[x264_scan8[25]] ) = 0;
+
             for( int ch = 0; ch < 2; ch++ )
             {
                 if( ssd[ch] > thresh )
@@ -452,11 +452,8 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
         }
     }
 
-    if( h->mb.i_cbp_chroma )
-        h->mb.i_cbp_chroma = 2;    /* dc+ac (we can't do only ac) */
-    else if( h->mb.cache.non_zero_count[x264_scan8[25]] |
-             h->mb.cache.non_zero_count[x264_scan8[26]] )
-        h->mb.i_cbp_chroma = 1;    /* dc only */
+    /* 0 = none, 1 = DC only, 2 = DC+AC */
+    h->mb.i_cbp_chroma = ((!!M16( &h->mb.cache.non_zero_count[x264_scan8[25]] )) | h->mb.i_cbp_chroma) + h->mb.i_cbp_chroma;
 }
 
 static void x264_macroblock_encode_skip( x264_t *h )
@@ -581,7 +578,6 @@ void x264_predict_lossless_16x16( x264_t *h, int i_mode )
  *****************************************************************************/
 void x264_macroblock_encode( x264_t *h )
 {
-    int i_cbp_dc = 0;
     int i_qp = h->mb.i_qp;
     int b_decimate = h->mb.b_dct_decimate;
     int b_force_no_skip = 0;
@@ -865,15 +861,13 @@ void x264_macroblock_encode( x264_t *h )
     /* encode the 8x8 blocks */
     x264_mb_encode_8x8_chroma( h, !IS_INTRA( h->mb.i_type ), h->mb.i_chroma_qp );
 
-    if( h->param.b_cabac )
-    {
-        i_cbp_dc = h->mb.cache.non_zero_count[x264_scan8[24]]
-                 | h->mb.cache.non_zero_count[x264_scan8[25]] << 1
-                 | h->mb.cache.non_zero_count[x264_scan8[26]] << 2;
-    }
-
     /* store cbp */
-    h->mb.cbp[h->mb.i_mb_xy] = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;
+    int cbp = h->mb.i_cbp_chroma << 4 | h->mb.i_cbp_luma;
+    if( h->param.b_cabac )
+        cbp |= h->mb.cache.non_zero_count[x264_scan8[24]] << 8
+            |  h->mb.cache.non_zero_count[x264_scan8[25]] << 9
+            |  h->mb.cache.non_zero_count[x264_scan8[26]] << 10;
+    h->mb.cbp[h->mb.i_mb_xy] = cbp;
 
     /* Check for P_SKIP
      * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account