[x264-devel] commit: Some deblocking-related optimizations (Henrik Gramner )

git at videolan.org git at videolan.org
Wed Jun 2 07:38:29 CEST 2010


x264 | branch: master | Henrik Gramner <hengar-6 at student.ltu.se> | Sun May 30 22:45:14 2010 +0200| [0c7cf0bfb1d30ee8e7f1b355fef5aa9e2db929d2] | committer: Jason Garrett-Glaser 

Some deblocking-related optimizations

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=0c7cf0bfb1d30ee8e7f1b355fef5aa9e2db929d2
---

 common/deblock.c    |    8 ++++----
 common/macroblock.c |   43 +++++++++++++++++++++++--------------------
 2 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/common/deblock.c b/common/deblock.c
index 27c73ae..3296dbf 100644
--- a/common/deblock.c
+++ b/common/deblock.c
@@ -299,7 +299,7 @@ static inline void deblock_edge_intra( x264_t *h, uint8_t *pix1, uint8_t *pix2,
 void x264_frame_deblock_row( x264_t *h, int mb_y )
 {
     int b_interlaced = h->sh.b_mbaff;
-    int qp_thresh = 15 - X264_MIN(h->sh.i_alpha_c0_offset, h->sh.i_beta_offset) - X264_MAX(0, h->param.analyse.i_chroma_qp_offset);
+    int qp_thresh = 15 - X264_MIN( h->sh.i_alpha_c0_offset, h->sh.i_beta_offset ) - X264_MAX( 0, h->param.analyse.i_chroma_qp_offset );
     int stridey   = h->fdec->i_stride[0];
     int stride2y  = stridey << b_interlaced;
     int strideuv  = h->fdec->i_stride[1];
@@ -318,7 +318,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
         uint8_t *pixy = h->fdec->plane[0] + 16*mb_y*stridey  + 16*mb_x;
         uint8_t *pixu = h->fdec->plane[1] +  8*mb_y*strideuv +  8*mb_x;
         uint8_t *pixv = h->fdec->plane[2] +  8*mb_y*strideuv +  8*mb_x;
-        if( b_interlaced && (mb_y&1) )
+        if( mb_y & b_interlaced )
         {
             pixy -= 15*stridey;
             pixu -=  7*strideuv;
@@ -366,12 +366,12 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
             int qp_top = (qp + qpt + 1) >> 1;
             int qpc_top = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpt] + 1) >> 1;
             int intra_top = IS_INTRA( h->mb.type[h->mb.i_mb_top_xy] );
-            if( !b_interlaced && (intra_cur || intra_top) )
+            if( ~b_interlaced & (intra_cur | intra_top) )
                 FILTER( _intra, 1, 0, qp_top, qpc_top );
             else
             {
                 if( intra_top )
-                    memset( bs[1][0], 3, sizeof(bs[1][0]) );
+                    M32( bs[1][0] ) = 0x03030303;
                 FILTER(       , 1, 0, qp_top, qpc_top );
             }
         }
diff --git a/common/macroblock.c b/common/macroblock.c
index 01c90d2..26f63f5 100644
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -400,9 +400,27 @@ void x264_macroblock_slice_init( x264_t *h )
                 }
         }
     }
-    if( h->sh.i_type == SLICE_TYPE_P )
+    else if( h->sh.i_type == SLICE_TYPE_P )
+    {
         memset( h->mb.cache.skip, 0, sizeof( h->mb.cache.skip ) );
 
+        if( h->sh.i_disable_deblocking_filter_idc != 1 && h->param.analyse.i_weighted_pred )
+        {
+            deblock_ref_table(-2) = -2;
+            deblock_ref_table(-1) = -1;
+            for( int i = 0; i < h->i_ref0 << h->sh.b_mbaff; i++ )
+            {
+                /* Mask off high bits to avoid frame num collisions with -1/-2.
+                 * In current x264 frame num values don't cover a range of more
+                 * than 32, so 6 bits is enough for uniqueness. */
+                if( !h->mb.b_interlaced )
+                    deblock_ref_table(i) = h->fref0[i]->i_frame_num&63;
+                else
+                    deblock_ref_table(i) = ((h->fref0[i>>1]->i_frame_num&63)<<1) + (i&1);
+            }
+        }
+    }
+
     /* init with not available (for top right idx=7,15) */
     memset( h->mb.cache.ref, -2, sizeof( h->mb.cache.ref ) );
 
@@ -418,19 +436,6 @@ void x264_macroblock_slice_init( x264_t *h )
             h->fdec->inv_ref_poc[field] = (256 + delta/2) / delta;
         }
 
-    deblock_ref_table(-2) = -2;
-    deblock_ref_table(-1) = -1;
-    for( int i = 0; i < h->i_ref0 << h->sh.b_mbaff; i++ )
-    {
-        /* Mask off high bits to avoid frame num collisions with -1/-2.
-         * In current x264 frame num values don't cover a range of more
-         * than 32, so 6 bits is enough for uniqueness. */
-        if( !h->mb.b_interlaced )
-            deblock_ref_table(i) = h->fref0[i]->i_frame_num&63;
-        else
-            deblock_ref_table(i) = ((h->fref0[i>>1]->i_frame_num&63)<<1) + (i&1);
-    }
-
     h->mb.i_neighbour4[6] =
     h->mb.i_neighbour4[9] =
     h->mb.i_neighbour4[12] =
@@ -894,7 +899,6 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
 void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_y )
 {
     int deblock_on_slice_edges = h->sh.i_disable_deblocking_filter_idc != 2;
-    int top = (mb_y - (1 << h->mb.b_interlaced)) * h->mb.i_mb_stride + mb_x;
 
     h->mb.i_neighbour = 0;
     h->mb.i_mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
@@ -906,9 +910,9 @@ void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_
             h->mb.i_neighbour |= MB_LEFT;
     }
 
-    if( top >= 0 )
+    if( mb_y > h->mb.b_interlaced )
     {
-        h->mb.i_mb_top_xy = top;
+        h->mb.i_mb_top_xy = h->mb.i_mb_xy - (h->mb.i_mb_stride << h->mb.b_interlaced);
         if( deblock_on_slice_edges || h->mb.slice_table[h->mb.i_mb_top_xy] == h->mb.slice_table[h->mb.i_mb_xy] )
             h->mb.i_neighbour |= MB_TOP;
     }
@@ -930,8 +934,6 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
         h->mb.i_neighbour &= ~old_neighbour;
         if( h->mb.i_neighbour )
         {
-            int left = h->mb.i_mb_left_xy;
-            int top  = h->mb.i_mb_top_xy;
             int top_y = mb_y - (1 << h->mb.b_interlaced);
             int top_8x8 = (2*top_y+1) * h->mb.i_b8_stride + 2*mb_x;
             int top_4x4 = (4*top_y+3) * h->mb.i_b4_stride + 4*mb_x;
@@ -941,10 +943,11 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
             uint8_t (*nnz)[24] = h->mb.non_zero_count;
 
             if( h->mb.i_neighbour & MB_TOP )
-                CP32( &h->mb.cache.non_zero_count[x264_scan8[0] - 8], &nnz[top][12] );
+                CP32( &h->mb.cache.non_zero_count[x264_scan8[0] - 8], &nnz[h->mb.i_mb_top_xy][12] );
 
             if( h->mb.i_neighbour & MB_LEFT )
             {
+                int left = h->mb.i_mb_left_xy;
                 h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left][3];
                 h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left][7];
                 h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left][11];



More information about the x264-devel mailing list