[x264-devel] commit: Fix cavlc+deblock+8x8dct (regression in r1612) ( Jason Garrett-Glaser )

Wed Jun 2 07:38:25 CEST 2010

x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Mon May 31 11:14:22 2010 -0700| [2bcbac357b714f468e0138f022e584ffdb42f6d2] | committer: Jason Garrett-Glaser 

Fix cavlc+deblock+8x8dct (regression in r1612)
Add cavlc+8x8dct munging to new deblock system.
May have caused minor visual artifacts.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=2bcbac357b714f468e0138f022e584ffdb42f6d2
---

 common/deblock.c    |   47 -----------------------------------------------
 common/macroblock.c |   46 ++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 44 insertions(+), 49 deletions(-)

diff --git a/common/deblock.c b/common/deblock.c
index fc039c5..27c73ae 100644
--- a/common/deblock.c
+++ b/common/deblock.c
@@ -24,46 +24,6 @@
 
 #include "common.h"
 
-/* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
- * entropy coding, but per 64 coeffs for the purpose of deblocking */
-static void munge_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
-{
-    uint32_t (*src)[6] = (uint32_t(*)[6])h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
-    int8_t *transform = h->mb.mb_transform_size + mb_y * h->sps->i_mb_width;
-    for( int x = 0; x<h->sps->i_mb_width; x++ )
-    {
-        memcpy( buf+x, src+x, 16 );
-        if( transform[x] )
-        {
-            int nnz = src[x][0] | src[x][1];
-            src[x][0] = src[x][1] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
-            nnz = src[x][2] | src[x][3];
-            src[x][2] = src[x][3] = ((uint16_t)nnz ? 0x0101 : 0) + (nnz>>16 ? 0x01010000 : 0);
-        }
-    }
-}
-
-static void restore_cavlc_nnz_row( x264_t *h, int mb_y, uint8_t (*buf)[16] )
-{
-    uint8_t (*dst)[24] = h->mb.non_zero_count + mb_y * h->sps->i_mb_width;
-    for( int x = 0; x < h->sps->i_mb_width; x++ )
-        memcpy( dst+x, buf+x, 16 );
-}
-
-static void munge_cavlc_nnz( x264_t *h, int mb_y, uint8_t (*buf)[16], void (*func)(x264_t*, int, uint8_t (*)[16]) )
-{
-    func( h, mb_y, buf );
-    if( mb_y > 0 )
-        func( h, mb_y-1, buf + h->sps->i_mb_width );
-    if( h->sh.b_mbaff )
-    {
-        func( h, mb_y+1, buf + h->sps->i_mb_width * 2 );
-        if( mb_y > 0 )
-            func( h, mb_y-2, buf + h->sps->i_mb_width * 3 );
-    }
-}
-
-
 /* Deblocking filter */
 static const uint8_t i_alpha_table[52+12*2] =
 {
@@ -344,10 +304,6 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
     int stride2y  = stridey << b_interlaced;
     int strideuv  = h->fdec->i_stride[1];
     int stride2uv = strideuv << b_interlaced;
-    uint8_t (*nnz_backup)[16] = h->scratch_buffer;
-
-    if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
-        munge_cavlc_nnz( h, mb_y, nnz_backup, munge_cavlc_nnz_row );
 
     for( int mb_x = 0; mb_x < h->sps->i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
     {
@@ -427,9 +383,6 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
             if( !transform_8x8 ) FILTER( , 1, 3, qp, qpc );
         }
     }
-
-    if( !h->pps->b_cabac && h->pps->b_transform_8x8_mode )
-        munge_cavlc_nnz( h, mb_y, nnz_backup, restore_cavlc_nnz_row );
 }
 
 #ifdef HAVE_MMX
diff --git a/common/macroblock.c b/common/macroblock.c
index ce510e9..01c90d2 100644
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -344,8 +344,7 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
         int me_range = X264_MIN(h->param.analyse.i_me_range, h->param.analyse.i_mv_range);
         int buf_tesa = (h->param.analyse.i_me_method >= X264_ME_ESA) *
             ((me_range*2+18) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
-        int buf_nnz = !h->param.b_cabac * h->pps->b_transform_8x8_mode * (h->sps->i_mb_width * 4 * 16 * sizeof(uint8_t));
-        scratch_size = X264_MAX4( buf_hpel, buf_ssim, buf_tesa, buf_nnz );
+        scratch_size = X264_MAX3( buf_hpel, buf_ssim, buf_tesa );
     }
     int buf_mbtree = h->param.rc.b_mb_tree * ((h->sps->i_mb_width+3)&~3) * sizeof(int);
     scratch_size = X264_MAX( scratch_size, buf_mbtree );
@@ -1013,6 +1012,49 @@ void x264_macroblock_cache_load_deblock( x264_t *h )
         M32( &h->mb.cache.ref[0][x264_scan8[0]+8*2] ) = refbot;
         M32( &h->mb.cache.ref[0][x264_scan8[0]+8*3] ) = refbot;
     }
+
+    /* Munge NNZ for cavlc + 8x8dct */
+    if( !h->param.b_cabac && h->pps->b_transform_8x8_mode )
+    {
+        uint8_t (*nnz)[24] = h->mb.non_zero_count;
+        int top = h->mb.i_mb_top_xy;
+        int left = h->mb.i_mb_left_xy;
+
+        if( (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[top] )
+        {
+            int i8 = x264_scan8[0] - 8;
+            int nnz_top0 = M16( &nnz[top][8] ) | M16( &nnz[top][12] );
+            int nnz_top1 = M16( &nnz[top][10] ) | M16( &nnz[top][14] );
+            M16( &h->mb.cache.non_zero_count[i8+0] ) = nnz_top0 ? 0x0101 : 0;
+            M16( &h->mb.cache.non_zero_count[i8+2] ) = nnz_top1 ? 0x0101 : 0;
+        }
+
+        if( (h->mb.i_neighbour & MB_LEFT) && h->mb.mb_transform_size[left] )
+        {
+            int i8 = x264_scan8[0] - 1;
+            int nnz_left0 = M16( &nnz[left][2] ) | M16( &nnz[left][6] );
+            int nnz_left1 = M16( &nnz[left][10] ) | M16( &nnz[left][14] );
+            h->mb.cache.non_zero_count[i8+8*0] = !!nnz_left0;
+            h->mb.cache.non_zero_count[i8+8*1] = !!nnz_left0;
+            h->mb.cache.non_zero_count[i8+8*2] = !!nnz_left1;
+            h->mb.cache.non_zero_count[i8+8*3] = !!nnz_left1;
+        }
+
+        if( h->mb.mb_transform_size[h->mb.i_mb_xy] )
+        {
+            int nnz0 = M16( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) | M16( &h->mb.cache.non_zero_count[x264_scan8[ 2]] );
+            int nnz1 = M16( &h->mb.cache.non_zero_count[x264_scan8[ 4]] ) | M16( &h->mb.cache.non_zero_count[x264_scan8[ 6]] );
+            int nnz2 = M16( &h->mb.cache.non_zero_count[x264_scan8[ 8]] ) | M16( &h->mb.cache.non_zero_count[x264_scan8[10]] );
+            int nnz3 = M16( &h->mb.cache.non_zero_count[x264_scan8[12]] ) | M16( &h->mb.cache.non_zero_count[x264_scan8[14]] );
+            uint32_t nnztop = pack16to32( !!nnz0, !!nnz1 ) * 0x0101;
+            uint32_t nnzbot = pack16to32( !!nnz2, !!nnz3 ) * 0x0101;
+
+            M32( &h->mb.cache.non_zero_count[x264_scan8[0]+8*0] ) = nnztop;
+            M32( &h->mb.cache.non_zero_count[x264_scan8[0]+8*1] ) = nnztop;
+            M32( &h->mb.cache.non_zero_count[x264_scan8[0]+8*2] ) = nnzbot;
+            M32( &h->mb.cache.non_zero_count[x264_scan8[0]+8*3] ) = nnzbot;
+        }
+    }
 }
 
 static void ALWAYS_INLINE x264_macroblock_store_pic( x264_t *h, int i )