[x264-devel] MBAFF: Add extra data to the deblock strength structure

Simon Horlick git at videolan.org
Thu May 12 08:39:03 CEST 2011


x264 | branch: master | Simon Horlick <simonhorlick at gmail.com> | Sat Apr  2 18:27:13 2011 +0100| [dce1b213394724cd86ed3e6983cc00ab79cd95e7] | committer: Jason Garrett-Glaser

MBAFF: Add extra data to the deblock strength structure

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=dce1b213394724cd86ed3e6983cc00ab79cd95e7
---

 common/common.h          |    4 +++-
 common/deblock.c         |   18 +++++++++---------
 common/frame.h           |    2 +-
 common/macroblock.c      |    5 +++--
 common/x86/deblock-a.asm |    2 +-
 tools/checkasm.c         |    7 ++++---
 6 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/common/common.h b/common/common.h
index f19f2f6..64947ed 100644
--- a/common/common.h
+++ b/common/common.h
@@ -863,7 +863,9 @@ struct x264_t
     /* Buffers that are allocated per-thread even in sliced threads. */
     void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */
     pixel *intra_border_backup[5][2]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
-    uint8_t (*deblock_strength[2])[2][4][4];
+    /* Deblock strength values are stored for each 4x4 partition. In MBAFF
+     * there are four extra values that need to be stored, located in [4][i]. */
+    uint8_t (*deblock_strength[2])[2][8][4];
 
     /* CPU functions dependents */
     x264_predict_t      predict_16x16[4+3];
diff --git a/common/deblock.c b/common/deblock.c
index 578b5f9..143b87b 100644
--- a/common/deblock.c
+++ b/common/deblock.c
@@ -248,7 +248,7 @@ static void deblock_h_chroma_intra_c( pixel *pix, int stride, int alpha, int bet
 }
 
 static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
-                                int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4], int mvy_limit,
+                                int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit,
                                 int bframe, x264_t *h )
 {
     for( int dir = 0; dir < 2; dir++ )
@@ -276,7 +276,7 @@ static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264
     }
 }
 void deblock_strength_mbaff_c( uint8_t nnz_cache[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
-                               int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4],
+                               int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
                                int mvy_limit, int bframe, x264_t *h )
 {
     int neighbour_field[2];
@@ -364,7 +364,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
         int mb_xy = h->mb.i_mb_xy;
         int transform_8x8 = h->mb.mb_transform_size[h->mb.i_mb_xy];
         int intra_cur = IS_INTRA( h->mb.type[mb_xy] );
-        uint8_t (*bs)[4][4] = h->deblock_strength[mb_y&1][mb_x];
+        uint8_t (*bs)[8][4] = h->deblock_strength[mb_y&1][mb_x];
 
         pixel *pixy = h->fdec->plane[0] + 16*mb_y*stridey  + 16*mb_x;
         pixel *pixuv = h->fdec->plane[1] + 8*mb_y*strideuv + 16*mb_x;
@@ -453,9 +453,9 @@ void x264_macroblock_deblock( x264_t *h )
     if( qp <= qp_thresh || h->mb.i_type == P_SKIP )
         return;
 
-    uint8_t (*bs)[4][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x];
+    uint8_t (*bs)[8][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x];
     if( IS_INTRA( h->mb.i_type ) )
-        memset( bs, 3, 2*4*4*sizeof(uint8_t) );
+        memset( bs, 3, 2*8*4*sizeof(uint8_t) );
     else
         h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv,
                                    bs, 4 >> h->sh.b_mbaff, h->sh.i_type == SLICE_TYPE_B, h );
@@ -500,16 +500,16 @@ void x264_deblock_v_chroma_intra_avx ( pixel *pix, int stride, int alpha, int be
 void x264_deblock_h_chroma_intra_sse2( pixel *pix, int stride, int alpha, int beta );
 void x264_deblock_h_chroma_intra_avx ( pixel *pix, int stride, int alpha, int beta );
 void x264_deblock_strength_mmxext( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
-                                   int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4],
+                                   int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
                                    int mvy_limit, int bframe, x264_t *h );
 void x264_deblock_strength_sse2  ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
-                                   int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4],
+                                   int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
                                    int mvy_limit, int bframe, x264_t *h );
 void x264_deblock_strength_ssse3 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
-                                   int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4],
+                                   int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
                                    int mvy_limit, int bframe, x264_t *h );
 void x264_deblock_strength_avx   ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
-                                   int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4],
+                                   int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
                                    int mvy_limit, int bframe, x264_t *h );
 #if ARCH_X86
 void x264_deblock_h_luma_mmxext( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
diff --git a/common/frame.h b/common/frame.h
index 3e6e1b6..b4047b5 100644
--- a/common/frame.h
+++ b/common/frame.h
@@ -182,7 +182,7 @@ typedef struct
     x264_deblock_intra_t deblock_luma_intra[2];
     x264_deblock_intra_t deblock_chroma_intra[2];
     void (*deblock_strength) ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
-                               int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4], int mvy_limit,
+                               int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit,
                                int bframe, x264_t *h );
 } x264_deblock_function_t;
 
diff --git a/common/macroblock.c b/common/macroblock.c
index 603fe21..95cfd94 100644
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -1242,10 +1242,11 @@ void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_
 
 void x264_macroblock_deblock_strength( x264_t *h )
 {
-    uint8_t (*bs)[4][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x];
+    uint8_t (*bs)[8][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x];
     if( IS_INTRA( h->mb.type[h->mb.i_mb_xy] ) )
     {
-        memset( bs, 3, 2*4*4*sizeof(uint8_t) );
+        memset( bs[0], 3, 4*4*sizeof(uint8_t) );
+        memset( bs[1], 3, 4*4*sizeof(uint8_t) );
         if( !h->sh.b_mbaff ) return;
     }
 
diff --git a/common/x86/deblock-a.asm b/common/x86/deblock-a.asm
index 9858f3a..7ce8624 100644
--- a/common/x86/deblock-a.asm
+++ b/common/x86/deblock-a.asm
@@ -1963,7 +1963,7 @@ DEBLOCK_CHROMA_INTRA mmxext
 %define ref r1+scan8start
 %define mv  r2+scan8start*4
 %define bs0 r3
-%define bs1 r3+16
+%define bs1 r3+32
 
 %macro LOAD_BYTES_MMX 1
     movd      m2, [%1+8*0-1]
diff --git a/tools/checkasm.c b/tools/checkasm.c
index 1f764ef..6ba78e1 100644
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -1335,7 +1335,8 @@ static int check_deblock( int cpu_ref, int cpu_new )
             ALIGNED_ARRAY_16( uint8_t, nnz, [X264_SCAN8_SIZE] );
             ALIGNED_4( int8_t ref[2][X264_SCAN8_LUMA_SIZE] );
             ALIGNED_ARRAY_16( int16_t, mv, [2],[X264_SCAN8_LUMA_SIZE][2] );
-            ALIGNED_ARRAY_16( uint8_t, bs, [2],[2][4][4] );
+            ALIGNED_ARRAY_16( uint8_t, bs, [2],[2][8][4] );
+            memset( bs, 99, sizeof(bs) );
             for( int j = 0; j < X264_SCAN8_SIZE; j++ )
                 nnz[j] = ((rand()&7) == 7) * rand() & 0xf;
             for( int j = 0; j < 2; j++ )
@@ -1346,8 +1347,8 @@ static int check_deblock( int cpu_ref, int cpu_new )
                         mv[j][k][l] = ((rand()&7) != 7) ? (rand()&7) - 3 : (rand()&1023) - 512;
                 }
             set_func_name( "deblock_strength" );
-            call_c( db_c.deblock_strength, nnz, ref, mv, bs[0], 2<<(i&1), ((i>>1)&1) );
-            call_a( db_a.deblock_strength, nnz, ref, mv, bs[1], 2<<(i&1), ((i>>1)&1) );
+            call_c( db_c.deblock_strength, nnz, ref, mv, bs[0], 2<<(i&1), ((i>>1)&1), NULL );
+            call_a( db_a.deblock_strength, nnz, ref, mv, bs[1], 2<<(i&1), ((i>>1)&1), NULL );
             if( memcmp( bs[0], bs[1], sizeof(bs[0]) ) )
             {
                 ok = 0;



More information about the x264-devel mailing list