[x264-devel] MBAFF: Add extra data to the deblock strength structure
Simon Horlick
git at videolan.org
Thu May 12 08:39:03 CEST 2011
x264 | branch: master | Simon Horlick <simonhorlick at gmail.com> | Sat Apr 2 18:27:13 2011 +0100| [dce1b213394724cd86ed3e6983cc00ab79cd95e7] | committer: Jason Garrett-Glaser
MBAFF: Add extra data to the deblock strength structure
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=dce1b213394724cd86ed3e6983cc00ab79cd95e7
---
common/common.h | 4 +++-
common/deblock.c | 18 +++++++++---------
common/frame.h | 2 +-
common/macroblock.c | 5 +++--
common/x86/deblock-a.asm | 2 +-
tools/checkasm.c | 7 ++++---
6 files changed, 21 insertions(+), 17 deletions(-)
diff --git a/common/common.h b/common/common.h
index f19f2f6..64947ed 100644
--- a/common/common.h
+++ b/common/common.h
@@ -863,7 +863,9 @@ struct x264_t
/* Buffers that are allocated per-thread even in sliced threads. */
void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */
pixel *intra_border_backup[5][2]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
- uint8_t (*deblock_strength[2])[2][4][4];
+ /* Deblock strength values are stored for each 4x4 partition. In MBAFF
+ * there are four extra values that need to be stored, located in [4][i]. */
+ uint8_t (*deblock_strength[2])[2][8][4];
/* CPU functions dependents */
x264_predict_t predict_16x16[4+3];
diff --git a/common/deblock.c b/common/deblock.c
index 578b5f9..143b87b 100644
--- a/common/deblock.c
+++ b/common/deblock.c
@@ -248,7 +248,7 @@ static void deblock_h_chroma_intra_c( pixel *pix, int stride, int alpha, int bet
}
static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
- int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4], int mvy_limit,
+ int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit,
int bframe, x264_t *h )
{
for( int dir = 0; dir < 2; dir++ )
@@ -276,7 +276,7 @@ static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264
}
}
void deblock_strength_mbaff_c( uint8_t nnz_cache[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
- int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4],
+ int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
int mvy_limit, int bframe, x264_t *h )
{
int neighbour_field[2];
@@ -364,7 +364,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
int mb_xy = h->mb.i_mb_xy;
int transform_8x8 = h->mb.mb_transform_size[h->mb.i_mb_xy];
int intra_cur = IS_INTRA( h->mb.type[mb_xy] );
- uint8_t (*bs)[4][4] = h->deblock_strength[mb_y&1][mb_x];
+ uint8_t (*bs)[8][4] = h->deblock_strength[mb_y&1][mb_x];
pixel *pixy = h->fdec->plane[0] + 16*mb_y*stridey + 16*mb_x;
pixel *pixuv = h->fdec->plane[1] + 8*mb_y*strideuv + 16*mb_x;
@@ -453,9 +453,9 @@ void x264_macroblock_deblock( x264_t *h )
if( qp <= qp_thresh || h->mb.i_type == P_SKIP )
return;
- uint8_t (*bs)[4][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x];
+ uint8_t (*bs)[8][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x];
if( IS_INTRA( h->mb.i_type ) )
- memset( bs, 3, 2*4*4*sizeof(uint8_t) );
+ memset( bs, 3, 2*8*4*sizeof(uint8_t) );
else
h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv,
bs, 4 >> h->sh.b_mbaff, h->sh.i_type == SLICE_TYPE_B, h );
@@ -500,16 +500,16 @@ void x264_deblock_v_chroma_intra_avx ( pixel *pix, int stride, int alpha, int be
void x264_deblock_h_chroma_intra_sse2( pixel *pix, int stride, int alpha, int beta );
void x264_deblock_h_chroma_intra_avx ( pixel *pix, int stride, int alpha, int beta );
void x264_deblock_strength_mmxext( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
- int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4],
+ int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
int mvy_limit, int bframe, x264_t *h );
void x264_deblock_strength_sse2 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
- int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4],
+ int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
int mvy_limit, int bframe, x264_t *h );
void x264_deblock_strength_ssse3 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
- int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4],
+ int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
int mvy_limit, int bframe, x264_t *h );
void x264_deblock_strength_avx ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
- int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4],
+ int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
int mvy_limit, int bframe, x264_t *h );
#if ARCH_X86
void x264_deblock_h_luma_mmxext( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
diff --git a/common/frame.h b/common/frame.h
index 3e6e1b6..b4047b5 100644
--- a/common/frame.h
+++ b/common/frame.h
@@ -182,7 +182,7 @@ typedef struct
x264_deblock_intra_t deblock_luma_intra[2];
x264_deblock_intra_t deblock_chroma_intra[2];
void (*deblock_strength) ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
- int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][4][4], int mvy_limit,
+ int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int mvy_limit,
int bframe, x264_t *h );
} x264_deblock_function_t;
diff --git a/common/macroblock.c b/common/macroblock.c
index 603fe21..95cfd94 100644
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -1242,10 +1242,11 @@ void x264_macroblock_cache_load_neighbours_deblock( x264_t *h, int mb_x, int mb_
void x264_macroblock_deblock_strength( x264_t *h )
{
- uint8_t (*bs)[4][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x];
+ uint8_t (*bs)[8][4] = h->deblock_strength[h->mb.i_mb_y&1][h->mb.i_mb_x];
if( IS_INTRA( h->mb.type[h->mb.i_mb_xy] ) )
{
- memset( bs, 3, 2*4*4*sizeof(uint8_t) );
+ memset( bs[0], 3, 4*4*sizeof(uint8_t) );
+ memset( bs[1], 3, 4*4*sizeof(uint8_t) );
if( !h->sh.b_mbaff ) return;
}
diff --git a/common/x86/deblock-a.asm b/common/x86/deblock-a.asm
index 9858f3a..7ce8624 100644
--- a/common/x86/deblock-a.asm
+++ b/common/x86/deblock-a.asm
@@ -1963,7 +1963,7 @@ DEBLOCK_CHROMA_INTRA mmxext
%define ref r1+scan8start
%define mv r2+scan8start*4
%define bs0 r3
-%define bs1 r3+16
+%define bs1 r3+32
%macro LOAD_BYTES_MMX 1
movd m2, [%1+8*0-1]
diff --git a/tools/checkasm.c b/tools/checkasm.c
index 1f764ef..6ba78e1 100644
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -1335,7 +1335,8 @@ static int check_deblock( int cpu_ref, int cpu_new )
ALIGNED_ARRAY_16( uint8_t, nnz, [X264_SCAN8_SIZE] );
ALIGNED_4( int8_t ref[2][X264_SCAN8_LUMA_SIZE] );
ALIGNED_ARRAY_16( int16_t, mv, [2],[X264_SCAN8_LUMA_SIZE][2] );
- ALIGNED_ARRAY_16( uint8_t, bs, [2],[2][4][4] );
+ ALIGNED_ARRAY_16( uint8_t, bs, [2],[2][8][4] );
+ memset( bs, 99, sizeof(bs) );
for( int j = 0; j < X264_SCAN8_SIZE; j++ )
nnz[j] = ((rand()&7) == 7) * rand() & 0xf;
for( int j = 0; j < 2; j++ )
@@ -1346,8 +1347,8 @@ static int check_deblock( int cpu_ref, int cpu_new )
mv[j][k][l] = ((rand()&7) != 7) ? (rand()&7) - 3 : (rand()&1023) - 512;
}
set_func_name( "deblock_strength" );
- call_c( db_c.deblock_strength, nnz, ref, mv, bs[0], 2<<(i&1), ((i>>1)&1) );
- call_a( db_a.deblock_strength, nnz, ref, mv, bs[1], 2<<(i&1), ((i>>1)&1) );
+ call_c( db_c.deblock_strength, nnz, ref, mv, bs[0], 2<<(i&1), ((i>>1)&1), NULL );
+ call_a( db_a.deblock_strength, nnz, ref, mv, bs[1], 2<<(i&1), ((i>>1)&1), NULL );
if( memcmp( bs[0], bs[1], sizeof(bs[0]) ) )
{
ok = 0;
More information about the x264-devel
mailing list