[x264-devel] commit: More write-combining (Henrik Gramner )
git at videolan.org
git at videolan.org
Thu Apr 29 19:58:08 CEST 2010
x264 | branch: master | Henrik Gramner <hengar-6 at student.ltu.se> | Tue Apr 27 01:44:33 2010 +0200| [bff0df76deeed96e76e3c965223065d30258e7d5] | committer: Jason Garrett-Glaser
More write-combining
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=bff0df76deeed96e76e3c965223065d30258e7d5
---
common/common.h | 2 +-
encoder/macroblock.c | 8 ++++----
encoder/rdo.c | 12 +++++++++---
3 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/common/common.h b/common/common.h
index 44f8c1a..a3b5d5a 100644
--- a/common/common.h
+++ b/common/common.h
@@ -658,7 +658,7 @@ struct x264_t
ALIGNED_8( int8_t intra4x4_pred_mode[X264_SCAN8_LUMA_SIZE] );
/* i_non_zero_count if available else 0x80 */
- ALIGNED_4( uint8_t non_zero_count[X264_SCAN8_SIZE] );
+ ALIGNED_16( uint8_t non_zero_count[X264_SCAN8_SIZE] );
/* -1 if unused, -2 if unavailable */
ALIGNED_4( int8_t ref[2][X264_SCAN8_LUMA_SIZE] );
diff --git a/encoder/macroblock.c b/encoder/macroblock.c
index 1125009..a961baf 100644
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -458,10 +458,10 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
static void x264_macroblock_encode_skip( x264_t *h )
{
- h->mb.i_cbp_luma = 0x00;
- h->mb.i_cbp_chroma = 0x00;
- memset( h->mb.cache.non_zero_count, 0, sizeof( h->mb.cache.non_zero_count ) );
- /* store cbp */
+ for( int i = 0; i < sizeof( h->mb.cache.non_zero_count ); i += 16 )
+ M128( &h->mb.cache.non_zero_count[i] ) = M128_ZERO;
+ h->mb.i_cbp_luma = 0;
+ h->mb.i_cbp_chroma = 0;
h->mb.cbp[h->mb.i_mb_xy] = 0;
}
diff --git a/encoder/rdo.c b/encoder/rdo.c
index 2d0fad5..4d83b6a 100644
--- a/encoder/rdo.c
+++ b/encoder/rdo.c
@@ -438,10 +438,13 @@ static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, int16_t *dct,
if( i < b_ac )
{
- /* We only need to memset an empty 4x4 block. 8x8 can be
+ /* We only need to zero an empty 4x4 block. 8x8 can be
implicitly emptied via zero nnz, as can dc. */
if( i_coefs == 16 && !dc )
- memset( dct, 0, 16 * sizeof(int16_t) );
+ {
+ M128( &dct[0] ) = M128_ZERO;
+ M128( &dct[8] ) = M128_ZERO;
+ }
return 0;
}
@@ -608,7 +611,10 @@ static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, int16_t *dct,
if( bnode == &nodes_cur[0] )
{
if( i_coefs == 16 && !dc )
- memset( dct, 0, 16 * sizeof(int16_t) );
+ {
+ M128( &dct[0] ) = M128_ZERO;
+ M128( &dct[8] ) = M128_ZERO;
+ }
return 0;
}
More information about the x264-devel
mailing list