[x264-devel] commit: More write-combining (Henrik Gramner )

git at videolan.org git at videolan.org
Thu Apr 29 19:58:08 CEST 2010


x264 | branch: master | Henrik Gramner <hengar-6 at student.ltu.se> | Tue Apr 27 01:44:33 2010 +0200| [bff0df76deeed96e76e3c965223065d30258e7d5] | committer: Jason Garrett-Glaser 

More write-combining

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=bff0df76deeed96e76e3c965223065d30258e7d5
---

 common/common.h      |    2 +-
 encoder/macroblock.c |    8 ++++----
 encoder/rdo.c        |   12 +++++++++---
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/common/common.h b/common/common.h
index 44f8c1a..a3b5d5a 100644
--- a/common/common.h
+++ b/common/common.h
@@ -658,7 +658,7 @@ struct x264_t
             ALIGNED_8( int8_t intra4x4_pred_mode[X264_SCAN8_LUMA_SIZE] );
 
             /* i_non_zero_count if available else 0x80 */
-            ALIGNED_4( uint8_t non_zero_count[X264_SCAN8_SIZE] );
+            ALIGNED_16( uint8_t non_zero_count[X264_SCAN8_SIZE] );
 
             /* -1 if unused, -2 if unavailable */
             ALIGNED_4( int8_t ref[2][X264_SCAN8_LUMA_SIZE] );
diff --git a/encoder/macroblock.c b/encoder/macroblock.c
index 1125009..a961baf 100644
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -458,10 +458,10 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
 
 static void x264_macroblock_encode_skip( x264_t *h )
 {
-    h->mb.i_cbp_luma = 0x00;
-    h->mb.i_cbp_chroma = 0x00;
-    memset( h->mb.cache.non_zero_count, 0, sizeof( h->mb.cache.non_zero_count ) );
-    /* store cbp */
+    for( int i = 0; i < sizeof( h->mb.cache.non_zero_count ); i += 16 )
+        M128( &h->mb.cache.non_zero_count[i] ) = M128_ZERO;
+    h->mb.i_cbp_luma = 0;
+    h->mb.i_cbp_chroma = 0;
     h->mb.cbp[h->mb.i_mb_xy] = 0;
 }
 
diff --git a/encoder/rdo.c b/encoder/rdo.c
index 2d0fad5..4d83b6a 100644
--- a/encoder/rdo.c
+++ b/encoder/rdo.c
@@ -438,10 +438,13 @@ static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, int16_t *dct,
 
     if( i < b_ac )
     {
-        /* We only need to memset an empty 4x4 block.  8x8 can be
+        /* We only need to zero an empty 4x4 block. 8x8 can be
            implicitly emptied via zero nnz, as can dc. */
         if( i_coefs == 16 && !dc )
-            memset( dct, 0, 16 * sizeof(int16_t) );
+        {
+            M128( &dct[0] ) = M128_ZERO;
+            M128( &dct[8] ) = M128_ZERO;
+        }
         return 0;
     }
 
@@ -608,7 +611,10 @@ static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, int16_t *dct,
     if( bnode == &nodes_cur[0] )
     {
         if( i_coefs == 16 && !dc )
-            memset( dct, 0, 16 * sizeof(int16_t) );
+        {
+            M128( &dct[0] ) = M128_ZERO;
+            M128( &dct[8] ) = M128_ZERO;
+        }
         return 0;
     }
 



More information about the x264-devel mailing list