[x264-devel] commit: Fix unaligned accesses in bitstream writer (David Conrad )

git version control git at videolan.org
Tue Aug 25 05:54:52 CEST 2009


x264 | branch: master | David Conrad <lessen42 at gmail.com> | Thu Aug 20 20:44:09 2009 -0700| [3b4ee7359abdeea0337ec76206518a9d8f8f2140] | committer: Jason Garrett-Glaser 

Fix unaligned accesses in bitstream writer
Fixes x264 on CPUs with no unaligned access support (e.g. SPARC).
Improves performance marginally on CPUs with penalties for unaligned stores (e.g. some x86).

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=3b4ee7359abdeea0337ec76206518a9d8f8f2140
---

 common/bs.h       |   26 +++++++++-----------------
 encoder/cavlc.c   |    4 ++++
 encoder/encoder.c |    2 ++
 3 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/common/bs.h b/common/bs.h
index eafa8f8..d5db977 100644
--- a/common/bs.h
+++ b/common/bs.h
@@ -73,21 +73,22 @@ extern vlc_large_t x264_level_token[7][LEVEL_TABLE_SIZE];
 
 static inline void bs_init( bs_t *s, void *p_data, int i_data )
 {
-    int offset = ((intptr_t)p_data & (WORD_SIZE-1));
+    int offset = ((intptr_t)p_data & 3);
     s->p       = s->p_start = (uint8_t*)p_data - offset;
     s->p_end   = (uint8_t*)p_data + i_data;
-    s->i_left  = offset ? 8*offset : (WORD_SIZE*8);
-    s->cur_bits = endian_fix( *(intptr_t*)s->p );
+    s->i_left  = (WORD_SIZE - offset)*8;
+    s->cur_bits = endian_fix32(*(uint32_t *)(s->p));
+    s->cur_bits >>= (4-offset)*8;
 }
 static inline int bs_pos( bs_t *s )
 {
     return( 8 * (s->p - s->p_start) + (WORD_SIZE*8) - s->i_left );
 }
 
-/* Write the rest of cur_bits to the bitstream; results in a bitstream no longer 32/64-bit aligned. */
+/* Write the rest of cur_bits to the bitstream; results in a bitstream no longer 32-bit aligned. */
 static inline void bs_flush( bs_t *s )
 {
-    *(intptr_t*)s->p = endian_fix( s->cur_bits << s->i_left );
+    *(uint32_t*)s->p = endian_fix32( s->cur_bits << (s->i_left&31) );
     s->p += WORD_SIZE - s->i_left / 8;
     s->i_left = WORD_SIZE*8;
 }
@@ -151,21 +152,12 @@ static inline void bs_write1( bs_t *s, uint32_t i_bit )
 
 static inline void bs_align_0( bs_t *s )
 {
-    if( s->i_left&7 )
-    {
-        s->cur_bits <<= s->i_left&7;
-        s->i_left &= ~7;
-    }
+    bs_write( s, s->i_left&7, 0 );
     bs_flush( s );
 }
 static inline void bs_align_1( bs_t *s )
 {
-    if( s->i_left&7 )
-    {
-        s->cur_bits <<= s->i_left&7;
-        s->cur_bits |= (1 << (s->i_left&7)) - 1;
-        s->i_left &= ~7;
-    }
+    bs_write( s, s->i_left&7, (1 << (s->i_left&7)) - 1 );
     bs_flush( s );
 }
 
@@ -245,7 +237,7 @@ static inline void bs_write_te( bs_t *s, int x, int val )
 static inline void bs_rbsp_trailing( bs_t *s )
 {
     bs_write1( s, 1 );
-    bs_flush( s );
+    bs_write( s, s->i_left&7, 0  );
 }
 
 static inline int bs_size_ue( unsigned int val )
diff --git a/encoder/cavlc.c b/encoder/cavlc.c
index cbe85b0..5f2971e 100644
--- a/encoder/cavlc.c
+++ b/encoder/cavlc.c
@@ -298,6 +298,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
 #if !RDO_SKIP_BS
     if( i_mb_type == I_PCM )
     {
+        uint8_t *p_start = s->p_start;
         bs_write_ue( s, i_mb_i_offset + 25 );
         i_mb_pos_tex = bs_pos( s );
         h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
@@ -313,6 +314,9 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
             memcpy( s->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
         s->p += 64;
 
+        bs_init( s, s->p, s->p_end - s->p );
+        s->p_start = p_start;
+
         /* if PCM is chosen, we need to store reconstructed frame data */
         h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE, 16 );
         h->mc.copy[PIXEL_8x8]  ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, 8 );
diff --git a/encoder/encoder.c b/encoder/encoder.c
index 343e27c..22828af 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -981,6 +981,7 @@ int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal )
         x264_nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST );
         x264_pps_write( &h->out.bs, h->pps );
         x264_nal_end( h );
+        bs_flush( &h->out.bs );
     }
     /* now set output*/
     *pi_nal = h->out.i_nal;
@@ -1374,6 +1375,7 @@ static int x264_slice_write( x264_t *h )
             bs_write_ue( &h->out.bs, i_skip );  /* last skip run */
         /* rbsp_slice_trailing_bits */
         bs_rbsp_trailing( &h->out.bs );
+        bs_flush( &h->out.bs );
     }
 
     x264_nal_end( h );



More information about the x264-devel mailing list