[x264-devel] commit: increase alignment of mv arrays ( Håkan Hjort )

Mon Mar 17 09:22:27 CET 2008

x264 | branch: master | Håkan Hjort <hakan.hjort at gmail.com> | Mon Mar 17 01:20:02 2008 -0600| [2c8600ea161bdf2be733b01e17974fe75a4e31d3]

increase alignment of mv arrays

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=2c8600ea161bdf2be733b01e17974fe75a4e31d3
---

 common/common.h   |   10 +++++-----
 encoder/analyse.c |   24 ++++++++++--------------
 encoder/me.h      |    2 +-
 3 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/common/common.h b/common/common.h
index d1c5e77..c4925f2 100644
--- a/common/common.h
+++ b/common/common.h
@@ -460,16 +460,16 @@ struct x264_t
             int     non_zero_count[X264_SCAN8_SIZE];
 
             /* -1 if unused, -2 if unavailable */
-            int8_t  ref[2][X264_SCAN8_SIZE];
+            DECLARE_ALIGNED( int8_t, ref[2][X264_SCAN8_SIZE], 4 );
 
             /* 0 if not available */
-            int16_t mv[2][X264_SCAN8_SIZE][2];
-            int16_t mvd[2][X264_SCAN8_SIZE][2];
+            DECLARE_ALIGNED( int16_t, mv[2][X264_SCAN8_SIZE][2], 16 );
+            DECLARE_ALIGNED( int16_t, mvd[2][X264_SCAN8_SIZE][2], 4 );
 
             /* 1 if SKIP or DIRECT. set only for B-frames + CABAC */
-            int8_t  skip[X264_SCAN8_SIZE];
+            DECLARE_ALIGNED( int8_t, skip[X264_SCAN8_SIZE], 4 );
 
-            int16_t direct_mv[2][X264_SCAN8_SIZE][2];
+            DECLARE_ALIGNED( int16_t, direct_mv[2][X264_SCAN8_SIZE][2], 16 ) ;
             int8_t  direct_ref[2][X264_SCAN8_SIZE];
             int     pskip_mv[2];
 
diff --git a/encoder/analyse.c b/encoder/analyse.c
index 2b3ec7e..cdfd08e 100644
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -44,8 +44,8 @@ typedef struct
 
     /* 8x8 */
     int       i_cost8x8;
-    int       mvc[32][5][2]; /* [ref][0] is 16x16 mv,
-                                [ref][1..4] are 8x8 mv from partition [0..3] */
+    /* [ref][0] is 16x16 mv, [ref][1..4] are 8x8 mv from partition [0..3] */
+    DECLARE_ALIGNED( int, mvc[32][5][2], 8 );
     x264_me_t me8x8[4];
 
     /* Sub 4x4 */
@@ -1145,7 +1145,7 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
 {
     x264_me_t m;
     uint8_t  **p_fenc = h->mb.pic.p_fenc;
-    int mvc[3][2];
+    DECLARE_ALIGNED( int, mvc[3][2], 8 );
     int i, j;
 
     /* XXX Needed for x264_mb_predict_mv */
@@ -1195,7 +1195,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
 {
     x264_me_t m;
     uint8_t  **p_fenc = h->mb.pic.p_fenc;
-    int mvc[3][2];
+    DECLARE_ALIGNED( int, mvc[3][2], 8 );
     int i, j;
 
     /* XXX Needed for x264_mb_predict_mv */
@@ -1698,7 +1698,7 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
         { h->mb.pic.p_fref[0][a->l0.i_ref],
           h->mb.pic.p_fref[1][a->l1.i_ref] };
     DECLARE_ALIGNED( uint8_t,  pix[2][16*8], 16 );
-    int mvc[2][2];
+    DECLARE_ALIGNED( int, mvc[2][2], 8 );
     int i, l;
 
     h->mb.i_partition = D_16x8;
@@ -1721,10 +1721,8 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
             LOAD_FENC( m, h->mb.pic.p_fenc, 0, 8*i );
             LOAD_HPELS( m, p_fref[l], l, lX->i_ref, 0, 8*i );
 
-            mvc[0][0] = lX->me8x8[2*i].mv[0];
-            mvc[0][1] = lX->me8x8[2*i].mv[1];
-            mvc[1][0] = lX->me8x8[2*i+1].mv[0];
-            mvc[1][1] = lX->me8x8[2*i+1].mv[1];
+            *(uint64_t*)mvc[0] = *(uint64_t*)lX->me8x8[2*i].mv;
+            *(uint64_t*)mvc[1] = *(uint64_t*)lX->me8x8[2*i+1].mv;
 
             x264_mb_predict_mv( h, l, 8*i, 2, m->mvp );
             x264_me_search( h, m, mvc, 2 );
@@ -1769,7 +1767,7 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
         { h->mb.pic.p_fref[0][a->l0.i_ref],
           h->mb.pic.p_fref[1][a->l1.i_ref] };
     uint8_t pix[2][8*16];
-    int mvc[2][2];
+    DECLARE_ALIGNED( int, mvc[2][2], 8 );
     int i, l;
 
     h->mb.i_partition = D_8x16;
@@ -1791,10 +1789,8 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
             LOAD_FENC( m, h->mb.pic.p_fenc, 8*i, 0 );
             LOAD_HPELS( m, p_fref[l], l, lX->i_ref, 8*i, 0 );
 
-            mvc[0][0] = lX->me8x8[i].mv[0];
-            mvc[0][1] = lX->me8x8[i].mv[1];
-            mvc[1][0] = lX->me8x8[i+2].mv[0];
-            mvc[1][1] = lX->me8x8[i+2].mv[1];
+            *(uint64_t*)mvc[0] = *(uint64_t*)lX->me8x8[i].mv;
+            *(uint64_t*)mvc[1] = *(uint64_t*)lX->me8x8[i+2].mv;
 
             x264_mb_predict_mv( h, l, 4*i, 2, m->mvp );
             x264_me_search( h, m, mvc, 2 );
diff --git a/encoder/me.h b/encoder/me.h
index e0be0e9..a1cc0af 100644
--- a/encoder/me.h
+++ b/encoder/me.h
@@ -44,7 +44,7 @@ typedef struct
     /* output */
     int cost_mv;        /* lambda * nbits for the chosen mv */
     int cost;           /* satd + lambda * nbits */
-    int mv[2];
+    DECLARE_ALIGNED( int, mv[2], 8 );
 } x264_me_t;
 
 void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_fullpel_thresh );