[x264-devel] commit: Cut size of MVD arrays by a factor of 2 again ( Jason Garrett-Glaser )

Sat Feb 27 01:16:54 CET 2010

x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Wed Feb 24 20:51:43 2010 -0800| [3ae44d9e19afccf3feb9ef3c33ae0538893f78e9] | committer: Jason Garrett-Glaser 

Cut size of MVD arrays by a factor of 2 again
Only store the MVDs of the edges of each MB.

Thanks to Michael Niedermayer for the idea.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=3ae44d9e19afccf3feb9ef3c33ae0538893f78e9
---

 common/macroblock.c |   47 +++++++++++++++++++++++++----------------------
 1 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/common/macroblock.c b/common/macroblock.c
index 8a4f095..b4d0bfb 100644
--- a/common/macroblock.c
+++ b/common/macroblock.c
@@ -712,8 +712,8 @@ int x264_macroblock_cache_init( x264_t *h )
     if( h->param.b_cabac )
     {
         CHECKED_MALLOC( h->mb.chroma_pred_mode, i_mb_count * sizeof(int8_t) );
-        CHECKED_MALLOC( h->mb.mvd[0], 2*16 * i_mb_count * sizeof(uint8_t) );
-        CHECKED_MALLOC( h->mb.mvd[1], 2*16 * i_mb_count * sizeof(uint8_t) );
+        CHECKED_MALLOC( h->mb.mvd[0], 2*8 * i_mb_count * sizeof(uint8_t) );
+        CHECKED_MALLOC( h->mb.mvd[1], 2*8 * i_mb_count * sizeof(uint8_t) );
     }
 
     for( i=0; i<2; i++ )
@@ -1211,25 +1211,20 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
             if( h->param.b_cabac )
             {
                 if( i_top_type >= 0 )
-                    CP64( h->mb.cache.mvd[i_list][x264_scan8[0] - 8], h->mb.mvd[i_list][i_top_4x4] );
+                    CP64( h->mb.cache.mvd[i_list][x264_scan8[0]-8], h->mb.mvd[i_list][i_top_xy*8] );
                 else
-                    M64( h->mb.cache.mvd[i_list][x264_scan8[0] - 8] ) = 0;
+                    M64( h->mb.cache.mvd[i_list][x264_scan8[0]-8] ) = 0;
 
                 if( i_left_type >= 0 )
                 {
-                    const int i8 = x264_scan8[0] - 1;
-                    const int iv = i_mb_4x4 - 1;
-                    CP16( h->mb.cache.mvd[i_list][i8+0*8], h->mb.mvd[i_list][iv + 0*s4x4] );
-                    CP16( h->mb.cache.mvd[i_list][i8+1*8], h->mb.mvd[i_list][iv + 1*s4x4] );
-                    CP16( h->mb.cache.mvd[i_list][i8+2*8], h->mb.mvd[i_list][iv + 2*s4x4] );
-                    CP16( h->mb.cache.mvd[i_list][i8+3*8], h->mb.mvd[i_list][iv + 3*s4x4] );
+                    CP16( h->mb.cache.mvd[i_list][x264_scan8[0]-1+0*8], h->mb.mvd[i_list][i_left_xy*8+4] );
+                    CP16( h->mb.cache.mvd[i_list][x264_scan8[0]-1+1*8], h->mb.mvd[i_list][i_left_xy*8+5] );
+                    CP16( h->mb.cache.mvd[i_list][x264_scan8[0]-1+2*8], h->mb.mvd[i_list][i_left_xy*8+6] );
+                    CP16( h->mb.cache.mvd[i_list][x264_scan8[0]-1+3*8], h->mb.mvd[i_list][i_left_xy*8+3] );
                 }
                 else
-                {
-                    const int i8 = x264_scan8[0] - 1;
                     for( i = 0; i < 4; i++ )
-                        M16( h->mb.cache.mvd[i_list][i8+i*8] ) = 0;
-                }
+                        M16( h->mb.cache.mvd[i_list][x264_scan8[0]-1+i*8] ) = 0;
             }
         }
 
@@ -1406,19 +1401,27 @@ void x264_macroblock_cache_save( x264_t *h )
 
         if( !IS_INTRA( i_mb_type ) && !IS_SKIP( i_mb_type ) && !IS_DIRECT( i_mb_type ) )
         {
-            for( y = 0; y < 4; y++ )
-                CP64( h->mb.mvd[0][i_mb_4x4+y*s4x4], h->mb.cache.mvd[0][x264_scan8[0]+8*y] );
+            CP64( h->mb.mvd[0][i_mb_xy*8+0], h->mb.cache.mvd[0][x264_scan8[0]+8*3] );
+            CP16( h->mb.mvd[0][i_mb_xy*8+4], h->mb.cache.mvd[0][x264_scan8[0]+8*0+3] );
+            CP16( h->mb.mvd[0][i_mb_xy*8+5], h->mb.cache.mvd[0][x264_scan8[0]+8*1+3] );
+            CP16( h->mb.mvd[0][i_mb_xy*8+6], h->mb.cache.mvd[0][x264_scan8[0]+8*2+3] );
             if( h->sh.i_type == SLICE_TYPE_B )
-                for( y = 0; y < 4; y++ )
-                    CP64( h->mb.mvd[1][i_mb_4x4+y*s4x4], h->mb.cache.mvd[1][x264_scan8[0]+8*y] );
+            {
+                CP64( h->mb.mvd[1][i_mb_xy*8+0], h->mb.cache.mvd[1][x264_scan8[0]+8*3] );
+                CP16( h->mb.mvd[1][i_mb_xy*8+4], h->mb.cache.mvd[1][x264_scan8[0]+8*0+3] );
+                CP16( h->mb.mvd[1][i_mb_xy*8+5], h->mb.cache.mvd[1][x264_scan8[0]+8*1+3] );
+                CP16( h->mb.mvd[1][i_mb_xy*8+6], h->mb.cache.mvd[1][x264_scan8[0]+8*2+3] );
+            }
         }
         else
         {
-            for( y = 0; y < 4; y++ )
-                M64( h->mb.mvd[0][i_mb_4x4+y*s4x4] ) = 0;
+            M64( h->mb.mvd[0][i_mb_xy*8+0] ) = 0;
+            M64( h->mb.mvd[0][i_mb_xy*8+4] ) = 0;
             if( h->sh.i_type == SLICE_TYPE_B )
-                for( y = 0; y < 4; y++ )
-                    M64( h->mb.mvd[1][i_mb_4x4+y*s4x4] ) = 0;
+            {
+                M64( h->mb.mvd[1][i_mb_xy*8+0] ) = 0;
+                M64( h->mb.mvd[1][i_mb_xy*8+4] ) = 0;
+            }
         }
 
         if( h->sh.i_type == SLICE_TYPE_B )