[x264-devel] commit: r1548 broke subme < 3 + p8x8/b8x8 (Loren Merritt )

Thu Apr 29 19:58:08 CEST 2010

x264 | branch: master | Loren Merritt <pengvado at akuvian.org> | Thu Apr 29 17:35:25 2010 +0000| [d9db8b3ed9615f4262d58e0d9ed1e1bb83f6673e] | committer: Jason Garrett-Glaser 

r1548 broke subme < 3 + p8x8/b8x8
Caused significantly worse compression.  Preset-wise, only affected veryfast.
Fixed by not modifying mvc in-place.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=d9db8b3ed9615f4262d58e0d9ed1e1bb83f6673e
---

 common/common.h   |    6 +++---
 common/x86/util.h |   10 +++++-----
 encoder/me.c      |   13 +++++++------
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/common/common.h b/common/common.h
index a3b5d5a..2f35244 100644
--- a/common/common.h
+++ b/common/common.h
@@ -188,14 +188,14 @@ static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum( uint8_t *mvdleft, uint8_t *mvd
     return amvd0 + (amvd1<<8);
 }
 
-static void ALWAYS_INLINE x264_predictor_roundclip( int16_t (*mvc)[2], int i_mvc, int mv_x_min, int mv_x_max, int mv_y_min, int mv_y_max )
+static void ALWAYS_INLINE x264_predictor_roundclip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int mv_x_min, int mv_x_max, int mv_y_min, int mv_y_max )
 {
     for( int i = 0; i < i_mvc; i++ )
     {
         int mx = (mvc[i][0] + 2) >> 2;
         int my = (mvc[i][1] + 2) >> 2;
-        mvc[i][0] = x264_clip3( mx, mv_x_min, mv_x_max );
-        mvc[i][1] = x264_clip3( my, mv_y_min, mv_y_max );
+        dst[i][0] = x264_clip3( mx, mv_x_min, mv_x_max );
+        dst[i][1] = x264_clip3( my, mv_y_min, mv_y_max );
     }
 }
 
diff --git a/common/x86/util.h b/common/x86/util.h
index e6a2505..40a738a 100644
--- a/common/x86/util.h
+++ b/common/x86/util.h
@@ -109,7 +109,7 @@ static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum_mmxext(uint8_t *mvdleft, uint8_
 }
 
 #define x264_predictor_roundclip x264_predictor_roundclip_mmxext
-static void ALWAYS_INLINE x264_predictor_roundclip_mmxext( int16_t (*mvc)[2], int i_mvc, int mv_x_min, int mv_x_max, int mv_y_min, int mv_y_max )
+static void ALWAYS_INLINE x264_predictor_roundclip_mmxext( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int mv_x_min, int mv_x_max, int mv_y_min, int mv_y_max )
 {
     uint32_t mv_min = pack16to32_mask( mv_x_min, mv_y_min );
     uint32_t mv_max = pack16to32_mask( mv_x_max, mv_y_max );
@@ -123,7 +123,7 @@ static void ALWAYS_INLINE x264_predictor_roundclip_mmxext( int16_t (*mvc)[2], in
         "punpckldq %%mm6, %%mm6  \n"
         "test $1, %0             \n"
         "jz 1f                   \n"
-        "movd -4(%5,%0,4), %%mm0 \n"
+        "movd -4(%6,%0,4), %%mm0 \n"
         "paddw %%mm7, %%mm0      \n"
         "psraw $2, %%mm0         \n"
         "pmaxsw %%mm5, %%mm0     \n"
@@ -132,7 +132,7 @@ static void ALWAYS_INLINE x264_predictor_roundclip_mmxext( int16_t (*mvc)[2], in
         "dec %0                  \n"
         "jz 2f                   \n"
         "1:                      \n"
-        "movq -8(%5,%0,4), %%mm0 \n"
+        "movq -8(%6,%0,4), %%mm0 \n"
         "paddw %%mm7, %%mm0      \n"
         "psraw $2, %%mm0         \n"
         "pmaxsw %%mm5, %%mm0     \n"
@@ -141,8 +141,8 @@ static void ALWAYS_INLINE x264_predictor_roundclip_mmxext( int16_t (*mvc)[2], in
         "sub $2, %0              \n"
         "jnz 1b                  \n"
         "2:                      \n"
-        :"+r"(i), "+m"(M64( mvc ))
-        :"g"(mv_min), "g"(mv_max), "m"(pw_2), "r"(mvc)
+        :"+r"(i), "=m"(M64( dst ))
+        :"g"(mv_min), "g"(mv_max), "m"(pw_2), "r"(dst), "r"(mvc), "m"(M64( mvc ))
     );
 }
 
diff --git a/encoder/me.c b/encoder/me.c
index 1424616..93f48ba 100644
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -245,14 +245,15 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
         pmv = pack16to32_mask( bmx, bmy );
         if( i_mvc > 0 )
         {
-            x264_predictor_roundclip( mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
+            ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16][2] );
+            x264_predictor_roundclip( mvc_fpel, mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
             bcost <<= 4;
             for( int i = 1; i <= i_mvc; i++ )
             {
-                if( M32( mvc[i-1] ) && (pmv != M32( mvc[i-1] )) )
+                if( M32( mvc_fpel[i-1] ) && (pmv != M32( mvc[i-1] )) )
                 {
-                    int mx = mvc[i-1][0];
-                    int my = mvc[i-1][1];
+                    int mx = mvc_fpel[i-1][0];
+                    int my = mvc_fpel[i-1][1];
                     int cost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[my*stride+mx], stride ) + BITS_MVD( mx, my );
                     cost = (cost << 4) + i;
                     COPY1_IF_LT( bcost, cost );
@@ -260,8 +261,8 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
             }
             if( bcost&15 )
             {
-                bmx = mvc[(bcost&15)-1][0];
-                bmy = mvc[(bcost&15)-1][1];
+                bmx = mvc_fpel[(bcost&15)-1][0];
+                bmy = mvc_fpel[(bcost&15)-1][1];
             }
             bcost >>= 4;
         }