[x264-devel] commit: r1548 broke subme < 3 + p8x8/b8x8 (Loren Merritt )
git at videolan.org
git at videolan.org
Thu Apr 29 19:58:08 CEST 2010
x264 | branch: master | Loren Merritt <pengvado at akuvian.org> | Thu Apr 29 17:35:25 2010 +0000| [d9db8b3ed9615f4262d58e0d9ed1e1bb83f6673e] | committer: Jason Garrett-Glaser
r1548 broke subme < 3 + p8x8/b8x8
Caused significantly worse compression. Preset-wise, only affected veryfast.
Fixed by not modifying mvc in-place.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=d9db8b3ed9615f4262d58e0d9ed1e1bb83f6673e
---
common/common.h | 6 +++---
common/x86/util.h | 10 +++++-----
encoder/me.c | 13 +++++++------
3 files changed, 15 insertions(+), 14 deletions(-)
diff --git a/common/common.h b/common/common.h
index a3b5d5a..2f35244 100644
--- a/common/common.h
+++ b/common/common.h
@@ -188,14 +188,14 @@ static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum( uint8_t *mvdleft, uint8_t *mvd
return amvd0 + (amvd1<<8);
}
-static void ALWAYS_INLINE x264_predictor_roundclip( int16_t (*mvc)[2], int i_mvc, int mv_x_min, int mv_x_max, int mv_y_min, int mv_y_max )
+static void ALWAYS_INLINE x264_predictor_roundclip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int mv_x_min, int mv_x_max, int mv_y_min, int mv_y_max )
{
for( int i = 0; i < i_mvc; i++ )
{
int mx = (mvc[i][0] + 2) >> 2;
int my = (mvc[i][1] + 2) >> 2;
- mvc[i][0] = x264_clip3( mx, mv_x_min, mv_x_max );
- mvc[i][1] = x264_clip3( my, mv_y_min, mv_y_max );
+ dst[i][0] = x264_clip3( mx, mv_x_min, mv_x_max );
+ dst[i][1] = x264_clip3( my, mv_y_min, mv_y_max );
}
}
diff --git a/common/x86/util.h b/common/x86/util.h
index e6a2505..40a738a 100644
--- a/common/x86/util.h
+++ b/common/x86/util.h
@@ -109,7 +109,7 @@ static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum_mmxext(uint8_t *mvdleft, uint8_
}
#define x264_predictor_roundclip x264_predictor_roundclip_mmxext
-static void ALWAYS_INLINE x264_predictor_roundclip_mmxext( int16_t (*mvc)[2], int i_mvc, int mv_x_min, int mv_x_max, int mv_y_min, int mv_y_max )
+static void ALWAYS_INLINE x264_predictor_roundclip_mmxext( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int mv_x_min, int mv_x_max, int mv_y_min, int mv_y_max )
{
uint32_t mv_min = pack16to32_mask( mv_x_min, mv_y_min );
uint32_t mv_max = pack16to32_mask( mv_x_max, mv_y_max );
@@ -123,7 +123,7 @@ static void ALWAYS_INLINE x264_predictor_roundclip_mmxext( int16_t (*mvc)[2], in
"punpckldq %%mm6, %%mm6 \n"
"test $1, %0 \n"
"jz 1f \n"
- "movd -4(%5,%0,4), %%mm0 \n"
+ "movd -4(%6,%0,4), %%mm0 \n"
"paddw %%mm7, %%mm0 \n"
"psraw $2, %%mm0 \n"
"pmaxsw %%mm5, %%mm0 \n"
@@ -132,7 +132,7 @@ static void ALWAYS_INLINE x264_predictor_roundclip_mmxext( int16_t (*mvc)[2], in
"dec %0 \n"
"jz 2f \n"
"1: \n"
- "movq -8(%5,%0,4), %%mm0 \n"
+ "movq -8(%6,%0,4), %%mm0 \n"
"paddw %%mm7, %%mm0 \n"
"psraw $2, %%mm0 \n"
"pmaxsw %%mm5, %%mm0 \n"
@@ -141,8 +141,8 @@ static void ALWAYS_INLINE x264_predictor_roundclip_mmxext( int16_t (*mvc)[2], in
"sub $2, %0 \n"
"jnz 1b \n"
"2: \n"
- :"+r"(i), "+m"(M64( mvc ))
- :"g"(mv_min), "g"(mv_max), "m"(pw_2), "r"(mvc)
+ :"+r"(i), "=m"(M64( dst ))
+ :"g"(mv_min), "g"(mv_max), "m"(pw_2), "r"(dst), "r"(mvc), "m"(M64( mvc ))
);
}
diff --git a/encoder/me.c b/encoder/me.c
index 1424616..93f48ba 100644
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -245,14 +245,15 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
pmv = pack16to32_mask( bmx, bmy );
if( i_mvc > 0 )
{
- x264_predictor_roundclip( mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
+ ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16][2] );
+ x264_predictor_roundclip( mvc_fpel, mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
bcost <<= 4;
for( int i = 1; i <= i_mvc; i++ )
{
- if( M32( mvc[i-1] ) && (pmv != M32( mvc[i-1] )) )
+ if( M32( mvc_fpel[i-1] ) && (pmv != M32( mvc[i-1] )) )
{
- int mx = mvc[i-1][0];
- int my = mvc[i-1][1];
+ int mx = mvc_fpel[i-1][0];
+ int my = mvc_fpel[i-1][1];
int cost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[my*stride+mx], stride ) + BITS_MVD( mx, my );
cost = (cost << 4) + i;
COPY1_IF_LT( bcost, cost );
@@ -260,8 +261,8 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
}
if( bcost&15 )
{
- bmx = mvc[(bcost&15)-1][0];
- bmy = mvc[(bcost&15)-1][1];
+ bmx = mvc_fpel[(bcost&15)-1][0];
+ bmy = mvc_fpel[(bcost&15)-1][1];
}
bcost >>= 4;
}
More information about the x264-devel
mailing list