[x264-devel] commit: MMX code for predictor rounding/clipping (Jason Garrett-Glaser )
git at videolan.org
git at videolan.org
Sat Apr 24 00:40:01 CEST 2010
x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Fri Apr 16 12:06:07 2010 -0700| [aaf2194fcd9efacbb835af5fe3390ad48d19cc8c] | committer: Jason Garrett-Glaser
MMX code for predictor rounding/clipping
Faster predictor checking at subme < 3.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=aaf2194fcd9efacbb835af5fe3390ad48d19cc8c
---
common/common.h | 11 +++++++++++
common/x86/util.h | 41 +++++++++++++++++++++++++++++++++++++++++
encoder/me.c | 11 ++++++-----
3 files changed, 58 insertions(+), 5 deletions(-)
diff --git a/common/common.h b/common/common.h
index c63fbd9..3973558 100644
--- a/common/common.h
+++ b/common/common.h
@@ -188,6 +188,17 @@ static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum( uint8_t *mvdleft, uint8_t *mvd
return amvd0 + (amvd1<<8);
}
+static void ALWAYS_INLINE x264_predictor_roundclip( int16_t (*mvc)[2], int i_mvc, int mv_x_min, int mv_x_max, int mv_y_min, int mv_y_max )
+{
+ for( int i = 0; i < i_mvc; i++ )
+ {
+ int mx = (mvc[i][0] + 2) >> 2;
+ int my = (mvc[i][1] + 2) >> 2;
+ mvc[i][0] = x264_clip3( mx, mv_x_min, mv_x_max );
+ mvc[i][0] = x264_clip3( my, mv_y_min, mv_y_max );
+ }
+}
+
extern const uint8_t x264_exp2_lut[64];
extern const float x264_log2_lut[128];
extern const float x264_log2_lz_lut[32];
diff --git a/common/x86/util.h b/common/x86/util.h
index 4c4d168..8fb1e84 100644
--- a/common/x86/util.h
+++ b/common/x86/util.h
@@ -45,6 +45,7 @@ static ALWAYS_INLINE void x264_median_mv_mmxext( int16_t *dst, int16_t *a, int16
:"m"(M32( a )), "m"(M32( b )), "m"(M32( c ))
);
}
+
#define x264_predictor_difference x264_predictor_difference_mmxext
static ALWAYS_INLINE int x264_predictor_difference_mmxext( int16_t (*mvc)[2], intptr_t i_mvc )
{
@@ -80,6 +81,7 @@ static ALWAYS_INLINE int x264_predictor_difference_mmxext( int16_t (*mvc)[2], in
);
return sum;
}
+
#define x264_cabac_mvd_sum x264_cabac_mvd_sum_mmxext
static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum_mmxext(uint8_t *mvdleft, uint8_t *mvdtop)
{
@@ -103,6 +105,45 @@ static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum_mmxext(uint8_t *mvdleft, uint8_
);
return amvd;
}
+
+#define x264_predictor_roundclip x264_predictor_roundclip_mmxext
+static void ALWAYS_INLINE x264_predictor_roundclip_mmxext( int16_t (*mvc)[2], int i_mvc, int mv_x_min, int mv_x_max, int mv_y_min, int mv_y_max )
+{
+ uint32_t mv_min = pack16to32_mask( mv_x_min, mv_y_min );
+ uint32_t mv_max = pack16to32_mask( mv_x_max, mv_y_max );
+ static const uint64_t pw_2 = 0x0002000200020002ULL;
+ intptr_t i = i_mvc;
+ asm(
+ "movd %2, %%mm5 \n"
+ "movd %3, %%mm6 \n"
+ "movq %4, %%mm7 \n"
+ "punpckldq %%mm5, %%mm5 \n"
+ "punpckldq %%mm6, %%mm6 \n"
+ "test $1, %0 \n"
+ "jz 1f \n"
+ "movd -4(%5,%0,4), %%mm0 \n"
+ "paddw %%mm7, %%mm0 \n"
+ "psraw $2, %%mm0 \n"
+ "pmaxsw %%mm5, %%mm0 \n"
+ "pminsw %%mm6, %%mm0 \n"
+ "movd %%mm0, -4(%5,%0,4) \n"
+ "dec %0 \n"
+ "jz 2f \n"
+ "1: \n"
+ "movq -8(%5,%0,4), %%mm0 \n"
+ "paddw %%mm7, %%mm0 \n"
+ "psraw $2, %%mm0 \n"
+ "pmaxsw %%mm5, %%mm0 \n"
+ "pminsw %%mm6, %%mm0 \n"
+ "movq %%mm0, -8(%5,%0,4) \n"
+ "sub $2, %0 \n"
+ "jnz 1b \n"
+ "2: \n"
+ :"+r"(i), "+m"(M64( mvc ))
+ :"g"(mv_min), "g"(mv_max), "m"(pw_2), "r"(mvc)
+ );
+}
+
#undef M128_ZERO
#define M128_ZERO ((__m128){0,0,0,0})
#define x264_union128_t x264_union128_sse_t
diff --git a/encoder/me.c b/encoder/me.c
index 6788022..0b519ea 100644
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -241,14 +241,15 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
* sensible to omit the cost of the MV from the rounded MVP to avoid unfairly
* biasing against use of the predicted motion vector. */
bcost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[bmy*stride+bmx], stride );
+ uint32_t bmv = pack16to32_mask( bmx, bmy );
+ if( i_mvc )
+ x264_predictor_roundclip( mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
for( int i = 0; i < i_mvc; i++ )
{
- int mx = (mvc[i][0] + 2) >> 2;
- int my = (mvc[i][1] + 2) >> 2;
- if( (mx | my) && ((mx-bmx) | (my-bmy)) )
+ if( M32( mvc[i] ) && (bmv - M32( mvc[i] )) )
{
- mx = x264_clip3( mx, mv_x_min, mv_x_max );
- my = x264_clip3( my, mv_y_min, mv_y_max );
+ int mx = mvc[i][0];
+ int my = mvc[i][1];
COST_MV( mx, my );
}
}
More information about the x264-devel
mailing list