[x264-devel] commit: More tweaks to me.c (Jason Garrett-Glaser )
git version control
git at videolan.org
Thu Jun 12 16:07:40 CEST 2008
x264 | branch: master | Jason Garrett-Glaser <darkshikari at gmail.com> | Thu Jun 12 08:09:22 2008 -0600| [11ae289c685bf901bddb121910c6c50f57625f98]
More tweaks to me.c
Added inline MMX version of UMH's predictor difference test
Various cosmetics throughout me.c
Removed a C99-ism introduced in r878.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=11ae289c685bf901bddb121910c6c50f57625f98
---
common/common.h | 11 +++++++++++
common/x86/util.h | 31 +++++++++++++++++++++++++++++++
encoder/me.c | 13 +++++--------
3 files changed, 47 insertions(+), 8 deletions(-)
diff --git a/common/common.h b/common/common.h
index 84d6d7f..572231a 100644
--- a/common/common.h
+++ b/common/common.h
@@ -130,6 +130,17 @@ static inline void x264_median_mv( int16_t *dst, int16_t *a, int16_t *b, int16_t
dst[1] = x264_median( a[1], b[1], c[1] );
}
+static inline int x264_predictor_difference( int16_t (*mvc)[2], int i_mvc )
+{
+ int sum = 0, i;
+ for( i = 0; i < i_mvc-1; i++ )
+ {
+ sum += abs( mvc[i][0] - mvc[i+1][0] )
+ + abs( mvc[i][1] - mvc[i+1][1] );
+ }
+ return sum;
+}
+
#ifdef HAVE_MMX
#include "x86/util.h"
#endif
diff --git a/common/x86/util.h b/common/x86/util.h
index 73f4904..4b149f2 100644
--- a/common/x86/util.h
+++ b/common/x86/util.h
@@ -39,6 +39,37 @@ static inline void x264_median_mv_mmxext( int16_t *dst, int16_t *a, int16_t *b,
:"m"(*(uint32_t*)a), "m"(*(uint32_t*)b), "m"(*(uint32_t*)c)
);
}
+#define x264_predictor_difference x264_predictor_difference_mmxext
+static inline int x264_predictor_difference_mmxext( int16_t (*mvc)[2], int i_mvc )
+{
+ int sum = 0;
+ uint16_t output[4];
+ asm(
+ "pxor %%mm4, %%mm4 \n"
+ "test $1, %1 \n"
+ "jnz 3f \n"
+ "movd -8(%2,%1,4), %%mm0 \n"
+ "movd -4(%2,%1,4), %%mm3 \n"
+ "psubw %%mm3, %%mm0 \n"
+ "jmp 2f \n"
+ "3: \n"
+ "sub $1, %1 \n"
+ "1: \n"
+ "movq -8(%2,%1,4), %%mm0 \n"
+ "psubw -4(%2,%1,4), %%mm0 \n"
+ "2: \n"
+ "sub $2, %1 \n"
+ "pxor %%mm2, %%mm2 \n"
+ "psubw %%mm0, %%mm2 \n"
+ "pmaxsw %%mm2, %%mm0 \n"
+ "paddusw %%mm0, %%mm4 \n"
+ "jg 1b \n"
+ "movq %%mm4, %0 \n"
+ :"=m"(output), "+r"(i_mvc), "+r"(mvc)
+ );
+ sum += output[0] + output[1] + output[2] + output[3];
+ return sum;
+}
#endif
#endif
diff --git a/encoder/me.c b/encoder/me.c
index 276f543..216c909 100644
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -186,8 +186,8 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
/* try extra predictors if provided */
if( h->mb.i_subpel_refine >= 3 )
{
- COST_MV_HPEL( bmx, bmy );
uint32_t bmv = pack16to32_mask(bmx,bmy);
+ COST_MV_HPEL( bmx, bmy );
do
{
if( *(uint32_t*)mvc[i] && (bmv - *(uint32_t*)mvc[i]) )
@@ -235,7 +235,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
for( i = 0; i < i_me_range; i++ )
{
DIA1_ITER( bmx, bmy );
- if( bmx == omx && bmy == omy )
+ if( (bmx == omx) & (bmy == omy) )
break;
if( !CHECK_MVRANGE(bmx, bmy) )
break;
@@ -389,9 +389,7 @@ me_hex2:
+ abs( m->mvp[1] - mvc[0][1] );
denom++;
}
- for( i = 0; i < i_mvc-1; i++ )
- mvd += abs( mvc[i][0] - mvc[i+1][0] )
- + abs( mvc[i][1] - mvc[i+1][1] );
+ mvd += x264_predictor_difference( mvc, i_mvc );
}
sad_ctx = SAD_THRESH(1000) ? 0
@@ -689,13 +687,12 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
int bcost = m->cost;
int odir = -1, bdir;
-
/* try the subpel component of the predicted mv */
if( hpel_iters && h->mb.i_subpel_refine < 3 )
{
int mx = x264_clip3( m->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
int my = x264_clip3( m->mvp[1], h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] );
- if( mx != bmx || my != bmy )
+ if( (mx-bmx)|(my-bmy) )
COST_MV_SAD( mx, my );
}
@@ -715,7 +712,7 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
COPY2_IF_LT( bcost, costs[1] + p_cost_mvx[omx ] + p_cost_mvy[omy+2], bmy, omy+2 );
COPY3_IF_LT( bcost, costs[2] + p_cost_mvx[omx-2] + p_cost_mvy[omy ], bmx, omx-2, bmy, omy );
COPY3_IF_LT( bcost, costs[3] + p_cost_mvx[omx+2] + p_cost_mvy[omy ], bmx, omx+2, bmy, omy );
- if( bmx == omx && bmy == omy )
+ if( (bmx == omx) & (bmy == omy) )
break;
}
More information about the x264-devel
mailing list