[x264-devel] commit: Faster me=tesa (Loren Merritt )
git version control
git at videolan.org
Mon Aug 31 23:31:45 CEST 2009
x264 | branch: master | Loren Merritt <pengvado at akuvian.org> | Sun Aug 30 20:49:07 2009 +0000| [2cd00546cdae867265fbbb58304087f776160102] | committer: Loren Merritt
Faster me=tesa
But it still spends all too much time in me_search_ref rather than asm.
> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=2cd00546cdae867265fbbb58304087f776160102
---
encoder/me.c | 50 +++++++++++++++++++++++---------------------------
encoder/me.h | 7 +++++++
2 files changed, 30 insertions(+), 27 deletions(-)
diff --git a/encoder/me.c b/encoder/me.c
index 1ba1811..df034c6 100644
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -633,39 +633,35 @@ me_hex2:
}
limit = i_me_range / 2;
- if( nmvsad > limit*2 )
+ sad_thresh = bsad*sad_thresh>>3;
+ while( nmvsad > limit*2 && sad_thresh > bsad )
{
// halve the range if the domain is too large... eh, close enough
- bsad = bsad*(sad_thresh+8)>>4;
- for( i=0; i<nmvsad && mvsads[i].sad <= bsad; i++ );
+ sad_thresh = (sad_thresh + bsad) >> 1;
+ for( i=0; i<nmvsad && mvsads[i].sad <= sad_thresh; i++ );
for( j=i; j<nmvsad; j++ )
- if( mvsads[j].sad <= bsad )
- {
- /* mvsad_t is not guaranteed to be 8 bytes on all archs, so check before using explicit write-combining */
- if( sizeof( mvsad_t ) == sizeof( uint64_t ) )
- *(uint64_t*)&mvsads[i++] = *(uint64_t*)&mvsads[j];
- else
- mvsads[i++] = mvsads[j];
- }
+ {
+ /* mvsad_t is not guaranteed to be 8 bytes on all archs, so check before using explicit write-combining */
+ if( sizeof( mvsad_t ) == sizeof( uint64_t ) )
+ *(uint64_t*)&mvsads[i] = *(uint64_t*)&mvsads[j];
+ else
+ mvsads[i] = mvsads[j];
+ i += mvsads[j].sad <= sad_thresh;
+ }
nmvsad = i;
}
- if( nmvsad > limit )
+ while( nmvsad > limit )
{
- for( i=0; i<limit; i++ )
- {
- int bj = i;
- int bsad = mvsads[bj].sad;
- for( j=i+1; j<nmvsad; j++ )
- COPY2_IF_LT( bsad, mvsads[j].sad, bj, j );
- if( bj > i )
- {
- if( sizeof( mvsad_t ) == sizeof( uint64_t ) )
- XCHG( uint64_t, *(uint64_t*)&mvsads[i], *(uint64_t*)&mvsads[bj] );
- else
- XCHG( mvsad_t, mvsads[i], mvsads[bj] );
- }
- }
- nmvsad = limit;
+ int bsad = mvsads[0].sad;
+ int bi = 0;
+ for( i=1; i<nmvsad; i++ )
+ COPY2_IF_GT( bsad, mvsads[i].sad, bi, i );
+ nmvsad--;
+ mvsads[bi] = mvsads[nmvsad];
+ if( sizeof( mvsad_t ) == sizeof( uint64_t ) )
+ *(uint64_t*)&mvsads[bi] = *(uint64_t*)&mvsads[nmvsad];
+ else
+ mvsads[bi] = mvsads[nmvsad];
}
for( i=0; i<nmvsad; i++ )
COST_MV( mvsads[i].mx, mvsads[i].my );
diff --git a/encoder/me.h b/encoder/me.h
index 24f296f..8bdee2e 100644
--- a/encoder/me.h
+++ b/encoder/me.h
@@ -93,4 +93,11 @@ if((y)<(x))\
(f)=(e);\
}
+#define COPY2_IF_GT(x,y,a,b)\
+if((y)>(x))\
+{\
+ (x)=(y);\
+ (a)=(b);\
+}
+
#endif
More information about the x264-devel
mailing list