[x264-devel] commit: Faster me=tesa (Loren Merritt )

git version control git at videolan.org
Mon Aug 31 23:31:45 CEST 2009


x264 | branch: master | Loren Merritt <pengvado at akuvian.org> | Sun Aug 30 20:49:07 2009 +0000| [2cd00546cdae867265fbbb58304087f776160102] | committer: Loren Merritt 

Faster me=tesa
But it still spends all too much time in me_search_ref rather than asm.

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=2cd00546cdae867265fbbb58304087f776160102
---

 encoder/me.c |   50 +++++++++++++++++++++++---------------------------
 encoder/me.h |    7 +++++++
 2 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/encoder/me.c b/encoder/me.c
index 1ba1811..df034c6 100644
--- a/encoder/me.c
+++ b/encoder/me.c
@@ -633,39 +633,35 @@ me_hex2:
                 }
 
                 limit = i_me_range / 2;
-                if( nmvsad > limit*2 )
+                sad_thresh = bsad*sad_thresh>>3;
+                while( nmvsad > limit*2 && sad_thresh > bsad )
                 {
                     // halve the range if the domain is too large... eh, close enough
-                    bsad = bsad*(sad_thresh+8)>>4;
-                    for( i=0; i<nmvsad && mvsads[i].sad <= bsad; i++ );
+                    sad_thresh = (sad_thresh + bsad) >> 1;
+                    for( i=0; i<nmvsad && mvsads[i].sad <= sad_thresh; i++ );
                     for( j=i; j<nmvsad; j++ )
-                        if( mvsads[j].sad <= bsad )
-                        {
-                            /* mvsad_t is not guaranteed to be 8 bytes on all archs, so check before using explicit write-combining */
-                            if( sizeof( mvsad_t ) == sizeof( uint64_t ) )
-                                *(uint64_t*)&mvsads[i++] = *(uint64_t*)&mvsads[j];
-                            else
-                                mvsads[i++] = mvsads[j];
-                        }
+                    {
+                        /* mvsad_t is not guaranteed to be 8 bytes on all archs, so check before using explicit write-combining */
+                        if( sizeof( mvsad_t ) == sizeof( uint64_t ) )
+                            *(uint64_t*)&mvsads[i] = *(uint64_t*)&mvsads[j];
+                        else
+                            mvsads[i] = mvsads[j];
+                        i += mvsads[j].sad <= sad_thresh;
+                    }
                     nmvsad = i;
                 }
-                if( nmvsad > limit )
+                while( nmvsad > limit )
                 {
-                    for( i=0; i<limit; i++ )
-                    {
-                        int bj = i;
-                        int bsad = mvsads[bj].sad;
-                        for( j=i+1; j<nmvsad; j++ )
-                            COPY2_IF_LT( bsad, mvsads[j].sad, bj, j );
-                        if( bj > i )
-                        {
-                            if( sizeof( mvsad_t ) == sizeof( uint64_t ) )
-                                XCHG( uint64_t, *(uint64_t*)&mvsads[i], *(uint64_t*)&mvsads[bj] );
-                            else
-                                XCHG( mvsad_t, mvsads[i], mvsads[bj] );
-                        }
-                    }
-                    nmvsad = limit;
+                    int bsad = mvsads[0].sad;
+                    int bi = 0;
+                    for( i=1; i<nmvsad; i++ )
+                        COPY2_IF_GT( bsad, mvsads[i].sad, bi, i );
+                    nmvsad--;
+                    mvsads[bi] = mvsads[nmvsad];
+                    if( sizeof( mvsad_t ) == sizeof( uint64_t ) )
+                        *(uint64_t*)&mvsads[bi] = *(uint64_t*)&mvsads[nmvsad];
+                    else
+                        mvsads[bi] = mvsads[nmvsad];
                 }
                 for( i=0; i<nmvsad; i++ )
                     COST_MV( mvsads[i].mx, mvsads[i].my );
diff --git a/encoder/me.h b/encoder/me.h
index 24f296f..8bdee2e 100644
--- a/encoder/me.h
+++ b/encoder/me.h
@@ -93,4 +93,11 @@ if((y)<(x))\
     (f)=(e);\
 }
 
+#define COPY2_IF_GT(x,y,a,b)\
+if((y)>(x))\
+{\
+    (x)=(y);\
+    (a)=(b);\
+}
+
 #endif



More information about the x264-devel mailing list