[x264-devel] Improve lookahead-threads auto selection

Jason Garrett-Glaser git at videolan.org
Wed Feb 27 00:18:03 CET 2013


x264 | branch: master | Jason Garrett-Glaser <jason at x264.com> | Thu Jan 10 13:15:52 2013 -0800| [4d220bc18cb177b6812c381e7fb808f9ae3189e1] | committer: Jason Garrett-Glaser

Improve lookahead-threads auto selection
Smarter decision to improve fast-first-pass performance in 2-pass encodes.
Dramatically improves CPU utilization on multi-core systems.

Tested on a quad-core Ivy Bridge (12 threads, 1080p):
Fast first pass:
veryfast:     ~7% faster
faster:      ~11% faster
fast/medium: ~15% faster
slow/slower: ~42% faster
veryslow:    ~55% faster
CRF/1-pass:
veryfast:     ~9% faster
(all others remained the same)

> http://git.videolan.org/gitweb.cgi/x264.git/?a=commit;h=4d220bc18cb177b6812c381e7fb808f9ae3189e1
---

 encoder/encoder.c |   32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/encoder/encoder.c b/encoder/encoder.c
index c4c8fe6..a289c8b 100644
--- a/encoder/encoder.c
+++ b/encoder/encoder.c
@@ -503,8 +503,6 @@ static int x264_validate_parameters( x264_t *h, int b_open )
 
     if( h->param.i_threads == X264_THREADS_AUTO )
         h->param.i_threads = x264_cpu_num_processors() * (h->param.b_sliced_threads?2:3)/2;
-    if( h->param.i_lookahead_threads == X264_THREADS_AUTO )
-        h->param.i_lookahead_threads = h->param.i_threads / (h->param.b_sliced_threads?1:6);
     int max_sliced_threads = X264_MAX( 1, (h->param.i_height+15)/16 / 4 );
     if( h->param.i_threads > 1 )
     {
@@ -518,7 +516,6 @@ static int x264_validate_parameters( x264_t *h, int b_open )
             h->param.i_threads = X264_MIN( h->param.i_threads, max_sliced_threads );
     }
     h->param.i_threads = x264_clip3( h->param.i_threads, 1, X264_THREAD_MAX );
-    h->param.i_lookahead_threads = x264_clip3( h->param.i_lookahead_threads, 1, X264_MIN( max_sliced_threads, X264_LOOKAHEAD_THREAD_MAX ) );
     if( h->param.i_threads == 1 )
     {
         h->param.b_sliced_threads = 0;
@@ -895,6 +892,35 @@ static int x264_validate_parameters( x264_t *h, int b_open )
 
     h->param.analyse.i_weighted_pred = x264_clip3( h->param.analyse.i_weighted_pred, X264_WEIGHTP_NONE, X264_WEIGHTP_SMART );
 
+    if( h->param.i_lookahead_threads == X264_THREADS_AUTO )
+    {
+        if( h->param.b_sliced_threads )
+            h->param.i_lookahead_threads = h->param.i_threads;
+        else
+        {
+            /* If we're using much slower lookahead settings than encoding settings, it helps a lot to use
+             * more lookahead threads.  This typically happens in the first pass of a two-pass encode, so
+             * try to guess at this sort of case.
+             *
+             * Tuned by a little bit of real encoding with the various presets. */
+            int badapt = h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS;
+            int subme = X264_MIN( h->param.analyse.i_subpel_refine / 3, 3 ) + (h->param.analyse.i_subpel_refine > 1);
+            int bframes = X264_MIN( (h->param.i_bframe - 1) / 3, 3 );
+
+            /* [b-adapt 0/1 vs 2][quantized subme][quantized bframes] */
+            static const uint8_t lookahead_thread_div[2][5][4] =
+            {{{6,6,6,6}, {3,3,3,3}, {4,4,4,4}, {6,6,6,6}, {12,12,12,12}},
+             {{3,2,1,1}, {2,1,1,1}, {4,3,2,1}, {6,4,3,2}, {12, 9, 6, 4}}};
+
+            h->param.i_lookahead_threads = h->param.i_threads / lookahead_thread_div[badapt][subme][bframes];
+            /* Since too many lookahead threads significantly degrades lookahead accuracy, limit auto
+             * lookahead threads to about 8 macroblock rows high each at worst.  This number is chosen
+             * pretty much arbitrarily. */
+            h->param.i_lookahead_threads = X264_MIN( h->param.i_lookahead_threads, h->param.i_height / 128 );
+        }
+    }
+    h->param.i_lookahead_threads = x264_clip3( h->param.i_lookahead_threads, 1, X264_MIN( max_sliced_threads, X264_LOOKAHEAD_THREAD_MAX ) );
+
     if( PARAM_INTERLACED )
     {
         if( h->param.analyse.i_me_method >= X264_ME_ESA )



More information about the x264-devel mailing list