[x265] [PATCH] slicetype: allow queue to fill past full to prevent bottlenecks

Steve Borho steve at borho.org
Tue Jan 6 13:22:44 CET 2015


# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1420538938 -19800
#      Tue Jan 06 15:38:58 2015 +0530
# Node ID d36211d0190f5aafdf7ecf6657e8d1a5ba14657c
# Parent  95f1e1f0efa4541e253125e7f564ecfbf8e647f9
slicetype: allow queue to fill past full to prevent bottlenecks

Allow the lookahead to grow just past full before we begin pulling off output
frames and handing them to frame encoders.  This lag of about one mini-gop
allows slicetypeDecide to stay ahead of the frame encoders and always have
frames in the output queue when they are needed.  It's a non-trivial performance
boost for most presets that used b-adapt 2.

diff -r 95f1e1f0efa4 -r d36211d0190f source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Tue Jan 06 12:33:36 2015 +0530
+++ b/source/encoder/encoder.cpp	Tue Jan 06 15:38:58 2015 +0530
@@ -291,10 +291,7 @@
     delete [] m_threadLocalData;
 
     if (m_lookahead)
-    {
-        m_lookahead->destroy();
-        delete m_lookahead;
-    }
+        m_lookahead->stop();
 
     delete m_dpb;
     if (m_rateControl)
@@ -302,10 +299,17 @@
         m_rateControl->destroy();
         delete m_rateControl;
     }
+
     // thread pool release should always happen last
     if (m_threadPool)
         m_threadPool->release();
 
+    if (m_lookahead)
+    {
+        m_lookahead->destroy();
+        delete m_lookahead;
+    }
+
     X265_FREE(m_cuOffsetY);
     X265_FREE(m_cuOffsetC);
     X265_FREE(m_buOffsetY);
diff -r 95f1e1f0efa4 -r d36211d0190f source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp	Tue Jan 06 12:33:36 2015 +0530
+++ b/source/encoder/slicetype.cpp	Tue Jan 06 15:38:58 2015 +0530
@@ -59,11 +59,12 @@
     : JobProvider(pool)
     , m_est(pool)
 {
-    m_bReady = 0;
+    m_bReady = false;
+    m_bBusy = false;
     m_param = param;
     m_lastKeyframe = -m_param->keyframeMax;
     m_lastNonB = NULL;
-    m_bFilling = true;
+    m_bFilled = false;
     m_bFlushed = false;
     m_widthInCU = ((m_param->sourceWidth / 2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
     m_heightInCU = ((m_param->sourceHeight / 2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
@@ -79,17 +80,26 @@
         ((m_param->bFrameAdaptive && m_param->bframes) ||
          m_param->rc.cuTree || m_param->scenecutThreshold ||
          (m_param->lookaheadDepth && m_param->rc.vbvBufferSize)))
-        m_pool = m_pool; /* allow use of worker thread */
+    {
+        JobProvider::enqueue();
+    }
     else
         m_pool = NULL; /* disable use of worker thread */
 }
 
+void Lookahead::stop()
+{
+    /* do not allow slicetypeDecide() to get started again */
+    m_bReady = false;
+    m_bFlushed = false;
+    m_bBusy = false;
+
+    if (m_pool)
+        JobProvider::flush(); // flush will dequeue, if it is necessary
+}
+
 void Lookahead::destroy()
 {
-    if (m_pool)
-        // flush will dequeue, if it is necessary
-        JobProvider::flush();
-
     // these two queues will be empty unless the encode was aborted
     while (!m_inputQueue.empty())
     {
@@ -120,47 +130,52 @@
 
     if (m_inputQueue.size() >= m_param->lookaheadDepth)
     {
-        /* when queue fills the first time, run slicetypeDecide synchronously,
-         * since the encoder will always be blocked here */
-        if (m_pool && !m_bFilling)
+        if (m_pool)
         {
+            m_bReady = !m_bBusy;
             m_inputQueueLock.release();
-            m_bReady = 1;
             m_pool->pokeIdleThread();
         }
         else
             slicetypeDecide();
-
-        if (m_bFilling && m_pool)
-            JobProvider::enqueue();
-        m_bFilling = false;
     }
     else
         m_inputQueueLock.release();
+
+    /* determine if the lookahead is (over) filled enough for frames to begin to
+     * be consumed by frame encoders */
+    if (!m_bFilled)
+    {
+        if (!m_param->bframes & !m_param->lookaheadDepth)
+            m_bFilled = true; /* zero-latency */
+        else if (curFrame->m_poc >= m_param->lookaheadDepth + 2 + m_param->bframes)
+            m_bFilled = true; /* full capacity plus mini-gop lag */
+    }
 }
 
 /* Called by API thread */
 void Lookahead::flush()
 {
+    m_bFilled = true;
+
     /* just in case the input queue is never allowed to fill */
-    m_bFilling = false;
-
-    /* flush synchronously */
     m_inputQueueLock.acquire();
-    if (!m_inputQueue.empty())
+    if (m_inputQueue.empty())
     {
-        slicetypeDecide();
+        m_bFlushed = true;
+        m_inputQueueLock.release();
     }
     else
-        m_inputQueueLock.release();
-
-    m_inputQueueLock.acquire();
-
-    /* bFlushed indicates that an empty output queue actually means all frames
-     * have been decided (no more inputs for the encoder) */
-    if (m_inputQueue.empty())
-        m_bFlushed = true;
-    m_inputQueueLock.release();
+    {
+        if (m_pool)
+        {
+            m_bReady = !m_bBusy;
+            m_inputQueueLock.release();
+            m_pool->pokeIdleThread();
+        }
+        else
+            slicetypeDecide();
+    }
 }
 
 /* Called by API thread. If the lookahead queue has not yet been filled the
@@ -169,37 +184,60 @@
  * flush() has been called and the output queue is empty, NULL is returned. */
 Frame* Lookahead::getDecidedPicture()
 {
+    if (!m_bFilled)
+        return NULL;
+
     m_outputQueueLock.acquire();
-
-    if (m_bFilling)
-    {
-        m_outputQueueLock.release();
-        return NULL;
-    }
-
-    while (m_outputQueue.empty() && !m_bFlushed)
-    {
-        m_outputQueueLock.release();
-        m_outputAvailable.wait();
-        m_outputQueueLock.acquire();
-    }
-
     Frame *fenc = m_outputQueue.popFront();
     m_outputQueueLock.release();
+
+    if (fenc || m_bFlushed)
+        return fenc;
+
+    do
+    {
+        m_outputAvailable.wait();
+
+        m_outputQueueLock.acquire();
+        fenc = m_outputQueue.popFront();
+        m_outputQueueLock.release();
+    }
+    while (!fenc);
+
     return fenc;
 }
 
 /* Called by pool worker threads */
 bool Lookahead::findJob(int)
 {
-    if (m_bReady > 0 && ATOMIC_DEC(&m_bReady) == 0)
+    if (!m_bReady)
+        return false;
+
+    m_inputQueueLock.acquire();
+    if (!m_bReady)
     {
+        m_inputQueueLock.release();
+        return false;
+    }
+
+    m_bReady = false;
+    m_bBusy = true;
+
+    do
+    {
+        slicetypeDecide(); // releases input queue lock
+
         m_inputQueueLock.acquire();
-        slicetypeDecide();
-        return true;
+
+        if (!m_bBusy)
+            break;
     }
-    else
-        return false;
+    while (m_inputQueue.size() >= m_param->lookaheadDepth ||
+           (m_bFlushed && m_inputQueue.size()));
+
+    m_bBusy = false;
+    m_inputQueueLock.release();
+    return true;
 }
 
 /* Called by rate-control to calculate the estimated SATD cost for a given
@@ -292,8 +330,6 @@
 {
     ProfileScopeEvent(slicetypeDecideEV);
 
-    ScopedLock lock(m_decideLock);
-
     Lowres *frames[X265_LOOKAHEAD_MAX];
     Frame *list[X265_LOOKAHEAD_MAX];
     int maxSearch = X265_MIN(m_param->lookaheadDepth, X265_LOOKAHEAD_MAX);
diff -r 95f1e1f0efa4 -r d36211d0190f source/encoder/slicetype.h
--- a/source/encoder/slicetype.h	Tue Jan 06 12:33:36 2015 +0530
+++ b/source/encoder/slicetype.h	Tue Jan 06 15:38:58 2015 +0530
@@ -147,20 +147,24 @@
 
     void addPicture(Frame*, int sliceType);
     void flush();
+    void stop();
     Frame* getDecidedPicture();
 
     void getEstimatedPictureCost(Frame *pic);
 
 protected:
 
+
     Lock  m_inputQueueLock;
     Lock  m_outputQueueLock;
-    Lock  m_decideLock;
     Event m_outputAvailable;
-    volatile int  m_bReady;
-    volatile bool m_bFilling;
-    volatile bool m_bFlushed;
-    bool findJob(int);
+
+    bool  m_bReady;   /* input lock - slicetypeDecide() can be started */
+    bool  m_bBusy;    /* input lock - slicetypeDecide() is running */
+    bool  m_bFilled;  /* enough frames in lookahead for output to be available */
+    bool  m_bFlushed; /* no more frames will be received */
+
+    bool  findJob(int);
 
     /* called by addPicture() or flush() to trigger slice decisions */
     void slicetypeDecide();


More information about the x265-devel mailing list