[x265] [PATCH] slicetype: allow queue to fill past full to prevent bottlenecks

Satoshi Nakagawa nakagawa424 at oki.com
Thu Jan 8 10:13:01 CET 2015


Steve,

This patch cause deadlock/freeze in short clips, shorter than lookahead.

# my test script often use -f 17

Please check.

Satoshi

> -----Original Message-----
> From: x265-devel [mailto:x265-devel-bounces at videolan.org] On Behalf Of
> Steve Borho
> Sent: Tuesday, January 06, 2015 9:23 PM
> To: x265-devel at videolan.org
> Subject: [x265] [PATCH] slicetype: allow queue to fill past full to
> prevent bottlenecks
> 
> # HG changeset patch
> # User Steve Borho <steve at borho.org>
> # Date 1420538938 -19800
> #      Tue Jan 06 15:38:58 2015 +0530
> # Node ID d36211d0190f5aafdf7ecf6657e8d1a5ba14657c
> # Parent  95f1e1f0efa4541e253125e7f564ecfbf8e647f9
> slicetype: allow queue to fill past full to prevent bottlenecks
> 
> Allow the lookahead to grow just past full before we begin pulling off
> output frames and handing them to frame encoders.  This lag of about one
> mini-gop allows slicetypeDecide to stay ahead of the frame encoders and
> always have frames in the output queue when they are needed.  It's a
> non-trivial performance boost for most presets that used b-adapt 2.
> 
> diff -r 95f1e1f0efa4 -r d36211d0190f source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp	Tue Jan 06 12:33:36 2015 +0530
> +++ b/source/encoder/encoder.cpp	Tue Jan 06 15:38:58 2015 +0530
> @@ -291,10 +291,7 @@
>      delete [] m_threadLocalData;
> 
>      if (m_lookahead)
> -    {
> -        m_lookahead->destroy();
> -        delete m_lookahead;
> -    }
> +        m_lookahead->stop();
> 
>      delete m_dpb;
>      if (m_rateControl)
> @@ -302,10 +299,17 @@
>          m_rateControl->destroy();
>          delete m_rateControl;
>      }
> +
>      // thread pool release should always happen last
>      if (m_threadPool)
>          m_threadPool->release();
> 
> +    if (m_lookahead)
> +    {
> +        m_lookahead->destroy();
> +        delete m_lookahead;
> +    }
> +
>      X265_FREE(m_cuOffsetY);
>      X265_FREE(m_cuOffsetC);
>      X265_FREE(m_buOffsetY);
> diff -r 95f1e1f0efa4 -r d36211d0190f source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp	Tue Jan 06 12:33:36 2015 +0530
> +++ b/source/encoder/slicetype.cpp	Tue Jan 06 15:38:58 2015 +0530
> @@ -59,11 +59,12 @@
>      : JobProvider(pool)
>      , m_est(pool)
>  {
> -    m_bReady = 0;
> +    m_bReady = false;
> +    m_bBusy = false;
>      m_param = param;
>      m_lastKeyframe = -m_param->keyframeMax;
>      m_lastNonB = NULL;
> -    m_bFilling = true;
> +    m_bFilled = false;
>      m_bFlushed = false;
>      m_widthInCU = ((m_param->sourceWidth / 2) + X265_LOWRES_CU_SIZE -
> 1) >> X265_LOWRES_CU_BITS;
>      m_heightInCU = ((m_param->sourceHeight / 2) + X265_LOWRES_CU_SIZE
> - 1) >> X265_LOWRES_CU_BITS; @@ -79,17 +80,26 @@
>          ((m_param->bFrameAdaptive && m_param->bframes) ||
>           m_param->rc.cuTree || m_param->scenecutThreshold ||
>           (m_param->lookaheadDepth && m_param->rc.vbvBufferSize)))
> -        m_pool = m_pool; /* allow use of worker thread */
> +    {
> +        JobProvider::enqueue();
> +    }
>      else
>          m_pool = NULL; /* disable use of worker thread */  }
> 
> +void Lookahead::stop()
> +{
> +    /* do not allow slicetypeDecide() to get started again */
> +    m_bReady = false;
> +    m_bFlushed = false;
> +    m_bBusy = false;
> +
> +    if (m_pool)
> +        JobProvider::flush(); // flush will dequeue, if it is necessary
> +}
> +
>  void Lookahead::destroy()
>  {
> -    if (m_pool)
> -        // flush will dequeue, if it is necessary
> -        JobProvider::flush();
> -
>      // these two queues will be empty unless the encode was aborted
>      while (!m_inputQueue.empty())
>      {
> @@ -120,47 +130,52 @@
> 
>      if (m_inputQueue.size() >= m_param->lookaheadDepth)
>      {
> -        /* when queue fills the first time, run slicetypeDecide
> synchronously,
> -         * since the encoder will always be blocked here */
> -        if (m_pool && !m_bFilling)
> +        if (m_pool)
>          {
> +            m_bReady = !m_bBusy;
>              m_inputQueueLock.release();
> -            m_bReady = 1;
>              m_pool->pokeIdleThread();
>          }
>          else
>              slicetypeDecide();
> -
> -        if (m_bFilling && m_pool)
> -            JobProvider::enqueue();
> -        m_bFilling = false;
>      }
>      else
>          m_inputQueueLock.release();
> +
> +    /* determine if the lookahead is (over) filled enough for frames
> to begin to
> +     * be consumed by frame encoders */
> +    if (!m_bFilled)
> +    {
> +        if (!m_param->bframes & !m_param->lookaheadDepth)
> +            m_bFilled = true; /* zero-latency */
> +        else if (curFrame->m_poc >= m_param->lookaheadDepth + 2 +
> m_param->bframes)
> +            m_bFilled = true; /* full capacity plus mini-gop lag */
> +    }
>  }
> 
>  /* Called by API thread */
>  void Lookahead::flush()
>  {
> +    m_bFilled = true;
> +
>      /* just in case the input queue is never allowed to fill */
> -    m_bFilling = false;
> -
> -    /* flush synchronously */
>      m_inputQueueLock.acquire();
> -    if (!m_inputQueue.empty())
> +    if (m_inputQueue.empty())
>      {
> -        slicetypeDecide();
> +        m_bFlushed = true;
> +        m_inputQueueLock.release();
>      }
>      else
> -        m_inputQueueLock.release();
> -
> -    m_inputQueueLock.acquire();
> -
> -    /* bFlushed indicates that an empty output queue actually means all
> frames
> -     * have been decided (no more inputs for the encoder) */
> -    if (m_inputQueue.empty())
> -        m_bFlushed = true;
> -    m_inputQueueLock.release();
> +    {
> +        if (m_pool)
> +        {
> +            m_bReady = !m_bBusy;
> +            m_inputQueueLock.release();
> +            m_pool->pokeIdleThread();
> +        }
> +        else
> +            slicetypeDecide();
> +    }
>  }
> 
>  /* Called by API thread. If the lookahead queue has not yet been filled
> the @@ -169,37 +184,60 @@
>   * flush() has been called and the output queue is empty, NULL is
returned.
> */
>  Frame* Lookahead::getDecidedPicture()
>  {
> +    if (!m_bFilled)
> +        return NULL;
> +
>      m_outputQueueLock.acquire();
> -
> -    if (m_bFilling)
> -    {
> -        m_outputQueueLock.release();
> -        return NULL;
> -    }
> -
> -    while (m_outputQueue.empty() && !m_bFlushed)
> -    {
> -        m_outputQueueLock.release();
> -        m_outputAvailable.wait();
> -        m_outputQueueLock.acquire();
> -    }
> -
>      Frame *fenc = m_outputQueue.popFront();
>      m_outputQueueLock.release();
> +
> +    if (fenc || m_bFlushed)
> +        return fenc;
> +
> +    do
> +    {
> +        m_outputAvailable.wait();
> +
> +        m_outputQueueLock.acquire();
> +        fenc = m_outputQueue.popFront();
> +        m_outputQueueLock.release();
> +    }
> +    while (!fenc);
> +
>      return fenc;
>  }
> 
>  /* Called by pool worker threads */
>  bool Lookahead::findJob(int)
>  {
> -    if (m_bReady > 0 && ATOMIC_DEC(&m_bReady) == 0)
> +    if (!m_bReady)
> +        return false;
> +
> +    m_inputQueueLock.acquire();
> +    if (!m_bReady)
>      {
> +        m_inputQueueLock.release();
> +        return false;
> +    }
> +
> +    m_bReady = false;
> +    m_bBusy = true;
> +
> +    do
> +    {
> +        slicetypeDecide(); // releases input queue lock
> +
>          m_inputQueueLock.acquire();
> -        slicetypeDecide();
> -        return true;
> +
> +        if (!m_bBusy)
> +            break;
>      }
> -    else
> -        return false;
> +    while (m_inputQueue.size() >= m_param->lookaheadDepth ||
> +           (m_bFlushed && m_inputQueue.size()));
> +
> +    m_bBusy = false;
> +    m_inputQueueLock.release();
> +    return true;
>  }
> 
>  /* Called by rate-control to calculate the estimated SATD cost for a
> given @@ -292,8 +330,6 @@  {
>      ProfileScopeEvent(slicetypeDecideEV);
> 
> -    ScopedLock lock(m_decideLock);
> -
>      Lowres *frames[X265_LOOKAHEAD_MAX];
>      Frame *list[X265_LOOKAHEAD_MAX];
>      int maxSearch = X265_MIN(m_param->lookaheadDepth,
> X265_LOOKAHEAD_MAX); diff -r 95f1e1f0efa4 -r d36211d0190f
> source/encoder/slicetype.h
> --- a/source/encoder/slicetype.h	Tue Jan 06 12:33:36 2015 +0530
> +++ b/source/encoder/slicetype.h	Tue Jan 06 15:38:58 2015 +0530
> @@ -147,20 +147,24 @@
> 
>      void addPicture(Frame*, int sliceType);
>      void flush();
> +    void stop();
>      Frame* getDecidedPicture();
> 
>      void getEstimatedPictureCost(Frame *pic);
> 
>  protected:
> 
> +
>      Lock  m_inputQueueLock;
>      Lock  m_outputQueueLock;
> -    Lock  m_decideLock;
>      Event m_outputAvailable;
> -    volatile int  m_bReady;
> -    volatile bool m_bFilling;
> -    volatile bool m_bFlushed;
> -    bool findJob(int);
> +
> +    bool  m_bReady;   /* input lock - slicetypeDecide() can be started
> */
> +    bool  m_bBusy;    /* input lock - slicetypeDecide() is running */
> +    bool  m_bFilled;  /* enough frames in lookahead for output to be
> available */
> +    bool  m_bFlushed; /* no more frames will be received */
> +
> +    bool  findJob(int);
> 
>      /* called by addPicture() or flush() to trigger slice decisions */
>      void slicetypeDecide();
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel



More information about the x265-devel mailing list