[x265] [PATCH] Allocate frame threads based on available pool threads

Pradeep Ramachandran pradeep at multicorewareinc.com
Wed Jul 5 17:00:39 CEST 2017


On Wed, Jul 5, 2017 at 3:28 PM, <aruna at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Aruna Matheswaran <aruna at multicorewareinc.com>
> # Date 1498107303 -19800
> #      Thu Jun 22 10:25:03 2017 +0530
> # Node ID 006c75cf822e92e3865fc97d21c25b0fdc072b51
> # Parent  58b4fa89c42da0e9ef229035ea02f29d3a02fffe
> Allocate frame threads based on available pool threads
>
> This patch decides #frame-threads based on #pool-threads available. If
> pools not
> specified, #frame-threads will be decided based on detected #CPU-threads.
>
> This patch also decreases #frame-threads allocated for #pool-threads in the
> interval (15 - 31) and (>= 32) as there is high run to run variation in
> bitrate
> and SSIM with higher frame threads.With this reduction in #frame-threads
> there
> is ~3-4 % drop in fps with little SSIM improvement for #pool-threads (15 -
> 31)
> and no significant change in performance for #pool-threads (>= 32).
>

Thanks. The improvements in quality seem to justify the change.
This has been pushed to default branch of x265.


> diff -r 58b4fa89c42d -r 006c75cf822e source/common/threadpool.cpp
> --- a/source/common/threadpool.cpp      Fri Jun 30 16:31:29 2017 +0530
> +++ b/source/common/threadpool.cpp      Thu Jun 22 10:25:03 2017 +0530
> @@ -253,6 +253,7 @@
>      int cpusPerNode[MAX_NODE_NUM + 1];
>      int threadsPerPool[MAX_NODE_NUM + 2];
>      uint64_t nodeMaskPerPool[MAX_NODE_NUM + 2];
> +    int totalNumThreads = 0;
>
>      memset(cpusPerNode, 0, sizeof(cpusPerNode));
>      memset(threadsPerPool, 0, sizeof(threadsPerPool));
> @@ -388,9 +389,23 @@
>          if (bNumaSupport)
>              x265_log(p, X265_LOG_DEBUG, "NUMA node %d may use %d logical
> cores\n", i, cpusPerNode[i]);
>          if (threadsPerPool[i])
> +        {
>              numPools += (threadsPerPool[i] + MAX_POOL_THREADS - 1) /
> MAX_POOL_THREADS;
> +            totalNumThreads += threadsPerPool[i];
> +        }
>      }
> +    if (!isThreadsReserved)
> +    {
> +        if (!numPools)
> +        {
> +            x265_log(p, X265_LOG_DEBUG, "No pool thread available.
> Deciding frame-threads based on detected CPU threads\n");
> +            totalNumThreads = ThreadPool::getCpuCount(); // auto-detect
> frame threads
> +        }
>
> +        if (!p->frameNumThreads)
> +            ThreadPool::getFrameThreadsCount(p, totalNumThreads);
> +    }
> +
>      if (!numPools)
>          return NULL;
>
> @@ -412,7 +427,7 @@
>                  node++;
>              int numThreads = X265_MIN(MAX_POOL_THREADS,
> threadsPerPool[node]);
>              int origNumThreads = numThreads;
> -            if (p->lookaheadThreads > numThreads / 2)
> +            if (i == 0 && p->lookaheadThreads > numThreads / 2)
>              {
>                  p->lookaheadThreads = numThreads / 2;
>                  x265_log(p, X265_LOG_DEBUG, "Setting lookahead threads to
> a maximum of half the total number of threads\n");
> @@ -423,7 +438,7 @@
>                  maxProviders = 1;
>              }
>
> -            else
> +            else if (i == 0)
>                  numThreads -= p->lookaheadThreads;
>              if (!pools[i].create(numThreads, maxProviders,
> nodeMaskPerPool[node]))
>              {
> @@ -643,4 +658,21 @@
>  #endif
>  }
>
> +void ThreadPool::getFrameThreadsCount(x265_param* p, int cpuCount)
> +{
> +    int rows = (p->sourceHeight + p->maxCUSize - 1) >>
> g_log2Size[p->maxCUSize];
> +    if (!p->bEnableWavefront)
> +        p->frameNumThreads = X265_MIN3(cpuCount, (rows + 1) / 2,
> X265_MAX_FRAME_THREADS);
> +    else if (cpuCount >= 32)
> +        p->frameNumThreads = (p->sourceHeight > 2000) ? 6 : 5;
> +    else if (cpuCount >= 16)
> +        p->frameNumThreads = 4;
> +    else if (cpuCount >= 8)
> +        p->frameNumThreads = 3;
> +    else if (cpuCount >= 4)
> +        p->frameNumThreads = 2;
> +    else
> +        p->frameNumThreads = 1;
> +}
> +
>  } // end namespace X265_NS
> diff -r 58b4fa89c42d -r 006c75cf822e source/common/threadpool.h
> --- a/source/common/threadpool.h        Fri Jun 30 16:31:29 2017 +0530
> +++ b/source/common/threadpool.h        Thu Jun 22 10:25:03 2017 +0530
> @@ -105,6 +105,7 @@
>      static ThreadPool* allocThreadPools(x265_param* p, int& numPools,
> bool isThreadsReserved);
>      static int  getCpuCount();
>      static int  getNumaNodeCount();
> +    static void getFrameThreadsCount(x265_param* p,int cpuCount);
>  };
>
>  /* Any worker thread may enlist the help of idle worker threads from the
> same
> diff -r 58b4fa89c42d -r 006c75cf822e source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp        Fri Jun 30 16:31:29 2017 +0530
> +++ b/source/encoder/encoder.cpp        Thu Jun 22 10:25:03 2017 +0530
> @@ -134,26 +134,19 @@
>      if (!p->bEnableWavefront && !p->bDistributeModeAnalysis && !p->bDistributeMotionEstimation
> && !p->lookaheadSlices)
>          allowPools = false;
>
> -    if (!p->frameNumThreads)
> -    {
> -        // auto-detect frame threads
> -        int cpuCount = ThreadPool::getCpuCount();
> -        if (!p->bEnableWavefront)
> -            p->frameNumThreads = X265_MIN3(cpuCount, (rows + 1) / 2,
> X265_MAX_FRAME_THREADS);
> -        else if (cpuCount >= 32)
> -            p->frameNumThreads = (p->sourceHeight > 2000) ? 8 : 6; //
> dual-socket 10-core IvyBridge or higher
> -        else if (cpuCount >= 16)
> -            p->frameNumThreads = 5; // 8 HT cores, or dual socket
> -        else if (cpuCount >= 8)
> -            p->frameNumThreads = 3; // 4 HT cores
> -        else if (cpuCount >= 4)
> -            p->frameNumThreads = 2; // Dual or Quad core
> -        else
> -            p->frameNumThreads = 1;
> -    }
>      m_numPools = 0;
>      if (allowPools)
>          m_threadPool = ThreadPool::allocThreadPools(p, m_numPools, 0);
> +    else
> +    {
> +        if (!p->frameNumThreads)
> +        {
> +            // auto-detect frame threads
> +            int cpuCount = ThreadPool::getCpuCount();
> +            ThreadPool::getFrameThreadsCount(p, cpuCount);
> +        }
> +    }
> +
>      if (!m_numPools)
>      {
>          // issue warnings if any of these features were requested
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20170705/381c34a3/attachment.html>


More information about the x265-devel mailing list