[x265] [PATCH RFC] encoder: allocate thread local data from the context of the worker thread

Steve Borho steve at borho.org
Sat Jul 12 08:18:57 CEST 2014


# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1405145839 18000
#      Sat Jul 12 01:17:19 2014 -0500
# Node ID 7163a5f40da0bd2836e435eff4c854229727a781
# Parent  6af56f7c870355152c9897a7bca9fbd8047dd5fc
encoder: allocate thread local data from the context of the worker thread

This is a simple change, data structure wise, but it delays allocation of the
ThreadLocalData structures until the worker thread which will use the structure
can allocate it itself. On multi-socket systems, this will make these structures
closer to reach, cache wise. We probably need to flag our worker threads with
core affinity to make this properly effective.

diff -r 6af56f7c8703 -r 7163a5f40da0 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Fri Jul 11 19:38:35 2014 -0500
+++ b/source/encoder/encoder.cpp	Sat Jul 12 01:17:19 2014 -0500
@@ -96,14 +96,10 @@
     }
 
     /* Allocate thread local data shared by all frame encoders */
-    ThreadPool *pool = ThreadPool::getThreadPool();
-    const int poolThreadCount = pool ? pool->getThreadCount() : 1;
-    m_threadLocalData = new ThreadLocalData[poolThreadCount];
+    const int poolThreadCount = m_threadPool ? m_threadPool->getThreadCount() : 1;
+    m_threadLocalData = X265_MALLOC(ThreadLocalData*, poolThreadCount);
     if (m_threadLocalData)
-    {
-        for (int i = 0; i < poolThreadCount; i++)
-            m_threadLocalData[i].init(*this);
-    }
+        memset(m_threadLocalData, 0, sizeof(ThreadLocalData*) * poolThreadCount);
     else
         m_aborted = true;
 
@@ -164,8 +160,13 @@
         delete [] m_frameEncoder;
     }
 
+    const int poolThreadCount = m_threadPool ? m_threadPool->getThreadCount() : 1;
     if (m_threadLocalData)
-        delete [] m_threadLocalData;
+    {
+        for (int i = 0; i < poolThreadCount; i++)
+            delete m_threadLocalData[i];
+        X265_FREE(m_threadLocalData);
+    }
 
     if (m_lookahead)
     {
@@ -623,7 +624,9 @@
         {
             for (int i = 0; i < poolThreadCount; i++)
             {
-                StatisticLog& enclog = m_threadLocalData[i].m_cuCoder.m_sliceTypeLog[sliceType];
+                if (!m_threadLocalData[i])
+                    continue;
+                StatisticLog& enclog = m_threadLocalData[i]->m_cuCoder.m_sliceTypeLog[sliceType];
                 if (depth == 0)
                     finalLog.totalCu += enclog.totalCu;
                 finalLog.cntIntra[depth] += enclog.cntIntra[depth];
diff -r 6af56f7c8703 -r 7163a5f40da0 source/encoder/encoder.h
--- a/source/encoder/encoder.h	Fri Jul 11 19:38:35 2014 -0500
+++ b/source/encoder/encoder.h	Sat Jul 12 01:17:19 2014 -0500
@@ -71,7 +71,6 @@
 {
 private:
 
-    bool               m_aborted;          // fatal error detected
     int                m_pocLast;          ///< time index (POC)
     int                m_encodedFrameNum;
     int                m_outputCount;
@@ -166,7 +165,8 @@
 
     x265_param*        m_param;
     RateControl*       m_rateControl;
-    ThreadLocalData*   m_threadLocalData;
+    ThreadLocalData**  m_threadLocalData;
+    bool               m_aborted;          // fatal error detected
 
     bool               m_bEnableRDOQ;
 
diff -r 6af56f7c8703 -r 7163a5f40da0 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Fri Jul 11 19:38:35 2014 -0500
+++ b/source/encoder/frameencoder.cpp	Sat Jul 12 01:17:19 2014 -0500
@@ -837,13 +837,32 @@
     const int realRow = row >> 1;
     const int typeNum = row & 1;
 
-    ThreadLocalData& tld = threadId >= 0 ? m_top->m_threadLocalData[threadId] : m_tld;
+    ThreadLocalData* tld;
+    if (threadId < 0)
+        tld = &m_tld;
+    else
+    {
+        if (!m_top->m_threadLocalData[threadId])
+        {
+            m_top->m_threadLocalData[threadId] = new ThreadLocalData;
+            if (m_top->m_threadLocalData[threadId])
+                m_top->m_threadLocalData[threadId]->init(*m_top);
+            else
+            {
+                x265_log(m_param, X265_LOG_ERROR, "unable to allocate thread local data, aborting\n");
+                m_completionEvent.trigger();
+                m_top->m_aborted = true;
+                return;
+            }
+        }
+        tld = m_top->m_threadLocalData[threadId];
+    }
 
     if (!typeNum)
-        processRowEncoder(realRow, tld);
+        processRowEncoder(realRow, *tld);
     else
     {
-        processRowFilter(realRow, tld);
+        processRowFilter(realRow, *tld);
 
         // NOTE: Active next row
         if (realRow != m_numRows - 1)


More information about the x265-devel mailing list