[x265] [PATCH RFC] encoder: allocate thread local data from the context of the worker thread
Steve Borho
steve at borho.org
Sat Jul 12 08:18:57 CEST 2014
# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1405145839 18000
# Sat Jul 12 01:17:19 2014 -0500
# Node ID 7163a5f40da0bd2836e435eff4c854229727a781
# Parent 6af56f7c870355152c9897a7bca9fbd8047dd5fc
encoder: allocate thread local data from the context of the worker thread
This is a simple change, data structure wise, but it delays allocation of the
ThreadLocalData structures until the worker thread which will use the structure
can allocate it itself. On multi-socket systems, this will make these structures
closer to reach, cache wise. We probably need to flag our worker threads with
core affinity to make this properly effective.
diff -r 6af56f7c8703 -r 7163a5f40da0 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Fri Jul 11 19:38:35 2014 -0500
+++ b/source/encoder/encoder.cpp Sat Jul 12 01:17:19 2014 -0500
@@ -96,14 +96,10 @@
}
/* Allocate thread local data shared by all frame encoders */
- ThreadPool *pool = ThreadPool::getThreadPool();
- const int poolThreadCount = pool ? pool->getThreadCount() : 1;
- m_threadLocalData = new ThreadLocalData[poolThreadCount];
+ const int poolThreadCount = m_threadPool ? m_threadPool->getThreadCount() : 1;
+ m_threadLocalData = X265_MALLOC(ThreadLocalData*, poolThreadCount);
if (m_threadLocalData)
- {
- for (int i = 0; i < poolThreadCount; i++)
- m_threadLocalData[i].init(*this);
- }
+ memset(m_threadLocalData, 0, sizeof(ThreadLocalData*) * poolThreadCount);
else
m_aborted = true;
@@ -164,8 +160,13 @@
delete [] m_frameEncoder;
}
+ const int poolThreadCount = m_threadPool ? m_threadPool->getThreadCount() : 1;
if (m_threadLocalData)
- delete [] m_threadLocalData;
+ {
+ for (int i = 0; i < poolThreadCount; i++)
+ delete m_threadLocalData[i];
+ X265_FREE(m_threadLocalData);
+ }
if (m_lookahead)
{
@@ -623,7 +624,9 @@
{
for (int i = 0; i < poolThreadCount; i++)
{
- StatisticLog& enclog = m_threadLocalData[i].m_cuCoder.m_sliceTypeLog[sliceType];
+ if (!m_threadLocalData[i])
+ continue;
+ StatisticLog& enclog = m_threadLocalData[i]->m_cuCoder.m_sliceTypeLog[sliceType];
if (depth == 0)
finalLog.totalCu += enclog.totalCu;
finalLog.cntIntra[depth] += enclog.cntIntra[depth];
diff -r 6af56f7c8703 -r 7163a5f40da0 source/encoder/encoder.h
--- a/source/encoder/encoder.h Fri Jul 11 19:38:35 2014 -0500
+++ b/source/encoder/encoder.h Sat Jul 12 01:17:19 2014 -0500
@@ -71,7 +71,6 @@
{
private:
- bool m_aborted; // fatal error detected
int m_pocLast; ///< time index (POC)
int m_encodedFrameNum;
int m_outputCount;
@@ -166,7 +165,8 @@
x265_param* m_param;
RateControl* m_rateControl;
- ThreadLocalData* m_threadLocalData;
+ ThreadLocalData** m_threadLocalData;
+ bool m_aborted; // fatal error detected
bool m_bEnableRDOQ;
diff -r 6af56f7c8703 -r 7163a5f40da0 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Fri Jul 11 19:38:35 2014 -0500
+++ b/source/encoder/frameencoder.cpp Sat Jul 12 01:17:19 2014 -0500
@@ -837,13 +837,32 @@
const int realRow = row >> 1;
const int typeNum = row & 1;
- ThreadLocalData& tld = threadId >= 0 ? m_top->m_threadLocalData[threadId] : m_tld;
+ ThreadLocalData* tld;
+ if (threadId < 0)
+ tld = &m_tld;
+ else
+ {
+ if (!m_top->m_threadLocalData[threadId])
+ {
+ m_top->m_threadLocalData[threadId] = new ThreadLocalData;
+ if (m_top->m_threadLocalData[threadId])
+ m_top->m_threadLocalData[threadId]->init(*m_top);
+ else
+ {
+ x265_log(m_param, X265_LOG_ERROR, "unable to allocate thread local data, aborting\n");
+ m_completionEvent.trigger();
+ m_top->m_aborted = true;
+ return;
+ }
+ }
+ tld = m_top->m_threadLocalData[threadId];
+ }
if (!typeNum)
- processRowEncoder(realRow, tld);
+ processRowEncoder(realRow, *tld);
else
{
- processRowFilter(realRow, tld);
+ processRowFilter(realRow, *tld);
// NOTE: Active next row
if (realRow != m_numRows - 1)
More information about the x265-devel
mailing list