[x265] [PATCH 2 of 2 EXPERIMENTAL] framedata: keep per-NUMA node copies of recon buffers
Steve Borho
steve at borho.org
Tue Aug 4 05:33:37 CEST 2015
# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1438658524 18000
# Mon Aug 03 22:22:04 2015 -0500
# Node ID 642e40276ab81696f5a6b342d8d71345f4e58339
# Parent 02e84edaa14399a3a68ade8617c63422f51a305b
framedata: keep per-NUMA node copies of recon buffers
diff -r 02e84edaa143 -r 642e40276ab8 source/common/framedata.cpp
--- a/source/common/framedata.cpp Mon Aug 03 17:55:58 2015 -0500
+++ b/source/common/framedata.cpp Mon Aug 03 22:22:04 2015 -0500
@@ -110,6 +110,80 @@
return false;
}
+/* Ensure recon pixels are available for NUMA node */
+void FrameData::copyRows(int node, int finishedRows, int maxNumRows)
+{
+ finishedRows = X265_MIN(finishedRows, maxNumRows);
+
+ if (node == m_ownerNode || m_nodes[node].rows >= finishedRows)
+ return;
+
+ ScopedLock s(m_nodes[node].copyLock);
+
+ if (m_nodes[node].rows >= finishedRows)
+ return;
+
+ PicYuv* srcPic = m_nodes[m_ownerNode].reconPic;
+ PicYuv* dstPic = m_nodes[node].reconPic;
+
+ const pixel* src = srcPic->m_picOrg[0] - srcPic->m_lumaMarginX;
+ pixel* dst = dstPic->m_picOrg[0] - dstPic->m_lumaMarginX;
+
+ intptr_t len = srcPic->m_stride * g_maxCUSize * (finishedRows - m_nodes[node].rows);
+
+ if (m_nodes[node].rows)
+ {
+ src += srcPic->m_stride * g_maxCUSize * m_nodes[node].rows;
+ dst += srcPic->m_stride * g_maxCUSize * m_nodes[node].rows;
+ }
+ else
+ {
+ /* include top padding */
+ intptr_t pad = srcPic->m_lumaMarginY * srcPic->m_stride;
+ len += pad;
+ src -= pad;
+ dst -= pad;
+ }
+
+ if (finishedRows == maxNumRows)
+ /* include bottom padding */
+ len += srcPic->m_lumaMarginY * srcPic->m_stride;
+
+ memcpy(dst, src, len); /* this should be between NUMA nodes */
+
+ intptr_t cuHeight = g_maxCUSize >> srcPic->m_vChromaShift;
+
+ for (int plane = 1; plane <= 2; plane++)
+ {
+ src = srcPic->m_picOrg[plane] - srcPic->m_chromaMarginX;
+ dst = dstPic->m_picOrg[plane] - dstPic->m_chromaMarginX;
+
+ len = srcPic->m_strideC * cuHeight * (finishedRows - m_nodes[node].rows);
+
+ if (m_nodes[node].rows)
+ {
+ src += srcPic->m_strideC * cuHeight * m_nodes[node].rows;
+ dst += srcPic->m_strideC * cuHeight * m_nodes[node].rows;
+ }
+ else
+ {
+ /* include top padding */
+ intptr_t pad = srcPic->m_chromaMarginY * srcPic->m_strideC;
+ len += pad;
+ src -= pad;
+ dst -= pad;
+ }
+
+ if (finishedRows == maxNumRows)
+ /* include bottom padding*/
+ len += srcPic->m_chromaMarginY * srcPic->m_strideC;
+
+ memcpy(dst, src, len); /* this should be between NUMA nodes */
+ }
+
+ m_nodes[node].rows = finishedRows;
+}
+
void FrameData::destroy()
{
delete [] m_picCTU;
diff -r 02e84edaa143 -r 642e40276ab8 source/common/framedata.h
--- a/source/common/framedata.h Mon Aug 03 17:55:58 2015 -0500
+++ b/source/common/framedata.h Mon Aug 03 22:22:04 2015 -0500
@@ -162,6 +162,7 @@
bool create(const x265_param& param, const SPS& sps);
bool allocRecon(const SPS& sps, int node);
void reinit(const SPS& sps);
+ void copyRows(int node, int curRow, int totRows);
void destroy();
inline CUData* getPicCTU(uint32_t ctuAddr) { return &m_picCTU[ctuAddr]; }
diff -r 02e84edaa143 -r 642e40276ab8 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Mon Aug 03 17:55:58 2015 -0500
+++ b/source/encoder/frameencoder.cpp Mon Aug 03 22:22:04 2015 -0500
@@ -330,7 +330,7 @@
m_top->getStreamHeaders(m_nalList, m_entropyCoder, m_bs);
#if (defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7) || HAVE_LIBNUMA
- int numaNode = 0; // m_pool ? m_pool->m_numaNode : 0;
+ int numaNode = m_pool ? m_pool->m_numaNode : 0;
#else
int numaNode = 0;
#endif
@@ -519,7 +519,7 @@
while ((reconRowCount != m_numRows) && (reconRowCount < row + m_refLagRows))
reconRowCount = refpic->m_reconRowCount.waitForChange(reconRowCount);
- /* TODO: if refpic->m_encData->m_ownerNode != numaNode, copy rows */
+ refpic->m_encData->copyRows(numaNode, row + m_refLagRows, m_numRows);
if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
m_mref[l][ref].applyWeight(row + m_refLagRows, m_numRows);
@@ -560,7 +560,7 @@
while ((reconRowCount != m_numRows) && (reconRowCount < i + m_refLagRows))
reconRowCount = refpic->m_reconRowCount.waitForChange(reconRowCount);
- /* TODO: if refpic->m_encData->m_ownerNode != numaNode, copy rows */
+ refpic->m_encData->copyRows(numaNode, i + m_refLagRows, m_numRows);
if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
m_mref[list][ref].applyWeight(i + m_refLagRows, m_numRows);
More information about the x265-devel
mailing list