[x265] [PATCH 2 of 2 EXPERIMENTAL] framedata: keep per-NUMA node copies of recon buffers

Steve Borho steve at borho.org
Tue Aug 4 05:33:37 CEST 2015


# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1438658524 18000
#      Mon Aug 03 22:22:04 2015 -0500
# Node ID 642e40276ab81696f5a6b342d8d71345f4e58339
# Parent  02e84edaa14399a3a68ade8617c63422f51a305b
framedata: keep per-NUMA node copies of recon buffers

diff -r 02e84edaa143 -r 642e40276ab8 source/common/framedata.cpp
--- a/source/common/framedata.cpp	Mon Aug 03 17:55:58 2015 -0500
+++ b/source/common/framedata.cpp	Mon Aug 03 22:22:04 2015 -0500
@@ -110,6 +110,80 @@
     return false;
 }
 
+/* Ensure recon pixels are available for NUMA node */
+void FrameData::copyRows(int node, int finishedRows, int maxNumRows)
+{
+    finishedRows = X265_MIN(finishedRows, maxNumRows);
+
+    if (node == m_ownerNode || m_nodes[node].rows >= finishedRows)
+        return;
+
+    ScopedLock s(m_nodes[node].copyLock);
+
+    if (m_nodes[node].rows >= finishedRows)
+        return;
+
+    PicYuv* srcPic = m_nodes[m_ownerNode].reconPic;
+    PicYuv* dstPic = m_nodes[node].reconPic;
+
+    const pixel* src = srcPic->m_picOrg[0] - srcPic->m_lumaMarginX;
+    pixel* dst = dstPic->m_picOrg[0] - dstPic->m_lumaMarginX;
+
+    intptr_t len = srcPic->m_stride * g_maxCUSize * (finishedRows - m_nodes[node].rows);
+
+    if (m_nodes[node].rows)
+    {
+        src += srcPic->m_stride * g_maxCUSize * m_nodes[node].rows;
+        dst += srcPic->m_stride * g_maxCUSize * m_nodes[node].rows;
+    }
+    else
+    {
+        /* include top padding */
+        intptr_t pad = srcPic->m_lumaMarginY * srcPic->m_stride;
+        len += pad;
+        src -= pad;
+        dst -= pad;
+    }
+
+    if (finishedRows == maxNumRows)
+        /* include bottom padding */
+        len += srcPic->m_lumaMarginY * srcPic->m_stride;
+
+    memcpy(dst, src, len); /* this should be between NUMA nodes */
+
+    intptr_t cuHeight = g_maxCUSize >> srcPic->m_vChromaShift;
+
+    for (int plane = 1; plane <= 2; plane++)
+    {
+        src = srcPic->m_picOrg[plane] - srcPic->m_chromaMarginX;
+        dst = dstPic->m_picOrg[plane] - dstPic->m_chromaMarginX;
+
+        len = srcPic->m_strideC * cuHeight * (finishedRows - m_nodes[node].rows);
+
+        if (m_nodes[node].rows)
+        {
+            src += srcPic->m_strideC * cuHeight * m_nodes[node].rows;
+            dst += srcPic->m_strideC * cuHeight * m_nodes[node].rows;
+        }
+        else
+        {
+            /* include top padding */
+            intptr_t pad = srcPic->m_chromaMarginY * srcPic->m_strideC;
+            len += pad;
+            src -= pad;
+            dst -= pad;
+        }
+
+        if (finishedRows == maxNumRows)
+            /* include bottom padding*/
+            len += srcPic->m_chromaMarginY * srcPic->m_strideC;
+
+        memcpy(dst, src, len); /* this should be between NUMA nodes */
+    }
+
+    m_nodes[node].rows = finishedRows;
+}
+
 void FrameData::destroy()
 {
     delete [] m_picCTU;
diff -r 02e84edaa143 -r 642e40276ab8 source/common/framedata.h
--- a/source/common/framedata.h	Mon Aug 03 17:55:58 2015 -0500
+++ b/source/common/framedata.h	Mon Aug 03 22:22:04 2015 -0500
@@ -162,6 +162,7 @@
     bool create(const x265_param& param, const SPS& sps);
     bool allocRecon(const SPS& sps, int node);
     void reinit(const SPS& sps);
+    void copyRows(int node, int curRow, int totRows);
     void destroy();
 
     inline CUData* getPicCTU(uint32_t ctuAddr) { return &m_picCTU[ctuAddr]; }
diff -r 02e84edaa143 -r 642e40276ab8 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Mon Aug 03 17:55:58 2015 -0500
+++ b/source/encoder/frameencoder.cpp	Mon Aug 03 22:22:04 2015 -0500
@@ -330,7 +330,7 @@
         m_top->getStreamHeaders(m_nalList, m_entropyCoder, m_bs);
 
 #if (defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7) || HAVE_LIBNUMA
-    int numaNode = 0; // m_pool ? m_pool->m_numaNode : 0;
+    int numaNode = m_pool ? m_pool->m_numaNode : 0;
 #else
     int numaNode = 0;
 #endif
@@ -519,7 +519,7 @@
                     while ((reconRowCount != m_numRows) && (reconRowCount < row + m_refLagRows))
                         reconRowCount = refpic->m_reconRowCount.waitForChange(reconRowCount);
 
-                    /* TODO: if refpic->m_encData->m_ownerNode != numaNode, copy rows */
+                    refpic->m_encData->copyRows(numaNode, row + m_refLagRows, m_numRows);
 
                     if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
                         m_mref[l][ref].applyWeight(row + m_refLagRows, m_numRows);
@@ -560,7 +560,7 @@
                         while ((reconRowCount != m_numRows) && (reconRowCount < i + m_refLagRows))
                             reconRowCount = refpic->m_reconRowCount.waitForChange(reconRowCount);
 
-                        /* TODO: if refpic->m_encData->m_ownerNode != numaNode, copy rows */
+                        refpic->m_encData->copyRows(numaNode, i + m_refLagRows, m_numRows);
 
                         if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
                             m_mref[list][ref].applyWeight(i + m_refLagRows, m_numRows);


More information about the x265-devel mailing list