<div dir="ltr">Like I'd said in the patch comment, although we see considerable reduction in the # writes to the remote-socket, we don't see any improvement in encoding performance. Perhaps because writes aren't the limiter for performance for x265 in the settings/videos that we tried with.</div><div class="gmail_extra"><br clear="all"><div><div class="gmail_signature"><div dir="ltr"><div><div dir="ltr"><div><div dir="ltr"><div><div dir="ltr">Pradeep Ramachandran, PhD<div>Solution Architect,</div><div>Multicoreware Inc.</div><div>Ph:   +91 99627 82018</div></div></div></div></div></div></div></div></div></div>
<br><div class="gmail_quote">On Wed, Aug 5, 2015 at 3:02 PM, Ximing Cheng <span dir="ltr"><<a href="mailto:chengximing1989@gmail.com" target="_blank">chengximing1989@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr">How much does this patch improve the performance of multi-numa node server? Thanks!</div><div class="gmail_extra"><br><div class="gmail_quote"><div><div class="h5">On Wed, Aug 5, 2015 at 5:02 PM, Pradeep Ramachandran <span dir="ltr"><<a href="mailto:pradeep@multicorewareinc.com" target="_blank">pradeep@multicorewareinc.com</a>></span> wrote:<br></div></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div><div class="h5"><div dir="ltr"><span style="font-size:12.8000001907349px">There was some merge problem rendering this patch unapplicable on the tip. Please ignore.</span><div style="font-size:12.8000001907349px">Apologies for the confusion.</div><div style="font-size:12.8000001907349px"><br></div><div style="font-size:12.8000001907349px">Pradeep.</div></div><div class="gmail_extra"><br clear="all"><div><div><div dir="ltr"><div><div dir="ltr"><div><div dir="ltr"><div><div dir="ltr">Pradeep Ramachandran, PhD<div>Solution Architect,</div><div>Multicoreware Inc.</div><div>Ph:   <a href="tel:%2B91%2099627%2082018" value="+919962782018" target="_blank">+91 99627 82018</a></div></div></div></div></div></div></div></div></div></div><div><div>
<br><div class="gmail_quote">On Wed, Aug 5, 2015 at 7:35 PM, Pradeep <span dir="ltr"><<a href="mailto:pradeep@multicorewareinc.com" target="_blank">pradeep@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Pradeep <<a href="mailto:pradeep@multicorewareinc.com" target="_blank">pradeep@multicorewareinc.com</a>><br>
# Date 1438704601 0<br>
#      Tue Aug 04 16:10:01 2015 +0000<br>
# Node ID 0206efdac228891f348c8d6c7ad7ced369c840a3<br>
# Parent  0c1f9d98294454d3bf896aeb24be881d8aa53434<br>
Performance: Enabling recon frames to be NUMA-aware when the<br>
frame encoder thread creates them. Seeing considerable reduction in<br>
no. cross-socket accesses, but impact on performance of sample videos<br>
is rather small<br>
<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/common/frame.cpp<br>
--- a/source/common/frame.cpp   Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/common/frame.cpp   Tue Aug 04 16:10:01 2015 +0000<br>
@@ -51,10 +51,34 @@<br>
            m_lowres.create(m_fencPic, param->bframes, !!param->rc.aqMode);<br>
 }<br>
<br>
-bool Frame::allocEncodeData(x265_param *param, const SPS& sps)<br>
+bool Frame::allocEncodeData(x265_param *param, const SPS& sps, const int numaNode)<br>
 {<br>
-    m_encData = new FrameData;<br>
-    m_reconPic = new PicYuv;<br>
+    int selNumaNode = numaNode ;<br>
+#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7<br>
+    GROUP_AFFINITY groupAffinity;<br>
+    if (GetNumaNodeProcessorMaskEx((USHORT)selNumaNode, &groupAffinity)) {<br>
+        if(VirtualAllocExNuma(GetCurrentProcess(),<br>
+                              NULL,<br>
+                              sizeof(FrameData)+sizeof(PicYuv),<br>
+                              MEM_COMMIT,<br>
+                              PAGE_READWRITE,<br>
+                              selNumaNode)) {<br>
+            // Successful commit, do nothing<br>
+        }<br>
+    }<br>
+#elif HAVE_LIBNUMA<br>
+    if(numa_available() >= 0) {<br>
+        numa_set_preferred(selNumaNode) ;<br>
+        numa_set_localalloc() ;<br>
+    } else {<br>
+        selNumaNode = -1 ;<br>
+    }<br>
+#else<br>
+    selNumaNode = -1 ;<br>
+#endif // HAVE_LIBNUMA<br>
+<br>
+    m_encData = new FrameData(selNumaNode) ;<br>
+    m_reconPic = new PicYuv(selNumaNode) ;<br>
     m_encData->m_reconPic = m_reconPic;<br>
     bool ok = m_encData->create(param, sps) && m_reconPic->create(param->sourceWidth, param->sourceHeight, param->internalCsp);<br>
     if (ok)<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/common/frame.h<br>
--- a/source/common/frame.h     Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/common/frame.h     Tue Aug 04 16:10:01 2015 +0000<br>
@@ -28,6 +28,10 @@<br>
 #include "lowres.h"<br>
 #include "threading.h"<br>
<br>
+#if HAVE_LIBNUMA<br>
+#include <numa.h><br>
+#endif // HAVE_LIBNUMA<br>
+<br>
 namespace X265_NS {<br>
 // private namespace<br>
<br>
@@ -67,10 +71,11 @@<br>
     Frame*                 m_prev;<br>
     x265_param*            m_param;              // Points to the latest param set for the frame.<br>
     x265_analysis_data     m_analysisData;<br>
+<br>
     Frame();<br>
<br>
     bool create(x265_param *param);<br>
-    bool allocEncodeData(x265_param *param, const SPS& sps);<br>
+    bool allocEncodeData(x265_param *param, const SPS& sps, const int numaNode);<br>
     void reinit(const SPS& sps);<br>
     void destroy();<br>
 };<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/common/framedata.cpp<br>
--- a/source/common/framedata.cpp       Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/common/framedata.cpp       Tue Aug 04 16:10:01 2015 +0000<br>
@@ -26,9 +26,10 @@<br>
<br>
 using namespace X265_NS;<br>
<br>
-FrameData::FrameData()<br>
+FrameData::FrameData(int numaNode)<br>
 {<br>
     memset(this, 0, sizeof(*this));<br>
+    m_numaNode = numaNode ;<br>
 }<br>
<br>
 bool FrameData::create(x265_param *param, const SPS& sps)<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/common/framedata.h<br>
--- a/source/common/framedata.h Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/common/framedata.h Tue Aug 04 16:10:01 2015 +0000<br>
@@ -107,6 +107,8 @@<br>
     CUDataMemPool  m_cuMemPool;<br>
     CUData*        m_picCTU;<br>
<br>
+    int            m_numaNode ;<br>
+<br>
     /* Rate control data used during encode and by references */<br>
     struct RCStatCU<br>
     {<br>
@@ -140,7 +142,7 @@<br>
     double         m_avgQpAq;    /* avg QP as decided by AQ in addition to rate-control */<br>
     double         m_rateFactor; /* calculated based on the Frame QP */<br>
<br>
-    FrameData();<br>
+    FrameData(int numaNode=-1);<br>
<br>
     bool create(x265_param *param, const SPS& sps);<br>
     void reinit(const SPS& sps);<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/common/param.cpp<br>
--- a/source/common/param.cpp   Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/common/param.cpp   Tue Aug 04 16:10:01 2015 +0000<br>
@@ -855,6 +855,7 @@<br>
     OPT("qg-size") p->rc.qgSize = atoi(value);<br>
     OPT("master-display") p->masteringDisplayColorVolume = strdup(value);<br>
     OPT("max-cll") p->contentLightLevelInfo = strdup(value);<br>
+    OPT("print-numa-stats") p->printNumaStats = atobool(value) ;<br>
     else<br>
         return X265_PARAM_BAD_NAME;<br>
 #undef OPT<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/common/picyuv.cpp<br>
--- a/source/common/picyuv.cpp  Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/common/picyuv.cpp  Tue Aug 04 16:10:01 2015 +0000<br>
@@ -28,7 +28,8 @@<br>
<br>
 using namespace X265_NS;<br>
<br>
-PicYuv::PicYuv()<br>
+PicYuv::PicYuv(int numaNode):<br>
+    m_numaNode(numaNode)<br>
 {<br>
     m_picBuf[0] = NULL;<br>
     m_picBuf[1] = NULL;<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/common/picyuv.h<br>
--- a/source/common/picyuv.h    Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/common/picyuv.h    Tue Aug 04 16:10:01 2015 +0000<br>
@@ -59,8 +59,9 @@<br>
     uint32_t m_lumaMarginY;<br>
     uint32_t m_chromaMarginX;<br>
     uint32_t m_chromaMarginY;<br>
+    int32_t  m_numaNode ;<br>
<br>
-    PicYuv();<br>
+    PicYuv(int numaNode=-1);<br>
<br>
     bool  create(uint32_t picWidth, uint32_t picHeight, uint32_t csp);<br>
     bool  createOffsets(const SPS& sps);<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/common/threadpool.cpp<br>
--- a/source/common/threadpool.cpp      Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/common/threadpool.cpp      Tue Aug 04 16:10:01 2015 +0000<br>
@@ -338,6 +338,7 @@<br>
 ThreadPool::ThreadPool()<br>
 {<br>
     memset(this, 0, sizeof(*this));<br>
+    m_numaNode = -1 ;<br>
 }<br>
<br>
 bool ThreadPool::create(int numThreads, int maxProviders, int node)<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/encoder/dpb.cpp<br>
--- a/source/encoder/dpb.cpp    Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/encoder/dpb.cpp    Tue Aug 04 16:10:01 2015 +0000<br>
@@ -58,6 +58,23 @@<br>
         delete m_picSymFreeList;<br>
         m_picSymFreeList = next;<br>
     }<br>
+<br>
+    if(m_picSymFreeListNuma) {<br>
+        for(int i=0; i<m_numNumaNodes; i++) {<br>
+            while(m_picSymFreeListNuma[i]) {<br>
+                FrameData* next = m_picSymFreeListNuma[i]->m_freeListNext;<br>
+                m_picSymFreeListNuma[i]->destroy();<br>
+<br>
+                m_picSymFreeListNuma[i]->m_reconPic->destroy();<br>
+                delete m_picSymFreeListNuma[i]->m_reconPic;<br>
+<br>
+                delete m_picSymFreeListNuma[i];<br>
+                m_picSymFreeListNuma[i] = next;<br>
+            }<br>
+            delete m_picSymFreeListNuma[i] ;<br>
+        }<br>
+        delete m_picSymFreeListNuma ;<br>
+    }<br>
 }<br>
<br>
 // move unreferenced pictures from picList to freeList for recycle<br>
@@ -78,9 +95,17 @@<br>
             m_picList.remove(*curFrame);<br>
             iterFrame = m_picList.first();<br>
<br>
+            int encDataNumaNode = curFrame->m_encData->m_numaNode ;<br>
+            if(encDataNumaNode != -1) {<br>
+                X265_CHECK(encDataNumaNode < m_numNumaNodes,<br>
+                           "fatal: frame allocated on non-existant numa node!\n") ;<br>
+                curFrame->m_encData->m_freeListNext = m_picSymFreeListNuma[encDataNumaNode] ;<br>
+                m_picSymFreeListNuma[encDataNumaNode] = curFrame->m_encData ;<br>
+            } else {<br>
+                curFrame->m_encData->m_freeListNext = m_picSymFreeList;<br>
+                m_picSymFreeList = curFrame->m_encData;<br>
+            }<br>
             m_freeList.pushBack(*curFrame);<br>
-            curFrame->m_encData->m_freeListNext = m_picSymFreeList;<br>
-            m_picSymFreeList = curFrame->m_encData;<br>
             curFrame->m_encData = NULL;<br>
             curFrame->m_reconPic = NULL;<br>
         }<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/encoder/dpb.h<br>
--- a/source/encoder/dpb.h      Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/encoder/dpb.h      Tue Aug 04 16:10:01 2015 +0000<br>
@@ -47,6 +47,9 @@<br>
     PicList            m_picList;<br>
     PicList            m_freeList;<br>
     FrameData*         m_picSymFreeList;<br>
+    x265_param*        m_param;<br>
+    int                m_numNumaNodes ;<br>
+    FrameData        **m_picSymFreeListNuma ;<br>
<br>
     DPB(x265_param *param)<br>
     {<br>
@@ -58,6 +61,27 @@<br>
         m_maxRefL1 = param->bBPyramid ? 2 : 1;<br>
         m_bOpenGOP = param->bOpenGOP;<br>
         m_bTemporalSublayer = !!param->bEnableTemporalSubLayers;<br>
+        m_param = param ;<br>
+        m_numNumaNodes = -1 ;<br>
+<br>
+#if (defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7)<br>
+        // NUMA supported by default on windows<br>
+        m_numNumaNodes = 1 ;<br>
+        if(GetNumaHighestNodeNumber(&num)) {<br>
+            m_numNumaNodes ++ ;<br>
+        }<br>
+#elif HAVE_LIBNUMA<br>
+        if(numa_available()>=0) {<br>
+            m_numNumaNodes = numa_max_node() + 1 ;<br>
+        }<br>
+#endif // HAVE_LIBNUMA<br>
+<br>
+        if(m_numNumaNodes>0) {<br>
+            m_picSymFreeListNuma = new FrameData*[m_numNumaNodes] ;<br>
+            for(int i=0; i<m_numNumaNodes; i++) {<br>
+                m_picSymFreeListNuma[i] = NULL ;<br>
+            }<br>
+        }<br>
     }<br>
<br>
     ~DPB();<br>
@@ -66,6 +90,17 @@<br>
<br>
     void recycleUnreferenced();<br>
<br>
+    bool isFreeEncDataAvailable() {<br>
+        if(m_picSymFreeList) {<br>
+            return true ;<br>
+        }<br>
+        for(int i=0; i<m_numNumaNodes; i++) {<br>
+            if(m_picSymFreeListNuma[i])<br>
+                return true ;<br>
+        }<br>
+        return false ;<br>
+    }<br>
+<br>
 protected:<br>
<br>
     void computeRPS(int curPoc, bool isRAP, RPS * rps, unsigned int maxDecPicBuffer);<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/encoder/encoder.cpp<br>
--- a/source/encoder/encoder.cpp        Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/encoder/encoder.cpp        Tue Aug 04 16:10:01 2015 +0000<br>
@@ -286,6 +286,11 @@<br>
<br>
 void Encoder::destroy()<br>
 {<br>
+    int numRefSameNuma = 0 ;<br>
+    int numRefDiffNuma = 0 ;<br>
+    int numReconSameNuma = 0 ;<br>
+    int numReconDiffNuma = 0 ;<br>
+<br>
     if (m_exportedPic)<br>
     {<br>
         ATOMIC_DEC(&m_exportedPic->m_countRefEncoders);<br>
@@ -296,6 +301,13 @@<br>
     {<br>
         if (m_frameEncoder[i])<br>
         {<br>
+            if(m_param->printNumaStats) {<br>
+                numRefSameNuma += m_frameEncoder[i]->getNumRefFramesSameNuma() ;<br>
+                numRefDiffNuma += m_frameEncoder[i]->getNumRefFramesDiffNuma() ;<br>
+                numReconSameNuma += m_frameEncoder[i]->getNumReconFramesSameNuma() ;<br>
+                numReconDiffNuma += m_frameEncoder[i]->getNumReconFramesDiffNuma() ;<br>
+            }<br>
+<br>
             m_frameEncoder[i]->destroy();<br>
             delete m_frameEncoder[i];<br>
         }<br>
@@ -323,6 +335,16 @@<br>
     X265_FREE(m_buOffsetY);<br>
     X265_FREE(m_buOffsetC);<br>
<br>
+    if(m_param && m_param->printNumaStats) {<br>
+        printf("Num new Encoder data alloc       = %d\n", m_numNewEncodeDataAlloc) ;<br>
+        printf("Num same node Encoder data reuse = %d\n", m_numSameNumaEncData) ;<br>
+        printf("Num diff node Encoder data reuse = %d\n", m_numDiffNumaEncData) ;<br>
+        printf("Num Ref frames in Same numa      = %d\n", numRefSameNuma) ;<br>
+        printf("Num Ref frames in Diff numa      = %d\n", numRefDiffNuma) ;<br>
+        printf("Num Recon frames in Same numa    = %d\n", numReconSameNuma) ;<br>
+        printf("Num Recon frames in Diff numa    = %d\n", numReconDiffNuma) ;<br>
+    }<br>
+<br>
     if (m_analysisFile)<br>
         fclose(m_analysisFile);<br>
<br>
@@ -511,6 +533,7 @@<br>
<br>
     FrameEncoder *curEncoder = m_frameEncoder[m_curEncoder];<br>
     m_curEncoder = (m_curEncoder + 1) % m_param->frameNumThreads;<br>
+<br>
     int ret = 0;<br>
<br>
     /* Normal operation is to wait for the current frame encoder to complete its current frame<br>
@@ -633,15 +656,49 @@<br>
         if (frameEnc && !pass)<br>
         {<br>
             /* give this frame a FrameData instance before encoding */<br>
-            if (m_dpb->m_picSymFreeList)<br>
+            // If NUMA aware allocation is enabled, try to preferably select a frame from this numa<br>
+            // node if available. If disabled, give any free node. If no free node, allocate new data<br>
+            if (m_dpb->isFreeEncDataAvailable())<br>
             {<br>
-                frameEnc->m_encData = m_dpb->m_picSymFreeList;<br>
-                m_dpb->m_picSymFreeList = m_dpb->m_picSymFreeList->m_freeListNext;<br>
-                frameEnc->reinit(m_sps);<br>
+                // Need to figure out which NUMA node this in frame is going to be<br>
+                // decoded on! try to allocate in data on that node.<br>
+                int threadNumaNode = curEncoder->m_pool->m_numaNode ;<br>
+                int dataNumaNode = -1 ;<br>
+                if(threadNumaNode!=-1) {<br>
+                    int checkingNumaNode = threadNumaNode ;<br>
+                    int numNumaNodes     = m_dpb->m_numNumaNodes ;<br>
+                    bool found           = false ;<br>
+                    for(int i=0; i<numNumaNodes;i++) {<br>
+                        if(m_dpb->m_picSymFreeListNuma[checkingNumaNode]) {<br>
+                            dataNumaNode = checkingNumaNode ;<br>
+                            frameEnc->m_encData = m_dpb->m_picSymFreeListNuma[dataNumaNode] ;<br>
+                            m_dpb->m_picSymFreeListNuma[dataNumaNode] =<br>
+                                m_dpb->m_picSymFreeListNuma[dataNumaNode]->m_freeListNext ;<br>
+                            frameEnc->reinit(m_sps) ;<br>
+                            // printf("Worker threads on %d, recon frame data on %d\n",<br>
+                            //    threadNumaNode, dataNumaNode) ;<br>
+                            found = true ;<br>
+                            break ;<br>
+                        }<br>
+                        checkingNumaNode = (checkingNumaNode+1) % numNumaNodes ;<br>
+                    }<br>
+                    X265_CHECK(found, "Should've found buffer for in frame!\n") ;<br>
+                } else {<br>
+                    frameEnc->m_encData = m_dpb->m_picSymFreeList;<br>
+                    m_dpb->m_picSymFreeList = m_dpb->m_picSymFreeList->m_freeListNext;<br>
+                    frameEnc->reinit(m_sps);<br>
+                    dataNumaNode = frameEnc->m_encData->m_numaNode ;<br>
+                }<br>
+                if(dataNumaNode == threadNumaNode) {<br>
+                    m_numSameNumaEncData ++ ;<br>
+                } else {<br>
+                    m_numDiffNumaEncData ++ ;<br>
+                }<br>
             }<br>
             else<br>
             {<br>
-                frameEnc->allocEncodeData(m_param, m_sps);<br>
+                m_numNewEncodeDataAlloc ++ ;<br>
+                frameEnc->allocEncodeData(m_param, m_sps, curEncoder->m_pool->m_numaNode);<br>
                 Slice* slice = frameEnc->m_encData->m_slice;<br>
                 slice->m_sps = &m_sps;<br>
                 slice->m_pps = &m_pps;<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/encoder/encoder.h<br>
--- a/source/encoder/encoder.h  Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/encoder/encoder.h  Tue Aug 04 16:10:01 2015 +0000<br>
@@ -133,6 +133,9 @@<br>
     bool               m_aborted;          // fatal error detected<br>
     bool               m_reconfigured;      // reconfigure of encoder detected<br>
<br>
+    uint32_t           m_numNewEncodeDataAlloc ;<br>
+    uint32_t           m_numSameNumaEncData ;<br>
+    uint32_t           m_numDiffNumaEncData ;<br>
     Encoder();<br>
     ~Encoder() {}<br>
<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/encoder/frameencoder.cpp<br>
--- a/source/encoder/frameencoder.cpp   Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/encoder/frameencoder.cpp   Tue Aug 04 16:10:01 2015 +0000<br>
@@ -60,6 +60,11 @@<br>
     m_ctuGeomMap = NULL;<br>
     m_localTldIdx = 0;<br>
     memset(&m_rce, 0, sizeof(RateControlEntry));<br>
+<br>
+    m_numRefFramesSameNuma = 0 ;<br>
+    m_numRefFrameDiffNuma = 0 ;<br>
+    m_numReconFramesSameNuma = 0 ;<br>
+    m_numReconFramesDiffNuma = 0 ;<br>
 }<br>
<br>
 void FrameEncoder::destroy()<br>
@@ -357,7 +362,15 @@<br>
             WeightParam *w = NULL;<br>
             if ((bUseWeightP || bUseWeightB) && slice->m_weightPredTable[l][ref][0].bPresentFlag)<br>
                 w = slice->m_weightPredTable[l][ref];<br>
-            m_mref[l][ref].init(slice->m_refPicList[l][ref]->m_reconPic, w, *m_param);<br>
+            PicYuv* closestReconPic = slice->m_refPicList[l][ref]->m_reconPic ;<br>
+            m_mref[l][ref].init(closestReconPic, w, *m_param);<br>
+            if(m_param->printNumaStats) {<br>
+                if(m_pool->m_numaNode != closestReconPic->m_numaNode) {<br>
+                    m_numRefFrameDiffNuma ++ ;<br>
+                } else {<br>
+                    m_numRefFramesSameNuma ++ ;<br>
+                }<br>
+            }<br>
         }<br>
     }<br>
<br>
@@ -932,6 +945,13 @@<br>
<br>
         // Does all the CU analysis, returns best top level mode decision<br>
         Mode& best = tld.analysis.compressCTU(*ctu, *m_frame, m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);<br>
+        if(m_param->printNumaStats) {<br>
+            if(m_pool->m_numaNode != m_frame->m_reconPic->m_numaNode) {<br>
+                m_numReconFramesDiffNuma ++ ;<br>
+            } else {<br>
+                m_numReconFramesSameNuma ++ ;<br>
+            }<br>
+        }<br>
<br>
         // take a sample of the current active worker count<br>
         ATOMIC_ADD(&m_totalActiveWorkerCount, m_activeWorkerCount);<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/encoder/frameencoder.h<br>
--- a/source/encoder/frameencoder.h     Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/encoder/frameencoder.h     Tue Aug 04 16:10:01 2015 +0000<br>
@@ -206,7 +206,16 @@<br>
         WeightAnalysis operator=(const WeightAnalysis&);<br>
     };<br>
<br>
-protected:<br>
+    unsigned int             m_numRefFramesSameNuma ;<br>
+    unsigned int             m_numRefFrameDiffNuma ;<br>
+    unsigned int             m_numReconFramesSameNuma ;<br>
+    unsigned int             m_numReconFramesDiffNuma ;<br>
+    unsigned int             getNumRefFramesSameNuma() { return m_numRefFramesSameNuma ; }<br>
+    unsigned int             getNumRefFramesDiffNuma() { return m_numRefFrameDiffNuma ; }<br>
+    unsigned int             getNumReconFramesSameNuma() { return m_numReconFramesSameNuma ; }<br>
+    unsigned int             getNumReconFramesDiffNuma() { return m_numReconFramesDiffNuma ; }<br>
+<br>
+    protected:<br>
<br>
     bool initializeGeoms();<br>
<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/x265.h<br>
--- a/source/x265.h     Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/x265.h     Tue Aug 04 16:10:01 2015 +0000<br>
@@ -1172,6 +1172,11 @@<br>
      * picture average light level (or 0). */<br>
     const char* contentLightLevelInfo;<br>
<br>
+    /* Print NUMA statistics collected from the code on the console to show the<br>
+     * number of times the recon and ref pics were locatd on the same NUMA socket,<br>
+     * and on different sockets */<br>
+    int printNumaStats ;<br>
+<br>
 } x265_param;<br>
<br>
 /* x265_param_alloc:<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/x265cli.h<br>
--- a/source/x265cli.h  Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/x265cli.h  Tue Aug 04 16:10:01 2015 +0000<br>
@@ -218,6 +218,7 @@<br>
     { "no-temporal-layers",   no_argument, NULL, 0 },<br>
     { "qg-size",        required_argument, NULL, 0 },<br>
     { "recon-y4m-exec", required_argument, NULL, 0 },<br>
+    { "print-numa-stats", no_argument, NULL, 0 },<br>
     { 0, 0, 0, 0 },<br>
     { 0, 0, 0, 0 },<br>
     { 0, 0, 0, 0 },<br>
@@ -414,6 +415,7 @@<br>
     H1("-r/--recon <filename>            Reconstructed raw image YUV or Y4M output file name\n");<br>
     H1("   --recon-depth <integer>       Bit-depth of reconstructed raw image file. Defaults to input bit depth, or 8 if Y4M\n");<br>
     H1("   --recon-y4m-exec <string>     pipe reconstructed frames to Y4M viewer, ex:\"ffplay -i pipe:0 -autoexit\"\n");<br>
+    H1("   --print-numa-stats            print statistics related to socket information for ref and recon frames\n");<br>
     H1("\nExecutable return codes:\n");<br>
     H1("    0 - encode successful\n");<br>
     H1("    1 - unable to parse command line\n");<br>
</blockquote></div><br></div></div></div>
<br></div></div>_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org" target="_blank">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
<br></blockquote></div><br></div>
<br>_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
<br></blockquote></div><br></div>