<div dir="ltr">How much does this patch improve the performance of multi-numa node server? Thanks!</div><div class="gmail_extra"><br><div class="gmail_quote">On Wed, Aug 5, 2015 at 5:02 PM, Pradeep Ramachandran <span dir="ltr"><<a href="mailto:pradeep@multicorewareinc.com" target="_blank">pradeep@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><span style="font-size:12.8000001907349px">There was some merge problem rendering this patch unapplicable on the tip. Please ignore.</span><div style="font-size:12.8000001907349px">Apologies for the confusion.</div><div style="font-size:12.8000001907349px"><br></div><div style="font-size:12.8000001907349px">Pradeep.</div></div><div class="gmail_extra"><br clear="all"><div><div><div dir="ltr"><div><div dir="ltr"><div><div dir="ltr"><div><div dir="ltr">Pradeep Ramachandran, PhD<div>Solution Architect,</div><div>Multicoreware Inc.</div><div>Ph: <a href="tel:%2B91%2099627%2082018" value="+919962782018" target="_blank">+91 99627 82018</a></div></div></div></div></div></div></div></div></div></div><div><div class="h5">
<br><div class="gmail_quote">On Wed, Aug 5, 2015 at 7:35 PM, Pradeep <span dir="ltr"><<a href="mailto:pradeep@multicorewareinc.com" target="_blank">pradeep@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Pradeep <<a href="mailto:pradeep@multicorewareinc.com" target="_blank">pradeep@multicorewareinc.com</a>><br>
# Date 1438704601 0<br>
# Tue Aug 04 16:10:01 2015 +0000<br>
# Node ID 0206efdac228891f348c8d6c7ad7ced369c840a3<br>
# Parent 0c1f9d98294454d3bf896aeb24be881d8aa53434<br>
Performance: Enabling recon frames to be NUMA-aware when the<br>
frame encoder thread creates them. Seeing considerable reduction in<br>
no. cross-socket accesses, but impact on performance of sample videos<br>
is rather small<br>
<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/common/frame.cpp<br>
--- a/source/common/frame.cpp Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/common/frame.cpp Tue Aug 04 16:10:01 2015 +0000<br>
@@ -51,10 +51,34 @@<br>
m_lowres.create(m_fencPic, param->bframes, !!param->rc.aqMode);<br>
}<br>
<br>
-bool Frame::allocEncodeData(x265_param *param, const SPS& sps)<br>
+bool Frame::allocEncodeData(x265_param *param, const SPS& sps, const int numaNode)<br>
{<br>
- m_encData = new FrameData;<br>
- m_reconPic = new PicYuv;<br>
+ int selNumaNode = numaNode ;<br>
+#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7<br>
+ GROUP_AFFINITY groupAffinity;<br>
+ if (GetNumaNodeProcessorMaskEx((USHORT)selNumaNode, &groupAffinity)) {<br>
+ if(VirtualAllocExNuma(GetCurrentProcess(),<br>
+ NULL,<br>
+ sizeof(FrameData)+sizeof(PicYuv),<br>
+ MEM_COMMIT,<br>
+ PAGE_READWRITE,<br>
+ selNumaNode)) {<br>
+ // Successful commit, do nothing<br>
+ }<br>
+ }<br>
+#elif HAVE_LIBNUMA<br>
+ if(numa_available() >= 0) {<br>
+ numa_set_preferred(selNumaNode) ;<br>
+ numa_set_localalloc() ;<br>
+ } else {<br>
+ selNumaNode = -1 ;<br>
+ }<br>
+#else<br>
+ selNumaNode = -1 ;<br>
+#endif // HAVE_LIBNUMA<br>
+<br>
+ m_encData = new FrameData(selNumaNode) ;<br>
+ m_reconPic = new PicYuv(selNumaNode) ;<br>
m_encData->m_reconPic = m_reconPic;<br>
bool ok = m_encData->create(param, sps) && m_reconPic->create(param->sourceWidth, param->sourceHeight, param->internalCsp);<br>
if (ok)<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/common/frame.h<br>
--- a/source/common/frame.h Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/common/frame.h Tue Aug 04 16:10:01 2015 +0000<br>
@@ -28,6 +28,10 @@<br>
#include "lowres.h"<br>
#include "threading.h"<br>
<br>
+#if HAVE_LIBNUMA<br>
+#include <numa.h><br>
+#endif // HAVE_LIBNUMA<br>
+<br>
namespace X265_NS {<br>
// private namespace<br>
<br>
@@ -67,10 +71,11 @@<br>
Frame* m_prev;<br>
x265_param* m_param; // Points to the latest param set for the frame.<br>
x265_analysis_data m_analysisData;<br>
+<br>
Frame();<br>
<br>
bool create(x265_param *param);<br>
- bool allocEncodeData(x265_param *param, const SPS& sps);<br>
+ bool allocEncodeData(x265_param *param, const SPS& sps, const int numaNode);<br>
void reinit(const SPS& sps);<br>
void destroy();<br>
};<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/common/framedata.cpp<br>
--- a/source/common/framedata.cpp Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/common/framedata.cpp Tue Aug 04 16:10:01 2015 +0000<br>
@@ -26,9 +26,10 @@<br>
<br>
using namespace X265_NS;<br>
<br>
-FrameData::FrameData()<br>
+FrameData::FrameData(int numaNode)<br>
{<br>
memset(this, 0, sizeof(*this));<br>
+ m_numaNode = numaNode ;<br>
}<br>
<br>
bool FrameData::create(x265_param *param, const SPS& sps)<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/common/framedata.h<br>
--- a/source/common/framedata.h Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/common/framedata.h Tue Aug 04 16:10:01 2015 +0000<br>
@@ -107,6 +107,8 @@<br>
CUDataMemPool m_cuMemPool;<br>
CUData* m_picCTU;<br>
<br>
+ int m_numaNode ;<br>
+<br>
/* Rate control data used during encode and by references */<br>
struct RCStatCU<br>
{<br>
@@ -140,7 +142,7 @@<br>
double m_avgQpAq; /* avg QP as decided by AQ in addition to rate-control */<br>
double m_rateFactor; /* calculated based on the Frame QP */<br>
<br>
- FrameData();<br>
+ FrameData(int numaNode=-1);<br>
<br>
bool create(x265_param *param, const SPS& sps);<br>
void reinit(const SPS& sps);<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/common/param.cpp<br>
--- a/source/common/param.cpp Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/common/param.cpp Tue Aug 04 16:10:01 2015 +0000<br>
@@ -855,6 +855,7 @@<br>
OPT("qg-size") p->rc.qgSize = atoi(value);<br>
OPT("master-display") p->masteringDisplayColorVolume = strdup(value);<br>
OPT("max-cll") p->contentLightLevelInfo = strdup(value);<br>
+ OPT("print-numa-stats") p->printNumaStats = atobool(value) ;<br>
else<br>
return X265_PARAM_BAD_NAME;<br>
#undef OPT<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/common/picyuv.cpp<br>
--- a/source/common/picyuv.cpp Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/common/picyuv.cpp Tue Aug 04 16:10:01 2015 +0000<br>
@@ -28,7 +28,8 @@<br>
<br>
using namespace X265_NS;<br>
<br>
-PicYuv::PicYuv()<br>
+PicYuv::PicYuv(int numaNode):<br>
+ m_numaNode(numaNode)<br>
{<br>
m_picBuf[0] = NULL;<br>
m_picBuf[1] = NULL;<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/common/picyuv.h<br>
--- a/source/common/picyuv.h Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/common/picyuv.h Tue Aug 04 16:10:01 2015 +0000<br>
@@ -59,8 +59,9 @@<br>
uint32_t m_lumaMarginY;<br>
uint32_t m_chromaMarginX;<br>
uint32_t m_chromaMarginY;<br>
+ int32_t m_numaNode ;<br>
<br>
- PicYuv();<br>
+ PicYuv(int numaNode=-1);<br>
<br>
bool create(uint32_t picWidth, uint32_t picHeight, uint32_t csp);<br>
bool createOffsets(const SPS& sps);<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/common/threadpool.cpp<br>
--- a/source/common/threadpool.cpp Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/common/threadpool.cpp Tue Aug 04 16:10:01 2015 +0000<br>
@@ -338,6 +338,7 @@<br>
ThreadPool::ThreadPool()<br>
{<br>
memset(this, 0, sizeof(*this));<br>
+ m_numaNode = -1 ;<br>
}<br>
<br>
bool ThreadPool::create(int numThreads, int maxProviders, int node)<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/encoder/dpb.cpp<br>
--- a/source/encoder/dpb.cpp Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/encoder/dpb.cpp Tue Aug 04 16:10:01 2015 +0000<br>
@@ -58,6 +58,23 @@<br>
delete m_picSymFreeList;<br>
m_picSymFreeList = next;<br>
}<br>
+<br>
+ if(m_picSymFreeListNuma) {<br>
+ for(int i=0; i<m_numNumaNodes; i++) {<br>
+ while(m_picSymFreeListNuma[i]) {<br>
+ FrameData* next = m_picSymFreeListNuma[i]->m_freeListNext;<br>
+ m_picSymFreeListNuma[i]->destroy();<br>
+<br>
+ m_picSymFreeListNuma[i]->m_reconPic->destroy();<br>
+ delete m_picSymFreeListNuma[i]->m_reconPic;<br>
+<br>
+ delete m_picSymFreeListNuma[i];<br>
+ m_picSymFreeListNuma[i] = next;<br>
+ }<br>
+ delete m_picSymFreeListNuma[i] ;<br>
+ }<br>
+ delete m_picSymFreeListNuma ;<br>
+ }<br>
}<br>
<br>
// move unreferenced pictures from picList to freeList for recycle<br>
@@ -78,9 +95,17 @@<br>
m_picList.remove(*curFrame);<br>
iterFrame = m_picList.first();<br>
<br>
+ int encDataNumaNode = curFrame->m_encData->m_numaNode ;<br>
+ if(encDataNumaNode != -1) {<br>
+ X265_CHECK(encDataNumaNode < m_numNumaNodes,<br>
+ "fatal: frame allocated on non-existant numa node!\n") ;<br>
+ curFrame->m_encData->m_freeListNext = m_picSymFreeListNuma[encDataNumaNode] ;<br>
+ m_picSymFreeListNuma[encDataNumaNode] = curFrame->m_encData ;<br>
+ } else {<br>
+ curFrame->m_encData->m_freeListNext = m_picSymFreeList;<br>
+ m_picSymFreeList = curFrame->m_encData;<br>
+ }<br>
m_freeList.pushBack(*curFrame);<br>
- curFrame->m_encData->m_freeListNext = m_picSymFreeList;<br>
- m_picSymFreeList = curFrame->m_encData;<br>
curFrame->m_encData = NULL;<br>
curFrame->m_reconPic = NULL;<br>
}<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/encoder/dpb.h<br>
--- a/source/encoder/dpb.h Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/encoder/dpb.h Tue Aug 04 16:10:01 2015 +0000<br>
@@ -47,6 +47,9 @@<br>
PicList m_picList;<br>
PicList m_freeList;<br>
FrameData* m_picSymFreeList;<br>
+ x265_param* m_param;<br>
+ int m_numNumaNodes ;<br>
+ FrameData **m_picSymFreeListNuma ;<br>
<br>
DPB(x265_param *param)<br>
{<br>
@@ -58,6 +61,27 @@<br>
m_maxRefL1 = param->bBPyramid ? 2 : 1;<br>
m_bOpenGOP = param->bOpenGOP;<br>
m_bTemporalSublayer = !!param->bEnableTemporalSubLayers;<br>
+ m_param = param ;<br>
+ m_numNumaNodes = -1 ;<br>
+<br>
+#if (defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7)<br>
+ // NUMA supported by default on windows<br>
+ m_numNumaNodes = 1 ;<br>
+ if(GetNumaHighestNodeNumber(&num)) {<br>
+ m_numNumaNodes ++ ;<br>
+ }<br>
+#elif HAVE_LIBNUMA<br>
+ if(numa_available()>=0) {<br>
+ m_numNumaNodes = numa_max_node() + 1 ;<br>
+ }<br>
+#endif // HAVE_LIBNUMA<br>
+<br>
+ if(m_numNumaNodes>0) {<br>
+ m_picSymFreeListNuma = new FrameData*[m_numNumaNodes] ;<br>
+ for(int i=0; i<m_numNumaNodes; i++) {<br>
+ m_picSymFreeListNuma[i] = NULL ;<br>
+ }<br>
+ }<br>
}<br>
<br>
~DPB();<br>
@@ -66,6 +90,17 @@<br>
<br>
void recycleUnreferenced();<br>
<br>
+ bool isFreeEncDataAvailable() {<br>
+ if(m_picSymFreeList) {<br>
+ return true ;<br>
+ }<br>
+ for(int i=0; i<m_numNumaNodes; i++) {<br>
+ if(m_picSymFreeListNuma[i])<br>
+ return true ;<br>
+ }<br>
+ return false ;<br>
+ }<br>
+<br>
protected:<br>
<br>
void computeRPS(int curPoc, bool isRAP, RPS * rps, unsigned int maxDecPicBuffer);<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/encoder/encoder.cpp<br>
--- a/source/encoder/encoder.cpp Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/encoder/encoder.cpp Tue Aug 04 16:10:01 2015 +0000<br>
@@ -286,6 +286,11 @@<br>
<br>
void Encoder::destroy()<br>
{<br>
+ int numRefSameNuma = 0 ;<br>
+ int numRefDiffNuma = 0 ;<br>
+ int numReconSameNuma = 0 ;<br>
+ int numReconDiffNuma = 0 ;<br>
+<br>
if (m_exportedPic)<br>
{<br>
ATOMIC_DEC(&m_exportedPic->m_countRefEncoders);<br>
@@ -296,6 +301,13 @@<br>
{<br>
if (m_frameEncoder[i])<br>
{<br>
+ if(m_param->printNumaStats) {<br>
+ numRefSameNuma += m_frameEncoder[i]->getNumRefFramesSameNuma() ;<br>
+ numRefDiffNuma += m_frameEncoder[i]->getNumRefFramesDiffNuma() ;<br>
+ numReconSameNuma += m_frameEncoder[i]->getNumReconFramesSameNuma() ;<br>
+ numReconDiffNuma += m_frameEncoder[i]->getNumReconFramesDiffNuma() ;<br>
+ }<br>
+<br>
m_frameEncoder[i]->destroy();<br>
delete m_frameEncoder[i];<br>
}<br>
@@ -323,6 +335,16 @@<br>
X265_FREE(m_buOffsetY);<br>
X265_FREE(m_buOffsetC);<br>
<br>
+ if(m_param && m_param->printNumaStats) {<br>
+ printf("Num new Encoder data alloc = %d\n", m_numNewEncodeDataAlloc) ;<br>
+ printf("Num same node Encoder data reuse = %d\n", m_numSameNumaEncData) ;<br>
+ printf("Num diff node Encoder data reuse = %d\n", m_numDiffNumaEncData) ;<br>
+ printf("Num Ref frames in Same numa = %d\n", numRefSameNuma) ;<br>
+ printf("Num Ref frames in Diff numa = %d\n", numRefDiffNuma) ;<br>
+ printf("Num Recon frames in Same numa = %d\n", numReconSameNuma) ;<br>
+ printf("Num Recon frames in Diff numa = %d\n", numReconDiffNuma) ;<br>
+ }<br>
+<br>
if (m_analysisFile)<br>
fclose(m_analysisFile);<br>
<br>
@@ -511,6 +533,7 @@<br>
<br>
FrameEncoder *curEncoder = m_frameEncoder[m_curEncoder];<br>
m_curEncoder = (m_curEncoder + 1) % m_param->frameNumThreads;<br>
+<br>
int ret = 0;<br>
<br>
/* Normal operation is to wait for the current frame encoder to complete its current frame<br>
@@ -633,15 +656,49 @@<br>
if (frameEnc && !pass)<br>
{<br>
/* give this frame a FrameData instance before encoding */<br>
- if (m_dpb->m_picSymFreeList)<br>
+ // If NUMA aware allocation is enabled, try to preferably select a frame from this numa<br>
+ // node if available. If disabled, give any free node. If no free node, allocate new data<br>
+ if (m_dpb->isFreeEncDataAvailable())<br>
{<br>
- frameEnc->m_encData = m_dpb->m_picSymFreeList;<br>
- m_dpb->m_picSymFreeList = m_dpb->m_picSymFreeList->m_freeListNext;<br>
- frameEnc->reinit(m_sps);<br>
+ // Need to figure out which NUMA node this in frame is going to be<br>
+ // decoded on! try to allocate in data on that node.<br>
+ int threadNumaNode = curEncoder->m_pool->m_numaNode ;<br>
+ int dataNumaNode = -1 ;<br>
+ if(threadNumaNode!=-1) {<br>
+ int checkingNumaNode = threadNumaNode ;<br>
+ int numNumaNodes = m_dpb->m_numNumaNodes ;<br>
+ bool found = false ;<br>
+ for(int i=0; i<numNumaNodes;i++) {<br>
+ if(m_dpb->m_picSymFreeListNuma[checkingNumaNode]) {<br>
+ dataNumaNode = checkingNumaNode ;<br>
+ frameEnc->m_encData = m_dpb->m_picSymFreeListNuma[dataNumaNode] ;<br>
+ m_dpb->m_picSymFreeListNuma[dataNumaNode] =<br>
+ m_dpb->m_picSymFreeListNuma[dataNumaNode]->m_freeListNext ;<br>
+ frameEnc->reinit(m_sps) ;<br>
+ // printf("Worker threads on %d, recon frame data on %d\n",<br>
+ // threadNumaNode, dataNumaNode) ;<br>
+ found = true ;<br>
+ break ;<br>
+ }<br>
+ checkingNumaNode = (checkingNumaNode+1) % numNumaNodes ;<br>
+ }<br>
+ X265_CHECK(found, "Should've found buffer for in frame!\n") ;<br>
+ } else {<br>
+ frameEnc->m_encData = m_dpb->m_picSymFreeList;<br>
+ m_dpb->m_picSymFreeList = m_dpb->m_picSymFreeList->m_freeListNext;<br>
+ frameEnc->reinit(m_sps);<br>
+ dataNumaNode = frameEnc->m_encData->m_numaNode ;<br>
+ }<br>
+ if(dataNumaNode == threadNumaNode) {<br>
+ m_numSameNumaEncData ++ ;<br>
+ } else {<br>
+ m_numDiffNumaEncData ++ ;<br>
+ }<br>
}<br>
else<br>
{<br>
- frameEnc->allocEncodeData(m_param, m_sps);<br>
+ m_numNewEncodeDataAlloc ++ ;<br>
+ frameEnc->allocEncodeData(m_param, m_sps, curEncoder->m_pool->m_numaNode);<br>
Slice* slice = frameEnc->m_encData->m_slice;<br>
slice->m_sps = &m_sps;<br>
slice->m_pps = &m_pps;<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/encoder/encoder.h<br>
--- a/source/encoder/encoder.h Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/encoder/encoder.h Tue Aug 04 16:10:01 2015 +0000<br>
@@ -133,6 +133,9 @@<br>
bool m_aborted; // fatal error detected<br>
bool m_reconfigured; // reconfigure of encoder detected<br>
<br>
+ uint32_t m_numNewEncodeDataAlloc ;<br>
+ uint32_t m_numSameNumaEncData ;<br>
+ uint32_t m_numDiffNumaEncData ;<br>
Encoder();<br>
~Encoder() {}<br>
<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/encoder/frameencoder.cpp<br>
--- a/source/encoder/frameencoder.cpp Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/encoder/frameencoder.cpp Tue Aug 04 16:10:01 2015 +0000<br>
@@ -60,6 +60,11 @@<br>
m_ctuGeomMap = NULL;<br>
m_localTldIdx = 0;<br>
memset(&m_rce, 0, sizeof(RateControlEntry));<br>
+<br>
+ m_numRefFramesSameNuma = 0 ;<br>
+ m_numRefFrameDiffNuma = 0 ;<br>
+ m_numReconFramesSameNuma = 0 ;<br>
+ m_numReconFramesDiffNuma = 0 ;<br>
}<br>
<br>
void FrameEncoder::destroy()<br>
@@ -357,7 +362,15 @@<br>
WeightParam *w = NULL;<br>
if ((bUseWeightP || bUseWeightB) && slice->m_weightPredTable[l][ref][0].bPresentFlag)<br>
w = slice->m_weightPredTable[l][ref];<br>
- m_mref[l][ref].init(slice->m_refPicList[l][ref]->m_reconPic, w, *m_param);<br>
+ PicYuv* closestReconPic = slice->m_refPicList[l][ref]->m_reconPic ;<br>
+ m_mref[l][ref].init(closestReconPic, w, *m_param);<br>
+ if(m_param->printNumaStats) {<br>
+ if(m_pool->m_numaNode != closestReconPic->m_numaNode) {<br>
+ m_numRefFrameDiffNuma ++ ;<br>
+ } else {<br>
+ m_numRefFramesSameNuma ++ ;<br>
+ }<br>
+ }<br>
}<br>
}<br>
<br>
@@ -932,6 +945,13 @@<br>
<br>
// Does all the CU analysis, returns best top level mode decision<br>
Mode& best = tld.analysis.compressCTU(*ctu, *m_frame, m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);<br>
+ if(m_param->printNumaStats) {<br>
+ if(m_pool->m_numaNode != m_frame->m_reconPic->m_numaNode) {<br>
+ m_numReconFramesDiffNuma ++ ;<br>
+ } else {<br>
+ m_numReconFramesSameNuma ++ ;<br>
+ }<br>
+ }<br>
<br>
// take a sample of the current active worker count<br>
ATOMIC_ADD(&m_totalActiveWorkerCount, m_activeWorkerCount);<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/encoder/frameencoder.h<br>
--- a/source/encoder/frameencoder.h Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/encoder/frameencoder.h Tue Aug 04 16:10:01 2015 +0000<br>
@@ -206,7 +206,16 @@<br>
WeightAnalysis operator=(const WeightAnalysis&);<br>
};<br>
<br>
-protected:<br>
+ unsigned int m_numRefFramesSameNuma ;<br>
+ unsigned int m_numRefFrameDiffNuma ;<br>
+ unsigned int m_numReconFramesSameNuma ;<br>
+ unsigned int m_numReconFramesDiffNuma ;<br>
+ unsigned int getNumRefFramesSameNuma() { return m_numRefFramesSameNuma ; }<br>
+ unsigned int getNumRefFramesDiffNuma() { return m_numRefFrameDiffNuma ; }<br>
+ unsigned int getNumReconFramesSameNuma() { return m_numReconFramesSameNuma ; }<br>
+ unsigned int getNumReconFramesDiffNuma() { return m_numReconFramesDiffNuma ; }<br>
+<br>
+ protected:<br>
<br>
bool initializeGeoms();<br>
<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/x265.h<br>
--- a/source/x265.h Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/x265.h Tue Aug 04 16:10:01 2015 +0000<br>
@@ -1172,6 +1172,11 @@<br>
* picture average light level (or 0). */<br>
const char* contentLightLevelInfo;<br>
<br>
+ /* Print NUMA statistics collected from the code on the console to show the<br>
+ * number of times the recon and ref pics were locatd on the same NUMA socket,<br>
+ * and on different sockets */<br>
+ int printNumaStats ;<br>
+<br>
} x265_param;<br>
<br>
/* x265_param_alloc:<br>
diff -r 0c1f9d982944 -r 0206efdac228 source/x265cli.h<br>
--- a/source/x265cli.h Tue Aug 04 15:37:26 2015 +0000<br>
+++ b/source/x265cli.h Tue Aug 04 16:10:01 2015 +0000<br>
@@ -218,6 +218,7 @@<br>
{ "no-temporal-layers", no_argument, NULL, 0 },<br>
{ "qg-size", required_argument, NULL, 0 },<br>
{ "recon-y4m-exec", required_argument, NULL, 0 },<br>
+ { "print-numa-stats", no_argument, NULL, 0 },<br>
{ 0, 0, 0, 0 },<br>
{ 0, 0, 0, 0 },<br>
{ 0, 0, 0, 0 },<br>
@@ -414,6 +415,7 @@<br>
H1("-r/--recon <filename> Reconstructed raw image YUV or Y4M output file name\n");<br>
H1(" --recon-depth <integer> Bit-depth of reconstructed raw image file. Defaults to input bit depth, or 8 if Y4M\n");<br>
H1(" --recon-y4m-exec <string> pipe reconstructed frames to Y4M viewer, ex:\"ffplay -i pipe:0 -autoexit\"\n");<br>
+ H1(" --print-numa-stats print statistics related to socket information for ref and recon frames\n");<br>
H1("\nExecutable return codes:\n");<br>
H1(" 0 - encode successful\n");<br>
H1(" 1 - unable to parse command line\n");<br>
</blockquote></div><br></div></div></div>
<br>_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
<br></blockquote></div><br></div>