[x265] [PATCH] Performance: Enabling recon frames to be NUMA-aware when the

Pradeep Ramachandran pradeep at multicorewareinc.com
Wed Aug 5 12:08:03 CEST 2015


Like I'd said in the patch comment, although we see considerable reduction
in the # writes to the remote-socket, we don't see any improvement in
encoding performance. Perhaps because writes aren't the limiter for
performance for x265 in the settings/videos that we tried with.

Pradeep Ramachandran, PhD
Solution Architect,
Multicoreware Inc.
Ph:   +91 99627 82018

On Wed, Aug 5, 2015 at 3:02 PM, Ximing Cheng <chengximing1989 at gmail.com>
wrote:

> How much does this patch improve the performance of multi-numa node
> server? Thanks!
>
> On Wed, Aug 5, 2015 at 5:02 PM, Pradeep Ramachandran <
> pradeep at multicorewareinc.com> wrote:
>
>> There was some merge problem rendering this patch unapplicable on the
>> tip. Please ignore.
>> Apologies for the confusion.
>>
>> Pradeep.
>>
>> Pradeep Ramachandran, PhD
>> Solution Architect,
>> Multicoreware Inc.
>> Ph:   +91 99627 82018
>>
>> On Wed, Aug 5, 2015 at 7:35 PM, Pradeep <pradeep at multicorewareinc.com>
>> wrote:
>>
>>> # HG changeset patch
>>> # User Pradeep <pradeep at multicorewareinc.com>
>>> # Date 1438704601 0
>>> #      Tue Aug 04 16:10:01 2015 +0000
>>> # Node ID 0206efdac228891f348c8d6c7ad7ced369c840a3
>>> # Parent  0c1f9d98294454d3bf896aeb24be881d8aa53434
>>> Performance: Enabling recon frames to be NUMA-aware when the
>>> frame encoder thread creates them. Seeing considerable reduction in
>>> no. cross-socket accesses, but impact on performance of sample videos
>>> is rather small
>>>
>>> diff -r 0c1f9d982944 -r 0206efdac228 source/common/frame.cpp
>>> --- a/source/common/frame.cpp   Tue Aug 04 15:37:26 2015 +0000
>>> +++ b/source/common/frame.cpp   Tue Aug 04 16:10:01 2015 +0000
>>> @@ -51,10 +51,34 @@
>>>             m_lowres.create(m_fencPic, param->bframes,
>>> !!param->rc.aqMode);
>>>  }
>>>
>>> -bool Frame::allocEncodeData(x265_param *param, const SPS& sps)
>>> +bool Frame::allocEncodeData(x265_param *param, const SPS& sps, const
>>> int numaNode)
>>>  {
>>> -    m_encData = new FrameData;
>>> -    m_reconPic = new PicYuv;
>>> +    int selNumaNode = numaNode ;
>>> +#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7
>>> +    GROUP_AFFINITY groupAffinity;
>>> +    if (GetNumaNodeProcessorMaskEx((USHORT)selNumaNode,
>>> &groupAffinity)) {
>>> +        if(VirtualAllocExNuma(GetCurrentProcess(),
>>> +                              NULL,
>>> +                              sizeof(FrameData)+sizeof(PicYuv),
>>> +                              MEM_COMMIT,
>>> +                              PAGE_READWRITE,
>>> +                              selNumaNode)) {
>>> +            // Successful commit, do nothing
>>> +        }
>>> +    }
>>> +#elif HAVE_LIBNUMA
>>> +    if(numa_available() >= 0) {
>>> +        numa_set_preferred(selNumaNode) ;
>>> +        numa_set_localalloc() ;
>>> +    } else {
>>> +        selNumaNode = -1 ;
>>> +    }
>>> +#else
>>> +    selNumaNode = -1 ;
>>> +#endif // HAVE_LIBNUMA
>>> +
>>> +    m_encData = new FrameData(selNumaNode) ;
>>> +    m_reconPic = new PicYuv(selNumaNode) ;
>>>      m_encData->m_reconPic = m_reconPic;
>>>      bool ok = m_encData->create(param, sps) &&
>>> m_reconPic->create(param->sourceWidth, param->sourceHeight,
>>> param->internalCsp);
>>>      if (ok)
>>> diff -r 0c1f9d982944 -r 0206efdac228 source/common/frame.h
>>> --- a/source/common/frame.h     Tue Aug 04 15:37:26 2015 +0000
>>> +++ b/source/common/frame.h     Tue Aug 04 16:10:01 2015 +0000
>>> @@ -28,6 +28,10 @@
>>>  #include "lowres.h"
>>>  #include "threading.h"
>>>
>>> +#if HAVE_LIBNUMA
>>> +#include <numa.h>
>>> +#endif // HAVE_LIBNUMA
>>> +
>>>  namespace X265_NS {
>>>  // private namespace
>>>
>>> @@ -67,10 +71,11 @@
>>>      Frame*                 m_prev;
>>>      x265_param*            m_param;              // Points to the
>>> latest param set for the frame.
>>>      x265_analysis_data     m_analysisData;
>>> +
>>>      Frame();
>>>
>>>      bool create(x265_param *param);
>>> -    bool allocEncodeData(x265_param *param, const SPS& sps);
>>> +    bool allocEncodeData(x265_param *param, const SPS& sps, const int
>>> numaNode);
>>>      void reinit(const SPS& sps);
>>>      void destroy();
>>>  };
>>> diff -r 0c1f9d982944 -r 0206efdac228 source/common/framedata.cpp
>>> --- a/source/common/framedata.cpp       Tue Aug 04 15:37:26 2015 +0000
>>> +++ b/source/common/framedata.cpp       Tue Aug 04 16:10:01 2015 +0000
>>> @@ -26,9 +26,10 @@
>>>
>>>  using namespace X265_NS;
>>>
>>> -FrameData::FrameData()
>>> +FrameData::FrameData(int numaNode)
>>>  {
>>>      memset(this, 0, sizeof(*this));
>>> +    m_numaNode = numaNode ;
>>>  }
>>>
>>>  bool FrameData::create(x265_param *param, const SPS& sps)
>>> diff -r 0c1f9d982944 -r 0206efdac228 source/common/framedata.h
>>> --- a/source/common/framedata.h Tue Aug 04 15:37:26 2015 +0000
>>> +++ b/source/common/framedata.h Tue Aug 04 16:10:01 2015 +0000
>>> @@ -107,6 +107,8 @@
>>>      CUDataMemPool  m_cuMemPool;
>>>      CUData*        m_picCTU;
>>>
>>> +    int            m_numaNode ;
>>> +
>>>      /* Rate control data used during encode and by references */
>>>      struct RCStatCU
>>>      {
>>> @@ -140,7 +142,7 @@
>>>      double         m_avgQpAq;    /* avg QP as decided by AQ in addition
>>> to rate-control */
>>>      double         m_rateFactor; /* calculated based on the Frame QP */
>>>
>>> -    FrameData();
>>> +    FrameData(int numaNode=-1);
>>>
>>>      bool create(x265_param *param, const SPS& sps);
>>>      void reinit(const SPS& sps);
>>> diff -r 0c1f9d982944 -r 0206efdac228 source/common/param.cpp
>>> --- a/source/common/param.cpp   Tue Aug 04 15:37:26 2015 +0000
>>> +++ b/source/common/param.cpp   Tue Aug 04 16:10:01 2015 +0000
>>> @@ -855,6 +855,7 @@
>>>      OPT("qg-size") p->rc.qgSize = atoi(value);
>>>      OPT("master-display") p->masteringDisplayColorVolume =
>>> strdup(value);
>>>      OPT("max-cll") p->contentLightLevelInfo = strdup(value);
>>> +    OPT("print-numa-stats") p->printNumaStats = atobool(value) ;
>>>      else
>>>          return X265_PARAM_BAD_NAME;
>>>  #undef OPT
>>> diff -r 0c1f9d982944 -r 0206efdac228 source/common/picyuv.cpp
>>> --- a/source/common/picyuv.cpp  Tue Aug 04 15:37:26 2015 +0000
>>> +++ b/source/common/picyuv.cpp  Tue Aug 04 16:10:01 2015 +0000
>>> @@ -28,7 +28,8 @@
>>>
>>>  using namespace X265_NS;
>>>
>>> -PicYuv::PicYuv()
>>> +PicYuv::PicYuv(int numaNode):
>>> +    m_numaNode(numaNode)
>>>  {
>>>      m_picBuf[0] = NULL;
>>>      m_picBuf[1] = NULL;
>>> diff -r 0c1f9d982944 -r 0206efdac228 source/common/picyuv.h
>>> --- a/source/common/picyuv.h    Tue Aug 04 15:37:26 2015 +0000
>>> +++ b/source/common/picyuv.h    Tue Aug 04 16:10:01 2015 +0000
>>> @@ -59,8 +59,9 @@
>>>      uint32_t m_lumaMarginY;
>>>      uint32_t m_chromaMarginX;
>>>      uint32_t m_chromaMarginY;
>>> +    int32_t  m_numaNode ;
>>>
>>> -    PicYuv();
>>> +    PicYuv(int numaNode=-1);
>>>
>>>      bool  create(uint32_t picWidth, uint32_t picHeight, uint32_t csp);
>>>      bool  createOffsets(const SPS& sps);
>>> diff -r 0c1f9d982944 -r 0206efdac228 source/common/threadpool.cpp
>>> --- a/source/common/threadpool.cpp      Tue Aug 04 15:37:26 2015 +0000
>>> +++ b/source/common/threadpool.cpp      Tue Aug 04 16:10:01 2015 +0000
>>> @@ -338,6 +338,7 @@
>>>  ThreadPool::ThreadPool()
>>>  {
>>>      memset(this, 0, sizeof(*this));
>>> +    m_numaNode = -1 ;
>>>  }
>>>
>>>  bool ThreadPool::create(int numThreads, int maxProviders, int node)
>>> diff -r 0c1f9d982944 -r 0206efdac228 source/encoder/dpb.cpp
>>> --- a/source/encoder/dpb.cpp    Tue Aug 04 15:37:26 2015 +0000
>>> +++ b/source/encoder/dpb.cpp    Tue Aug 04 16:10:01 2015 +0000
>>> @@ -58,6 +58,23 @@
>>>          delete m_picSymFreeList;
>>>          m_picSymFreeList = next;
>>>      }
>>> +
>>> +    if(m_picSymFreeListNuma) {
>>> +        for(int i=0; i<m_numNumaNodes; i++) {
>>> +            while(m_picSymFreeListNuma[i]) {
>>> +                FrameData* next =
>>> m_picSymFreeListNuma[i]->m_freeListNext;
>>> +                m_picSymFreeListNuma[i]->destroy();
>>> +
>>> +                m_picSymFreeListNuma[i]->m_reconPic->destroy();
>>> +                delete m_picSymFreeListNuma[i]->m_reconPic;
>>> +
>>> +                delete m_picSymFreeListNuma[i];
>>> +                m_picSymFreeListNuma[i] = next;
>>> +            }
>>> +            delete m_picSymFreeListNuma[i] ;
>>> +        }
>>> +        delete m_picSymFreeListNuma ;
>>> +    }
>>>  }
>>>
>>>  // move unreferenced pictures from picList to freeList for recycle
>>> @@ -78,9 +95,17 @@
>>>              m_picList.remove(*curFrame);
>>>              iterFrame = m_picList.first();
>>>
>>> +            int encDataNumaNode = curFrame->m_encData->m_numaNode ;
>>> +            if(encDataNumaNode != -1) {
>>> +                X265_CHECK(encDataNumaNode < m_numNumaNodes,
>>> +                           "fatal: frame allocated on non-existant numa
>>> node!\n") ;
>>> +                curFrame->m_encData->m_freeListNext =
>>> m_picSymFreeListNuma[encDataNumaNode] ;
>>> +                m_picSymFreeListNuma[encDataNumaNode] =
>>> curFrame->m_encData ;
>>> +            } else {
>>> +                curFrame->m_encData->m_freeListNext = m_picSymFreeList;
>>> +                m_picSymFreeList = curFrame->m_encData;
>>> +            }
>>>              m_freeList.pushBack(*curFrame);
>>> -            curFrame->m_encData->m_freeListNext = m_picSymFreeList;
>>> -            m_picSymFreeList = curFrame->m_encData;
>>>              curFrame->m_encData = NULL;
>>>              curFrame->m_reconPic = NULL;
>>>          }
>>> diff -r 0c1f9d982944 -r 0206efdac228 source/encoder/dpb.h
>>> --- a/source/encoder/dpb.h      Tue Aug 04 15:37:26 2015 +0000
>>> +++ b/source/encoder/dpb.h      Tue Aug 04 16:10:01 2015 +0000
>>> @@ -47,6 +47,9 @@
>>>      PicList            m_picList;
>>>      PicList            m_freeList;
>>>      FrameData*         m_picSymFreeList;
>>> +    x265_param*        m_param;
>>> +    int                m_numNumaNodes ;
>>> +    FrameData        **m_picSymFreeListNuma ;
>>>
>>>      DPB(x265_param *param)
>>>      {
>>> @@ -58,6 +61,27 @@
>>>          m_maxRefL1 = param->bBPyramid ? 2 : 1;
>>>          m_bOpenGOP = param->bOpenGOP;
>>>          m_bTemporalSublayer = !!param->bEnableTemporalSubLayers;
>>> +        m_param = param ;
>>> +        m_numNumaNodes = -1 ;
>>> +
>>> +#if (defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7)
>>> +        // NUMA supported by default on windows
>>> +        m_numNumaNodes = 1 ;
>>> +        if(GetNumaHighestNodeNumber(&num)) {
>>> +            m_numNumaNodes ++ ;
>>> +        }
>>> +#elif HAVE_LIBNUMA
>>> +        if(numa_available()>=0) {
>>> +            m_numNumaNodes = numa_max_node() + 1 ;
>>> +        }
>>> +#endif // HAVE_LIBNUMA
>>> +
>>> +        if(m_numNumaNodes>0) {
>>> +            m_picSymFreeListNuma = new FrameData*[m_numNumaNodes] ;
>>> +            for(int i=0; i<m_numNumaNodes; i++) {
>>> +                m_picSymFreeListNuma[i] = NULL ;
>>> +            }
>>> +        }
>>>      }
>>>
>>>      ~DPB();
>>> @@ -66,6 +90,17 @@
>>>
>>>      void recycleUnreferenced();
>>>
>>> +    bool isFreeEncDataAvailable() {
>>> +        if(m_picSymFreeList) {
>>> +            return true ;
>>> +        }
>>> +        for(int i=0; i<m_numNumaNodes; i++) {
>>> +            if(m_picSymFreeListNuma[i])
>>> +                return true ;
>>> +        }
>>> +        return false ;
>>> +    }
>>> +
>>>  protected:
>>>
>>>      void computeRPS(int curPoc, bool isRAP, RPS * rps, unsigned int
>>> maxDecPicBuffer);
>>> diff -r 0c1f9d982944 -r 0206efdac228 source/encoder/encoder.cpp
>>> --- a/source/encoder/encoder.cpp        Tue Aug 04 15:37:26 2015 +0000
>>> +++ b/source/encoder/encoder.cpp        Tue Aug 04 16:10:01 2015 +0000
>>> @@ -286,6 +286,11 @@
>>>
>>>  void Encoder::destroy()
>>>  {
>>> +    int numRefSameNuma = 0 ;
>>> +    int numRefDiffNuma = 0 ;
>>> +    int numReconSameNuma = 0 ;
>>> +    int numReconDiffNuma = 0 ;
>>> +
>>>      if (m_exportedPic)
>>>      {
>>>          ATOMIC_DEC(&m_exportedPic->m_countRefEncoders);
>>> @@ -296,6 +301,13 @@
>>>      {
>>>          if (m_frameEncoder[i])
>>>          {
>>> +            if(m_param->printNumaStats) {
>>> +                numRefSameNuma +=
>>> m_frameEncoder[i]->getNumRefFramesSameNuma() ;
>>> +                numRefDiffNuma +=
>>> m_frameEncoder[i]->getNumRefFramesDiffNuma() ;
>>> +                numReconSameNuma +=
>>> m_frameEncoder[i]->getNumReconFramesSameNuma() ;
>>> +                numReconDiffNuma +=
>>> m_frameEncoder[i]->getNumReconFramesDiffNuma() ;
>>> +            }
>>> +
>>>              m_frameEncoder[i]->destroy();
>>>              delete m_frameEncoder[i];
>>>          }
>>> @@ -323,6 +335,16 @@
>>>      X265_FREE(m_buOffsetY);
>>>      X265_FREE(m_buOffsetC);
>>>
>>> +    if(m_param && m_param->printNumaStats) {
>>> +        printf("Num new Encoder data alloc       = %d\n",
>>> m_numNewEncodeDataAlloc) ;
>>> +        printf("Num same node Encoder data reuse = %d\n",
>>> m_numSameNumaEncData) ;
>>> +        printf("Num diff node Encoder data reuse = %d\n",
>>> m_numDiffNumaEncData) ;
>>> +        printf("Num Ref frames in Same numa      = %d\n",
>>> numRefSameNuma) ;
>>> +        printf("Num Ref frames in Diff numa      = %d\n",
>>> numRefDiffNuma) ;
>>> +        printf("Num Recon frames in Same numa    = %d\n",
>>> numReconSameNuma) ;
>>> +        printf("Num Recon frames in Diff numa    = %d\n",
>>> numReconDiffNuma) ;
>>> +    }
>>> +
>>>      if (m_analysisFile)
>>>          fclose(m_analysisFile);
>>>
>>> @@ -511,6 +533,7 @@
>>>
>>>      FrameEncoder *curEncoder = m_frameEncoder[m_curEncoder];
>>>      m_curEncoder = (m_curEncoder + 1) % m_param->frameNumThreads;
>>> +
>>>      int ret = 0;
>>>
>>>      /* Normal operation is to wait for the current frame encoder to
>>> complete its current frame
>>> @@ -633,15 +656,49 @@
>>>          if (frameEnc && !pass)
>>>          {
>>>              /* give this frame a FrameData instance before encoding */
>>> -            if (m_dpb->m_picSymFreeList)
>>> +            // If NUMA aware allocation is enabled, try to preferably
>>> select a frame from this numa
>>> +            // node if available. If disabled, give any free node. If
>>> no free node, allocate new data
>>> +            if (m_dpb->isFreeEncDataAvailable())
>>>              {
>>> -                frameEnc->m_encData = m_dpb->m_picSymFreeList;
>>> -                m_dpb->m_picSymFreeList =
>>> m_dpb->m_picSymFreeList->m_freeListNext;
>>> -                frameEnc->reinit(m_sps);
>>> +                // Need to figure out which NUMA node this in frame is
>>> going to be
>>> +                // decoded on! try to allocate in data on that node.
>>> +                int threadNumaNode = curEncoder->m_pool->m_numaNode ;
>>> +                int dataNumaNode = -1 ;
>>> +                if(threadNumaNode!=-1) {
>>> +                    int checkingNumaNode = threadNumaNode ;
>>> +                    int numNumaNodes     = m_dpb->m_numNumaNodes ;
>>> +                    bool found           = false ;
>>> +                    for(int i=0; i<numNumaNodes;i++) {
>>> +
>>> if(m_dpb->m_picSymFreeListNuma[checkingNumaNode]) {
>>> +                            dataNumaNode = checkingNumaNode ;
>>> +                            frameEnc->m_encData =
>>> m_dpb->m_picSymFreeListNuma[dataNumaNode] ;
>>> +                            m_dpb->m_picSymFreeListNuma[dataNumaNode] =
>>> +
>>> m_dpb->m_picSymFreeListNuma[dataNumaNode]->m_freeListNext ;
>>> +                            frameEnc->reinit(m_sps) ;
>>> +                            // printf("Worker threads on %d, recon
>>> frame data on %d\n",
>>> +                            //    threadNumaNode, dataNumaNode) ;
>>> +                            found = true ;
>>> +                            break ;
>>> +                        }
>>> +                        checkingNumaNode = (checkingNumaNode+1) %
>>> numNumaNodes ;
>>> +                    }
>>> +                    X265_CHECK(found, "Should've found buffer for in
>>> frame!\n") ;
>>> +                } else {
>>> +                    frameEnc->m_encData = m_dpb->m_picSymFreeList;
>>> +                    m_dpb->m_picSymFreeList =
>>> m_dpb->m_picSymFreeList->m_freeListNext;
>>> +                    frameEnc->reinit(m_sps);
>>> +                    dataNumaNode = frameEnc->m_encData->m_numaNode ;
>>> +                }
>>> +                if(dataNumaNode == threadNumaNode) {
>>> +                    m_numSameNumaEncData ++ ;
>>> +                } else {
>>> +                    m_numDiffNumaEncData ++ ;
>>> +                }
>>>              }
>>>              else
>>>              {
>>> -                frameEnc->allocEncodeData(m_param, m_sps);
>>> +                m_numNewEncodeDataAlloc ++ ;
>>> +                frameEnc->allocEncodeData(m_param, m_sps,
>>> curEncoder->m_pool->m_numaNode);
>>>                  Slice* slice = frameEnc->m_encData->m_slice;
>>>                  slice->m_sps = &m_sps;
>>>                  slice->m_pps = &m_pps;
>>> diff -r 0c1f9d982944 -r 0206efdac228 source/encoder/encoder.h
>>> --- a/source/encoder/encoder.h  Tue Aug 04 15:37:26 2015 +0000
>>> +++ b/source/encoder/encoder.h  Tue Aug 04 16:10:01 2015 +0000
>>> @@ -133,6 +133,9 @@
>>>      bool               m_aborted;          // fatal error detected
>>>      bool               m_reconfigured;      // reconfigure of encoder
>>> detected
>>>
>>> +    uint32_t           m_numNewEncodeDataAlloc ;
>>> +    uint32_t           m_numSameNumaEncData ;
>>> +    uint32_t           m_numDiffNumaEncData ;
>>>      Encoder();
>>>      ~Encoder() {}
>>>
>>> diff -r 0c1f9d982944 -r 0206efdac228 source/encoder/frameencoder.cpp
>>> --- a/source/encoder/frameencoder.cpp   Tue Aug 04 15:37:26 2015 +0000
>>> +++ b/source/encoder/frameencoder.cpp   Tue Aug 04 16:10:01 2015 +0000
>>> @@ -60,6 +60,11 @@
>>>      m_ctuGeomMap = NULL;
>>>      m_localTldIdx = 0;
>>>      memset(&m_rce, 0, sizeof(RateControlEntry));
>>> +
>>> +    m_numRefFramesSameNuma = 0 ;
>>> +    m_numRefFrameDiffNuma = 0 ;
>>> +    m_numReconFramesSameNuma = 0 ;
>>> +    m_numReconFramesDiffNuma = 0 ;
>>>  }
>>>
>>>  void FrameEncoder::destroy()
>>> @@ -357,7 +362,15 @@
>>>              WeightParam *w = NULL;
>>>              if ((bUseWeightP || bUseWeightB) &&
>>> slice->m_weightPredTable[l][ref][0].bPresentFlag)
>>>                  w = slice->m_weightPredTable[l][ref];
>>> -
>>> m_mref[l][ref].init(slice->m_refPicList[l][ref]->m_reconPic, w, *m_param);
>>> +            PicYuv* closestReconPic =
>>> slice->m_refPicList[l][ref]->m_reconPic ;
>>> +            m_mref[l][ref].init(closestReconPic, w, *m_param);
>>> +            if(m_param->printNumaStats) {
>>> +                if(m_pool->m_numaNode != closestReconPic->m_numaNode) {
>>> +                    m_numRefFrameDiffNuma ++ ;
>>> +                } else {
>>> +                    m_numRefFramesSameNuma ++ ;
>>> +                }
>>> +            }
>>>          }
>>>      }
>>>
>>> @@ -932,6 +945,13 @@
>>>
>>>          // Does all the CU analysis, returns best top level mode
>>> decision
>>>          Mode& best = tld.analysis.compressCTU(*ctu, *m_frame,
>>> m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);
>>> +        if(m_param->printNumaStats) {
>>> +            if(m_pool->m_numaNode != m_frame->m_reconPic->m_numaNode) {
>>> +                m_numReconFramesDiffNuma ++ ;
>>> +            } else {
>>> +                m_numReconFramesSameNuma ++ ;
>>> +            }
>>> +        }
>>>
>>>          // take a sample of the current active worker count
>>>          ATOMIC_ADD(&m_totalActiveWorkerCount, m_activeWorkerCount);
>>> diff -r 0c1f9d982944 -r 0206efdac228 source/encoder/frameencoder.h
>>> --- a/source/encoder/frameencoder.h     Tue Aug 04 15:37:26 2015 +0000
>>> +++ b/source/encoder/frameencoder.h     Tue Aug 04 16:10:01 2015 +0000
>>> @@ -206,7 +206,16 @@
>>>          WeightAnalysis operator=(const WeightAnalysis&);
>>>      };
>>>
>>> -protected:
>>> +    unsigned int             m_numRefFramesSameNuma ;
>>> +    unsigned int             m_numRefFrameDiffNuma ;
>>> +    unsigned int             m_numReconFramesSameNuma ;
>>> +    unsigned int             m_numReconFramesDiffNuma ;
>>> +    unsigned int             getNumRefFramesSameNuma() { return
>>> m_numRefFramesSameNuma ; }
>>> +    unsigned int             getNumRefFramesDiffNuma() { return
>>> m_numRefFrameDiffNuma ; }
>>> +    unsigned int             getNumReconFramesSameNuma() { return
>>> m_numReconFramesSameNuma ; }
>>> +    unsigned int             getNumReconFramesDiffNuma() { return
>>> m_numReconFramesDiffNuma ; }
>>> +
>>> +    protected:
>>>
>>>      bool initializeGeoms();
>>>
>>> diff -r 0c1f9d982944 -r 0206efdac228 source/x265.h
>>> --- a/source/x265.h     Tue Aug 04 15:37:26 2015 +0000
>>> +++ b/source/x265.h     Tue Aug 04 16:10:01 2015 +0000
>>> @@ -1172,6 +1172,11 @@
>>>       * picture average light level (or 0). */
>>>      const char* contentLightLevelInfo;
>>>
>>> +    /* Print NUMA statistics collected from the code on the console to
>>> show the
>>> +     * number of times the recon and ref pics were locatd on the same
>>> NUMA socket,
>>> +     * and on different sockets */
>>> +    int printNumaStats ;
>>> +
>>>  } x265_param;
>>>
>>>  /* x265_param_alloc:
>>> diff -r 0c1f9d982944 -r 0206efdac228 source/x265cli.h
>>> --- a/source/x265cli.h  Tue Aug 04 15:37:26 2015 +0000
>>> +++ b/source/x265cli.h  Tue Aug 04 16:10:01 2015 +0000
>>> @@ -218,6 +218,7 @@
>>>      { "no-temporal-layers",   no_argument, NULL, 0 },
>>>      { "qg-size",        required_argument, NULL, 0 },
>>>      { "recon-y4m-exec", required_argument, NULL, 0 },
>>> +    { "print-numa-stats", no_argument, NULL, 0 },
>>>      { 0, 0, 0, 0 },
>>>      { 0, 0, 0, 0 },
>>>      { 0, 0, 0, 0 },
>>> @@ -414,6 +415,7 @@
>>>      H1("-r/--recon <filename>            Reconstructed raw image YUV or
>>> Y4M output file name\n");
>>>      H1("   --recon-depth <integer>       Bit-depth of reconstructed raw
>>> image file. Defaults to input bit depth, or 8 if Y4M\n");
>>>      H1("   --recon-y4m-exec <string>     pipe reconstructed frames to
>>> Y4M viewer, ex:\"ffplay -i pipe:0 -autoexit\"\n");
>>> +    H1("   --print-numa-stats            print statistics related to
>>> socket information for ref and recon frames\n");
>>>      H1("\nExecutable return codes:\n");
>>>      H1("    0 - encode successful\n");
>>>      H1("    1 - unable to parse command line\n");
>>>
>>
>>
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150805/d16aec01/attachment-0001.html>


More information about the x265-devel mailing list