[x265] [PATCH 1 of 2 EXPERIMENTAL] slice: prep work for per-numa recon picture copies

Wed Aug 5 16:52:54 CEST 2015

On 08/05, Deepthi Nandakumar wrote:
> Steve,
> 
> allocRecon is now being called unconditionally, so a recon frame gets
> allocated for every single frame, as opposed to an available frame being
> chosen from the DPB freeList?

it is allocated only if it is the first time that FrameData has been
seen by a frame encoder on that particular socket.  The number of
FrameData instances stays the same, but each one will have one recon
PicYuv allocated per active socket.  If you're encoder is only using the
second socket, for instance, it will still only allocate one recon
PicYuv per FrameData (m_nodes[0].reconPic will remain NULL).

it seemed better to perform the 'already allocated' check within the
function, that way callers do not have to know about any of the numa
build option complexities.

> On Tue, Aug 4, 2015 at 9:03 AM, Steve Borho <steve at borho.org> wrote:
> 
> > # HG changeset patch
> > # User Steve Borho <steve at borho.org>
> > # Date 1438642558 18000
> > #      Mon Aug 03 17:55:58 2015 -0500
> > # Node ID 02e84edaa14399a3a68ade8617c63422f51a305b
> > # Parent  1f161d9c6e35e32998d38ebf5b6dec96f1ef43e2
> > slice: prep work for per-numa recon picture copies
> >
> > Rename slice.m_refPicList to m_refFrameList since it is an array of Frame
> > pointers, and make a new slice.m_refReconPicList array which points
> > directly
> > to the motion reference PicYuv buffers (bypassing the Frame structure)
> >
> > The reconstructed pictures are now allocated by the frame encoder worker
> > thread
> > making them socket-local. The per-node structures devolve to a single
> > pointer
> > de-reference when NUMA support is not compiled in, minimizing the impact
> > when
> > the feature is disabled.
> >
> > The shared offset buffers were moved from the top-level encoder into the
> > SPS
> > structure so the FrameData functions could use them directly (avoiding a
> > major
> > layering violations). The offset buffers are computed based on SPS values,
> > so it
> > seems minimally ugly to keep them there.
> >
> > Later commits will make the PicYuv buffers per-NUMA node and copy
> > reconstructed
> > pixels between nodes on demand.
> >
> > diff -r 1f161d9c6e35 -r 02e84edaa143 source/common/cudata.cpp
> > --- a/source/common/cudata.cpp  Mon Aug 03 14:56:21 2015 -0500
> > +++ b/source/common/cudata.cpp  Mon Aug 03 17:55:58 2015 -0500
> > @@ -1676,7 +1676,7 @@
> >          if (tempRefIdx != -1)
> >          {
> >              uint32_t cuAddr = neighbours[MD_COLLOCATED].cuAddr[picList];
> > -            const Frame* colPic =
> > m_slice->m_refPicList[m_slice->isInterB() &&
> > !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
> > +            const Frame* colPic =
> > m_slice->m_refFrameList[m_slice->isInterB() &&
> > !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
> >              const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
> >
> >              // Scale the vector
> > @@ -1857,7 +1857,7 @@
> >
> >  bool CUData::getColMVP(MV& outMV, int& outRefIdx, int picList, int
> > cuAddr, int partUnitIdx) const
> >  {
> > -    const Frame* colPic = m_slice->m_refPicList[m_slice->isInterB() &&
> > !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
> > +    const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() &&
> > !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
> >      const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
> >
> >      uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
> > @@ -1892,7 +1892,7 @@
> >  // Cache the collocated MV.
> >  bool CUData::getCollocatedMV(int cuAddr, int partUnitIdx,
> > InterNeighbourMV *neighbour) const
> >  {
> > -    const Frame* colPic = m_slice->m_refPicList[m_slice->isInterB() &&
> > !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
> > +    const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() &&
> > !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
> >      const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
> >
> >      uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
> > diff -r 1f161d9c6e35 -r 02e84edaa143 source/common/deblock.cpp
> > --- a/source/common/deblock.cpp Mon Aug 03 14:56:21 2015 -0500
> > +++ b/source/common/deblock.cpp Mon Aug 03 17:55:58 2015 -0500
> > @@ -209,8 +209,8 @@
> >      const Slice* const sliceQ = cuQ->m_slice;
> >      const Slice* const sliceP = cuP->m_slice;
> >
> > -    const Frame* refP0 = sliceP->getRefPic(0, cuP->m_refIdx[0][partP]);
> > -    const Frame* refQ0 = sliceQ->getRefPic(0, cuQ->m_refIdx[0][partQ]);
> > +    const Frame* refP0 =
> > sliceP->m_refFrameList[0][cuP->m_refIdx[0][partP]];
> > +    const Frame* refQ0 =
> > sliceQ->m_refFrameList[0][cuQ->m_refIdx[0][partQ]];
> >      const MV& mvP0 = refP0 ? cuP->m_mv[0][partP] : zeroMv;
> >      const MV& mvQ0 = refQ0 ? cuQ->m_mv[0][partQ] : zeroMv;
> >
> > @@ -221,8 +221,8 @@
> >      }
> >
> >      // (sliceQ->isInterB() || sliceP->isInterB())
> > -    const Frame* refP1 = sliceP->getRefPic(1, cuP->m_refIdx[1][partP]);
> > -    const Frame* refQ1 = sliceQ->getRefPic(1, cuQ->m_refIdx[1][partQ]);
> > +    const Frame* refP1 =
> > sliceP->m_refFrameList[1][cuP->m_refIdx[1][partP]];
> > +    const Frame* refQ1 =
> > sliceQ->m_refFrameList[1][cuQ->m_refIdx[1][partQ]];
> >      const MV& mvP1 = refP1 ? cuP->m_mv[1][partP] : zeroMv;
> >      const MV& mvQ1 = refQ1 ? cuQ->m_mv[1][partQ] : zeroMv;
> >
> > @@ -366,7 +366,7 @@
> >
> >  void Deblock::edgeFilterLuma(const CUData* cuQ, uint32_t absPartIdx,
> > uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
> >  {
> > -    PicYuv* reconPic = cuQ->m_encData->m_reconPic;
> > +    PicYuv* reconPic = cuQ->m_encData->getOutputRecon();
> >      pixel* src = reconPic->getLumaAddr(cuQ->m_cuAddr, absPartIdx);
> >      intptr_t stride = reconPic->m_stride;
> >      const PPS* pps = cuQ->m_slice->m_pps;
> > @@ -474,7 +474,7 @@
> >                  : ((g_zscanToPelY[absPartIdx] + edge * UNIT_SIZE) >>
> > cuQ->m_vChromaShift)) % DEBLOCK_SMALLEST_BLOCK == 0,
> >                 "invalid edge\n");
> >
> > -    PicYuv* reconPic = cuQ->m_encData->m_reconPic;
> > +    PicYuv* reconPic = cuQ->m_encData->getOutputRecon();
> >      intptr_t stride = reconPic->m_strideC;
> >      intptr_t srcOffset = reconPic->getChromaAddrOffset(cuQ->m_cuAddr,
> > absPartIdx);
> >      bool bCheckNoFilter = pps->bTransquantBypassEnabled;
> > diff -r 1f161d9c6e35 -r 02e84edaa143 source/common/frame.cpp
> > --- a/source/common/frame.cpp   Mon Aug 03 14:56:21 2015 -0500
> > +++ b/source/common/frame.cpp   Mon Aug 03 17:55:58 2015 -0500
> > @@ -35,7 +35,6 @@
> >      m_reconRowCount.set(0);
> >      m_countRefEncoders = 0;
> >      m_encData = NULL;
> > -    m_reconPic = NULL;
> >      m_next = NULL;
> >      m_prev = NULL;
> >      m_param = NULL;
> > @@ -54,26 +53,13 @@
> >  bool Frame::allocEncodeData(x265_param *param, const SPS& sps)
> >  {
> >      m_encData = new FrameData;
> > -    m_reconPic = new PicYuv;
> > -    m_encData->m_reconPic = m_reconPic;
> > -    bool ok = m_encData->create(param, sps) &&
> > m_reconPic->create(param->sourceWidth, param->sourceHeight,
> > param->internalCsp);
> > -    if (ok)
> > -    {
> > -        /* initialize right border of m_reconpicYuv as SAO may read
> > beyond the
> > -         * end of the picture accessing uninitialized pixels */
> > -        int maxHeight = sps.numCuInHeight * g_maxCUSize;
> > -        memset(m_reconPic->m_picOrg[0], 0, sizeof(pixel) *
> > m_reconPic->m_stride * maxHeight);
> > -        memset(m_reconPic->m_picOrg[1], 0, sizeof(pixel) *
> > m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
> > -        memset(m_reconPic->m_picOrg[2], 0, sizeof(pixel) *
> > m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
> > -    }
> > -    return ok;
> > +    return m_encData->create(*param, sps);
> >  }
> >
> >  /* prepare to re-use a FrameData instance to encode a new picture */
> >  void Frame::reinit(const SPS& sps)
> >  {
> >      m_bChromaExtended = false;
> > -    m_reconPic = m_encData->m_reconPic;
> >      m_encData->reinit(sps);
> >  }
> >
> > @@ -93,12 +79,5 @@
> >          m_fencPic = NULL;
> >      }
> >
> > -    if (m_reconPic)
> > -    {
> > -        m_reconPic->destroy();
> > -        delete m_reconPic;
> > -        m_reconPic = NULL;
> > -    }
> > -
> >      m_lowres.destroy();
> >  }
> > diff -r 1f161d9c6e35 -r 02e84edaa143 source/common/frame.h
> > --- a/source/common/frame.h     Mon Aug 03 14:56:21 2015 -0500
> > +++ b/source/common/frame.h     Mon Aug 03 17:55:58 2015 -0500
> > @@ -41,10 +41,9 @@
> >  {
> >  public:
> >
> > -    /* These two items will be NULL until the Frame begins to be encoded,
> > at which point
> > -     * it will be assigned a FrameData instance, which comes with a
> > reconstructed image PicYuv */
> > +    /* will be NULL until the Frame begins to be encoded, at which point
> > it will
> > +     * be assigned a FrameData instance */
> >      FrameData*             m_encData;
> > -    PicYuv*                m_reconPic;
> >
> >      /* Data associated with x265_picture */
> >      PicYuv*                m_fencPic;
> > diff -r 1f161d9c6e35 -r 02e84edaa143 source/common/framedata.cpp
> > --- a/source/common/framedata.cpp       Mon Aug 03 14:56:21 2015 -0500
> > +++ b/source/common/framedata.cpp       Mon Aug 03 17:55:58 2015 -0500
> > @@ -22,24 +22,39 @@
> >
> >  *****************************************************************************/
> >
> >  #include "framedata.h"
> > +#include "threadpool.h"
> >  #include "picyuv.h"
> >
> >  using namespace X265_NS;
> >
> > +PerNodeRecon::~PerNodeRecon()
> > +{
> > +    if (reconPic)
> > +    {
> > +        reconPic->destroy();
> > +        delete reconPic;
> > +    }
> > +}
> > +
> >  FrameData::FrameData()
> >  {
> >      memset(this, 0, sizeof(*this));
> >  }
> >
> > -bool FrameData::create(x265_param *param, const SPS& sps)
> > +bool FrameData::create(const x265_param& param, const SPS& sps)
> >  {
> > -    m_param = param;
> > +    m_param = ¶m;
> >      m_slice  = new Slice;
> >      m_picCTU = new CUData[sps.numCUsInFrame];
> > +#if (defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7) ||
> > HAVE_LIBNUMA
> > +    m_nodes = new PerNodeRecon[ThreadPool::getNumaNodeCount()];
> > +#else
> > +    m_nodes = new PerNodeRecon[1];
> > +#endif
> >
> > -    m_cuMemPool.create(0, param->internalCsp, sps.numCUsInFrame);
> > +    m_cuMemPool.create(0, param.internalCsp, sps.numCUsInFrame);
> >      for (uint32_t ctuAddr = 0; ctuAddr < sps.numCUsInFrame; ctuAddr++)
> > -        m_picCTU[ctuAddr].initialize(m_cuMemPool, 0, param->internalCsp,
> > ctuAddr);
> > +        m_picCTU[ctuAddr].initialize(m_cuMemPool, 0, param.internalCsp,
> > ctuAddr);
> >
> >      CHECKED_MALLOC(m_cuStat, RCStatCU, sps.numCUsInFrame);
> >      CHECKED_MALLOC(m_rowStat, RCStatRow, sps.numCuInHeight);
> > @@ -54,11 +69,51 @@
> >  {
> >      memset(m_cuStat, 0, sps.numCUsInFrame * sizeof(*m_cuStat));
> >      memset(m_rowStat, 0, sps.numCuInHeight * sizeof(*m_rowStat));
> > +#if (defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7) ||
> > HAVE_LIBNUMA
> > +    for (int i = 0; i < ThreadPool::getNumaNodeCount(); i++)
> > +        m_nodes[i].rows = 0;
> > +#else
> > +    m_nodes->rows = 0;
> > +#endif
> > +}
> > +
> > +bool FrameData::allocRecon(const SPS& sps, int node)
> > +{
> > +    if (m_nodes[node].reconPic)
> > +        return true;
> > +
> > +    ScopedLock s(m_nodes[node].copyLock);
> > +
> > +    if (m_nodes[node].reconPic)
> > +        return true;
> > +
> > +    PicYuv* reconPic = new PicYuv;
> > +    m_nodes[node].reconPic = reconPic;
> > +
> > +    int maxHeight = sps.numCuInHeight * g_maxCUSize;
> > +    if (reconPic->create(m_param->sourceWidth, m_param->sourceHeight,
> > m_param->internalCsp))
> > +    {
> > +        /* initialize right border of recon PicYuv as SAO may read beyond
> > the end
> > +         * of the picture accessing uninitialized pixels */
> > +        memset(reconPic->m_picOrg[0], 0, sizeof(pixel) *
> > reconPic->m_stride * maxHeight);
> > +        memset(reconPic->m_picOrg[1], 0, sizeof(pixel) *
> > reconPic->m_strideC * (maxHeight >> reconPic->m_vChromaShift));
> > +        memset(reconPic->m_picOrg[2], 0, sizeof(pixel) *
> > reconPic->m_strideC * (maxHeight >> reconPic->m_vChromaShift));
> > +
> > +        /* use pre-calculated cu/pu offsets cached in the SPS structure */
> > +        reconPic->m_cuOffsetC = sps.cuOffsetC;
> > +        reconPic->m_cuOffsetY = sps.cuOffsetY;
> > +        reconPic->m_buOffsetC = sps.buOffsetC;
> > +        reconPic->m_buOffsetY = sps.buOffsetY;
> > +        return true;
> > +    }
> > +
> > +    return false;
> >  }
> >
> >  void FrameData::destroy()
> >  {
> >      delete [] m_picCTU;
> > +    delete [] m_nodes;
> >      delete m_slice;
> >      delete m_saoParam;
> >
> > diff -r 1f161d9c6e35 -r 02e84edaa143 source/common/framedata.h
> > --- a/source/common/framedata.h Mon Aug 03 14:56:21 2015 -0500
> > +++ b/source/common/framedata.h Mon Aug 03 17:55:58 2015 -0500
> > @@ -27,6 +27,7 @@
> >  #include "common.h"
> >  #include "slice.h"
> >  #include "cudata.h"
> > +#include "threading.h"
> >
> >  namespace X265_NS {
> >  // private namespace
> > @@ -83,6 +84,21 @@
> >      }
> >  };
> >
> > +struct PerNodeRecon
> > +{
> > +    PicYuv*    reconPic;
> > +    Lock       copyLock;
> > +    int        rows;
> > +
> > +    PerNodeRecon()
> > +    {
> > +        rows = 0;
> > +        reconPic = NULL;
> > +    }
> > +
> > +    ~PerNodeRecon();
> > +};
> > +
> >  /* Per-frame data that is used during encodes and referenced while the
> > picture
> >   * is available for reference. A FrameData instance is attached to a
> > Frame as it
> >   * comes out of the lookahead. Frames which are not being encoded do not
> > have a
> > @@ -93,13 +109,14 @@
> >  class FrameData
> >  {
> >  public:
> > +    PerNodeRecon*  m_nodes;
> > +    int            m_ownerNode;        /* NUMA node of writing frame
> > encoder */
> >
> >      Slice*         m_slice;
> >      SAOParam*      m_saoParam;
> > -    x265_param*    m_param;
> > +    const x265_param* m_param;
> >
> >      FrameData*     m_freeListNext;
> > -    PicYuv*        m_reconPic;
> >      bool           m_bHasReferences;   /* used during DPB/RPS updates */
> >      int            m_frameEncoderID;   /* the ID of the FrameEncoder
> > encoding this frame */
> >      JobProvider*   m_jobProvider;
> > @@ -142,11 +159,23 @@
> >
> >      FrameData();
> >
> > -    bool create(x265_param *param, const SPS& sps);
> > +    bool create(const x265_param& param, const SPS& sps);
> > +    bool allocRecon(const SPS& sps, int node);
> >      void reinit(const SPS& sps);
> >      void destroy();
> >
> > -    CUData* getPicCTU(uint32_t ctuAddr) { return &m_picCTU[ctuAddr]; }
> > +    inline CUData* getPicCTU(uint32_t ctuAddr) { return
> > &m_picCTU[ctuAddr]; }
> > +
> > +#if (defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7) ||
> > HAVE_LIBNUMA
> > +    /* this function should only be called by worker threads which are
> > encoding this particular
> > +     * frame. workers which are using this frame as a motion reference
> > should use their slice's
> > +     * m_refReconPicList[][] array instead. Its pointers will always
> > reference a buffer which
> > +     * was allocated on their local socket */
> > +    /* TODO: X265_CHECK that current node == m_ownerNode */
> > +    inline PicYuv* getOutputRecon() { return
> > m_nodes[m_ownerNode].reconPic; }
> > +#else
> > +    inline PicYuv* getOutputRecon() { return m_nodes->reconPic; }
> > +#endif
> >  };
> >  }
> >
> > diff -r 1f161d9c6e35 -r 02e84edaa143 source/common/predict.cpp
> > --- a/source/common/predict.cpp Mon Aug 03 14:56:21 2015 -0500
> > +++ b/source/common/predict.cpp Mon Aug 03 17:55:58 2015 -0500
> > @@ -109,18 +109,18 @@
> >              ShortYuv& shortYuv = m_predShortYuv[0];
> >
> >              if (bLuma)
> > -                predInterLumaShort(pu, shortYuv,
> > *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
> > +                predInterLumaShort(pu, shortYuv,
> > *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
> >              if (bChroma)
> > -                predInterChromaShort(pu, shortYuv,
> > *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
> > +                predInterChromaShort(pu, shortYuv,
> > *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
> >
> >              addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma);
> >          }
> >          else
> >          {
> >              if (bLuma)
> > -                predInterLumaPixel(pu, predYuv,
> > *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
> > +                predInterLumaPixel(pu, predYuv,
> > *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
> >              if (bChroma)
> > -                predInterChromaPixel(pu, predYuv,
> > *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
> > +                predInterChromaPixel(pu, predYuv,
> > *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
> >          }
> >      }
> >      else
> > @@ -179,13 +179,13 @@
> >
> >              if (bLuma)
> >              {
> > -                predInterLumaShort(pu, m_predShortYuv[0],
> > *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
> > -                predInterLumaShort(pu, m_predShortYuv[1],
> > *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1);
> > +                predInterLumaShort(pu, m_predShortYuv[0],
> > *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
> > +                predInterLumaShort(pu, m_predShortYuv[1],
> > *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
> >              }
> >              if (bChroma)
> >              {
> > -                predInterChromaShort(pu, m_predShortYuv[0],
> > *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
> > -                predInterChromaShort(pu, m_predShortYuv[1],
> > *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1);
> > +                predInterChromaShort(pu, m_predShortYuv[0],
> > *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
> > +                predInterChromaShort(pu, m_predShortYuv[1],
> > *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
> >              }
> >
> >              if (pwp0 && pwp1 && (pwp0->bPresentFlag ||
> > pwp1->bPresentFlag))
> > @@ -203,18 +203,18 @@
> >                  ShortYuv& shortYuv = m_predShortYuv[0];
> >
> >                  if (bLuma)
> > -                    predInterLumaShort(pu, shortYuv,
> > *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
> > +                    predInterLumaShort(pu, shortYuv,
> > *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
> >                  if (bChroma)
> > -                    predInterChromaShort(pu, shortYuv,
> > *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
> > +                    predInterChromaShort(pu, shortYuv,
> > *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
> >
> >                  addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma);
> >              }
> >              else
> >              {
> >                  if (bLuma)
> > -                    predInterLumaPixel(pu, predYuv,
> > *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
> > +                    predInterLumaPixel(pu, predYuv,
> > *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
> >                  if (bChroma)
> > -                    predInterChromaPixel(pu, predYuv,
> > *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
> > +                    predInterChromaPixel(pu, predYuv,
> > *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
> >              }
> >          }
> >          else
> > @@ -230,18 +230,18 @@
> >                  ShortYuv& shortYuv = m_predShortYuv[0];
> >
> >                  if (bLuma)
> > -                    predInterLumaShort(pu, shortYuv,
> > *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1);
> > +                    predInterLumaShort(pu, shortYuv,
> > *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
> >                  if (bChroma)
> > -                    predInterChromaShort(pu, shortYuv,
> > *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1);
> > +                    predInterChromaShort(pu, shortYuv,
> > *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
> >
> >                  addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma);
> >              }
> >              else
> >              {
> >                  if (bLuma)
> > -                    predInterLumaPixel(pu, predYuv,
> > *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1);
> > +                    predInterLumaPixel(pu, predYuv,
> > *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
> >                  if (bChroma)
> > -                    predInterChromaPixel(pu, predYuv,
> > *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1);
> > +                    predInterChromaPixel(pu, predYuv,
> > *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
> >              }
> >          }
> >      }
> > @@ -600,8 +600,9 @@
> >      int tuSize = 1 << intraNeighbors.log2TrSize;
> >      int tuSize2 = tuSize << 1;
> >
> > -    pixel* adiOrigin = cu.m_encData->m_reconPic->getLumaAddr(cu.m_cuAddr,
> > cuGeom.absPartIdx + puAbsPartIdx);
> > -    intptr_t picStride = cu.m_encData->m_reconPic->m_stride;
> > +    PicYuv* reconPic = cu.m_encData->getOutputRecon();
> > +    pixel* adiOrigin = reconPic->getLumaAddr(cu.m_cuAddr,
> > cuGeom.absPartIdx + puAbsPartIdx);
> > +    intptr_t picStride = reconPic->m_stride;
> >
> >      fillReferenceSamples(adiOrigin, picStride, intraNeighbors,
> > intraNeighbourBuf[0]);
> >
> > @@ -648,8 +649,9 @@
> >
> >  void Predict::initAdiPatternChroma(const CUData& cu, const CUGeom&
> > cuGeom, uint32_t puAbsPartIdx, const IntraNeighbors& intraNeighbors,
> > uint32_t chromaId)
> >  {
> > -    const pixel* adiOrigin =
> > cu.m_encData->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr,
> > cuGeom.absPartIdx + puAbsPartIdx);
> > -    intptr_t picStride = cu.m_encData->m_reconPic->m_strideC;
> > +    PicYuv* reconPic = cu.m_encData->getOutputRecon();
> > +    const pixel* adiOrigin = reconPic->getChromaAddr(chromaId,
> > cu.m_cuAddr, cuGeom.absPartIdx + puAbsPartIdx);
> > +    intptr_t picStride = reconPic->m_strideC;
> >
> >      fillReferenceSamples(adiOrigin, picStride, intraNeighbors,
> > intraNeighbourBuf[0]);
> >
> > diff -r 1f161d9c6e35 -r 02e84edaa143 source/common/slice.cpp
> > --- a/source/common/slice.cpp   Mon Aug 03 14:56:21 2015 -0500
> > +++ b/source/common/slice.cpp   Mon Aug 03 17:55:58 2015 -0500
> > @@ -33,7 +33,9 @@
> >  {
> >      if (m_sliceType == I_SLICE)
> >      {
> > -        memset(m_refPicList, 0, sizeof(m_refPicList));
> > +        memset(m_refFrameList, 0, sizeof(m_refFrameList));
> > +        memset(m_refReconPicList, 0, sizeof(m_refReconPicList));
> > +        memset(m_refPOCList, 0, sizeof(m_refPOCList));
> >          m_numRefIdx[1] = m_numRefIdx[0] = 0;
> >          return;
> >      }
> > @@ -106,13 +108,13 @@
> >      {
> >          cIdx = rIdx % numPocTotalCurr;
> >          X265_CHECK(cIdx >= 0 && cIdx < numPocTotalCurr, "RPS index check
> > fail\n");
> > -        m_refPicList[0][rIdx] = rpsCurrList0[cIdx];
> > +        m_refFrameList[0][rIdx] = rpsCurrList0[cIdx];
> >      }
> >
> >      if (m_sliceType != B_SLICE)
> >      {
> >          m_numRefIdx[1] = 0;
> > -        memset(m_refPicList[1], 0, sizeof(m_refPicList[1]));
> > +        memset(m_refFrameList[1], 0, sizeof(m_refFrameList[1]));
> >      }
> >      else
> >      {
> > @@ -120,13 +122,13 @@
> >          {
> >              cIdx = rIdx % numPocTotalCurr;
> >              X265_CHECK(cIdx >= 0 && cIdx < numPocTotalCurr, "RPS index
> > check fail\n");
> > -            m_refPicList[1][rIdx] = rpsCurrList1[cIdx];
> > +            m_refFrameList[1][rIdx] = rpsCurrList1[cIdx];
> >          }
> >      }
> >
> >      for (int dir = 0; dir < 2; dir++)
> >          for (int numRefIdx = 0; numRefIdx < m_numRefIdx[dir]; numRefIdx++)
> > -            m_refPOCList[dir][numRefIdx] =
> > m_refPicList[dir][numRefIdx]->m_poc;
> > +            m_refPOCList[dir][numRefIdx] =
> > m_refFrameList[dir][numRefIdx]->m_poc;
> >  }
> >
> >  void Slice::disableWeights()
> > diff -r 1f161d9c6e35 -r 02e84edaa143 source/common/slice.h
> > --- a/source/common/slice.h     Mon Aug 03 14:56:21 2015 -0500
> > +++ b/source/common/slice.h     Mon Aug 03 17:55:58 2015 -0500
> > @@ -31,6 +31,7 @@
> >
> >  class Frame;
> >  class PicList;
> > +class PicYuv;
> >  class MotionReference;
> >
> >  enum SliceType
> > @@ -209,6 +210,13 @@
> >
> >  struct SPS
> >  {
> > +    /* cached PicYuv offset arrays, shared by all instances of
> > +    * PicYuv created by this encoder */
> > +    intptr_t* cuOffsetY;
> > +    intptr_t* cuOffsetC;
> > +    intptr_t* buOffsetY;
> > +    intptr_t* buOffsetC;
> > +
> >      int      chromaFormatIdc;        // use param
> >      uint32_t picWidthInLumaSamples;  // use param
> >      uint32_t picHeightInLumaSamples; // use param
> > @@ -242,6 +250,11 @@
> >
> >      Window   conformanceWindow;
> >      VUI      vuiParameters;
> > +
> > +    SPS()
> > +    {
> > +        memset(this, 0, sizeof(*this));
> > +    }
> >  };
> >
> >  struct PPS
> > @@ -321,7 +334,8 @@
> >      uint32_t    m_colRefIdx;       // never modified
> >
> >      int         m_numRefIdx[2];
> > -    Frame*      m_refPicList[2][MAX_NUM_REF + 1];
> > +    Frame*      m_refFrameList[2][MAX_NUM_REF + 1];
> > +    PicYuv*     m_refReconPicList[2][MAX_NUM_REF + 1];
> >      int         m_refPOCList[2][MAX_NUM_REF + 1];
> >
> >      uint32_t    m_maxNumMergeCand; // use param
> > @@ -332,14 +346,9 @@
> >          m_lastIDR = 0;
> >          m_sLFaseFlag = true;
> >          m_numRefIdx[0] = m_numRefIdx[1] = 0;
> > -        for (int i = 0; i < MAX_NUM_REF; i++)
> > -        {
> > -            m_refPicList[0][i] = NULL;
> > -            m_refPicList[1][i] = NULL;
> > -            m_refPOCList[0][i] = 0;
> > -            m_refPOCList[1][i] = 0;
> > -        }
> > -
> > +        memset(m_refFrameList, 0, sizeof(m_refFrameList));
> > +        memset(m_refReconPicList, 0, sizeof(m_refReconPicList));
> > +        memset(m_refPOCList, 0, sizeof(m_refPOCList));
> >          disableWeights();
> >      }
> >
> > @@ -347,8 +356,6 @@
> >
> >      void setRefPicList(PicList& picList);
> >
> > -    const Frame* getRefPic(int list, int refIdx) const { return refIdx >=
> > 0 ? m_refPicList[list][refIdx] : NULL; }
> > -
> >      bool getRapPicFlag() const
> >      {
> >          return m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL
> > diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/analysis.cpp
> > --- a/source/encoder/analysis.cpp       Mon Aug 03 14:56:21 2015 -0500
> > +++ b/source/encoder/analysis.cpp       Mon Aug 03 17:55:58 2015 -0500
> > @@ -173,8 +173,8 @@
> >          if (!m_param->rdLevel)
> >          {
> >              /* In RD Level 0/1, copy source pixels into the reconstructed
> > block so
> > -            * they are available for intra predictions */
> > -            m_modeDepth[0].fencYuv.copyToPicYuv(*m_frame->m_reconPic,
> > ctu.m_cuAddr, 0);
> > +             * they are available for intra predictions */
> > +
> > m_modeDepth[0].fencYuv.copyToPicYuv(*m_frame->m_encData->getOutputRecon(),
> > ctu.m_cuAddr, 0);
> >
> >              compressInterCU_rd0_4(ctu, cuGeom, qp);
> >
> > @@ -337,7 +337,7 @@
> >      /* Copy best data to encData CTU and recon */
> >      md.bestMode->cu.copyToPic(depth);
> >      if (md.bestMode != &md.pred[PRED_SPLIT])
> > -        md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic,
> > parentCTU.m_cuAddr, cuGeom.absPartIdx);
> > +
> > md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_encData->getOutputRecon(),
> > parentCTU.m_cuAddr, cuGeom.absPartIdx);
> >  }
> >
> >  void Analysis::PMODE::processTasks(int workerThreadId)
> > @@ -747,7 +747,7 @@
> >      /* Copy best data to encData CTU and recon */
> >      md.bestMode->cu.copyToPic(depth);
> >      if (md.bestMode != &md.pred[PRED_SPLIT])
> > -        md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr,
> > cuGeom.absPartIdx);
> > +
> > md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_encData->getOutputRecon(),
> > cuAddr, cuGeom.absPartIdx);
> >  }
> >
> >  uint32_t Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const
> > CUGeom& cuGeom, int32_t qp)
> > @@ -757,6 +757,8 @@
> >      ModeDepth& md = m_modeDepth[depth];
> >      md.bestMode = NULL;
> >
> > +    PicYuv& reconPic = *m_frame->m_encData->getOutputRecon();
> > +
> >      bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
> >      bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
> >      uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);
> > @@ -1051,7 +1053,7 @@
> >                          residualTransformQuantIntra(*md.bestMode, cuGeom,
> > 0, 0, tuDepthRange);
> >                          getBestIntraModeChroma(*md.bestMode, cuGeom);
> >                          residualQTIntraChroma(*md.bestMode, cuGeom, 0, 0);
> > -
> > md.bestMode->reconYuv.copyFromPicYuv(*m_frame->m_reconPic, cu.m_cuAddr,
> > cuGeom.absPartIdx); // TODO:
> > +                        md.bestMode->reconYuv.copyFromPicYuv(reconPic,
> > cu.m_cuAddr, cuGeom.absPartIdx); // TODO:
> >                      }
> >                  }
> >              }
> > @@ -1107,7 +1109,7 @@
> >      X265_CHECK(md.bestMode->ok(), "best mode is not ok");
> >      md.bestMode->cu.copyToPic(depth);
> >      if (m_param->rdLevel)
> > -        md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr,
> > cuGeom.absPartIdx);
> > +        md.bestMode->reconYuv.copyToPicYuv(reconPic, cuAddr,
> > cuGeom.absPartIdx);
> >
> >      return refMask;
> >  }
> > @@ -1356,7 +1358,7 @@
> >      /* Copy best data to encData CTU and recon */
> >      X265_CHECK(md.bestMode->ok(), "best mode is not ok");
> >      md.bestMode->cu.copyToPic(depth);
> > -    md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic,
> > parentCTU.m_cuAddr, cuGeom.absPartIdx);
> > +
> > md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_encData->getOutputRecon(),
> > parentCTU.m_cuAddr, cuGeom.absPartIdx);
> >
> >      return refMask;
> >  }
> > @@ -1851,6 +1853,8 @@
> >
> >      cu.copyFromPic(ctu, cuGeom);
> >
> > +    PicYuv& reconPic = *m_frame->m_encData->getOutputRecon();
> > +
> >      Yuv& fencYuv = m_modeDepth[cuGeom.depth].fencYuv;
> >      if (cuGeom.depth)
> >          m_modeDepth[0].fencYuv.copyPartToYuv(fencYuv, absPartIdx);
> > @@ -1906,7 +1910,6 @@
> >          /* residualTransformQuantInter() wrote transformed residual back
> > into
> >           * resiYuv. Generate the recon pixels by adding it to the
> > prediction */
> >
> > -        PicYuv& reconPic = *m_frame->m_reconPic;
> >          if (cu.m_cbf[0][0])
> >              primitives.cu[sizeIdx].add_ps(reconPic.getLumaAddr(cu.m_cuAddr,
> > absPartIdx), reconPic.m_stride,
> >                                            predY, resiYuv.m_buf[0],
> > predYuv.m_size, resiYuv.m_size);
> > @@ -1969,7 +1972,7 @@
> >      if (m_slice->m_numRefIdx[0])
> >      {
> >          numRefs++;
> > -        const CUData& cu =
> > *m_slice->m_refPicList[0][0]->m_encData->getPicCTU(parentCTU.m_cuAddr);
> > +        const CUData& cu =
> > *m_slice->m_refFrameList[0][0]->m_encData->getPicCTU(parentCTU.m_cuAddr);
> >          previousQP = cu.m_qp[0];
> >          if (!cu.m_cuDepth[cuGeom.absPartIdx])
> >              return 0;
> > @@ -1983,7 +1986,7 @@
> >      if (m_slice->m_numRefIdx[1])
> >      {
> >          numRefs++;
> > -        const CUData& cu =
> > *m_slice->m_refPicList[1][0]->m_encData->getPicCTU(parentCTU.m_cuAddr);
> > +        const CUData& cu =
> > *m_slice->m_refFrameList[1][0]->m_encData->getPicCTU(parentCTU.m_cuAddr);
> >          if (!cu.m_cuDepth[cuGeom.absPartIdx])
> >              return 0;
> >          for (uint32_t i = 0; i < cuGeom.numPartitions; i += 4)
> > diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/dpb.cpp
> > --- a/source/encoder/dpb.cpp    Mon Aug 03 14:56:21 2015 -0500
> > +++ b/source/encoder/dpb.cpp    Mon Aug 03 17:55:58 2015 -0500
> > @@ -47,16 +47,12 @@
> >          delete curFrame;
> >      }
> >
> > -    while (m_picSymFreeList)
> > +    while (m_frameDataFreeList)
> >      {
> > -        FrameData* next = m_picSymFreeList->m_freeListNext;
> > -        m_picSymFreeList->destroy();
> > -
> > -        m_picSymFreeList->m_reconPic->destroy();
> > -        delete m_picSymFreeList->m_reconPic;
> > -
> > -        delete m_picSymFreeList;
> > -        m_picSymFreeList = next;
> > +        FrameData* next = m_frameDataFreeList->m_freeListNext;
> > +        m_frameDataFreeList->destroy();
> > +        delete m_frameDataFreeList;
> > +        m_frameDataFreeList = next;
> >      }
> >  }
> >
> > @@ -79,10 +75,9 @@
> >              iterFrame = m_picList.first();
> >
> >              m_freeList.pushBack(*curFrame);
> > -            curFrame->m_encData->m_freeListNext = m_picSymFreeList;
> > -            m_picSymFreeList = curFrame->m_encData;
> > +            curFrame->m_encData->m_freeListNext = m_frameDataFreeList;
> > +            m_frameDataFreeList = curFrame->m_encData;
> >              curFrame->m_encData = NULL;
> > -            curFrame->m_reconPic = NULL;
> >          }
> >      }
> >  }
> > @@ -171,7 +166,7 @@
> >      {
> >          for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
> >          {
> > -            Frame *refpic = slice->m_refPicList[l][ref];
> > +            Frame *refpic = slice->m_refFrameList[l][ref];
> >              ATOMIC_INC(&refpic->m_countRefEncoders);
> >          }
> >      }
> > diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/dpb.h
> > --- a/source/encoder/dpb.h      Mon Aug 03 14:56:21 2015 -0500
> > +++ b/source/encoder/dpb.h      Mon Aug 03 17:55:58 2015 -0500
> > @@ -46,14 +46,14 @@
> >      bool               m_bTemporalSublayer;
> >      PicList            m_picList;
> >      PicList            m_freeList;
> > -    FrameData*         m_picSymFreeList;
> > +    FrameData*         m_frameDataFreeList;
> >
> >      DPB(x265_param *param)
> >      {
> >          m_lastIDR = 0;
> >          m_pocCRA = 0;
> >          m_bRefreshPending = false;
> > -        m_picSymFreeList = NULL;
> > +        m_frameDataFreeList = NULL;
> >          m_maxRefL0 = param->maxNumReferences;
> >          m_maxRefL1 = param->bBPyramid ? 2 : 1;
> >          m_bOpenGOP = param->bOpenGOP;
> > diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/encoder.cpp
> > --- a/source/encoder/encoder.cpp        Mon Aug 03 14:56:21 2015 -0500
> > +++ b/source/encoder/encoder.cpp        Mon Aug 03 17:55:58 2015 -0500
> > @@ -66,10 +66,6 @@
> >      m_outputCount = 0;
> >      m_param = NULL;
> >      m_latestParam = NULL;
> > -    m_cuOffsetY = NULL;
> > -    m_cuOffsetC = NULL;
> > -    m_buOffsetY = NULL;
> > -    m_buOffsetC = NULL;
> >      m_threadPool = NULL;
> >      m_analysisFile = NULL;
> >      for (int i = 0; i < X265_MAX_FRAME_THREADS; i++)
> > @@ -318,10 +314,10 @@
> >          delete m_rateControl;
> >      }
> >
> > -    X265_FREE(m_cuOffsetY);
> > -    X265_FREE(m_cuOffsetC);
> > -    X265_FREE(m_buOffsetY);
> > -    X265_FREE(m_buOffsetC);
> > +    X265_FREE(m_sps.cuOffsetY);
> > +    X265_FREE(m_sps.cuOffsetC);
> > +    X265_FREE(m_sps.buOffsetY);
> > +    X265_FREE(m_sps.buOffsetC);
> >
> >      if (m_analysisFile)
> >          fclose(m_analysisFile);
> > @@ -416,12 +412,12 @@
> >                  /* the first PicYuv created is asked to generate the CU
> > and block unit offset
> >                   * arrays which are then shared with all subsequent
> > PicYuv (orig and recon)
> >                   * allocated by this top level encoder */
> > -                if (m_cuOffsetY)
> > +                if (m_sps.cuOffsetY)
> >                  {
> > -                    inFrame->m_fencPic->m_cuOffsetC = m_cuOffsetC;
> > -                    inFrame->m_fencPic->m_cuOffsetY = m_cuOffsetY;
> > -                    inFrame->m_fencPic->m_buOffsetC = m_buOffsetC;
> > -                    inFrame->m_fencPic->m_buOffsetY = m_buOffsetY;
> > +                    inFrame->m_fencPic->m_cuOffsetC = m_sps.cuOffsetC;
> > +                    inFrame->m_fencPic->m_cuOffsetY = m_sps.cuOffsetY;
> > +                    inFrame->m_fencPic->m_buOffsetC = m_sps.buOffsetC;
> > +                    inFrame->m_fencPic->m_buOffsetY = m_sps.buOffsetY;
> >                  }
> >                  else
> >                  {
> > @@ -435,10 +431,10 @@
> >                      }
> >                      else
> >                      {
> > -                        m_cuOffsetC = inFrame->m_fencPic->m_cuOffsetC;
> > -                        m_cuOffsetY = inFrame->m_fencPic->m_cuOffsetY;
> > -                        m_buOffsetC = inFrame->m_fencPic->m_buOffsetC;
> > -                        m_buOffsetY = inFrame->m_fencPic->m_buOffsetY;
> > +                        m_sps.cuOffsetC = inFrame->m_fencPic->m_cuOffsetC;
> > +                        m_sps.cuOffsetY = inFrame->m_fencPic->m_cuOffsetY;
> > +                        m_sps.buOffsetC = inFrame->m_fencPic->m_buOffsetC;
> > +                        m_sps.buOffsetY = inFrame->m_fencPic->m_buOffsetY;
> >                      }
> >                  }
> >              }
> > @@ -538,7 +534,7 @@
> >
> >              if (pic_out)
> >              {
> > -                PicYuv *recpic = outFrame->m_reconPic;
> > +                PicYuv *recpic = outFrame->m_encData->getOutputRecon();
> >                  pic_out->poc = slice->m_poc;
> >                  pic_out->bitDepth = X265_DEPTH;
> >                  pic_out->userData = outFrame->m_userData;
> > @@ -633,10 +629,10 @@
> >          if (frameEnc && !pass)
> >          {
> >              /* give this frame a FrameData instance before encoding */
> > -            if (m_dpb->m_picSymFreeList)
> > +            if (m_dpb->m_frameDataFreeList)
> >              {
> > -                frameEnc->m_encData = m_dpb->m_picSymFreeList;
> > -                m_dpb->m_picSymFreeList =
> > m_dpb->m_picSymFreeList->m_freeListNext;
> > +                frameEnc->m_encData = m_dpb->m_frameDataFreeList;
> > +                m_dpb->m_frameDataFreeList =
> > m_dpb->m_frameDataFreeList->m_freeListNext;
> >                  frameEnc->reinit(m_sps);
> >              }
> >              else
> > @@ -647,10 +643,6 @@
> >                  slice->m_pps = &m_pps;
> >                  slice->m_maxNumMergeCand = m_param->maxNumMergeCand;
> >                  slice->m_endCUAddr =
> > slice->realEndAddress(m_sps.numCUsInFrame * NUM_4x4_PARTITIONS);
> > -                frameEnc->m_reconPic->m_cuOffsetC = m_cuOffsetC;
> > -                frameEnc->m_reconPic->m_cuOffsetY = m_cuOffsetY;
> > -                frameEnc->m_reconPic->m_buOffsetC = m_buOffsetC;
> > -                frameEnc->m_reconPic->m_buOffsetY = m_buOffsetY;
> >              }
> >
> >              curEncoder->m_rce.encodeOrder = m_encodedFrameNum++;
> > @@ -1054,7 +1046,7 @@
> >
> >  void Encoder::finishFrameStats(Frame* curFrame, FrameEncoder *curEncoder,
> > uint64_t bits, x265_frame_stats* frameStats)
> >  {
> > -    PicYuv* reconPic = curFrame->m_reconPic;
> > +    PicYuv *reconPic = curFrame->m_encData->getOutputRecon();
> >
> >      //===== calculate PSNR =====
> >      int width  = reconPic->m_picWidth -
> > m_sps.conformanceWindow.rightOffset;
> > diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/encoder.h
> > --- a/source/encoder/encoder.h  Mon Aug 03 14:56:21 2015 -0500
> > +++ b/source/encoder/encoder.h  Mon Aug 03 17:55:58 2015 -0500
> > @@ -93,13 +93,6 @@
> >      int                m_numPools;
> >      int                m_curEncoder;
> >
> > -    /* cached PicYuv offset arrays, shared by all instances of
> > -     * PicYuv created by this encoder */
> > -    intptr_t*          m_cuOffsetY;
> > -    intptr_t*          m_cuOffsetC;
> > -    intptr_t*          m_buOffsetY;
> > -    intptr_t*          m_buOffsetC;
> > -
> >      /* Collect statistics globally */
> >      EncStats           m_analyzeAll;
> >      EncStats           m_analyzeI;
> > diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/frameencoder.cpp
> > --- a/source/encoder/frameencoder.cpp   Mon Aug 03 14:56:21 2015 -0500
> > +++ b/source/encoder/frameencoder.cpp   Mon Aug 03 17:55:58 2015 -0500
> > @@ -329,6 +329,16 @@
> >      if (m_frame->m_lowres.bKeyframe && m_param->bRepeatHeaders)
> >          m_top->getStreamHeaders(m_nalList, m_entropyCoder, m_bs);
> >
> > +#if (defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7) ||
> > HAVE_LIBNUMA
> > +    int numaNode = 0; // m_pool ? m_pool->m_numaNode : 0;
> > +#else
> > +    int numaNode = 0;
> > +#endif
> > +
> > +    /* Claim this frame as being encoded by this NUMA node */
> > +    m_frame->m_encData->allocRecon(m_top->m_sps, numaNode); /* TODO: bail
> > if failure */
> > +    m_frame->m_encData->m_ownerNode = numaNode;
> > +
> >      // Weighted Prediction parameters estimation.
> >      bool bUseWeightP = slice->m_sliceType == P_SLICE &&
> > slice->m_pps->bUseWeightPred;
> >      bool bUseWeightB = slice->m_sliceType == B_SLICE &&
> > slice->m_pps->bUseWeightedBiPred;
> > @@ -357,7 +367,9 @@
> >              WeightParam *w = NULL;
> >              if ((bUseWeightP || bUseWeightB) &&
> > slice->m_weightPredTable[l][ref][0].bPresentFlag)
> >                  w = slice->m_weightPredTable[l][ref];
> > -            m_mref[l][ref].init(slice->m_refPicList[l][ref]->m_reconPic,
> > w, *m_param);
> > +
> > slice->m_refFrameList[l][ref]->m_encData->allocRecon(m_top->m_sps,
> > numaNode); /* TODO: bail if failure */
> > +            slice->m_refReconPicList[l][ref] =
> > slice->m_refFrameList[l][ref]->m_encData->m_nodes[numaNode].reconPic;
> > +            m_mref[l][ref].init(slice->m_refReconPicList[l][ref], w,
> > *m_param);
> >          }
> >      }
> >
> > @@ -477,7 +489,7 @@
> >      /* CQP and CRF (without capped VBV) doesn't use mid-frame statistics
> > to
> >       * tune RateControl parameters for other frames.
> >       * Hence, for these modes, update m_startEndOrder and unlock RC for
> > previous threads waiting in
> > -     * RateControlEnd here, after the slicecontexts are initialized. For
> > the rest - ABR
> > +     * RateControlEnd here, after the slice contexts are initialized. For
> > the rest - ABR
> >       * and VBV, unlock only after rateControlUpdateStats of this frame is
> > called */
> >      if (m_param->rc.rateControlMode != X265_RC_ABR &&
> > !m_top->m_rateControl->m_isVbv)
> >      {
> > @@ -501,12 +513,14 @@
> >              {
> >                  for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
> >                  {
> > -                    Frame *refpic = slice->m_refPicList[l][ref];
> > +                    Frame *refpic = slice->m_refFrameList[l][ref];
> >
> >                      uint32_t reconRowCount =
> > refpic->m_reconRowCount.get();
> >                      while ((reconRowCount != m_numRows) && (reconRowCount
> > < row + m_refLagRows))
> >                          reconRowCount =
> > refpic->m_reconRowCount.waitForChange(reconRowCount);
> >
> > +                    /* TODO: if refpic->m_encData->m_ownerNode !=
> > numaNode, copy rows */
> > +
> >                      if ((bUseWeightP || bUseWeightB) &&
> > m_mref[l][ref].isWeighted)
> >                          m_mref[l][ref].applyWeight(row + m_refLagRows,
> > m_numRows);
> >                  }
> > @@ -540,12 +554,14 @@
> >                      int list = l;
> >                      for (int ref = 0; ref < slice->m_numRefIdx[list];
> > ref++)
> >                      {
> > -                        Frame *refpic = slice->m_refPicList[list][ref];
> > +                        Frame *refpic = slice->m_refFrameList[list][ref];
> >
> >                          uint32_t reconRowCount =
> > refpic->m_reconRowCount.get();
> >                          while ((reconRowCount != m_numRows) &&
> > (reconRowCount < i + m_refLagRows))
> >                              reconRowCount =
> > refpic->m_reconRowCount.waitForChange(reconRowCount);
> >
> > +                        /* TODO: if refpic->m_encData->m_ownerNode !=
> > numaNode, copy rows */
> > +
> >                          if ((bUseWeightP || bUseWeightB) &&
> > m_mref[l][ref].isWeighted)
> >                              m_mref[list][ref].applyWeight(i +
> > m_refLagRows, m_numRows);
> >                      }
> > @@ -697,7 +713,7 @@
> >      {
> >          for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
> >          {
> > -            Frame *refpic = slice->m_refPicList[l][ref];
> > +            Frame *refpic = slice->m_refFrameList[l][ref];
> >              ATOMIC_DEC(&refpic->m_countRefEncoders);
> >          }
> >      }
> > diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/framefilter.cpp
> > --- a/source/encoder/framefilter.cpp    Mon Aug 03 14:56:21 2015 -0500
> > +++ b/source/encoder/framefilter.cpp    Mon Aug 03 17:55:58 2015 -0500
> > @@ -160,7 +160,7 @@
> >
> >  void FrameFilter::processRowPost(int row)
> >  {
> > -    PicYuv *reconPic = m_frame->m_reconPic;
> > +    PicYuv *reconPic = m_frame->m_encData->getOutputRecon();
> >      const uint32_t numCols =
> > m_frame->m_encData->m_slice->m_sps->numCuInWidth;
> >      const uint32_t lineStartCUAddr = row * numCols;
> >      const int realH = getCUHeight(row);
> > @@ -233,10 +233,10 @@
> >      }
> >      if (m_param->bEnableSsim && m_ssimBuf)
> >      {
> > -        pixel *rec = m_frame->m_reconPic->m_picOrg[0];
> > +        pixel *rec = reconPic->m_picOrg[0];
> >          pixel *fenc = m_frame->m_fencPic->m_picOrg[0];
> > -        intptr_t stride1 = m_frame->m_fencPic->m_stride;
> > -        intptr_t stride2 = m_frame->m_reconPic->m_stride;
> > +        intptr_t stride1 = reconPic->m_stride;
> > +        intptr_t stride2 = m_frame->m_fencPic->m_stride;
> >          uint32_t bEnd = ((row + 1) == (this->m_numRows - 1));
> >          uint32_t bStart = (row == 0);
> >          uint32_t minPixY = row * g_maxCUSize - 4 * !bStart;
> > @@ -407,7 +407,7 @@
> >      int size = cu->m_log2CUSize[absPartIdx] - 2;
> >      uint32_t cuAddr = cu->m_cuAddr;
> >
> > -    PicYuv* reconPic = frame.m_reconPic;
> > +    PicYuv* reconPic = frame.m_encData->getOutputRecon();
> >      PicYuv* fencPic  = frame.m_fencPic;
> >
> >      pixel* dst = reconPic->getLumaAddr(cuAddr, absPartIdx);
> > diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/ratecontrol.cpp
> > --- a/source/encoder/ratecontrol.cpp    Mon Aug 03 14:56:21 2015 -0500
> > +++ b/source/encoder/ratecontrol.cpp    Mon Aug 03 17:55:58 2015 -0500
> > @@ -1345,10 +1345,10 @@
> >      {
> >          /* B-frames don't have independent rate control, but rather get
> > the
> >           * average QP of the two adjacent P-frames + an offset */
> > -        Slice* prevRefSlice =
> > m_curSlice->m_refPicList[0][0]->m_encData->m_slice;
> > -        Slice* nextRefSlice =
> > m_curSlice->m_refPicList[1][0]->m_encData->m_slice;
> > -        double q0 = m_curSlice->m_refPicList[0][0]->m_encData->m_avgQpRc;
> > -        double q1 = m_curSlice->m_refPicList[1][0]->m_encData->m_avgQpRc;
> > +        Slice* prevRefSlice =
> > m_curSlice->m_refFrameList[0][0]->m_encData->m_slice;
> > +        Slice* nextRefSlice =
> > m_curSlice->m_refFrameList[1][0]->m_encData->m_slice;
> > +        double q0 =
> > m_curSlice->m_refFrameList[0][0]->m_encData->m_avgQpRc;
> > +        double q1 =
> > m_curSlice->m_refFrameList[1][0]->m_encData->m_avgQpRc;
> >          bool i0 = prevRefSlice->m_sliceType == I_SLICE;
> >          bool i1 = nextRefSlice->m_sliceType == I_SLICE;
> >          int dt0 = abs(m_curSlice->m_poc - prevRefSlice->m_poc);
> > @@ -1364,9 +1364,9 @@
> >                  q0 = q1;
> >              }
> >          }
> > -        if (prevRefSlice->m_sliceType == B_SLICE &&
> > IS_REFERENCED(m_curSlice->m_refPicList[0][0]))
> > +        if (prevRefSlice->m_sliceType == B_SLICE &&
> > IS_REFERENCED(m_curSlice->m_refFrameList[0][0]))
> >              q0 -= m_pbOffset / 2;
> > -        if (nextRefSlice->m_sliceType == B_SLICE &&
> > IS_REFERENCED(m_curSlice->m_refPicList[1][0]))
> > +        if (nextRefSlice->m_sliceType == B_SLICE &&
> > IS_REFERENCED(m_curSlice->m_refFrameList[1][0]))
> >              q1 -= m_pbOffset / 2;
> >          if (i0 && i1)
> >              q = (q0 + q1) / 2 + m_ipOffset;
> > @@ -1483,7 +1483,7 @@
> >               * Then bias the quant up or down if total size so far was
> > far from
> >               * the target.
> >               * Result: Depending on the value of rate_tolerance, there is
> > a
> > -             * tradeoff between quality and bitrate precision. But at
> > large
> > +             * trade-off between quality and bitrate precision. But at
> > large
> >               * tolerances, the bit distribution approaches that of 2pass.
> > */
> >
> >              double overflow = 1;
> > @@ -1832,7 +1832,7 @@
> >      double qScale = x265_qp2qScale(qpVbv);
> >      FrameData& curEncData = *curFrame->m_encData;
> >      int picType = curEncData.m_slice->m_sliceType;
> > -    Frame* refFrame = curEncData.m_slice->m_refPicList[0][0];
> > +    Frame* refFrame = curEncData.m_slice->m_refFrameList[0][0];
> >
> >      uint32_t maxRows = curEncData.m_slice->m_sps->numCuInHeight;
> >      uint32_t maxCols = curEncData.m_slice->m_sps->numCuInWidth;
> > @@ -1921,7 +1921,7 @@
> >      updatePredictor(rce->rowPred[0], qScaleVbv, (double)rowSatdCost,
> > encodedBits);
> >      if (curEncData.m_slice->m_sliceType == P_SLICE)
> >      {
> > -        Frame* refFrame = curEncData.m_slice->m_refPicList[0][0];
> > +        Frame* refFrame = curEncData.m_slice->m_refFrameList[0][0];
> >          if (qpVbv < refFrame->m_encData->m_rowStat[row].diagQp)
> >          {
> >              uint64_t intraRowSatdCost =
> > curEncData.m_rowStat[row].diagIntraSatd;
> > diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/sao.cpp
> > --- a/source/encoder/sao.cpp    Mon Aug 03 14:56:21 2015 -0500
> > +++ b/source/encoder/sao.cpp    Mon Aug 03 17:55:58 2015 -0500
> > @@ -243,12 +243,13 @@
> >  void SAO::processSaoCu(int addr, int typeIdx, int plane)
> >  {
> >      int x, y;
> > -    const CUData* cu = m_frame->m_encData->getPicCTU(addr);
> > -    pixel* rec = m_frame->m_reconPic->getPlaneAddr(plane, addr);
> > -    intptr_t stride = plane ? m_frame->m_reconPic->m_strideC :
> > m_frame->m_reconPic->m_stride;
> > +    PicYuv* reconPic = m_frame->m_encData->getOutputRecon();
> > +    pixel* rec = reconPic->getPlaneAddr(plane, addr);
> > +    intptr_t stride = plane ? reconPic->m_strideC : reconPic->m_stride;
> >      uint32_t picWidth  = m_param->sourceWidth;
> >      uint32_t picHeight = m_param->sourceHeight;
> > -    int ctuWidth  = g_maxCUSize;
> > +    const CUData* cu = m_frame->m_encData->getPicCTU(addr);
> > +    int ctuWidth = g_maxCUSize;
> >      int ctuHeight = g_maxCUSize;
> >      uint32_t lpelx = cu->m_cuPelX;
> >      uint32_t tpely = cu->m_cuPelY;
> > @@ -572,7 +573,8 @@
> >  /* Process SAO all units */
> >  void SAO::processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int plane)
> >  {
> > -    intptr_t stride = plane ? m_frame->m_reconPic->m_strideC :
> > m_frame->m_reconPic->m_stride;
> > +    PicYuv* reconPic = m_frame->m_encData->getOutputRecon();
> > +    intptr_t stride = plane ? reconPic->m_strideC : reconPic->m_stride;
> >      uint32_t picWidth  = m_param->sourceWidth;
> >      int ctuWidth  = g_maxCUSize;
> >      int ctuHeight = g_maxCUSize;
> > @@ -585,12 +587,12 @@
> >
> >      if (!idxY)
> >      {
> > -        pixel* rec = m_frame->m_reconPic->m_picOrg[plane];
> > +        pixel* rec = reconPic->m_picOrg[plane];
> >          memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidth);
> >      }
> >
> >      int addr = idxY * m_numCuInWidth;
> > -    pixel* rec = plane ? m_frame->m_reconPic->getChromaAddr(plane, addr)
> > : m_frame->m_reconPic->getLumaAddr(addr);
> > +    pixel* rec = plane ? reconPic->getChromaAddr(plane, addr) :
> > reconPic->getLumaAddr(addr);
> >
> >      for (int i = 0; i < ctuHeight + 1; i++)
> >      {
> > @@ -635,7 +637,7 @@
> >          }
> >          else if (idxX != (m_numCuInWidth - 1))
> >          {
> > -            rec = plane ? m_frame->m_reconPic->getChromaAddr(plane, addr)
> > : m_frame->m_reconPic->getLumaAddr(addr);
> > +            rec = plane ? reconPic->getChromaAddr(plane, addr) :
> > reconPic->getLumaAddr(addr);
> >
> >              for (int i = 0; i < ctuHeight + 1; i++)
> >              {
> > @@ -671,12 +673,13 @@
> >  /* Calculate SAO statistics for current CTU without non-crossing slice */
> >  void SAO::calcSaoStatsCu(int addr, int plane)
> >  {
> > +    const PicYuv* reconPic = m_frame->m_encData->getOutputRecon();
> >      const CUData* cu = m_frame->m_encData->getPicCTU(addr);
> >      const pixel* fenc0 = m_frame->m_fencPic->getPlaneAddr(plane, addr);
> > -    const pixel* rec0  = m_frame->m_reconPic->getPlaneAddr(plane, addr);
> > +    const pixel* rec0  = reconPic->getPlaneAddr(plane, addr);
> >      const pixel* fenc;
> >      const pixel* rec;
> > -    intptr_t stride = plane ? m_frame->m_reconPic->m_strideC :
> > m_frame->m_reconPic->m_stride;
> > +    intptr_t stride = plane ? reconPic->m_strideC : reconPic->m_stride;
> >      uint32_t picWidth  = m_param->sourceWidth;
> >      uint32_t picHeight = m_param->sourceHeight;
> >      int ctuWidth  = g_maxCUSize;
> > @@ -825,9 +828,10 @@
> >
> >      int x, y;
> >      const CUData* cu = frame->m_encData->getPicCTU(addr);
> > +    const PicYuv* reconPic = m_frame->m_encData->getOutputRecon();
> >      const pixel* fenc;
> >      const pixel* rec;
> > -    intptr_t stride = m_frame->m_reconPic->m_stride;
> > +    intptr_t stride = reconPic->m_stride;
> >      uint32_t picWidth  = m_param->sourceWidth;
> >      uint32_t picHeight = m_param->sourceHeight;
> >      int ctuWidth  = g_maxCUSize;
> > @@ -861,7 +865,7 @@
> >      {
> >          if (plane == 1)
> >          {
> > -            stride = frame->m_reconPic->m_strideC;
> > +            stride = reconPic->m_strideC;
> >              picWidth  >>= m_hChromaShift;
> >              picHeight >>= m_vChromaShift;
> >              ctuWidth  >>= m_hChromaShift;
> > @@ -881,7 +885,7 @@
> >          count = m_countPreDblk[addr][plane][SAO_BO];
> >
> >          const pixel* fenc0 = m_frame->m_fencPic->getPlaneAddr(plane,
> > addr);
> > -        const pixel* rec0  = m_frame->m_reconPic->getPlaneAddr(plane,
> > addr);
> > +        const pixel* rec0 = reconPic->getPlaneAddr(plane, addr);
> >          fenc = fenc0;
> >          rec  = rec0;
> >
> > diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/search.cpp
> > --- a/source/encoder/search.cpp Mon Aug 03 14:56:21 2015 -0500
> > +++ b/source/encoder/search.cpp Mon Aug 03 17:55:58 2015 -0500
> > @@ -446,8 +446,9 @@
> >      }
> >
> >      // set reconstruction for next intra prediction blocks if full TU
> > prediction won
> > -    pixel*   picReconY = m_frame->m_reconPic->getLumaAddr(cu.m_cuAddr,
> > cuGeom.absPartIdx + absPartIdx);
> > -    intptr_t picStride = m_frame->m_reconPic->m_stride;
> > +    PicYuv*  reconPic = m_frame->m_encData->getOutputRecon();
> > +    pixel*   picReconY = reconPic->getLumaAddr(cu.m_cuAddr,
> > cuGeom.absPartIdx + absPartIdx);
> > +    intptr_t picStride = reconPic->m_stride;
> >      primitives.cu[sizeIdx].copy_pp(picReconY, picStride, reconQt,
> > reconQtStride);
> >
> >      outCost.rdcost     += fullCost.rdcost;
> > @@ -611,8 +612,9 @@
> >      }
> >
> >      // set reconstruction for next intra prediction blocks
> > -    pixel*   picReconY = m_frame->m_reconPic->getLumaAddr(cu.m_cuAddr,
> > cuGeom.absPartIdx + absPartIdx);
> > -    intptr_t picStride = m_frame->m_reconPic->m_stride;
> > +    PicYuv*  reconPic = m_frame->m_encData->getOutputRecon();
> > +    pixel*   picReconY = reconPic->getLumaAddr(cu.m_cuAddr,
> > cuGeom.absPartIdx + absPartIdx);
> > +    intptr_t picStride = reconPic->m_stride;
> >      primitives.cu[sizeIdx].copy_pp(picReconY, picStride, reconQt,
> > reconQtStride);
> >
> >      outCost.rdcost += fullCost.rdcost;
> > @@ -661,8 +663,9 @@
> >          uint32_t sizeIdx   = log2TrSize - 2;
> >          primitives.cu[sizeIdx].calcresidual(fenc, pred, residual,
> > stride);
> >
> > -        pixel*   picReconY =
> > m_frame->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx +
> > absPartIdx);
> > -        intptr_t picStride = m_frame->m_reconPic->m_stride;
> > +        PicYuv*  reconPic = m_frame->m_encData->getOutputRecon();
> > +        pixel*   picReconY = reconPic->getLumaAddr(cu.m_cuAddr,
> > cuGeom.absPartIdx + absPartIdx);
> > +        intptr_t picStride = reconPic->m_stride;
> >
> >          uint32_t numSig = m_quant.transformNxN(cu, fenc, stride,
> > residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
> >          if (numSig)
> > @@ -821,8 +824,9 @@
> >              coeff_t* coeffC        = m_rqt[qtLayer].coeffRQT[chromaId] +
> > coeffOffsetC;
> >              pixel*   reconQt       =
> > m_rqt[qtLayer].reconQtYuv.getChromaAddr(chromaId, absPartIdxC);
> >              uint32_t reconQtStride = m_rqt[qtLayer].reconQtYuv.m_csize;
> > -            pixel*   picReconC =
> > m_frame->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx
> > + absPartIdxC);
> > -            intptr_t picStride = m_frame->m_reconPic->m_strideC;
> > +            PicYuv*  reconPic = m_frame->m_encData->getOutputRecon();
> > +            pixel*   picReconC = reconPic->getChromaAddr(chromaId,
> > cu.m_cuAddr, cuGeom.absPartIdx + absPartIdxC);
> > +            intptr_t picStride = reconPic->m_strideC;
> >
> >              uint32_t chromaPredMode = cu.m_chromaIntraDir[absPartIdxC];
> >              if (chromaPredMode == DM_CHROMA_IDX)
> > @@ -998,8 +1002,9 @@
> >              cu.setCbfPartRange(bCbf << tuDepth, ttype, absPartIdxC,
> > tuIterator.absPartIdxStep);
> >              cu.setTransformSkipPartRange(bTSkip, ttype, absPartIdxC,
> > tuIterator.absPartIdxStep);
> >
> > -            pixel*   reconPicC =
> > m_frame->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx
> > + absPartIdxC);
> > -            intptr_t picStride = m_frame->m_reconPic->m_strideC;
> > +            PicYuv*  reconPic = m_frame->m_encData->getOutputRecon();
> > +            pixel*   reconPicC = reconPic->getChromaAddr(chromaId,
> > cu.m_cuAddr, cuGeom.absPartIdx + absPartIdxC);
> > +            intptr_t picStride = reconPic->m_strideC;
> >              primitives.cu[sizeIdxC].copy_pp(reconPicC, picStride,
> > reconQt, reconQtStride);
> >
> >              outDist += bDist;
> > @@ -1108,8 +1113,9 @@
> >              int16_t* residual = resiYuv.getChromaAddr(chromaId,
> > absPartIdxC);
> >              uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 -
> > (m_hChromaShift + m_vChromaShift));
> >              coeff_t* coeffC        = cu.m_trCoeff[ttype] + coeffOffsetC;
> > -            pixel*   picReconC =
> > m_frame->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx
> > + absPartIdxC);
> > -            intptr_t picStride = m_frame->m_reconPic->m_strideC;
> > +            PicYuv*  reconPic = m_frame->m_encData->getOutputRecon();
> > +            pixel*   picReconC = reconPic->getChromaAddr(chromaId,
> > cu.m_cuAddr, cuGeom.absPartIdx + absPartIdxC);
> > +            intptr_t picStride = reconPic->m_strideC;
> >
> >              uint32_t chromaPredMode = cu.m_chromaIntraDir[absPartIdxC];
> >              if (chromaPredMode == DM_CHROMA_IDX)
> > @@ -1591,10 +1597,11 @@
> >               * output recon picture, so it cannot proceed in parallel
> > with anything else when doing INTRA_NXN. Also
> >               * it is not updating m_rdContexts[depth].cur for the later
> > PUs which I suspect is slightly wrong. I think
> >               * that the contexts should be tracked through each PU */
> > -            pixel*   dst         =
> > m_frame->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx +
> > absPartIdx);
> > -            uint32_t dststride   = m_frame->m_reconPic->m_stride;
> > -            const pixel*   src   = reconYuv->getLumaAddr(absPartIdx);
> > -            uint32_t srcstride   = reconYuv->m_size;
> > +            PicYuv*  reconPic  = m_frame->m_encData->getOutputRecon();
> > +            pixel*   dst       = reconPic->getLumaAddr(cu.m_cuAddr,
> > cuGeom.absPartIdx + absPartIdx);
> > +            uint32_t dststride = reconPic->m_stride;
> > +            const pixel*   src = reconYuv->getLumaAddr(absPartIdx);
> > +            uint32_t srcstride = reconYuv->m_size;
> >              primitives.cu[log2TrSize - 2].copy_pp(dst, dststride, src,
> > srcstride);
> >          }
> >      }
> > @@ -1757,15 +1764,16 @@
> >          if (!tuIterator.isLastSection())
> >          {
> >              uint32_t zorder    = cuGeom.absPartIdx + absPartIdxC;
> > -            uint32_t dststride = m_frame->m_reconPic->m_strideC;
> > +            PicYuv*  reconPic  = m_frame->m_encData->getOutputRecon();
> > +            uint32_t dststride = reconPic->m_strideC;
> >              const pixel* src;
> >              pixel* dst;
> >
> > -            dst = m_frame->m_reconPic->getCbAddr(cu.m_cuAddr, zorder);
> > +            dst = reconPic->getCbAddr(cu.m_cuAddr, zorder);
> >              src = reconYuv.getCbAddr(absPartIdxC);
> >              primitives.chroma[m_csp].cu[size].copy_pp(dst, dststride,
> > src, reconYuv.m_csize);
> >
> > -            dst = m_frame->m_reconPic->getCrAddr(cu.m_cuAddr, zorder);
> > +            dst = reconPic->getCrAddr(cu.m_cuAddr, zorder);
> >              src = reconYuv.getCrAddr(absPartIdxC);
> >              primitives.chroma[m_csp].cu[size].copy_pp(dst, dststride,
> > src, reconYuv.m_csize);
> >          }
> > @@ -1866,7 +1874,7 @@
> >  /* find the lowres motion vector from lookahead in middle of current PU */
> >  MV Search::getLowresMV(const CUData& cu, const PredictionUnit& pu, int
> > list, int ref)
> >  {
> > -    int diffPoc = abs(m_slice->m_poc -
> > m_slice->m_refPicList[list][ref]->m_poc);
> > +    int diffPoc = abs(m_slice->m_poc - m_slice->m_refPOCList[list][ref]);
> >      if (diffPoc > m_param->bframes + 1)
> >          /* poc difference is out of range for lookahead */
> >          return 0;
> > @@ -1906,7 +1914,7 @@
> >          else
> >          {
> >              cu.clipMv(mvCand);
> > -            predInterLumaPixel(pu, tmpPredYuv,
> > *m_slice->m_refPicList[list][ref]->m_reconPic, mvCand);
> > +            predInterLumaPixel(pu, tmpPredYuv,
> > *m_slice->m_refReconPicList[list][ref], mvCand);
> >              costs[i] =
> > m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size);
> >          }
> >      }
> > @@ -2197,8 +2205,8 @@
> >              }
> >              else
> >              {
> > -                PicYuv* refPic0 =
> > slice->m_refPicList[0][bestME[0].ref]->m_reconPic;
> > -                PicYuv* refPic1 =
> > slice->m_refPicList[1][bestME[1].ref]->m_reconPic;
> > +                PicYuv* refPic0 =
> > slice->m_refReconPicList[0][bestME[0].ref];
> > +                PicYuv* refPic1 =
> > slice->m_refReconPicList[1][bestME[1].ref];
> >                  Yuv* bidirYuv = m_rqt[cuGeom.depth].bidirPredYuv;
> >
> >                  /* Generate reference subpels */
> > diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/slicetype.cpp
> > --- a/source/encoder/slicetype.cpp      Mon Aug 03 14:56:21 2015 -0500
> > +++ b/source/encoder/slicetype.cpp      Mon Aug 03 17:55:58 2015 -0500
> > @@ -714,16 +714,16 @@
> >
> >      case P_SLICE:
> >          b = p1 = poc - l0poc;
> > -        frames[p0] = &slice->m_refPicList[0][0]->m_lowres;
> > +        frames[p0] = &slice->m_refFrameList[0][0]->m_lowres;
> >          frames[b] = &curFrame->m_lowres;
> >          break;
> >
> >      case B_SLICE:
> >          b = poc - l0poc;
> >          p1 = b + l1poc - poc;
> > -        frames[p0] = &slice->m_refPicList[0][0]->m_lowres;
> > +        frames[p0] = &slice->m_refFrameList[0][0]->m_lowres;
> >          frames[b] = &curFrame->m_lowres;
> > -        frames[p1] = &slice->m_refPicList[1][0]->m_lowres;
> > +        frames[p1] = &slice->m_refFrameList[1][0]->m_lowres;
> >          break;
> >
> >      default:
> > diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/weightPrediction.cpp
> > --- a/source/encoder/weightPrediction.cpp       Mon Aug 03 14:56:21 2015
> > -0500
> > +++ b/source/encoder/weightPrediction.cpp       Mon Aug 03 17:55:58 2015
> > -0500
> > @@ -259,7 +259,7 @@
> >      for (int list = 0; list < cache.numPredDir; list++)
> >      {
> >          WeightParam *weights = wp[list][0];
> > -        Frame *refFrame = slice.m_refPicList[list][0];
> > +        Frame *refFrame = slice.m_refFrameList[list][0];
> >          Lowres& refLowres = refFrame->m_lowres;
> >          int diffPoc = abs(curPoc - refFrame->m_poc);
> >
> > _______________________________________________
> > x265-devel mailing list
> > x265-devel at videolan.org
> > https://mailman.videolan.org/listinfo/x265-devel
> >

> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho