[x265] [PATCH] cleanup: simplify slice ref lists; moving shared offset buffers into SPS

Steve Borho steve at borho.org
Wed Aug 12 09:53:28 CEST 2015


On 08/12, Pradeep wrote:
> # HG changeset patch
> # User Pradeep
> # Date 1439332317 0
> #      Tue Aug 11 22:31:57 2015 +0000
> # Node ID 1b2e5e8ccb850d83bfae57b5e2dec623330f558d
> # Parent  faecefdb2ae90c031dfda5c11d14b9ba7005257b
> cleanup: simplify slice ref lists; moving shared offset buffers into SPS
> 
> Stripping down Steve's experimental patches into separate patches
> 
> Part 1:
> Rename slice.m_refPicList to m_refFrameList since it is an array of Frame
> pointers, and make a new slice.m_refReconPicList array which points directly
> to the motion reference PicYuv buffers (bypassing the Frame structure)
> 
> The shared offset buffers were moved from the top-level encoder into the SPS
> structure so the FrameData functions could use them directly (avoiding a major
> layering violations). The offset buffers are computed based on SPS values, so it
> seems minimally ugly to keep them there.
> 
> diff -r faecefdb2ae9 -r 1b2e5e8ccb85 source/common/cudata.cpp
> --- a/source/common/cudata.cpp	Tue Aug 11 14:28:23 2015 +0000
> +++ b/source/common/cudata.cpp	Tue Aug 11 22:31:57 2015 +0000
> @@ -1676,7 +1676,7 @@
>          if (tempRefIdx != -1)
>          {
>              uint32_t cuAddr = neighbours[MD_COLLOCATED].cuAddr[picList];
> -            const Frame* colPic = m_slice->m_refPicList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
> +            const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
>              const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
>  
>              // Scale the vector
> @@ -1857,7 +1857,7 @@
>  
>  bool CUData::getColMVP(MV& outMV, int& outRefIdx, int picList, int cuAddr, int partUnitIdx) const
>  {
> -    const Frame* colPic = m_slice->m_refPicList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
> +    const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
>      const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
>  
>      uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
> @@ -1892,7 +1892,7 @@
>  // Cache the collocated MV.
>  bool CUData::getCollocatedMV(int cuAddr, int partUnitIdx, InterNeighbourMV *neighbour) const
>  {
> -    const Frame* colPic = m_slice->m_refPicList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
> +    const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
>      const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
>  
>      uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
> diff -r faecefdb2ae9 -r 1b2e5e8ccb85 source/common/deblock.cpp
> --- a/source/common/deblock.cpp	Tue Aug 11 14:28:23 2015 +0000
> +++ b/source/common/deblock.cpp	Tue Aug 11 22:31:57 2015 +0000
> @@ -209,8 +209,8 @@
>      const Slice* const sliceQ = cuQ->m_slice;
>      const Slice* const sliceP = cuP->m_slice;
>  
> -    const Frame* refP0 = sliceP->getRefPic(0, cuP->m_refIdx[0][partP]);
> -    const Frame* refQ0 = sliceQ->getRefPic(0, cuQ->m_refIdx[0][partQ]);
> +    const Frame* refP0 = sliceP->m_refFrameList[0][cuP->m_refIdx[0][partP]];
> +    const Frame* refQ0 = sliceQ->m_refFrameList[0][cuQ->m_refIdx[0][partQ]];
>      const MV& mvP0 = refP0 ? cuP->m_mv[0][partP] : zeroMv;
>      const MV& mvQ0 = refQ0 ? cuQ->m_mv[0][partQ] : zeroMv;
>  
> @@ -221,8 +221,8 @@
>      }
>  
>      // (sliceQ->isInterB() || sliceP->isInterB())
> -    const Frame* refP1 = sliceP->getRefPic(1, cuP->m_refIdx[1][partP]);
> -    const Frame* refQ1 = sliceQ->getRefPic(1, cuQ->m_refIdx[1][partQ]);
> +    const Frame* refP1 = sliceP->m_refFrameList[1][cuP->m_refIdx[1][partP]];
> +    const Frame* refQ1 = sliceQ->m_refFrameList[1][cuQ->m_refIdx[1][partQ]];
>      const MV& mvP1 = refP1 ? cuP->m_mv[1][partP] : zeroMv;
>      const MV& mvQ1 = refQ1 ? cuQ->m_mv[1][partQ] : zeroMv;
>  
> diff -r faecefdb2ae9 -r 1b2e5e8ccb85 source/common/frame.cpp
> --- a/source/common/frame.cpp	Tue Aug 11 14:28:23 2015 +0000
> +++ b/source/common/frame.cpp	Tue Aug 11 22:31:57 2015 +0000
> @@ -56,7 +56,7 @@
>      m_encData = new FrameData;
>      m_reconPic = new PicYuv;
>      m_encData->m_reconPic = m_reconPic;
> -    bool ok = m_encData->create(param, sps) && m_reconPic->create(param->sourceWidth, param->sourceHeight, param->internalCsp);
> +    bool ok = m_encData->create(*param, sps) && m_reconPic->create(param->sourceWidth, param->sourceHeight, param->internalCsp);
>      if (ok)
>      {
>          /* initialize right border of m_reconpicYuv as SAO may read beyond the
> @@ -65,6 +65,12 @@
>          memset(m_reconPic->m_picOrg[0], 0, sizeof(pixel) * m_reconPic->m_stride * maxHeight);
>          memset(m_reconPic->m_picOrg[1], 0, sizeof(pixel) * m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
>          memset(m_reconPic->m_picOrg[2], 0, sizeof(pixel) * m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
> +
> +        /* use pre-calculated cu/pu offsets cached in the SPS structure */
> +        m_reconPic->m_cuOffsetC = sps.cuOffsetC;
> +        m_reconPic->m_cuOffsetY = sps.cuOffsetY;
> +        m_reconPic->m_buOffsetC = sps.buOffsetC;
> +        m_reconPic->m_buOffsetY = sps.buOffsetY;
>      }
>      return ok;
>  }
> diff -r faecefdb2ae9 -r 1b2e5e8ccb85 source/common/framedata.cpp
> --- a/source/common/framedata.cpp	Tue Aug 11 14:28:23 2015 +0000
> +++ b/source/common/framedata.cpp	Tue Aug 11 22:31:57 2015 +0000
> @@ -31,15 +31,15 @@
>      memset(this, 0, sizeof(*this));
>  }
>  
> -bool FrameData::create(x265_param *param, const SPS& sps)
> +bool FrameData::create(const x265_param& param, const SPS& sps)
>  {
> -    m_param = param;
> +    m_param = ¶m;
>      m_slice  = new Slice;
>      m_picCTU = new CUData[sps.numCUsInFrame];
>  
> -    m_cuMemPool.create(0, param->internalCsp, sps.numCUsInFrame);
> +    m_cuMemPool.create(0, param.internalCsp, sps.numCUsInFrame);
>      for (uint32_t ctuAddr = 0; ctuAddr < sps.numCUsInFrame; ctuAddr++)
> -        m_picCTU[ctuAddr].initialize(m_cuMemPool, 0, param->internalCsp, ctuAddr);
> +        m_picCTU[ctuAddr].initialize(m_cuMemPool, 0, param.internalCsp, ctuAddr);
>  
>      CHECKED_MALLOC(m_cuStat, RCStatCU, sps.numCUsInFrame);
>      CHECKED_MALLOC(m_rowStat, RCStatRow, sps.numCuInHeight);
> diff -r faecefdb2ae9 -r 1b2e5e8ccb85 source/common/framedata.h
> --- a/source/common/framedata.h	Tue Aug 11 14:28:23 2015 +0000
> +++ b/source/common/framedata.h	Tue Aug 11 22:31:57 2015 +0000
> @@ -96,7 +96,7 @@
>  
>      Slice*         m_slice;
>      SAOParam*      m_saoParam;
> -    x265_param*    m_param;
> +    const x265_param* m_param;
>  
>      FrameData*     m_freeListNext;
>      PicYuv*        m_reconPic;
> @@ -142,11 +142,11 @@
>  
>      FrameData();
>  
> -    bool create(x265_param *param, const SPS& sps);
> +    bool create(const x265_param& param, const SPS& sps);
>      void reinit(const SPS& sps);
>      void destroy();
>  
> -    CUData* getPicCTU(uint32_t ctuAddr) { return &m_picCTU[ctuAddr]; }
> +    inline CUData* getPicCTU(uint32_t ctuAddr) { return &m_picCTU[ctuAddr]; }
>  };
>  }
>  
> diff -r faecefdb2ae9 -r 1b2e5e8ccb85 source/common/predict.cpp
> --- a/source/common/predict.cpp	Tue Aug 11 14:28:23 2015 +0000
> +++ b/source/common/predict.cpp	Tue Aug 11 22:31:57 2015 +0000
> @@ -109,18 +109,18 @@
>              ShortYuv& shortYuv = m_predShortYuv[0];
>  
>              if (bLuma)
> -                predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
> +                predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
>              if (bChroma)
> -                predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
> +                predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
>  
>              addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma);
>          }
>          else
>          {
>              if (bLuma)
> -                predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
> +                predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
>              if (bChroma)
> -                predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
> +                predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
>          }
>      }
>      else
> @@ -179,13 +179,13 @@
>  
>              if (bLuma)
>              {
> -                predInterLumaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
> -                predInterLumaShort(pu, m_predShortYuv[1], *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1);
> +                predInterLumaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
> +                predInterLumaShort(pu, m_predShortYuv[1], *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
>              }
>              if (bChroma)
>              {
> -                predInterChromaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
> -                predInterChromaShort(pu, m_predShortYuv[1], *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1);
> +                predInterChromaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
> +                predInterChromaShort(pu, m_predShortYuv[1], *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
>              }
>  
>              if (pwp0 && pwp1 && (pwp0->bPresentFlag || pwp1->bPresentFlag))
> @@ -203,18 +203,18 @@
>                  ShortYuv& shortYuv = m_predShortYuv[0];
>  
>                  if (bLuma)
> -                    predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
> +                    predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
>                  if (bChroma)
> -                    predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
> +                    predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
>  
>                  addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma);
>              }
>              else
>              {
>                  if (bLuma)
> -                    predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
> +                    predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
>                  if (bChroma)
> -                    predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
> +                    predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
>              }
>          }
>          else
> @@ -230,18 +230,18 @@
>                  ShortYuv& shortYuv = m_predShortYuv[0];
>  
>                  if (bLuma)
> -                    predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1);
> +                    predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
>                  if (bChroma)
> -                    predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1);
> +                    predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
>  
>                  addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma);
>              }
>              else
>              {
>                  if (bLuma)
> -                    predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1);
> +                    predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
>                  if (bChroma)
> -                    predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1);
> +                    predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
>              }
>          }
>      }
> @@ -600,8 +600,9 @@
>      int tuSize = 1 << intraNeighbors.log2TrSize;
>      int tuSize2 = tuSize << 1;
>  
> -    pixel* adiOrigin = cu.m_encData->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + puAbsPartIdx);
> -    intptr_t picStride = cu.m_encData->m_reconPic->m_stride;
> +    PicYuv* reconPic = cu.m_encData->m_reconPic;
> +    pixel* adiOrigin = reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + puAbsPartIdx);
> +    intptr_t picStride = reconPic->m_stride;
>  
>      fillReferenceSamples(adiOrigin, picStride, intraNeighbors, intraNeighbourBuf[0]);
>  
> @@ -648,8 +649,9 @@
>  
>  void Predict::initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t puAbsPartIdx, const IntraNeighbors& intraNeighbors, uint32_t chromaId)
>  {
> -    const pixel* adiOrigin = cu.m_encData->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx + puAbsPartIdx);
> -    intptr_t picStride = cu.m_encData->m_reconPic->m_strideC;
> +    PicYuv* reconPic = cu.m_encData->m_reconPic;
> +    const pixel* adiOrigin = reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx + puAbsPartIdx);
> +    intptr_t picStride = reconPic->m_strideC;
>  
>      fillReferenceSamples(adiOrigin, picStride, intraNeighbors, intraNeighbourBuf[0]);
>  
> diff -r faecefdb2ae9 -r 1b2e5e8ccb85 source/common/slice.cpp
> --- a/source/common/slice.cpp	Tue Aug 11 14:28:23 2015 +0000
> +++ b/source/common/slice.cpp	Tue Aug 11 22:31:57 2015 +0000
> @@ -33,7 +33,9 @@
>  {
>      if (m_sliceType == I_SLICE)
>      {
> -        memset(m_refPicList, 0, sizeof(m_refPicList));
> +        memset(m_refFrameList, 0, sizeof(m_refFrameList));
> +        memset(m_refReconPicList, 0, sizeof(m_refReconPicList));
> +        memset(m_refPOCList, 0, sizeof(m_refPOCList));
>          m_numRefIdx[1] = m_numRefIdx[0] = 0;
>          return;
>      }
> @@ -106,13 +108,13 @@
>      {
>          cIdx = rIdx % numPocTotalCurr;
>          X265_CHECK(cIdx >= 0 && cIdx < numPocTotalCurr, "RPS index check fail\n");
> -        m_refPicList[0][rIdx] = rpsCurrList0[cIdx];
> +        m_refFrameList[0][rIdx] = rpsCurrList0[cIdx];
>      }
>  
>      if (m_sliceType != B_SLICE)
>      {
>          m_numRefIdx[1] = 0;
> -        memset(m_refPicList[1], 0, sizeof(m_refPicList[1]));
> +        memset(m_refFrameList[1], 0, sizeof(m_refFrameList[1]));
>      }
>      else
>      {
> @@ -120,13 +122,13 @@
>          {
>              cIdx = rIdx % numPocTotalCurr;
>              X265_CHECK(cIdx >= 0 && cIdx < numPocTotalCurr, "RPS index check fail\n");
> -            m_refPicList[1][rIdx] = rpsCurrList1[cIdx];
> +            m_refFrameList[1][rIdx] = rpsCurrList1[cIdx];
>          }
>      }
>  
>      for (int dir = 0; dir < 2; dir++)
>          for (int numRefIdx = 0; numRefIdx < m_numRefIdx[dir]; numRefIdx++)
> -            m_refPOCList[dir][numRefIdx] = m_refPicList[dir][numRefIdx]->m_poc;
> +            m_refPOCList[dir][numRefIdx] = m_refFrameList[dir][numRefIdx]->m_poc;
>  }
>  
>  void Slice::disableWeights()
> diff -r faecefdb2ae9 -r 1b2e5e8ccb85 source/common/slice.h
> --- a/source/common/slice.h	Tue Aug 11 14:28:23 2015 +0000
> +++ b/source/common/slice.h	Tue Aug 11 22:31:57 2015 +0000
> @@ -31,6 +31,7 @@
>  
>  class Frame;
>  class PicList;
> +class PicYuv;
>  class MotionReference;
>  
>  enum SliceType
> @@ -209,6 +210,13 @@
>  
>  struct SPS
>  {
> +    /* cached PicYuv offset arrays, shared by all instances of
> +    * PicYuv created by this encoder */

w/s nit. editors generally mangle multi-line comments when they are
pasted.

> +    intptr_t* cuOffsetY;
> +    intptr_t* cuOffsetC;
> +    intptr_t* buOffsetY;
> +    intptr_t* buOffsetC;
> +
>      int      chromaFormatIdc;        // use param
>      uint32_t picWidthInLumaSamples;  // use param
>      uint32_t picHeightInLumaSamples; // use param
> @@ -242,6 +250,11 @@
>  
>      Window   conformanceWindow;
>      VUI      vuiParameters;
> +
> +    SPS()
> +    {
> +        memset(this, 0, sizeof(*this));
> +    }

These offset buffers are still created and initalized by
PicYuv::createOffsets(), we should move the alloc/init/free logic into
the SPS class (in a later patch).

>  };
>  
>  struct PPS
> @@ -321,7 +334,8 @@
>      uint32_t    m_colRefIdx;       // never modified
>      
>      int         m_numRefIdx[2];
> -    Frame*      m_refPicList[2][MAX_NUM_REF + 1];
> +    Frame*      m_refFrameList[2][MAX_NUM_REF + 1];
> +    PicYuv*     m_refReconPicList[2][MAX_NUM_REF + 1];
>      int         m_refPOCList[2][MAX_NUM_REF + 1];
>  
>      uint32_t    m_maxNumMergeCand; // use param
> @@ -332,14 +346,9 @@
>          m_lastIDR = 0;
>          m_sLFaseFlag = true;
>          m_numRefIdx[0] = m_numRefIdx[1] = 0;
> -        for (int i = 0; i < MAX_NUM_REF; i++)
> -        {
> -            m_refPicList[0][i] = NULL;
> -            m_refPicList[1][i] = NULL;
> -            m_refPOCList[0][i] = 0;
> -            m_refPOCList[1][i] = 0;
> -        }
> -
> +        memset(m_refFrameList, 0, sizeof(m_refFrameList));
> +        memset(m_refReconPicList, 0, sizeof(m_refReconPicList));
> +        memset(m_refPOCList, 0, sizeof(m_refPOCList));
>          disableWeights();
>      }
>  
> @@ -347,8 +356,6 @@
>  
>      void setRefPicList(PicList& picList);
>  
> -    const Frame* getRefPic(int list, int refIdx) const { return refIdx >= 0 ? m_refPicList[list][refIdx] : NULL; }
> -
>      bool getRapPicFlag() const
>      {
>          return m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL
> diff -r faecefdb2ae9 -r 1b2e5e8ccb85 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp	Tue Aug 11 14:28:23 2015 +0000
> +++ b/source/encoder/analysis.cpp	Tue Aug 11 22:31:57 2015 +0000
> @@ -757,6 +757,8 @@
>      ModeDepth& md = m_modeDepth[depth];
>      md.bestMode = NULL;
>  
> +    PicYuv& reconPic = *m_frame->m_reconPic;
> +
>      bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
>      bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
>      uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);
> @@ -1051,7 +1053,7 @@
>                          residualTransformQuantIntra(*md.bestMode, cuGeom, 0, 0, tuDepthRange);
>                          getBestIntraModeChroma(*md.bestMode, cuGeom);
>                          residualQTIntraChroma(*md.bestMode, cuGeom, 0, 0);
> -                        md.bestMode->reconYuv.copyFromPicYuv(*m_frame->m_reconPic, cu.m_cuAddr, cuGeom.absPartIdx); // TODO:
> +                        md.bestMode->reconYuv.copyFromPicYuv(reconPic, cu.m_cuAddr, cuGeom.absPartIdx); // TODO:
>                      }
>                  }
>              }
> @@ -1107,7 +1109,7 @@
>      X265_CHECK(md.bestMode->ok(), "best mode is not ok");
>      md.bestMode->cu.copyToPic(depth);
>      if (m_param->rdLevel)
> -        md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.absPartIdx);
> +        md.bestMode->reconYuv.copyToPicYuv(reconPic, cuAddr, cuGeom.absPartIdx);
>  
>      return refMask;
>  }
> @@ -1851,6 +1853,8 @@
>  
>      cu.copyFromPic(ctu, cuGeom);
>  
> +    PicYuv& reconPic = *m_frame->m_reconPic;
> +
>      Yuv& fencYuv = m_modeDepth[cuGeom.depth].fencYuv;
>      if (cuGeom.depth)
>          m_modeDepth[0].fencYuv.copyPartToYuv(fencYuv, absPartIdx);
> @@ -1906,7 +1910,6 @@
>          /* residualTransformQuantInter() wrote transformed residual back into
>           * resiYuv. Generate the recon pixels by adding it to the prediction */
>  
> -        PicYuv& reconPic = *m_frame->m_reconPic;
>          if (cu.m_cbf[0][0])
>              primitives.cu[sizeIdx].add_ps(reconPic.getLumaAddr(cu.m_cuAddr, absPartIdx), reconPic.m_stride,
>                                            predY, resiYuv.m_buf[0], predYuv.m_size, resiYuv.m_size);
> @@ -1969,7 +1972,7 @@
>      if (m_slice->m_numRefIdx[0])
>      {
>          numRefs++;
> -        const CUData& cu = *m_slice->m_refPicList[0][0]->m_encData->getPicCTU(parentCTU.m_cuAddr);
> +        const CUData& cu = *m_slice->m_refFrameList[0][0]->m_encData->getPicCTU(parentCTU.m_cuAddr);
>          previousQP = cu.m_qp[0];
>          if (!cu.m_cuDepth[cuGeom.absPartIdx])
>              return 0;
> @@ -1983,7 +1986,7 @@
>      if (m_slice->m_numRefIdx[1])
>      {
>          numRefs++;
> -        const CUData& cu = *m_slice->m_refPicList[1][0]->m_encData->getPicCTU(parentCTU.m_cuAddr);
> +        const CUData& cu = *m_slice->m_refFrameList[1][0]->m_encData->getPicCTU(parentCTU.m_cuAddr);
>          if (!cu.m_cuDepth[cuGeom.absPartIdx])
>              return 0;
>          for (uint32_t i = 0; i < cuGeom.numPartitions; i += 4)
> diff -r faecefdb2ae9 -r 1b2e5e8ccb85 source/encoder/dpb.cpp
> --- a/source/encoder/dpb.cpp	Tue Aug 11 14:28:23 2015 +0000
> +++ b/source/encoder/dpb.cpp	Tue Aug 11 22:31:57 2015 +0000
> @@ -47,16 +47,16 @@
>          delete curFrame;
>      }
>  
> -    while (m_picSymFreeList)
> +    while (m_frameDataFreeList)
>      {
> -        FrameData* next = m_picSymFreeList->m_freeListNext;
> -        m_picSymFreeList->destroy();
> +        FrameData* next = m_frameDataFreeList->m_freeListNext;
> +        m_frameDataFreeList->destroy();
>  
> -        m_picSymFreeList->m_reconPic->destroy();
> -        delete m_picSymFreeList->m_reconPic;
> +        m_frameDataFreeList->m_reconPic->destroy();
> +        delete m_frameDataFreeList->m_reconPic;
>  
> -        delete m_picSymFreeList;
> -        m_picSymFreeList = next;
> +        delete m_frameDataFreeList;
> +        m_frameDataFreeList = next;
>      }
>  }
>  
> @@ -79,8 +79,8 @@
>              iterFrame = m_picList.first();
>  
>              m_freeList.pushBack(*curFrame);
> -            curFrame->m_encData->m_freeListNext = m_picSymFreeList;
> -            m_picSymFreeList = curFrame->m_encData;
> +            curFrame->m_encData->m_freeListNext = m_frameDataFreeList;
> +            m_frameDataFreeList = curFrame->m_encData;
>              curFrame->m_encData = NULL;
>              curFrame->m_reconPic = NULL;
>          }
> @@ -171,7 +171,7 @@
>      {
>          for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
>          {
> -            Frame *refpic = slice->m_refPicList[l][ref];
> +            Frame *refpic = slice->m_refFrameList[l][ref];
>              ATOMIC_INC(&refpic->m_countRefEncoders);
>          }
>      }
> diff -r faecefdb2ae9 -r 1b2e5e8ccb85 source/encoder/dpb.h
> --- a/source/encoder/dpb.h	Tue Aug 11 14:28:23 2015 +0000
> +++ b/source/encoder/dpb.h	Tue Aug 11 22:31:57 2015 +0000
> @@ -46,14 +46,14 @@
>      bool               m_bTemporalSublayer;
>      PicList            m_picList;
>      PicList            m_freeList;
> -    FrameData*         m_picSymFreeList;
> +    FrameData*         m_frameDataFreeList;
>  
>      DPB(x265_param *param)
>      {
>          m_lastIDR = 0;
>          m_pocCRA = 0;
>          m_bRefreshPending = false;
> -        m_picSymFreeList = NULL;
> +        m_frameDataFreeList = NULL;
>          m_maxRefL0 = param->maxNumReferences;
>          m_maxRefL1 = param->bBPyramid ? 2 : 1;
>          m_bOpenGOP = param->bOpenGOP;
> diff -r faecefdb2ae9 -r 1b2e5e8ccb85 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp	Tue Aug 11 14:28:23 2015 +0000
> +++ b/source/encoder/encoder.cpp	Tue Aug 11 22:31:57 2015 +0000
> @@ -66,10 +66,6 @@
>      m_outputCount = 0;
>      m_param = NULL;
>      m_latestParam = NULL;
> -    m_cuOffsetY = NULL;
> -    m_cuOffsetC = NULL;
> -    m_buOffsetY = NULL;
> -    m_buOffsetC = NULL;
>      m_threadPool = NULL;
>      m_analysisFile = NULL;
>      for (int i = 0; i < X265_MAX_FRAME_THREADS; i++)
> @@ -318,10 +314,10 @@
>          delete m_rateControl;
>      }
>  
> -    X265_FREE(m_cuOffsetY);
> -    X265_FREE(m_cuOffsetC);
> -    X265_FREE(m_buOffsetY);
> -    X265_FREE(m_buOffsetC);
> +    X265_FREE(m_sps.cuOffsetY);
> +    X265_FREE(m_sps.cuOffsetC);
> +    X265_FREE(m_sps.buOffsetY);
> +    X265_FREE(m_sps.buOffsetC);

these free calls should be in an SPS destructor.

>      if (m_analysisFile)
>          fclose(m_analysisFile);
> @@ -416,12 +412,12 @@
>                  /* the first PicYuv created is asked to generate the CU and block unit offset
>                   * arrays which are then shared with all subsequent PicYuv (orig and recon) 
>                   * allocated by this top level encoder */
> -                if (m_cuOffsetY)
> +                if (m_sps.cuOffsetY)
>                  {
> -                    inFrame->m_fencPic->m_cuOffsetC = m_cuOffsetC;
> -                    inFrame->m_fencPic->m_cuOffsetY = m_cuOffsetY;
> -                    inFrame->m_fencPic->m_buOffsetC = m_buOffsetC;
> -                    inFrame->m_fencPic->m_buOffsetY = m_buOffsetY;
> +                    inFrame->m_fencPic->m_cuOffsetC = m_sps.cuOffsetC;
> +                    inFrame->m_fencPic->m_cuOffsetY = m_sps.cuOffsetY;
> +                    inFrame->m_fencPic->m_buOffsetC = m_sps.buOffsetC;
> +                    inFrame->m_fencPic->m_buOffsetY = m_sps.buOffsetY;
>                  }
>                  else
>                  {
> @@ -435,10 +431,10 @@
>                      }
>                      else
>                      {
> -                        m_cuOffsetC = inFrame->m_fencPic->m_cuOffsetC;
> -                        m_cuOffsetY = inFrame->m_fencPic->m_cuOffsetY;
> -                        m_buOffsetC = inFrame->m_fencPic->m_buOffsetC;
> -                        m_buOffsetY = inFrame->m_fencPic->m_buOffsetY;
> +                        m_sps.cuOffsetC = inFrame->m_fencPic->m_cuOffsetC;
> +                        m_sps.cuOffsetY = inFrame->m_fencPic->m_cuOffsetY;
> +                        m_sps.buOffsetC = inFrame->m_fencPic->m_buOffsetC;
> +                        m_sps.buOffsetY = inFrame->m_fencPic->m_buOffsetY;
>                      }
>                  }
>              }
> @@ -633,10 +629,10 @@
>          if (frameEnc && !pass)
>          {
>              /* give this frame a FrameData instance before encoding */
> -            if (m_dpb->m_picSymFreeList)
> +            if (m_dpb->m_frameDataFreeList)
>              {
> -                frameEnc->m_encData = m_dpb->m_picSymFreeList;
> -                m_dpb->m_picSymFreeList = m_dpb->m_picSymFreeList->m_freeListNext;
> +                frameEnc->m_encData = m_dpb->m_frameDataFreeList;
> +                m_dpb->m_frameDataFreeList = m_dpb->m_frameDataFreeList->m_freeListNext;
>                  frameEnc->reinit(m_sps);
>              }
>              else
> @@ -647,10 +643,6 @@
>                  slice->m_pps = &m_pps;
>                  slice->m_maxNumMergeCand = m_param->maxNumMergeCand;
>                  slice->m_endCUAddr = slice->realEndAddress(m_sps.numCUsInFrame * NUM_4x4_PARTITIONS);
> -                frameEnc->m_reconPic->m_cuOffsetC = m_cuOffsetC;
> -                frameEnc->m_reconPic->m_cuOffsetY = m_cuOffsetY;
> -                frameEnc->m_reconPic->m_buOffsetC = m_buOffsetC;
> -                frameEnc->m_reconPic->m_buOffsetY = m_buOffsetY;
>              }
>  
>              curEncoder->m_rce.encodeOrder = m_encodedFrameNum++;
> diff -r faecefdb2ae9 -r 1b2e5e8ccb85 source/encoder/encoder.h
> --- a/source/encoder/encoder.h	Tue Aug 11 14:28:23 2015 +0000
> +++ b/source/encoder/encoder.h	Tue Aug 11 22:31:57 2015 +0000
> @@ -93,13 +93,6 @@
>      int                m_numPools;
>      int                m_curEncoder;
>  
> -    /* cached PicYuv offset arrays, shared by all instances of
> -     * PicYuv created by this encoder */
> -    intptr_t*          m_cuOffsetY;
> -    intptr_t*          m_cuOffsetC;
> -    intptr_t*          m_buOffsetY;
> -    intptr_t*          m_buOffsetC;
> -
>      /* Collect statistics globally */
>      EncStats           m_analyzeAll;
>      EncStats           m_analyzeI;
> diff -r faecefdb2ae9 -r 1b2e5e8ccb85 source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp	Tue Aug 11 14:28:23 2015 +0000
> +++ b/source/encoder/frameencoder.cpp	Tue Aug 11 22:31:57 2015 +0000
> @@ -35,10 +35,6 @@
>  #include "slicetype.h"
>  #include "nal.h"
>  

it seems you had some unrelated local changes in your working directory,
probably caused by an update that required a merge.

> -#if HAVE_LIBNUMA
> -#include <numa.h>
> -#endif
> -
>  namespace X265_NS {
>  void weightAnalyse(Slice& slice, Frame& frame, x265_param& param);
>  
> @@ -333,28 +329,6 @@
>      if (m_frame->m_lowres.bKeyframe && m_param->bRepeatHeaders)
>          m_top->getStreamHeaders(m_nalList, m_entropyCoder, m_bs);
>  
> -<<<<<<< local
> -#if (defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7) || HAVE_LIBNUMA
> -    int numaNode = m_pool ? m_pool->m_numaNode : 0;
> -#else
> -    int numaNode = 0;
> -#endif
> -
> -    /* Claim this frame as being encoded by this NUMA node */
> -    // FIXME - Trying to see if copying recon to all NUMA nodes is better
> -    // m_frame->m_encData->allocRecon(m_top->m_sps, numaNode); /* TODO: bail if failure */
> -    for (int i = 0; i < ThreadPool::getNumaNodeCount(); i++)
> -    {
> -        // TODO - Fix for non-numa and windows
> -        numa_set_preferred(i) ;
> -        m_frame->m_encData->allocRecon(m_top->m_sps, i); /* TODO: bail if failure */
> -    }
> -    // TODO - Fix for non-numa and windows
> -    numa_set_preferred(numaNode) ;
> -    m_frame->m_encData->m_ownerNode = numaNode;
> -
> -=======
> ->>>>>>> other
>      // Weighted Prediction parameters estimation.
>      bool bUseWeightP = slice->m_sliceType == P_SLICE && slice->m_pps->bUseWeightPred;
>      bool bUseWeightB = slice->m_sliceType == B_SLICE && slice->m_pps->bUseWeightedBiPred;
> @@ -383,7 +357,8 @@
>              WeightParam *w = NULL;
>              if ((bUseWeightP || bUseWeightB) && slice->m_weightPredTable[l][ref][0].bPresentFlag)
>                  w = slice->m_weightPredTable[l][ref];
> -            m_mref[l][ref].init(slice->m_refPicList[l][ref]->m_reconPic, w, *m_param);
> +            slice->m_refReconPicList[l][ref] = slice->m_refFrameList[l][ref]->m_reconPic;
> +            m_mref[l][ref].init(slice->m_refReconPicList[l][ref], w, *m_param);
>          }
>      }
>  
> @@ -503,7 +478,7 @@
>      /* CQP and CRF (without capped VBV) doesn't use mid-frame statistics to 
>       * tune RateControl parameters for other frames.
>       * Hence, for these modes, update m_startEndOrder and unlock RC for previous threads waiting in
> -     * RateControlEnd here, after the slicecontexts are initialized. For the rest - ABR
> +     * RateControlEnd here, after the slice contexts are initialized. For the rest - ABR
>       * and VBV, unlock only after rateControlUpdateStats of this frame is called */
>      if (m_param->rc.rateControlMode != X265_RC_ABR && !m_top->m_rateControl->m_isVbv)
>      {
> @@ -527,7 +502,7 @@
>              {
>                  for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
>                  {
> -                    Frame *refpic = slice->m_refPicList[l][ref];
> +                    Frame *refpic = slice->m_refFrameList[l][ref];
>  
>                      uint32_t reconRowCount = refpic->m_reconRowCount.get();
>                      while ((reconRowCount != m_numRows) && (reconRowCount < row + m_refLagRows))
> @@ -566,7 +541,7 @@
>                      int list = l;
>                      for (int ref = 0; ref < slice->m_numRefIdx[list]; ref++)
>                      {
> -                        Frame *refpic = slice->m_refPicList[list][ref];
> +                        Frame *refpic = slice->m_refFrameList[list][ref];
>  
>                          uint32_t reconRowCount = refpic->m_reconRowCount.get();
>                          while ((reconRowCount != m_numRows) && (reconRowCount < i + m_refLagRows))
> @@ -723,7 +698,7 @@
>      {
>          for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
>          {
> -            Frame *refpic = slice->m_refPicList[l][ref];
> +            Frame *refpic = slice->m_refFrameList[l][ref];
>              ATOMIC_DEC(&refpic->m_countRefEncoders);
>          }
>      }
> diff -r faecefdb2ae9 -r 1b2e5e8ccb85 source/encoder/framefilter.cpp
> --- a/source/encoder/framefilter.cpp	Tue Aug 11 14:28:23 2015 +0000
> +++ b/source/encoder/framefilter.cpp	Tue Aug 11 22:31:57 2015 +0000
> @@ -233,10 +233,10 @@
>      }
>      if (m_param->bEnableSsim && m_ssimBuf)
>      {
> -        pixel *rec = m_frame->m_reconPic->m_picOrg[0];
> +        pixel *rec = reconPic->m_picOrg[0];
>          pixel *fenc = m_frame->m_fencPic->m_picOrg[0];
>          intptr_t stride1 = m_frame->m_fencPic->m_stride;
> -        intptr_t stride2 = m_frame->m_reconPic->m_stride;
> +        intptr_t stride2 = reconPic->m_stride;
>          uint32_t bEnd = ((row + 1) == (this->m_numRows - 1));
>          uint32_t bStart = (row == 0);
>          uint32_t minPixY = row * g_maxCUSize - 4 * !bStart;

here you'll notice the original code had a bug. rec and stride1 are used
together but rec us from reconPic and stride1 comes from fencPic. the
bug is harmless since all PicYuv instances allocated by an encoder
should have the same stride.

> diff -r faecefdb2ae9 -r 1b2e5e8ccb85 source/encoder/ratecontrol.cpp
> --- a/source/encoder/ratecontrol.cpp	Tue Aug 11 14:28:23 2015 +0000
> +++ b/source/encoder/ratecontrol.cpp	Tue Aug 11 22:31:57 2015 +0000
> @@ -1345,10 +1345,10 @@
>      {
>          /* B-frames don't have independent rate control, but rather get the
>           * average QP of the two adjacent P-frames + an offset */
> -        Slice* prevRefSlice = m_curSlice->m_refPicList[0][0]->m_encData->m_slice;
> -        Slice* nextRefSlice = m_curSlice->m_refPicList[1][0]->m_encData->m_slice;
> -        double q0 = m_curSlice->m_refPicList[0][0]->m_encData->m_avgQpRc;
> -        double q1 = m_curSlice->m_refPicList[1][0]->m_encData->m_avgQpRc;
> +        Slice* prevRefSlice = m_curSlice->m_refFrameList[0][0]->m_encData->m_slice;
> +        Slice* nextRefSlice = m_curSlice->m_refFrameList[1][0]->m_encData->m_slice;
> +        double q0 = m_curSlice->m_refFrameList[0][0]->m_encData->m_avgQpRc;
> +        double q1 = m_curSlice->m_refFrameList[1][0]->m_encData->m_avgQpRc;
>          bool i0 = prevRefSlice->m_sliceType == I_SLICE;
>          bool i1 = nextRefSlice->m_sliceType == I_SLICE;
>          int dt0 = abs(m_curSlice->m_poc - prevRefSlice->m_poc);
> @@ -1364,9 +1364,9 @@
>                  q0 = q1;
>              }
>          }
> -        if (prevRefSlice->m_sliceType == B_SLICE && IS_REFERENCED(m_curSlice->m_refPicList[0][0]))
> +        if (prevRefSlice->m_sliceType == B_SLICE && IS_REFERENCED(m_curSlice->m_refFrameList[0][0]))
>              q0 -= m_pbOffset / 2;
> -        if (nextRefSlice->m_sliceType == B_SLICE && IS_REFERENCED(m_curSlice->m_refPicList[1][0]))
> +        if (nextRefSlice->m_sliceType == B_SLICE && IS_REFERENCED(m_curSlice->m_refFrameList[1][0]))
>              q1 -= m_pbOffset / 2;
>          if (i0 && i1)
>              q = (q0 + q1) / 2 + m_ipOffset;
> @@ -1483,7 +1483,7 @@
>               * Then bias the quant up or down if total size so far was far from
>               * the target.
>               * Result: Depending on the value of rate_tolerance, there is a
> -             * tradeoff between quality and bitrate precision. But at large
> +             * trade-off between quality and bitrate precision. But at large
>               * tolerances, the bit distribution approaches that of 2pass. */
>  
>              double overflow = 1;
> @@ -1832,7 +1832,7 @@
>      double qScale = x265_qp2qScale(qpVbv);
>      FrameData& curEncData = *curFrame->m_encData;
>      int picType = curEncData.m_slice->m_sliceType;
> -    Frame* refFrame = curEncData.m_slice->m_refPicList[0][0];
> +    Frame* refFrame = curEncData.m_slice->m_refFrameList[0][0];
>  
>      uint32_t maxRows = curEncData.m_slice->m_sps->numCuInHeight;
>      uint32_t maxCols = curEncData.m_slice->m_sps->numCuInWidth;
> @@ -1921,7 +1921,7 @@
>      updatePredictor(rce->rowPred[0], qScaleVbv, (double)rowSatdCost, encodedBits);
>      if (curEncData.m_slice->m_sliceType == P_SLICE)
>      {
> -        Frame* refFrame = curEncData.m_slice->m_refPicList[0][0];
> +        Frame* refFrame = curEncData.m_slice->m_refFrameList[0][0];
>          if (qpVbv < refFrame->m_encData->m_rowStat[row].diagQp)
>          {
>              uint64_t intraRowSatdCost = curEncData.m_rowStat[row].diagIntraSatd;
> diff -r faecefdb2ae9 -r 1b2e5e8ccb85 source/encoder/sao.cpp
> --- a/source/encoder/sao.cpp	Tue Aug 11 14:28:23 2015 +0000
> +++ b/source/encoder/sao.cpp	Tue Aug 11 22:31:57 2015 +0000
> @@ -243,12 +243,13 @@
>  void SAO::processSaoCu(int addr, int typeIdx, int plane)
>  {
>      int x, y;
> -    const CUData* cu = m_frame->m_encData->getPicCTU(addr);
> -    pixel* rec = m_frame->m_reconPic->getPlaneAddr(plane, addr);
> -    intptr_t stride = plane ? m_frame->m_reconPic->m_strideC : m_frame->m_reconPic->m_stride;
> +    PicYuv* reconPic = m_frame->m_reconPic;
> +    pixel* rec = reconPic->getPlaneAddr(plane, addr);
> +    intptr_t stride = plane ? reconPic->m_strideC : reconPic->m_stride;
>      uint32_t picWidth  = m_param->sourceWidth;
>      uint32_t picHeight = m_param->sourceHeight;
> -    int ctuWidth  = g_maxCUSize;
> +    const CUData* cu = m_frame->m_encData->getPicCTU(addr);
> +    int ctuWidth = g_maxCUSize;
>      int ctuHeight = g_maxCUSize;
>      uint32_t lpelx = cu->m_cuPelX;
>      uint32_t tpely = cu->m_cuPelY;
> @@ -572,7 +573,8 @@
>  /* Process SAO all units */
>  void SAO::processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int plane)
>  {
> -    intptr_t stride = plane ? m_frame->m_reconPic->m_strideC : m_frame->m_reconPic->m_stride;
> +    PicYuv* reconPic = m_frame->m_reconPic;
> +    intptr_t stride = plane ? reconPic->m_strideC : reconPic->m_stride;
>      uint32_t picWidth  = m_param->sourceWidth;
>      int ctuWidth  = g_maxCUSize;
>      int ctuHeight = g_maxCUSize;
> @@ -585,12 +587,12 @@
>  
>      if (!idxY)
>      {
> -        pixel* rec = m_frame->m_reconPic->m_picOrg[plane];
> +        pixel* rec = reconPic->m_picOrg[plane];
>          memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidth);
>      }
>  
>      int addr = idxY * m_numCuInWidth;
> -    pixel* rec = plane ? m_frame->m_reconPic->getChromaAddr(plane, addr) : m_frame->m_reconPic->getLumaAddr(addr);
> +    pixel* rec = plane ? reconPic->getChromaAddr(plane, addr) : reconPic->getLumaAddr(addr);
>  
>      for (int i = 0; i < ctuHeight + 1; i++)
>      {
> @@ -635,7 +637,7 @@
>          }
>          else if (idxX != (m_numCuInWidth - 1))
>          {
> -            rec = plane ? m_frame->m_reconPic->getChromaAddr(plane, addr) : m_frame->m_reconPic->getLumaAddr(addr);
> +            rec = plane ? reconPic->getChromaAddr(plane, addr) : reconPic->getLumaAddr(addr);
>  
>              for (int i = 0; i < ctuHeight + 1; i++)
>              {
> @@ -671,12 +673,13 @@
>  /* Calculate SAO statistics for current CTU without non-crossing slice */
>  void SAO::calcSaoStatsCu(int addr, int plane)
>  {
> +    const PicYuv* reconPic = m_frame->m_reconPic;
>      const CUData* cu = m_frame->m_encData->getPicCTU(addr);
>      const pixel* fenc0 = m_frame->m_fencPic->getPlaneAddr(plane, addr);
> -    const pixel* rec0  = m_frame->m_reconPic->getPlaneAddr(plane, addr);
> +    const pixel* rec0  = reconPic->getPlaneAddr(plane, addr);
>      const pixel* fenc;
>      const pixel* rec;
> -    intptr_t stride = plane ? m_frame->m_reconPic->m_strideC : m_frame->m_reconPic->m_stride;
> +    intptr_t stride = plane ? reconPic->m_strideC : reconPic->m_stride;
>      uint32_t picWidth  = m_param->sourceWidth;
>      uint32_t picHeight = m_param->sourceHeight;
>      int ctuWidth  = g_maxCUSize;
> @@ -825,9 +828,10 @@
>  
>      int x, y;
>      const CUData* cu = frame->m_encData->getPicCTU(addr);
> +    const PicYuv* reconPic = m_frame->m_reconPic;
>      const pixel* fenc;
>      const pixel* rec;
> -    intptr_t stride = m_frame->m_reconPic->m_stride;
> +    intptr_t stride = reconPic->m_stride;
>      uint32_t picWidth  = m_param->sourceWidth;
>      uint32_t picHeight = m_param->sourceHeight;
>      int ctuWidth  = g_maxCUSize;
> @@ -861,7 +865,7 @@
>      {
>          if (plane == 1)
>          {
> -            stride = frame->m_reconPic->m_strideC;
> +            stride = reconPic->m_strideC;
>              picWidth  >>= m_hChromaShift;
>              picHeight >>= m_vChromaShift;
>              ctuWidth  >>= m_hChromaShift;
> @@ -881,7 +885,7 @@
>          count = m_countPreDblk[addr][plane][SAO_BO];
>  
>          const pixel* fenc0 = m_frame->m_fencPic->getPlaneAddr(plane, addr);
> -        const pixel* rec0  = m_frame->m_reconPic->getPlaneAddr(plane, addr);
> +        const pixel* rec0 = reconPic->getPlaneAddr(plane, addr);
>          fenc = fenc0;
>          rec  = rec0;
>  
> diff -r faecefdb2ae9 -r 1b2e5e8ccb85 source/encoder/search.cpp
> --- a/source/encoder/search.cpp	Tue Aug 11 14:28:23 2015 +0000
> +++ b/source/encoder/search.cpp	Tue Aug 11 22:31:57 2015 +0000
> @@ -446,8 +446,9 @@
>      }
>  
>      // set reconstruction for next intra prediction blocks if full TU prediction won
> -    pixel*   picReconY = m_frame->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + absPartIdx);
> -    intptr_t picStride = m_frame->m_reconPic->m_stride;
> +    PicYuv*  reconPic = m_frame->m_reconPic;
> +    pixel*   picReconY = reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + absPartIdx);
> +    intptr_t picStride = reconPic->m_stride;
>      primitives.cu[sizeIdx].copy_pp(picReconY, picStride, reconQt, reconQtStride);
>  
>      outCost.rdcost     += fullCost.rdcost;
> @@ -611,8 +612,9 @@
>      }
>  
>      // set reconstruction for next intra prediction blocks
> -    pixel*   picReconY = m_frame->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + absPartIdx);
> -    intptr_t picStride = m_frame->m_reconPic->m_stride;
> +    PicYuv*  reconPic = m_frame->m_reconPic;
> +    pixel*   picReconY = reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + absPartIdx);
> +    intptr_t picStride = reconPic->m_stride;
>      primitives.cu[sizeIdx].copy_pp(picReconY, picStride, reconQt, reconQtStride);
>  
>      outCost.rdcost += fullCost.rdcost;
> @@ -661,8 +663,9 @@
>          uint32_t sizeIdx   = log2TrSize - 2;
>          primitives.cu[sizeIdx].calcresidual(fenc, pred, residual, stride);
>  
> -        pixel*   picReconY = m_frame->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + absPartIdx);
> -        intptr_t picStride = m_frame->m_reconPic->m_stride;
> +        PicYuv*  reconPic = m_frame->m_reconPic;
> +        pixel*   picReconY = reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + absPartIdx);
> +        intptr_t picStride = reconPic->m_stride;
>  
>          uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
>          if (numSig)
> @@ -821,8 +824,9 @@
>              coeff_t* coeffC        = m_rqt[qtLayer].coeffRQT[chromaId] + coeffOffsetC;
>              pixel*   reconQt       = m_rqt[qtLayer].reconQtYuv.getChromaAddr(chromaId, absPartIdxC);
>              uint32_t reconQtStride = m_rqt[qtLayer].reconQtYuv.m_csize;
> -            pixel*   picReconC = m_frame->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx + absPartIdxC);
> -            intptr_t picStride = m_frame->m_reconPic->m_strideC;
> +            PicYuv*  reconPic = m_frame->m_reconPic;
> +            pixel*   picReconC = reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx + absPartIdxC);
> +            intptr_t picStride = reconPic->m_strideC;
>  
>              uint32_t chromaPredMode = cu.m_chromaIntraDir[absPartIdxC];
>              if (chromaPredMode == DM_CHROMA_IDX)
> @@ -998,8 +1002,9 @@
>              cu.setCbfPartRange(bCbf << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
>              cu.setTransformSkipPartRange(bTSkip, ttype, absPartIdxC, tuIterator.absPartIdxStep);
>  
> -            pixel*   reconPicC = m_frame->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx + absPartIdxC);
> -            intptr_t picStride = m_frame->m_reconPic->m_strideC;
> +            PicYuv*  reconPic = m_frame->m_reconPic;
> +            pixel*   reconPicC = reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx + absPartIdxC);
> +            intptr_t picStride = reconPic->m_strideC;
>              primitives.cu[sizeIdxC].copy_pp(reconPicC, picStride, reconQt, reconQtStride);
>  
>              outDist += bDist;
> @@ -1108,8 +1113,9 @@
>              int16_t* residual = resiYuv.getChromaAddr(chromaId, absPartIdxC);
>              uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (m_hChromaShift + m_vChromaShift));
>              coeff_t* coeffC        = cu.m_trCoeff[ttype] + coeffOffsetC;
> -            pixel*   picReconC = m_frame->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx + absPartIdxC);
> -            intptr_t picStride = m_frame->m_reconPic->m_strideC;
> +            PicYuv*  reconPic = m_frame->m_reconPic;
> +            pixel*   picReconC = reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx + absPartIdxC);
> +            intptr_t picStride = reconPic->m_strideC;
>  
>              uint32_t chromaPredMode = cu.m_chromaIntraDir[absPartIdxC];
>              if (chromaPredMode == DM_CHROMA_IDX)
> @@ -1591,10 +1597,11 @@
>               * output recon picture, so it cannot proceed in parallel with anything else when doing INTRA_NXN. Also
>               * it is not updating m_rdContexts[depth].cur for the later PUs which I suspect is slightly wrong. I think
>               * that the contexts should be tracked through each PU */
> -            pixel*   dst         = m_frame->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + absPartIdx);
> -            uint32_t dststride   = m_frame->m_reconPic->m_stride;
> -            const pixel*   src   = reconYuv->getLumaAddr(absPartIdx);
> -            uint32_t srcstride   = reconYuv->m_size;
> +            PicYuv*  reconPic = m_frame->m_reconPic;
> +            pixel*   dst       = reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + absPartIdx);
> +            uint32_t dststride = reconPic->m_stride;
> +            const pixel*   src = reconYuv->getLumaAddr(absPartIdx);
> +            uint32_t srcstride = reconYuv->m_size;
>              primitives.cu[log2TrSize - 2].copy_pp(dst, dststride, src, srcstride);
>          }
>      }
> @@ -1757,15 +1764,16 @@
>          if (!tuIterator.isLastSection())
>          {
>              uint32_t zorder    = cuGeom.absPartIdx + absPartIdxC;
> -            uint32_t dststride = m_frame->m_reconPic->m_strideC;
> +            PicYuv*  reconPic  = m_frame->m_reconPic;
> +            uint32_t dststride = reconPic->m_strideC;
>              const pixel* src;
>              pixel* dst;
>  
> -            dst = m_frame->m_reconPic->getCbAddr(cu.m_cuAddr, zorder);
> +            dst = reconPic->getCbAddr(cu.m_cuAddr, zorder);
>              src = reconYuv.getCbAddr(absPartIdxC);
>              primitives.chroma[m_csp].cu[size].copy_pp(dst, dststride, src, reconYuv.m_csize);
>  
> -            dst = m_frame->m_reconPic->getCrAddr(cu.m_cuAddr, zorder);
> +            dst = reconPic->getCrAddr(cu.m_cuAddr, zorder);
>              src = reconYuv.getCrAddr(absPartIdxC);
>              primitives.chroma[m_csp].cu[size].copy_pp(dst, dststride, src, reconYuv.m_csize);
>          }
> @@ -1866,7 +1874,7 @@
>  /* find the lowres motion vector from lookahead in middle of current PU */
>  MV Search::getLowresMV(const CUData& cu, const PredictionUnit& pu, int list, int ref)
>  {
> -    int diffPoc = abs(m_slice->m_poc - m_slice->m_refPicList[list][ref]->m_poc);
> +    int diffPoc = abs(m_slice->m_poc - m_slice->m_refPOCList[list][ref]);
>      if (diffPoc > m_param->bframes + 1)
>          /* poc difference is out of range for lookahead */
>          return 0;
> @@ -1906,7 +1914,7 @@
>          else
>          {
>              cu.clipMv(mvCand);
> -            predInterLumaPixel(pu, tmpPredYuv, *m_slice->m_refPicList[list][ref]->m_reconPic, mvCand);
> +            predInterLumaPixel(pu, tmpPredYuv, *m_slice->m_refReconPicList[list][ref], mvCand);
>              costs[i] = m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size);
>          }
>      }
> @@ -2197,8 +2205,8 @@
>              }
>              else
>              {
> -                PicYuv* refPic0 = slice->m_refPicList[0][bestME[0].ref]->m_reconPic;
> -                PicYuv* refPic1 = slice->m_refPicList[1][bestME[1].ref]->m_reconPic;
> +                PicYuv* refPic0 = slice->m_refReconPicList[0][bestME[0].ref];
> +                PicYuv* refPic1 = slice->m_refReconPicList[1][bestME[1].ref];
>                  Yuv* bidirYuv = m_rqt[cuGeom.depth].bidirPredYuv;
>  
>                  /* Generate reference subpels */
> diff -r faecefdb2ae9 -r 1b2e5e8ccb85 source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp	Tue Aug 11 14:28:23 2015 +0000
> +++ b/source/encoder/slicetype.cpp	Tue Aug 11 22:31:57 2015 +0000
> @@ -714,16 +714,16 @@
>  
>      case P_SLICE:
>          b = p1 = poc - l0poc;
> -        frames[p0] = &slice->m_refPicList[0][0]->m_lowres;
> +        frames[p0] = &slice->m_refFrameList[0][0]->m_lowres;
>          frames[b] = &curFrame->m_lowres;
>          break;
>  
>      case B_SLICE:
>          b = poc - l0poc;
>          p1 = b + l1poc - poc;
> -        frames[p0] = &slice->m_refPicList[0][0]->m_lowres;
> +        frames[p0] = &slice->m_refFrameList[0][0]->m_lowres;
>          frames[b] = &curFrame->m_lowres;
> -        frames[p1] = &slice->m_refPicList[1][0]->m_lowres;
> +        frames[p1] = &slice->m_refFrameList[1][0]->m_lowres;
>          break;
>  
>      default:
> diff -r faecefdb2ae9 -r 1b2e5e8ccb85 source/encoder/weightPrediction.cpp
> --- a/source/encoder/weightPrediction.cpp	Tue Aug 11 14:28:23 2015 +0000
> +++ b/source/encoder/weightPrediction.cpp	Tue Aug 11 22:31:57 2015 +0000
> @@ -259,7 +259,7 @@
>      for (int list = 0; list < cache.numPredDir; list++)
>      {
>          WeightParam *weights = wp[list][0];
> -        Frame *refFrame = slice.m_refPicList[list][0];
> +        Frame *refFrame = slice.m_refFrameList[list][0];
>          Lowres& refLowres = refFrame->m_lowres;
>          int diffPoc = abs(curPoc - refFrame->m_poc);

-- 
Steve Borho


More information about the x265-devel mailing list