[x265] [PATCH] analysis: cleanup intra analysis functions

Steve Borho steve at borho.org
Fri Oct 9 17:34:17 CEST 2015


On 10/09, Deepthi Nandakumar wrote:
> Thanks, looks really good, a couple of comments below.
> 
> On Thu, Oct 8, 2015 at 6:24 PM, <kavitha at multicorewareinc.com> wrote:
> 
> > # HG changeset patch
> > # User Kavitha Sampath <kavitha at multicorewareinc.com>
> > # Date 1444301894 -19800
> > #      Thu Oct 08 16:28:14 2015 +0530
> > # Node ID bd8087bfd93f7564df436a27c22f7e44f6803c4a
> > # Parent  55a4a9b920ff0385fd0b65c73c4c3f63b8a8cc65
> > analysis: cleanup intra analysis functions
> >
> > During the analysis-load mode, the predicted luma and chroma modes saved in
> > shared buffers are no longer passed explicitly to intra analysis functions.
> > Instead the predicted modes are copied to parentCTU, thereby enabling the
> > intra
> > analysis functions to decide implicitly if modes should be reused.
> >
> > diff -r 55a4a9b920ff -r bd8087bfd93f source/common/cudata.cpp
> > --- a/source/common/cudata.cpp  Thu Oct 08 15:40:43 2015 +0530
> > +++ b/source/common/cudata.cpp  Thu Oct 08 16:28:14 2015 +0530
> > @@ -199,6 +199,7 @@
> >      m_qp        = (int8_t*)charBuf; charBuf += m_numPartitions;
> >      m_log2CUSize         = charBuf; charBuf += m_numPartitions;
> >      m_lumaIntraDir       = charBuf; charBuf += m_numPartitions;
> > +    m_chromaIntraDir     = charBuf; charBuf += m_numPartitions;
> >      m_tqBypass           = charBuf; charBuf += m_numPartitions;
> >      m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions;
> >      m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions;
> > @@ -216,7 +217,6 @@
> >      m_cbf[0]             = charBuf; charBuf += m_numPartitions;
> >      m_cbf[1]             = charBuf; charBuf += m_numPartitions;
> >      m_cbf[2]             = charBuf; charBuf += m_numPartitions;
> > -    m_chromaIntraDir     = charBuf; charBuf += m_numPartitions;
> >
> >      X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions *
> > BytesPerPartition) * (instance + 1), "CU data layout is broken\n");
> >
> > @@ -246,7 +246,8 @@
> >      /* sequential memsets */
> >      m_partSet((uint8_t*)m_qp, (uint8_t)qp);
> >      m_partSet(m_log2CUSize,   (uint8_t)g_maxLog2CUSize);
> > -    m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
> > +    m_partSet(m_lumaIntraDir, (uint8_t)ALL_IDX);
> > +    m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
> >      m_partSet(m_tqBypass,
> >  (uint8_t)frame.m_encData->m_param->bLossless);
> >      if (m_slice->m_sliceType != I_SLICE)
> >      {
> > @@ -257,7 +258,7 @@
> >      X265_CHECK(!(frame.m_encData->m_param->bLossless &&
> > !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without
> > TQbypass in PPS\n");
> >
> >      /* initialize the remaining CU data in one memset */
> > -    memset(m_cuDepth, 0, (BytesPerPartition - 6) * m_numPartitions);
> > +    memset(m_cuDepth, 0, (BytesPerPartition - 7) * m_numPartitions);
> >
> >      uint32_t widthInCU = m_slice->m_sps->numCuInWidth;
> >      m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr -
> > 1) : NULL;
> > @@ -284,14 +285,15 @@
> >      m_partSet((uint8_t*)m_qp, (uint8_t)qp);
> >
> >      m_partSet(m_log2CUSize,   (uint8_t)cuGeom.log2CUSize);
> > -    m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
> >      m_partSet(m_tqBypass,     (uint8_t)m_encData->m_param->bLossless);
> >      m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
> >      m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID);
> >      m_partSet(m_cuDepth,      (uint8_t)cuGeom.depth);
> > +    memcpy(m_lumaIntraDir, ctu.m_lumaIntraDir + cuGeom.absPartIdx,
> > cuGeom.numPartitions);
> > +    memcpy(m_chromaIntraDir, ctu.m_chromaIntraDir + cuGeom.absPartIdx,
> > cuGeom.numPartitions);
> >
> >      /* initialize the remaining CU data in one memset */
> > -    memset(m_predMode, 0, (BytesPerPartition - 7) * m_numPartitions);
> > +    memset(m_predMode, 0, (BytesPerPartition - 8) * m_numPartitions);
> >  }
> >
> >  /* Copy the results of a sub-part (split) CU to the parent CU */
> > diff -r 55a4a9b920ff -r bd8087bfd93f source/common/cudata.h
> > --- a/source/common/cudata.h    Thu Oct 08 15:40:43 2015 +0530
> > +++ b/source/common/cudata.h    Thu Oct 08 16:28:14 2015 +0530
> > @@ -287,6 +287,12 @@
> >      const CUData* getPUAboveRightAdi(uint32_t& arPartUnitIdx, uint32_t
> > curPartUnitIdx, uint32_t partUnitOffset) const;
> >      const CUData* getPUBelowLeftAdi(uint32_t& blPartUnitIdx, uint32_t
> > curPartUnitIdx, uint32_t partUnitOffset) const;
> >
> > +    inline void resetIntraModes(bool resetLuma, bool resetChroma)
> > +    {
> > +        if (resetLuma) m_partSet(m_lumaIntraDir, (uint8_t)ALL_IDX);
> > +        if (resetChroma) m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
> > +    }
> > +
> >  protected:
> >
> >      template<typename T>
> > diff -r 55a4a9b920ff -r bd8087bfd93f source/encoder/analysis.cpp
> > --- a/source/encoder/analysis.cpp       Thu Oct 08 15:40:43 2015 +0530
> > +++ b/source/encoder/analysis.cpp       Thu Oct 08 16:28:14 2015 +0530
> > @@ -72,7 +72,6 @@
> >
> >  Analysis::Analysis()
> >  {
> > -    m_reuseIntraDataCTU = NULL;
> >      m_reuseInterDataCTU = NULL;
> >      m_reuseRef = NULL;
> >      m_reuseBestMergeCand = NULL;
> > @@ -141,17 +140,12 @@
> >      m_modeDepth[0].fencYuv.copyFromPicYuv(*m_frame->m_fencPic,
> > ctu.m_cuAddr, 0);
> >
> >      uint32_t numPartition = ctu.m_numPartitions;
> > -    if (m_param->analysisMode)
> > +    if (m_param->analysisMode && m_slice->m_sliceType != I_SLICE)
> >      {
> > -        if (m_slice->m_sliceType == I_SLICE)
> > -            m_reuseIntraDataCTU =
> > (analysis_intra_data*)m_frame->m_analysisData.intraData;
> > -        else
> > -        {
> > -            int numPredDir = m_slice->isInterP() ? 1 : 2;
> > -            m_reuseInterDataCTU =
> > (analysis_inter_data*)m_frame->m_analysisData.interData;
> > -            m_reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr *
> > X265_MAX_PRED_MODE_PER_CTU * numPredDir];
> > -            m_reuseBestMergeCand =
> > &m_reuseInterDataCTU->bestMergeCand[ctu.m_cuAddr * CUGeom::MAX_GEOMS];
> > -        }
> > +        int numPredDir = m_slice->isInterP() ? 1 : 2;
> > +        m_reuseInterDataCTU =
> > (analysis_inter_data*)m_frame->m_analysisData.interData;
> > +        m_reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr *
> > X265_MAX_PRED_MODE_PER_CTU * numPredDir];
> > +        m_reuseBestMergeCand =
> > &m_reuseInterDataCTU->bestMergeCand[ctu.m_cuAddr * CUGeom::MAX_GEOMS];
> >      }
> >
> >      ProfileCUScope(ctu, totalCTUTime, totalCTUs);
> > @@ -159,14 +153,22 @@
> >      uint32_t zOrder = 0;
> >      if (m_slice->m_sliceType == I_SLICE)
> >      {
> > +        analysis_intra_data* intraDataCTU =
> > (analysis_intra_data*)m_frame->m_analysisData.intraData;
> > +        if (m_param->analysisMode == X265_ANALYSIS_LOAD)
> > +        {
> > +            memcpy(ctu.m_cuDepth, &intraDataCTU->depth[ctu.m_cuAddr *
> > numPartition], sizeof(uint8_t) * numPartition);
> > +            memcpy(ctu.m_lumaIntraDir, &intraDataCTU->modes[ctu.m_cuAddr
> > * numPartition], sizeof(uint8_t) * numPartition);
> > +            memcpy(ctu.m_partSize, &intraDataCTU->partSizes[ctu.m_cuAddr
> > * numPartition], sizeof(char) * numPartition);
> > +            memcpy(ctu.m_chromaIntraDir,
> > &intraDataCTU->chromaModes[ctu.m_cuAddr * numPartition], sizeof(uint8_t) *
> > numPartition);
> > +        }
> >          compressIntraCU(ctu, cuGeom, zOrder, qp);
> > -        if (m_param->analysisMode == X265_ANALYSIS_SAVE &&
> > m_frame->m_analysisData.intraData)
> > +        if (m_param->analysisMode == X265_ANALYSIS_SAVE && intraDataCTU)
> >          {
> >              CUData* bestCU = &m_modeDepth[0].bestMode->cu;
> > -            memcpy(&m_reuseIntraDataCTU->depth[ctu.m_cuAddr *
> > numPartition], bestCU->m_cuDepth, sizeof(uint8_t) * numPartition);
> > -            memcpy(&m_reuseIntraDataCTU->modes[ctu.m_cuAddr *
> > numPartition], bestCU->m_lumaIntraDir, sizeof(uint8_t) * numPartition);
> > -            memcpy(&m_reuseIntraDataCTU->partSizes[ctu.m_cuAddr *
> > numPartition], bestCU->m_partSize, sizeof(uint8_t) * numPartition);
> > -            memcpy(&m_reuseIntraDataCTU->chromaModes[ctu.m_cuAddr *
> > numPartition], bestCU->m_chromaIntraDir, sizeof(uint8_t) * numPartition);
> > +            memcpy(&intraDataCTU->depth[ctu.m_cuAddr * numPartition],
> > bestCU->m_cuDepth, sizeof(uint8_t) * numPartition);
> > +            memcpy(&intraDataCTU->modes[ctu.m_cuAddr * numPartition],
> > bestCU->m_lumaIntraDir, sizeof(uint8_t) * numPartition);
> > +            memcpy(&intraDataCTU->partSizes[ctu.m_cuAddr * numPartition],
> > bestCU->m_partSize, sizeof(uint8_t) * numPartition);
> > +            memcpy(&intraDataCTU->chromaModes[ctu.m_cuAddr *
> > numPartition], bestCU->m_chromaIntraDir, sizeof(uint8_t) * numPartition);
> >          }
> >      }
> >      else
> > @@ -213,8 +215,7 @@
> >          md.pred[PRED_LOSSLESS].initCosts();
> >          md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu, cuGeom);
> >          PartSize size = (PartSize)md.pred[PRED_LOSSLESS].cu.m_partSize[0];
> > -        uint8_t* modes = md.pred[PRED_LOSSLESS].cu.m_lumaIntraDir;
> > -        checkIntra(md.pred[PRED_LOSSLESS], cuGeom, size, modes, NULL);
> > +        checkIntra(md.pred[PRED_LOSSLESS], cuGeom, size);
> >          checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth);
> >      }
> >      else
> > @@ -236,42 +237,37 @@
> >      bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
> >      bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
> >
> > -    if (m_param->analysisMode == X265_ANALYSIS_LOAD)
> > +    bool bAlreadyDecided = parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] !=
> > (uint8_t)ALL_IDX;
> > +    bool bDecidedDepth = parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
> > +
> > +    // stop recursion if we reach the depth of previous analysis decision
> > +    mightSplit &= !(bAlreadyDecided && bDecidedDepth);
> > +
> > +    if (bAlreadyDecided)
> >      {
> > -        uint8_t* reuseDepth  =
> > &m_reuseIntraDataCTU->depth[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
> > -        uint8_t* reuseModes  =
> > &m_reuseIntraDataCTU->modes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
> > -        char* reusePartSizes =
> > &m_reuseIntraDataCTU->partSizes[parentCTU.m_cuAddr *
> > parentCTU.m_numPartitions];
> > -        uint8_t* reuseChromaModes =
> > &m_reuseIntraDataCTU->chromaModes[parentCTU.m_cuAddr *
> > parentCTU.m_numPartitions];
> > -
> > -        if (mightNotSplit && depth == reuseDepth[zOrder] && zOrder ==
> > cuGeom.absPartIdx)
> > +        if (bDecidedDepth)
> >          {
> > -            PartSize size = (PartSize)reusePartSizes[zOrder];
> > -            Mode& mode = size == SIZE_2Nx2N ? md.pred[PRED_INTRA] :
> > md.pred[PRED_INTRA_NxN];
> > +            Mode& mode = md.pred[0];
> >
> 
> need to use the correct enum PRED_INTRA above.

it doesn't matter which mode instance we use, since only one mode is
ever going to be evaluated. This was done deliberately as a hint for
when we do the same for inter blocks. We don't want to go hunting for
which of the 14 Mode instances to use for every CU.

> 
> > +            md.bestMode = &mode;
> >              mode.cu.initSubCU(parentCTU, cuGeom, qp);
> > -            checkIntra(mode, cuGeom, size, &reuseModes[zOrder],
> > &reuseChromaModes[zOrder]);
> > -            checkBestMode(mode, depth);
> > -
> >
> 
> So, instead of copying the dirs in initSubCU and then resetting them
> everywhere else,  can't we just copy the dirs into the subCU if
> bAlreadyDecided is true right here?

hmm, perhaps.

-- 
Steve Borho


More information about the x265-devel mailing list