[x265] [PATCH] analysis: cleanup intra analysis functions
Steve Borho
steve at borho.org
Fri Oct 9 17:34:17 CEST 2015
On 10/09, Deepthi Nandakumar wrote:
> Thanks, looks really good, a couple of comments below.
>
> On Thu, Oct 8, 2015 at 6:24 PM, <kavitha at multicorewareinc.com> wrote:
>
> > # HG changeset patch
> > # User Kavitha Sampath <kavitha at multicorewareinc.com>
> > # Date 1444301894 -19800
> > # Thu Oct 08 16:28:14 2015 +0530
> > # Node ID bd8087bfd93f7564df436a27c22f7e44f6803c4a
> > # Parent 55a4a9b920ff0385fd0b65c73c4c3f63b8a8cc65
> > analysis: cleanup intra analysis functions
> >
> > During the analysis-load mode, the predicted luma and chroma modes saved in
> > shared buffers are no longer passed explicitly to intra analysis functions.
> > Instead the predicted modes are copied to parentCTU, thereby enabling the
> > intra
> > analysis functions to decide implicitly if modes should be reused.
> >
> > diff -r 55a4a9b920ff -r bd8087bfd93f source/common/cudata.cpp
> > --- a/source/common/cudata.cpp Thu Oct 08 15:40:43 2015 +0530
> > +++ b/source/common/cudata.cpp Thu Oct 08 16:28:14 2015 +0530
> > @@ -199,6 +199,7 @@
> > m_qp = (int8_t*)charBuf; charBuf += m_numPartitions;
> > m_log2CUSize = charBuf; charBuf += m_numPartitions;
> > m_lumaIntraDir = charBuf; charBuf += m_numPartitions;
> > + m_chromaIntraDir = charBuf; charBuf += m_numPartitions;
> > m_tqBypass = charBuf; charBuf += m_numPartitions;
> > m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions;
> > m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions;
> > @@ -216,7 +217,6 @@
> > m_cbf[0] = charBuf; charBuf += m_numPartitions;
> > m_cbf[1] = charBuf; charBuf += m_numPartitions;
> > m_cbf[2] = charBuf; charBuf += m_numPartitions;
> > - m_chromaIntraDir = charBuf; charBuf += m_numPartitions;
> >
> > X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions *
> > BytesPerPartition) * (instance + 1), "CU data layout is broken\n");
> >
> > @@ -246,7 +246,8 @@
> > /* sequential memsets */
> > m_partSet((uint8_t*)m_qp, (uint8_t)qp);
> > m_partSet(m_log2CUSize, (uint8_t)g_maxLog2CUSize);
> > - m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
> > + m_partSet(m_lumaIntraDir, (uint8_t)ALL_IDX);
> > + m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
> > m_partSet(m_tqBypass,
> > (uint8_t)frame.m_encData->m_param->bLossless);
> > if (m_slice->m_sliceType != I_SLICE)
> > {
> > @@ -257,7 +258,7 @@
> > X265_CHECK(!(frame.m_encData->m_param->bLossless &&
> > !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without
> > TQbypass in PPS\n");
> >
> > /* initialize the remaining CU data in one memset */
> > - memset(m_cuDepth, 0, (BytesPerPartition - 6) * m_numPartitions);
> > + memset(m_cuDepth, 0, (BytesPerPartition - 7) * m_numPartitions);
> >
> > uint32_t widthInCU = m_slice->m_sps->numCuInWidth;
> > m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr -
> > 1) : NULL;
> > @@ -284,14 +285,15 @@
> > m_partSet((uint8_t*)m_qp, (uint8_t)qp);
> >
> > m_partSet(m_log2CUSize, (uint8_t)cuGeom.log2CUSize);
> > - m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
> > m_partSet(m_tqBypass, (uint8_t)m_encData->m_param->bLossless);
> > m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
> > m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID);
> > m_partSet(m_cuDepth, (uint8_t)cuGeom.depth);
> > + memcpy(m_lumaIntraDir, ctu.m_lumaIntraDir + cuGeom.absPartIdx,
> > cuGeom.numPartitions);
> > + memcpy(m_chromaIntraDir, ctu.m_chromaIntraDir + cuGeom.absPartIdx,
> > cuGeom.numPartitions);
> >
> > /* initialize the remaining CU data in one memset */
> > - memset(m_predMode, 0, (BytesPerPartition - 7) * m_numPartitions);
> > + memset(m_predMode, 0, (BytesPerPartition - 8) * m_numPartitions);
> > }
> >
> > /* Copy the results of a sub-part (split) CU to the parent CU */
> > diff -r 55a4a9b920ff -r bd8087bfd93f source/common/cudata.h
> > --- a/source/common/cudata.h Thu Oct 08 15:40:43 2015 +0530
> > +++ b/source/common/cudata.h Thu Oct 08 16:28:14 2015 +0530
> > @@ -287,6 +287,12 @@
> > const CUData* getPUAboveRightAdi(uint32_t& arPartUnitIdx, uint32_t
> > curPartUnitIdx, uint32_t partUnitOffset) const;
> > const CUData* getPUBelowLeftAdi(uint32_t& blPartUnitIdx, uint32_t
> > curPartUnitIdx, uint32_t partUnitOffset) const;
> >
> > + inline void resetIntraModes(bool resetLuma, bool resetChroma)
> > + {
> > + if (resetLuma) m_partSet(m_lumaIntraDir, (uint8_t)ALL_IDX);
> > + if (resetChroma) m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
> > + }
> > +
> > protected:
> >
> > template<typename T>
> > diff -r 55a4a9b920ff -r bd8087bfd93f source/encoder/analysis.cpp
> > --- a/source/encoder/analysis.cpp Thu Oct 08 15:40:43 2015 +0530
> > +++ b/source/encoder/analysis.cpp Thu Oct 08 16:28:14 2015 +0530
> > @@ -72,7 +72,6 @@
> >
> > Analysis::Analysis()
> > {
> > - m_reuseIntraDataCTU = NULL;
> > m_reuseInterDataCTU = NULL;
> > m_reuseRef = NULL;
> > m_reuseBestMergeCand = NULL;
> > @@ -141,17 +140,12 @@
> > m_modeDepth[0].fencYuv.copyFromPicYuv(*m_frame->m_fencPic,
> > ctu.m_cuAddr, 0);
> >
> > uint32_t numPartition = ctu.m_numPartitions;
> > - if (m_param->analysisMode)
> > + if (m_param->analysisMode && m_slice->m_sliceType != I_SLICE)
> > {
> > - if (m_slice->m_sliceType == I_SLICE)
> > - m_reuseIntraDataCTU =
> > (analysis_intra_data*)m_frame->m_analysisData.intraData;
> > - else
> > - {
> > - int numPredDir = m_slice->isInterP() ? 1 : 2;
> > - m_reuseInterDataCTU =
> > (analysis_inter_data*)m_frame->m_analysisData.interData;
> > - m_reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr *
> > X265_MAX_PRED_MODE_PER_CTU * numPredDir];
> > - m_reuseBestMergeCand =
> > &m_reuseInterDataCTU->bestMergeCand[ctu.m_cuAddr * CUGeom::MAX_GEOMS];
> > - }
> > + int numPredDir = m_slice->isInterP() ? 1 : 2;
> > + m_reuseInterDataCTU =
> > (analysis_inter_data*)m_frame->m_analysisData.interData;
> > + m_reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr *
> > X265_MAX_PRED_MODE_PER_CTU * numPredDir];
> > + m_reuseBestMergeCand =
> > &m_reuseInterDataCTU->bestMergeCand[ctu.m_cuAddr * CUGeom::MAX_GEOMS];
> > }
> >
> > ProfileCUScope(ctu, totalCTUTime, totalCTUs);
> > @@ -159,14 +153,22 @@
> > uint32_t zOrder = 0;
> > if (m_slice->m_sliceType == I_SLICE)
> > {
> > + analysis_intra_data* intraDataCTU =
> > (analysis_intra_data*)m_frame->m_analysisData.intraData;
> > + if (m_param->analysisMode == X265_ANALYSIS_LOAD)
> > + {
> > + memcpy(ctu.m_cuDepth, &intraDataCTU->depth[ctu.m_cuAddr *
> > numPartition], sizeof(uint8_t) * numPartition);
> > + memcpy(ctu.m_lumaIntraDir, &intraDataCTU->modes[ctu.m_cuAddr
> > * numPartition], sizeof(uint8_t) * numPartition);
> > + memcpy(ctu.m_partSize, &intraDataCTU->partSizes[ctu.m_cuAddr
> > * numPartition], sizeof(char) * numPartition);
> > + memcpy(ctu.m_chromaIntraDir,
> > &intraDataCTU->chromaModes[ctu.m_cuAddr * numPartition], sizeof(uint8_t) *
> > numPartition);
> > + }
> > compressIntraCU(ctu, cuGeom, zOrder, qp);
> > - if (m_param->analysisMode == X265_ANALYSIS_SAVE &&
> > m_frame->m_analysisData.intraData)
> > + if (m_param->analysisMode == X265_ANALYSIS_SAVE && intraDataCTU)
> > {
> > CUData* bestCU = &m_modeDepth[0].bestMode->cu;
> > - memcpy(&m_reuseIntraDataCTU->depth[ctu.m_cuAddr *
> > numPartition], bestCU->m_cuDepth, sizeof(uint8_t) * numPartition);
> > - memcpy(&m_reuseIntraDataCTU->modes[ctu.m_cuAddr *
> > numPartition], bestCU->m_lumaIntraDir, sizeof(uint8_t) * numPartition);
> > - memcpy(&m_reuseIntraDataCTU->partSizes[ctu.m_cuAddr *
> > numPartition], bestCU->m_partSize, sizeof(uint8_t) * numPartition);
> > - memcpy(&m_reuseIntraDataCTU->chromaModes[ctu.m_cuAddr *
> > numPartition], bestCU->m_chromaIntraDir, sizeof(uint8_t) * numPartition);
> > + memcpy(&intraDataCTU->depth[ctu.m_cuAddr * numPartition],
> > bestCU->m_cuDepth, sizeof(uint8_t) * numPartition);
> > + memcpy(&intraDataCTU->modes[ctu.m_cuAddr * numPartition],
> > bestCU->m_lumaIntraDir, sizeof(uint8_t) * numPartition);
> > + memcpy(&intraDataCTU->partSizes[ctu.m_cuAddr * numPartition],
> > bestCU->m_partSize, sizeof(uint8_t) * numPartition);
> > + memcpy(&intraDataCTU->chromaModes[ctu.m_cuAddr *
> > numPartition], bestCU->m_chromaIntraDir, sizeof(uint8_t) * numPartition);
> > }
> > }
> > else
> > @@ -213,8 +215,7 @@
> > md.pred[PRED_LOSSLESS].initCosts();
> > md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu, cuGeom);
> > PartSize size = (PartSize)md.pred[PRED_LOSSLESS].cu.m_partSize[0];
> > - uint8_t* modes = md.pred[PRED_LOSSLESS].cu.m_lumaIntraDir;
> > - checkIntra(md.pred[PRED_LOSSLESS], cuGeom, size, modes, NULL);
> > + checkIntra(md.pred[PRED_LOSSLESS], cuGeom, size);
> > checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth);
> > }
> > else
> > @@ -236,42 +237,37 @@
> > bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
> > bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
> >
> > - if (m_param->analysisMode == X265_ANALYSIS_LOAD)
> > + bool bAlreadyDecided = parentCTU.m_lumaIntraDir[cuGeom.absPartIdx] !=
> > (uint8_t)ALL_IDX;
> > + bool bDecidedDepth = parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
> > +
> > + // stop recursion if we reach the depth of previous analysis decision
> > + mightSplit &= !(bAlreadyDecided && bDecidedDepth);
> > +
> > + if (bAlreadyDecided)
> > {
> > - uint8_t* reuseDepth =
> > &m_reuseIntraDataCTU->depth[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
> > - uint8_t* reuseModes =
> > &m_reuseIntraDataCTU->modes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
> > - char* reusePartSizes =
> > &m_reuseIntraDataCTU->partSizes[parentCTU.m_cuAddr *
> > parentCTU.m_numPartitions];
> > - uint8_t* reuseChromaModes =
> > &m_reuseIntraDataCTU->chromaModes[parentCTU.m_cuAddr *
> > parentCTU.m_numPartitions];
> > -
> > - if (mightNotSplit && depth == reuseDepth[zOrder] && zOrder ==
> > cuGeom.absPartIdx)
> > + if (bDecidedDepth)
> > {
> > - PartSize size = (PartSize)reusePartSizes[zOrder];
> > - Mode& mode = size == SIZE_2Nx2N ? md.pred[PRED_INTRA] :
> > md.pred[PRED_INTRA_NxN];
> > + Mode& mode = md.pred[0];
> >
>
> need to use the correct enum PRED_INTRA above.
it doesn't matter which mode instance we use, since only one mode is
ever going to be evaluated. This was done deliberately as a hint for
when we do the same for inter blocks. We don't want to go hunting for
which of the 14 Mode instances to use for every CU.
>
> > + md.bestMode = &mode;
> > mode.cu.initSubCU(parentCTU, cuGeom, qp);
> > - checkIntra(mode, cuGeom, size, &reuseModes[zOrder],
> > &reuseChromaModes[zOrder]);
> > - checkBestMode(mode, depth);
> > -
> >
>
> So, instead of copying the dirs in initSubCU and then resetting them
> everywhere else, can't we just copy the dirs into the subCU if
> bAlreadyDecided is true right here?
hmm, perhaps.
--
Steve Borho
More information about the x265-devel
mailing list