[x265] [PATCH] TComDataCU: replace getZorderIdxInCU() with encodeIdx of CU structure
Santhoshini Sekar
santhoshini at multicorewareinc.com
Mon Sep 22 07:25:11 CEST 2014
Please ignore above patch
On Mon, Sep 22, 2014 at 9:06 AM, <santhoshini at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Santhoshini Sekar <santhoshini at multicorewareinc.com>
> # Date 1411356953 -19800
> # Mon Sep 22 09:05:53 2014 +0530
> # Node ID f70fd79cb3e1a0cb60b1c7ea5aac9a52922703c3
> # Parent c8f53398f8ceb9e536c2f1569fe4a2a2756aa014
> TComDataCU: replace getZorderIdxInCU() with encodeIdx of CU structure
>
> diff -r c8f53398f8ce -r f70fd79cb3e1 source/Lib/TLibCommon/TComDataCU.cpp
> --- a/source/Lib/TLibCommon/TComDataCU.cpp Sat Sep 20 15:41:08 2014
> +0100
> +++ b/source/Lib/TLibCommon/TComDataCU.cpp Mon Sep 22 09:05:53 2014
> +0530
> @@ -387,7 +387,7 @@
> }
>
> // initialize Sub partition
> -void TComDataCU::initSubCU(TComDataCU* cu, uint32_t partUnitIdx, uint32_t
> depth, int qp)
> +void TComDataCU::initSubCU(TComDataCU* cu, uint32_t partUnitIdx, uint32_t
> depth, int qp, CU* cuData)
> {
> X265_CHECK(partUnitIdx < 4, "part unit should be less than 4\n");
> uint8_t log2CUSize = g_maxLog2CUSize - depth;
> @@ -396,7 +396,7 @@
> m_pic = cu->m_pic;
> m_slice = cu->m_slice;
> m_cuAddr = cu->getAddr();
> - m_absIdxInLCU = cu->getZorderIdxInCU() + partOffset;
> + m_absIdxInLCU = cuData->encodeIdx * 4 + partOffset;
>
> m_cuPelX = cu->getCUPelX() + ((partUnitIdx & 1) <<
> log2CUSize);
> m_cuPelY = cu->getCUPelY() + ((partUnitIdx >> 1) <<
> log2CUSize);
> @@ -453,7 +453,7 @@
> m_cuAboveRight = cu->getCUAboveRight();
> }
>
> -void TComDataCU::copyToSubCU(TComDataCU* cu, uint32_t partUnitIdx,
> uint32_t depth)
> +void TComDataCU::copyToSubCU(TComDataCU* cu, uint32_t partUnitIdx,
> uint32_t depth, CU* cuData)
> {
> X265_CHECK(partUnitIdx < 4, "part unit should be less than 4\n");
>
> @@ -462,7 +462,7 @@
> m_pic = cu->m_pic;
> m_slice = cu->m_slice;
> m_cuAddr = cu->getAddr();
> - m_absIdxInLCU = cu->getZorderIdxInCU() + partOffset;
> + m_absIdxInLCU = cuData->encodeIdx * 4 + partOffset;
>
> m_cuPelX = cu->getCUPelX() + ((partUnitIdx & 1) <<
> (g_maxLog2CUSize - depth));
> m_cuPelY = cu->getCUPelY() + ((partUnitIdx >> 1) <<
> (g_maxLog2CUSize - depth));
> @@ -1067,9 +1067,9 @@
> }
> else
> {
> - if (getZorderIdxInCU() > 0)
> + if (m_CULocalData->encodeIdx *4 > 0)
> {
> - return
> m_pic->getCU(getAddr())->getLastCodedQP(getZorderIdxInCU());
> + return
> m_pic->getCU(getAddr())->getLastCodedQP(m_CULocalData->encodeIdx *4);
> }
> else if (getAddr() > 0 &&
> !(m_slice->m_pps->bEntropyCodingSyncEnabled &&
> getAddr() %
> m_pic->getFrameWidthInCU() == 0))
> diff -r c8f53398f8ce -r f70fd79cb3e1 source/Lib/TLibCommon/TComDataCU.h
> --- a/source/Lib/TLibCommon/TComDataCU.h Sat Sep 20 15:41:08 2014
> +0100
> +++ b/source/Lib/TLibCommon/TComDataCU.h Mon Sep 22 09:05:53 2014
> +0530
> @@ -273,9 +273,9 @@
>
> void initCU(Frame* pic, uint32_t cuAddr);
> void initEstData();
> - void initSubCU(TComDataCU* cu, uint32_t partUnitIdx,
> uint32_t depth, int qp);
> + void initSubCU(TComDataCU* cu, uint32_t partUnitIdx,
> uint32_t depth, int qp, CU* cuData);
>
> - void copyToSubCU(TComDataCU* lcu, uint32_t partUnitIdx,
> uint32_t depth);
> + void copyToSubCU(TComDataCU* lcu, uint32_t partUnitIdx,
> uint32_t depth, CU* cuData);
> void copyPartFrom(TComDataCU* cu, uint32_t partUnitIdx,
> uint32_t depth, bool isRDObasedAnalysis = true);
>
> void copyToPic(uint32_t depth);
> @@ -288,8 +288,6 @@
>
> uint32_t& getAddr() { return m_cuAddr; }
>
> - uint32_t& getZorderIdxInCU() { return
> m_absIdxInLCU; }
> -
> uint32_t getSCUAddr() const { return (m_cuAddr <<
> g_maxFullDepth * 2) + m_absIdxInLCU; }
>
>
> diff -r c8f53398f8ce -r f70fd79cb3e1 source/Lib/TLibCommon/TComPattern.cpp
> --- a/source/Lib/TLibCommon/TComPattern.cpp Sat Sep 20 15:41:08 2014
> +0100
> +++ b/source/Lib/TLibCommon/TComPattern.cpp Mon Sep 22 09:05:53 2014
> +0530
> @@ -50,7 +50,7 @@
> //
> ====================================================================================================================
>
> void TComPattern::initAdiPattern(TComDataCU* cu, uint32_t
> zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf,
> - pixel* refAbove, pixel* refLeft, pixel*
> refAboveFlt, pixel* refLeftFlt, int dirMode)
> + pixel* refAbove, pixel* refLeft, pixel*
> refAboveFlt, pixel* refLeftFlt, int dirMode, CU* cuData)
> {
> pixel* roiOrigin;
> pixel* adiTemp;
> @@ -63,7 +63,7 @@
> uint32_t tuSize = intraNeighbors.tuSize;
> uint32_t tuSize2 = tuSize << 1;
>
> - roiOrigin = cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(),
> cu->getZorderIdxInCU() + zOrderIdxInPart);
> + roiOrigin = cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(),
> cuData->encodeIdx * 4 + zOrderIdxInPart);
> adiTemp = adiBuf;
>
> fillReferenceSamples(roiOrigin, picStride, adiTemp, intraNeighbors);
> @@ -163,7 +163,7 @@
> }
> }
>
> -void TComPattern::initAdiPatternChroma(TComDataCU* cu, uint32_t
> zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf, uint32_t chromaId)
> +void TComPattern::initAdiPatternChroma(TComDataCU* cu, uint32_t
> zOrderIdxInPart, uint32_t partDepth, pixel* adiBuf, uint32_t chromaId, CU*
> cuData)
> {
> pixel* roiOrigin;
> pixel* adiTemp;
> @@ -175,7 +175,7 @@
> initIntraNeighbors(cu, zOrderIdxInPart, partDepth, false,
> &intraNeighbors);
> uint32_t tuSize = intraNeighbors.tuSize;
>
> - roiOrigin = cu->m_pic->getPicYuvRec()->getChromaAddr(chromaId,
> cu->getAddr(), cu->getZorderIdxInCU() + zOrderIdxInPart);
> + roiOrigin = cu->m_pic->getPicYuvRec()->getChromaAddr(chromaId,
> cu->getAddr(), cuData->encodeIdx * 4 + zOrderIdxInPart);
> adiTemp = getAdiChromaBuf(chromaId, tuSize, adiBuf);
>
> fillReferenceSamples(roiOrigin, picStride, adiTemp, intraNeighbors);
> diff -r c8f53398f8ce -r f70fd79cb3e1 source/Lib/TLibCommon/TComPattern.h
> --- a/source/Lib/TLibCommon/TComPattern.h Sat Sep 20 15:41:08 2014
> +0100
> +++ b/source/Lib/TLibCommon/TComPattern.h Mon Sep 22 09:05:53 2014
> +0530
> @@ -53,6 +53,7 @@
>
> class TComDataCU;
>
> +struct CU;
> struct IntraNeighbors
> {
> int numIntraNeighbor;
> @@ -84,11 +85,12 @@
> /// set parameters from pixel buffers for accessing neighboring pixels
> static void initAdiPattern(TComDataCU* cu, uint32_t zOrderIdxInPart,
> uint32_t partDepth, pixel* adiBuf,
> pixel* refAbove, pixel* refLeft,
> - pixel* refAboveFlt, pixel* refLeftFlt, int
> dirMode);
> + pixel* refAboveFlt, pixel* refLeftFlt, int
> dirMode,
> + CU* cuData);
>
> /// set chroma parameters from CU data for accessing ADI data
> static void initAdiPatternChroma(TComDataCU* cu, uint32_t
> zOrderIdxInPart, uint32_t partDepth,
> - pixel* adiBuf, uint32_t chromaId);
> + pixel* adiBuf, uint32_t chromaId,
> CU* cuData);
>
> static void initIntraNeighbors(TComDataCU* cu, uint32_t
> zOrderIdxInPart, uint32_t partDepth, bool isLuma, IntraNeighbors
> *IntraNeighbors);
>
> diff -r c8f53398f8ce -r f70fd79cb3e1 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Sat Sep 20 15:41:08 2014 +0100
> +++ b/source/encoder/analysis.cpp Mon Sep 22 09:05:53 2014 +0530
> @@ -424,7 +424,7 @@
> //PPAScopeEvent(CompressIntraCU + depth);
> Frame* pic = outBestCU->m_pic;
> uint32_t cuAddr = outBestCU->getAddr();
> - uint32_t absPartIdx = outBestCU->getZorderIdxInCU();
> + uint32_t absPartIdx = cu->encodeIdx * 4;
>
> if (depth == 0)
> // get original YUV data from picture
> @@ -469,10 +469,10 @@
> {
> CU *child_cu = cuPicsym->m_CULocalData + cu->childIdx +
> partUnitIdx;
> int qp = outTempCU->getQP(0);
> - subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth,
> qp); // clear sub partition datas or init.
> + subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth,
> qp, cu); // clear sub partition datas or init.
> if (child_cu->flags & CU::PRESENT)
> {
> - subTempPartCU->initSubCU(outTempCU, partUnitIdx,
> nextDepth, qp); // clear sub partition datas or init.
> + subTempPartCU->initSubCU(outTempCU, partUnitIdx,
> nextDepth, qp, cu); // clear sub partition datas or init.
> if (0 == partUnitIdx) //initialize RD with previous depth
> buffer
>
> m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
> else
> @@ -560,15 +560,15 @@
> int32_t ctuToDepthIndex = g_maxCUDepth - 1;
>
> if (depth)
> - m_origYuv[0]->copyPartToYuv(m_origYuv[depth],
> outBestCU->getZorderIdxInCU());
> + m_origYuv[0]->copyPartToYuv(m_origYuv[depth], cu->encodeIdx * 4);
> else
> - m_origYuv[depth]->copyFromPicYuv(pic->getPicYuvOrg(),
> outBestCU->getAddr(), outBestCU->getZorderIdxInCU());
> + m_origYuv[depth]->copyFromPicYuv(pic->getPicYuvOrg(),
> outBestCU->getAddr(), cu->encodeIdx * 4);
>
> Slice* slice = outTempCU->m_slice;
> int32_t cu_split_flag = !(cu->flags & CU::LEAF);
> int32_t cu_unsplit_flag = !(cu->flags & CU::SPLIT_MANDATORY);
>
> - if (cu_unsplit_flag && ((zOrder == outBestCU->getZorderIdxInCU()) &&
> (depth == sharedDepth[zOrder])))
> + if (cu_unsplit_flag && ((zOrder == cu->encodeIdx * 4) && (depth ==
> sharedDepth[zOrder])))
> {
> m_quant.setQPforQuant(outTempCU);
> checkIntra(outBestCU, outTempCU,
> (PartSize)sharedPartSizes[zOrder], cu, &sharedModes[zOrder]);
> @@ -602,10 +602,10 @@
> {
> CU *child_cu = cuPicsym->m_CULocalData + cu->childIdx +
> partUnitIdx;
> int qp = outTempCU->getQP(0);
> - subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth,
> qp); // clear sub partition datas or init.
> + subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth,
> qp, cu); // clear sub partition datas or init.
> if (child_cu->flags & CU::PRESENT)
> {
> - subTempPartCU->initSubCU(outTempCU, partUnitIdx,
> nextDepth, qp); // clear sub partition datas or init.
> + subTempPartCU->initSubCU(outTempCU, partUnitIdx,
> nextDepth, qp, cu); // clear sub partition datas or init.
>
> if (partUnitIdx) // initialize RD with previous depth
> buffer
>
> m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);
> @@ -668,7 +668,7 @@
> outBestCU->copyToPic(depth);
> if (!cu_unsplit_flag)
> return;
> - m_bestRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(),
> outBestCU->getAddr(), outBestCU->getZorderIdxInCU());
> + m_bestRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(),
> outBestCU->getAddr(), cu->encodeIdx * 4);
>
> #if CHECKED_BUILD || _DEBUG
> X265_CHECK(outBestCU->getPartitionSize(0) != SIZE_NONE, "no best
> partition size\n");
> @@ -696,11 +696,11 @@
> outTempCU->getQuadtreeTULog2MinSizeInCU(tuDepthRange, 0);
>
> if (sharedModes)
> - sharedEstIntraPredQT(outTempCU, m_origYuv[depth],
> m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth],
> tuDepthRange, sharedModes);
> + sharedEstIntraPredQT(outTempCU, m_origYuv[depth],
> m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth],
> tuDepthRange, sharedModes, cu);
> else
> - estIntraPredQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth],
> m_tmpResiYuv[depth], m_tmpRecoYuv[depth], tuDepthRange);
> + estIntraPredQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth],
> m_tmpResiYuv[depth], m_tmpRecoYuv[depth], tuDepthRange, cu);
>
> - estIntraPredChromaQT(outTempCU, m_origYuv[depth],
> m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth]);
> + estIntraPredChromaQT(outTempCU, m_origYuv[depth],
> m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth], cu);
>
> m_entropyCoder->resetBits();
> if (outTempCU->m_slice->m_pps->bTransquantBypassEnabled)
> @@ -731,11 +731,11 @@
> checkBestMode(outBestCU, outTempCU, depth);
> }
>
> -void Analysis::compressInterCU_rd0_4(TComDataCU*& outBestCU, TComDataCU*&
> outTempCU, TComDataCU* cu, uint32_t depth, TComDataCU* cuPicsym, CU *cu_t,
> int bInsidePicture, uint32_t PartitionIndex, uint32_t minDepth)
> +void Analysis::compressInterCU_rd0_4(TComDataCU*& outBestCU, TComDataCU*&
> outTempCU, TComDataCU* cu, uint32_t depth, TComDataCU* cuPicsym, CU
> *cuData, int bInsidePicture, uint32_t PartitionIndex, uint32_t minDepth)
> {
> Frame* pic = outTempCU->m_pic;
> uint32_t cuAddr = outTempCU->getAddr();
> - uint32_t absPartIdx = outTempCU->getZorderIdxInCU();
> + uint32_t absPartIdx = cuData->encodeIdx * 4;
>
> if (depth == 0)
> // get original YUV data from picture
> @@ -753,8 +753,8 @@
> #endif
>
> Slice* slice = outTempCU->m_slice;
> - int cu_split_flag = !(cu_t->flags & CU::LEAF);
> - int cu_unsplit_flag = !(cu_t->flags & CU::SPLIT_MANDATORY);
> + int cu_split_flag = !(cuData->flags & CU::LEAF);
> + int cu_unsplit_flag = !(cuData->flags & CU::SPLIT_MANDATORY);
>
> if (depth == 0 && m_param->rdLevel == 0)
> {
> @@ -810,16 +810,16 @@
> }
> else
> {
> - m_interCU_2Nx2N[depth]->initSubCU(cu, PartitionIndex,
> depth, qp);
> - m_interCU_2NxN[depth]->initSubCU(cu, PartitionIndex,
> depth, qp);
> - m_interCU_Nx2N[depth]->initSubCU(cu, PartitionIndex,
> depth, qp);
> - m_intraInInterCU[depth]->initSubCU(cu, PartitionIndex,
> depth, qp);
> - m_mergeCU[depth]->initSubCU(cu, PartitionIndex, depth,
> qp);
> - m_bestMergeCU[depth]->initSubCU(cu, PartitionIndex,
> depth, qp);
> + m_interCU_2Nx2N[depth]->initSubCU(cu, PartitionIndex,
> depth, qp, cuData);
> + m_interCU_2NxN[depth]->initSubCU(cu, PartitionIndex,
> depth, qp, cuData);
> + m_interCU_Nx2N[depth]->initSubCU(cu, PartitionIndex,
> depth, qp, cuData);
> + m_intraInInterCU[depth]->initSubCU(cu, PartitionIndex,
> depth, qp, cuData);
> + m_mergeCU[depth]->initSubCU(cu, PartitionIndex, depth,
> qp, cuData);
> + m_bestMergeCU[depth]->initSubCU(cu, PartitionIndex,
> depth, qp, cuData);
> }
>
> /* Compute Merge Cost */
> - checkMerge2Nx2N_rd0_4(m_bestMergeCU[depth], m_mergeCU[depth],
> m_modePredYuv[3][depth], m_bestMergeRecoYuv[depth]);
> + checkMerge2Nx2N_rd0_4(m_bestMergeCU[depth], m_mergeCU[depth],
> m_modePredYuv[3][depth], m_bestMergeRecoYuv[depth], cuData);
> bool earlyskip = false;
> if (m_param->rdLevel >= 1)
> earlyskip = (m_param->bEnableEarlySkip &&
> m_bestMergeCU[depth]->isSkipped(0));
> @@ -828,7 +828,7 @@
> {
> /* Compute 2Nx2N mode costs */
> {
> - checkInter_rd0_4(m_interCU_2Nx2N[depth],
> m_modePredYuv[0][depth], SIZE_2Nx2N);
> + checkInter_rd0_4(m_interCU_2Nx2N[depth],
> m_modePredYuv[0][depth], SIZE_2Nx2N, cuData);
> /* Choose best mode; initialise outBestCU to 2Nx2N */
> outBestCU = m_interCU_2Nx2N[depth];
> std::swap(m_bestPredYuv[depth],
> m_modePredYuv[0][depth]);
> @@ -837,8 +837,8 @@
> /* Compute Rect costs */
> if (m_param->bEnableRectInter)
> {
> - checkInter_rd0_4(m_interCU_Nx2N[depth],
> m_modePredYuv[1][depth], SIZE_Nx2N);
> - checkInter_rd0_4(m_interCU_2NxN[depth],
> m_modePredYuv[2][depth], SIZE_2NxN);
> + checkInter_rd0_4(m_interCU_Nx2N[depth],
> m_modePredYuv[1][depth], SIZE_Nx2N, cuData);
> + checkInter_rd0_4(m_interCU_2NxN[depth],
> m_modePredYuv[2][depth], SIZE_2NxN, cuData);
> if (m_interCU_Nx2N[depth]->m_sa8dCost <
> outBestCU->m_sa8dCost)
> {
> outBestCU = m_interCU_Nx2N[depth];
> @@ -857,12 +857,12 @@
> int numPart = outBestCU->getNumPartInter();
> for (int partIdx = 0; partIdx < numPart; partIdx++)
> {
> - prepMotionCompensation(outBestCU, partIdx);
> + prepMotionCompensation(outBestCU, partIdx,
> cuData);
> motionCompensation(outBestCU,
> m_bestPredYuv[depth], REF_PIC_LIST_X, false, true);
> }
>
> encodeResAndCalcRdInterCU(outBestCU,
> m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],
> - m_bestResiYuv[depth],
> m_bestRecoYuv[depth]);
> + m_bestResiYuv[depth], m_bestRecoYuv[depth],
> cuData);
> uint64_t bestMergeCost = m_rdCost.m_psyRd ?
> m_bestMergeCU[depth]->m_totalPsyCost : m_bestMergeCU[depth]->m_totalRDCost;
> uint64_t bestCost = m_rdCost.m_psyRd ?
> outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;
> if (bestMergeCost < bestCost)
> @@ -890,12 +890,12 @@
> }
> if (bdoIntra)
> {
> - checkIntraInInter_rd0_4(m_intraInInterCU[depth],
> SIZE_2Nx2N);
> + checkIntraInInter_rd0_4(m_intraInInterCU[depth],
> SIZE_2Nx2N, cuData);
> uint64_t intraInInterCost, bestCost;
> if (m_param->rdLevel > 2)
> {
> encodeIntraInInter(m_intraInInterCU[depth],
> m_origYuv[depth], m_modePredYuv[5][depth],
> - m_tmpResiYuv[depth],
> m_tmpRecoYuv[depth]);
> + m_tmpResiYuv[depth],
> m_tmpRecoYuv[depth], cuData);
> intraInInterCost = m_rdCost.m_psyRd ?
> m_intraInInterCU[depth]->m_totalPsyCost :
> m_intraInInterCU[depth]->m_totalRDCost;
> bestCost = m_rdCost.m_psyRd ?
> outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;
> }
> @@ -927,17 +927,17 @@
> int numPart = outBestCU->getNumPartInter();
> for (int partIdx = 0; partIdx < numPart;
> partIdx++)
> {
> - prepMotionCompensation(outBestCU, partIdx);
> + prepMotionCompensation(outBestCU, partIdx,
> cuData);
> motionCompensation(outBestCU,
> m_bestPredYuv[depth], REF_PIC_LIST_X, false, true);
> }
>
> encodeResAndCalcRdInterCU(outBestCU,
> m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth],
> - m_bestResiYuv[depth],
> m_bestRecoYuv[depth]);
> + m_bestResiYuv[depth],
> m_bestRecoYuv[depth], cuData);
>
> m_rdEntropyCoders[depth][CI_TEMP_BEST].store(m_rdEntropyCoders[depth][CI_NEXT_BEST]);
> }
> else if (outBestCU->getPredictionMode(0) ==
> MODE_INTRA)
> {
> - encodeIntraInInter(outBestCU, m_origYuv[depth],
> m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestRecoYuv[depth]);
> + encodeIntraInInter(outBestCU, m_origYuv[depth],
> m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestRecoYuv[depth], cuData);
>
> m_rdEntropyCoders[depth][CI_TEMP_BEST].store(m_rdEntropyCoders[depth][CI_NEXT_BEST]);
> }
> }
> @@ -954,15 +954,15 @@
> int numPart = outBestCU->getNumPartInter();
> for (int partIdx = 0; partIdx < numPart;
> partIdx++)
> {
> - prepMotionCompensation(outBestCU, partIdx);
> + prepMotionCompensation(outBestCU, partIdx,
> cuData);
> motionCompensation(outBestCU,
> m_bestPredYuv[depth], REF_PIC_LIST_X, false, true);
> }
>
> m_tmpResiYuv[depth]->subtract(m_origYuv[depth],
> m_bestPredYuv[depth], outBestCU->getLog2CUSize(0));
> - generateCoeffRecon(outBestCU, m_origYuv[depth],
> m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestRecoYuv[depth]);
> + generateCoeffRecon(outBestCU, m_origYuv[depth],
> m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestRecoYuv[depth], cuData);
> }
> else
> - generateCoeffRecon(outBestCU, m_origYuv[depth],
> m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestRecoYuv[depth]);
> + generateCoeffRecon(outBestCU, m_origYuv[depth],
> m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestRecoYuv[depth], cuData);
> }
> else if (m_param->rdLevel == 0)
> {
> @@ -971,7 +971,7 @@
> int numPart = outBestCU->getNumPartInter();
> for (int partIdx = 0; partIdx < numPart;
> partIdx++)
> {
> - prepMotionCompensation(outBestCU, partIdx);
> + prepMotionCompensation(outBestCU, partIdx,
> cuData);
> motionCompensation(outBestCU,
> m_bestPredYuv[depth], REF_PIC_LIST_X, false, true);
> }
> }
> @@ -1081,10 +1081,10 @@
> TComDataCU* subTempPartCU = m_tempCU[nextDepth];
> for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)
> {
> - CU *child_cu = cuPicsym->m_CULocalData + cu_t->childIdx +
> partUnitIdx;
> + CU *child_cu = cuPicsym->m_CULocalData + cuData->childIdx +
> partUnitIdx;
>
> TComDataCU* subBestPartCU = NULL;
> - subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth,
> qp); // clear sub partition datas or init.
> + subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth,
> qp, cuData); // clear sub partition datas or init.
>
> if (child_cu->flags & CU::PRESENT)
> {
> @@ -1202,7 +1202,7 @@
> outBestCU->copyToPic(depth);
>
> if (m_param->rdLevel == 0 && depth == 0)
> - encodeResidue(outBestCU, outBestCU, 0, 0);
> + encodeResidue(outBestCU, outBestCU, 0, 0, cuData);
> else if (m_param->rdLevel != 0)
> {
> /* Copy Yuv data to picture Yuv */
> @@ -1244,7 +1244,7 @@
>
> Frame* pic = outBestCU->m_pic;
> uint32_t cuAddr = outBestCU->getAddr();
> - uint32_t absPartIdx = outBestCU->getZorderIdxInCU();
> + uint32_t absPartIdx = cu->encodeIdx * 4;
>
> if (depth == 0)
> // get original YUV data from picture
> @@ -1270,14 +1270,14 @@
> if (slice->m_sliceType != I_SLICE)
> {
> // by Merge for inter_2Nx2N
> - checkMerge2Nx2N_rd5_6(outBestCU, outTempCU,
> &earlyDetectionSkipMode, m_bestPredYuv[depth], m_bestRecoYuv[depth]);
> + checkMerge2Nx2N_rd5_6(outBestCU, outTempCU,
> &earlyDetectionSkipMode, m_bestPredYuv[depth], m_bestRecoYuv[depth], cu);
>
> outTempCU->initEstData();
>
> if (!m_param->bEnableEarlySkip)
> {
> // 2Nx2N, NxN
> - checkInter_rd5_6(outBestCU, outTempCU, SIZE_2Nx2N);
> + checkInter_rd5_6(outBestCU, outTempCU, SIZE_2Nx2N, cu);
> outTempCU->initEstData();
> if (m_param->bEnableCbfFastMode)
> doNotBlockPu = outBestCU->getQtRootCbf(0) != 0;
> @@ -1296,7 +1296,7 @@
> {
> if (depth == g_maxCUDepth && doNotBlockPu)
> {
> - checkInter_rd5_6(outBestCU, outTempCU, SIZE_NxN);
> + checkInter_rd5_6(outBestCU, outTempCU, SIZE_NxN,
> cu);
> outTempCU->initEstData();
> }
> }
> @@ -1306,14 +1306,14 @@
> // 2NxN, Nx2N
> if (doNotBlockPu)
> {
> - checkInter_rd5_6(outBestCU, outTempCU, SIZE_Nx2N);
> + checkInter_rd5_6(outBestCU, outTempCU, SIZE_Nx2N,
> cu);
> outTempCU->initEstData();
> if (m_param->bEnableCbfFastMode &&
> outBestCU->getPartitionSize(0) == SIZE_Nx2N)
> doNotBlockPu = outBestCU->getQtRootCbf(0) !=
> 0;
> }
> if (doNotBlockPu)
> {
> - checkInter_rd5_6(outBestCU, outTempCU, SIZE_2NxN);
> + checkInter_rd5_6(outBestCU, outTempCU, SIZE_2NxN,
> cu);
> outTempCU->initEstData();
> if (m_param->bEnableCbfFastMode &&
> outBestCU->getPartitionSize(0) == SIZE_2NxN)
> doNotBlockPu = outBestCU->getQtRootCbf(0) !=
> 0;
> @@ -1333,14 +1333,14 @@
> {
> if (doNotBlockPu)
> {
> - checkInter_rd5_6(outBestCU, outTempCU,
> SIZE_2NxnU);
> + checkInter_rd5_6(outBestCU, outTempCU,
> SIZE_2NxnU, cu);
> outTempCU->initEstData();
> if (m_param->bEnableCbfFastMode &&
> outBestCU->getPartitionSize(0) == SIZE_2NxnU)
> doNotBlockPu = outBestCU->getQtRootCbf(0)
> != 0;
> }
> if (doNotBlockPu)
> {
> - checkInter_rd5_6(outBestCU, outTempCU,
> SIZE_2NxnD);
> + checkInter_rd5_6(outBestCU, outTempCU,
> SIZE_2NxnD, cu);
> outTempCU->initEstData();
> if (m_param->bEnableCbfFastMode &&
> outBestCU->getPartitionSize(0) == SIZE_2NxnD)
> doNotBlockPu = outBestCU->getQtRootCbf(0)
> != 0;
> @@ -1350,14 +1350,14 @@
> {
> if (doNotBlockPu)
> {
> - checkInter_rd5_6(outBestCU, outTempCU,
> SIZE_2NxnU, true);
> + checkInter_rd5_6(outBestCU, outTempCU,
> SIZE_2NxnU, cu, true);
> outTempCU->initEstData();
> if (m_param->bEnableCbfFastMode &&
> outBestCU->getPartitionSize(0) == SIZE_2NxnU)
> doNotBlockPu = outBestCU->getQtRootCbf(0)
> != 0;
> }
> if (doNotBlockPu)
> {
> - checkInter_rd5_6(outBestCU, outTempCU,
> SIZE_2NxnD, true);
> + checkInter_rd5_6(outBestCU, outTempCU,
> SIZE_2NxnD, cu, true);
> outTempCU->initEstData();
> if (m_param->bEnableCbfFastMode &&
> outBestCU->getPartitionSize(0) == SIZE_2NxnD)
> doNotBlockPu = outBestCU->getQtRootCbf(0)
> != 0;
> @@ -1369,14 +1369,14 @@
> {
> if (doNotBlockPu)
> {
> - checkInter_rd5_6(outBestCU, outTempCU,
> SIZE_nLx2N);
> + checkInter_rd5_6(outBestCU, outTempCU,
> SIZE_nLx2N, cu);
> outTempCU->initEstData();
> if (m_param->bEnableCbfFastMode &&
> outBestCU->getPartitionSize(0) == SIZE_nLx2N)
> doNotBlockPu = outBestCU->getQtRootCbf(0)
> != 0;
> }
> if (doNotBlockPu)
> {
> - checkInter_rd5_6(outBestCU, outTempCU,
> SIZE_nRx2N);
> + checkInter_rd5_6(outBestCU, outTempCU,
> SIZE_nRx2N, cu);
> outTempCU->initEstData();
> }
> }
> @@ -1384,14 +1384,14 @@
> {
> if (doNotBlockPu)
> {
> - checkInter_rd5_6(outBestCU, outTempCU,
> SIZE_nLx2N, true);
> + checkInter_rd5_6(outBestCU, outTempCU,
> SIZE_nLx2N, cu, true);
> outTempCU->initEstData();
> if (m_param->bEnableCbfFastMode &&
> outBestCU->getPartitionSize(0) == SIZE_nLx2N)
> doNotBlockPu = outBestCU->getQtRootCbf(0)
> != 0;
> }
> if (doNotBlockPu)
> {
> - checkInter_rd5_6(outBestCU, outTempCU,
> SIZE_nRx2N, true);
> + checkInter_rd5_6(outBestCU, outTempCU,
> SIZE_nRx2N, cu, true);
> outTempCU->initEstData();
> }
> }
> @@ -1404,14 +1404,14 @@
> outBestCU->getCbf(0, TEXT_CHROMA_U) != 0 ||
> outBestCU->getCbf(0, TEXT_CHROMA_V) != 0) && doIntra)
> {
> - checkIntraInInter_rd5_6(outBestCU, outTempCU, SIZE_2Nx2N);
> + checkIntraInInter_rd5_6(outBestCU, outTempCU, SIZE_2Nx2N,
> cu);
> outTempCU->initEstData();
>
> if (depth == g_maxCUDepth)
> {
> if (cu->log2CUSize >
> slice->m_sps->quadtreeTULog2MinSize)
> {
> - checkIntraInInter_rd5_6(outBestCU, outTempCU,
> SIZE_NxN);
> + checkIntraInInter_rd5_6(outBestCU, outTempCU,
> SIZE_NxN, cu);
> outTempCU->initEstData();
> }
> }
> @@ -1445,11 +1445,11 @@
> CU *child_cu = cuPicsym->m_CULocalData + cu->childIdx +
> partUnitIdx;
>
> int qp = outTempCU->getQP(0);
> - subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth,
> qp); // clear sub partition datas or init.
> + subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth,
> qp, cu); // clear sub partition datas or init.
>
> if (child_cu->flags & CU::PRESENT)
> {
> - subTempPartCU->initSubCU(outTempCU, partUnitIdx,
> nextDepth, qp); // clear sub partition datas or init.
> + subTempPartCU->initSubCU(outTempCU, partUnitIdx,
> nextDepth, qp, cu); // clear sub partition datas or init.
>
> if (0 == partUnitIdx) //initialize RD with previous depth
> buffer
>
> m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
> @@ -1525,7 +1525,7 @@
> #endif
> }
>
> -void Analysis::checkMerge2Nx2N_rd0_4(TComDataCU*& outBestCU, TComDataCU*&
> outTempCU, TComYuv*& bestPredYuv, TComYuv*& yuvReconBest)
> +void Analysis::checkMerge2Nx2N_rd0_4(TComDataCU*& outBestCU, TComDataCU*&
> outTempCU, TComYuv*& bestPredYuv, TComYuv*& yuvReconBest, CU* cuData)
> {
> X265_CHECK(outTempCU->m_slice->m_sliceType != I_SLICE, "Evaluating
> merge in I slice\n");
> TComMvField mvFieldNeighbours[MRG_MAX_NUM_CANDS][2]; // double length
> for mv of both lists
> @@ -1561,7 +1561,7 @@
>
> // do MC only for Luma part
> /* Set CU parameters for motion compensation */
> - prepMotionCompensation(outTempCU, 0);
> + prepMotionCompensation(outTempCU, 0, cuData);
> motionCompensation(outTempCU, m_tmpPredYuv[depth],
> REF_PIC_LIST_X, true, false);
> uint32_t bitsCand = getTUBits(mergeCand, maxNumMergeCand);
> outTempCU->m_totalBits = bitsCand;
> @@ -1600,7 +1600,7 @@
> int numPart = outBestCU->getNumPartInter();
> for (int partIdx = 0; partIdx < numPart; partIdx++)
> {
> - prepMotionCompensation(outBestCU, partIdx);
> + prepMotionCompensation(outBestCU, partIdx, cuData);
> motionCompensation(outBestCU, bestPredYuv,
> REF_PIC_LIST_X, false, true);
> }
>
> @@ -1615,7 +1615,7 @@
> }
>
> // Encode with residue
> - encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth],
> bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth],
> m_tmpRecoYuv[depth]);
> + encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth],
> bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth],
> m_tmpRecoYuv[depth], cuData);
>
> uint64_t tempCost = m_rdCost.m_psyRd ?
> outTempCU->m_totalPsyCost : outTempCU->m_totalRDCost;
> uint64_t bestCost = m_rdCost.m_psyRd ?
> outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;
> @@ -1629,7 +1629,7 @@
> }
> }
>
> -void Analysis::checkMerge2Nx2N_rd5_6(TComDataCU*& outBestCU, TComDataCU*&
> outTempCU, bool *earlyDetectionSkipMode, TComYuv*& outBestPredYuv,
> TComYuv*& rpcYuvReconBest)
> +void Analysis::checkMerge2Nx2N_rd5_6(TComDataCU*& outBestCU, TComDataCU*&
> outTempCU, bool *earlyDetectionSkipMode, TComYuv*& outBestPredYuv,
> TComYuv*& rpcYuvReconBest, CU* cuData)
> {
> X265_CHECK(outTempCU->m_slice->m_sliceType != I_SLICE, "I slice not
> expected\n");
> TComMvField mvFieldNeighbours[MRG_MAX_NUM_CANDS][2]; // double length
> for mv of both lists
> @@ -1674,7 +1674,7 @@
>
> outTempCU->getCUMvField(REF_PIC_LIST_1)->setAllMvField(mvFieldNeighbours[mergeCand][1],
> SIZE_2Nx2N, 0, 0); // interprets depth relative to outTempCU level
>
> // do MC
> - prepMotionCompensation(outTempCU, 0);
> + prepMotionCompensation(outTempCU, 0, cuData);
> motionCompensation(outTempCU, m_tmpPredYuv[depth],
> REF_PIC_LIST_X, true, true);
> // estimate residual and encode everything
> if (noResidual)
> @@ -1688,7 +1688,8 @@
> m_tmpPredYuv[depth],
> m_tmpResiYuv[depth],
> m_bestResiYuv[depth],
> - m_tmpRecoYuv[depth]);
> + m_tmpRecoYuv[depth],
> + cuData);
>
>
> /* Todo: Fix the satd cost estimates. Why is merge
> being chosen in high motion areas: estimated distortion is too low? */
> @@ -1733,7 +1734,7 @@
> }
> }
>
> -void Analysis::checkInter_rd0_4(TComDataCU* outTempCU, TComYuv*
> outPredYuv, PartSize partSize, bool bUseMRG)
> +void Analysis::checkInter_rd0_4(TComDataCU* outTempCU, TComYuv*
> outPredYuv, PartSize partSize, CU* cuData, bool bUseMRG)
> {
> uint32_t depth = outTempCU->getDepth(0);
>
> @@ -1743,7 +1744,7 @@
>
> // do motion compensation only for Luma since luma cost alone is
> calculated
> outTempCU->m_totalBits = 0;
> - if (predInterSearch(outTempCU, outPredYuv, bUseMRG, false))
> + if (predInterSearch(outTempCU, outPredYuv, bUseMRG, false, cuData))
> {
> int sizeIdx = outTempCU->getLog2CUSize(0) - 2;
> uint32_t distortion =
> primitives.sa8d[sizeIdx](m_origYuv[depth]->getLumaAddr(),
> m_origYuv[depth]->getStride(),
> @@ -1758,7 +1759,7 @@
> }
> }
>
> -void Analysis::checkInter_rd5_6(TComDataCU*& outBestCU, TComDataCU*&
> outTempCU, PartSize partSize, bool bUseMRG)
> +void Analysis::checkInter_rd5_6(TComDataCU*& outBestCU, TComDataCU*&
> outTempCU, PartSize partSize, CU* cuData, bool bUseMRG)
> {
> uint32_t depth = outTempCU->getDepth(0);
>
> @@ -1767,15 +1768,15 @@
> outTempCU->setPredModeSubParts(MODE_INTER, 0, depth);
> outTempCU->setCUTransquantBypassSubParts(!!m_param->bLossless, 0,
> depth);
>
> - if (predInterSearch(outTempCU, m_tmpPredYuv[depth], bUseMRG, true))
> + if (predInterSearch(outTempCU, m_tmpPredYuv[depth], bUseMRG, true,
> cuData))
> {
> - encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth],
> m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_bestResiYuv[depth],
> m_tmpRecoYuv[depth]);
> + encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth],
> m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_bestResiYuv[depth],
> m_tmpRecoYuv[depth], cuData);
> checkDQP(outTempCU);
> checkBestMode(outBestCU, outTempCU, depth);
> }
> }
>
> -void Analysis::checkIntraInInter_rd0_4(TComDataCU* cu, PartSize partSize)
> +void Analysis::checkIntraInInter_rd0_4(TComDataCU* cu, PartSize partSize,
> CU* cuData)
> {
> uint32_t depth = cu->getDepth(0);
>
> @@ -1789,7 +1790,7 @@
> const uint32_t partOffset = 0;
>
> // Reference sample smoothing
> - TComPattern::initAdiPattern(cu, partOffset, initTrDepth, m_predBuf,
> m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, ALL_IDX);
> + TComPattern::initAdiPattern(cu, partOffset, initTrDepth, m_predBuf,
> m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, ALL_IDX, cuData);
>
> pixel* fenc = m_origYuv[depth]->getLumaAddr();
> uint32_t stride = m_modePredYuv[5][depth]->getStride();
> @@ -1941,7 +1942,7 @@
> cu->setLumaIntraDirSubParts(bmode, partOffset, depth + initTrDepth);
> }
>
> -void Analysis::checkIntraInInter_rd5_6(TComDataCU*& outBestCU,
> TComDataCU*& outTempCU, PartSize partSize)
> +void Analysis::checkIntraInInter_rd5_6(TComDataCU*& outBestCU,
> TComDataCU*& outTempCU, PartSize partSize, CU* cuData)
> {
> uint32_t depth = outTempCU->getDepth(0);
>
> @@ -1956,9 +1957,9 @@
> uint32_t tuDepthRange[2];
> outTempCU->getQuadtreeTULog2MinSizeInCU(tuDepthRange, 0);
>
> - estIntraPredQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth],
> m_tmpResiYuv[depth], m_tmpRecoYuv[depth], tuDepthRange);
> + estIntraPredQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth],
> m_tmpResiYuv[depth], m_tmpRecoYuv[depth], tuDepthRange, cuData);
>
> - estIntraPredChromaQT(outTempCU, m_origYuv[depth],
> m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth]);
> + estIntraPredChromaQT(outTempCU, m_origYuv[depth],
> m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth], cuData);
>
> m_entropyCoder->resetBits();
> if (outTempCU->m_slice->m_pps->bTransquantBypassEnabled)
> @@ -1994,7 +1995,7 @@
> checkBestMode(outBestCU, outTempCU, depth);
> }
>
> -void Analysis::encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, ShortYuv* outResiYuv, TComYuv* outReconYuv)
> +void Analysis::encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, ShortYuv* outResiYuv, TComYuv* outReconYuv, CU* cuData)
> {
> uint64_t puCost = 0;
> uint32_t puBits = 0;
> @@ -2009,7 +2010,7 @@
> uint32_t tuDepthRange[2];
> cu->getQuadtreeTULog2MinSizeInCU(tuDepthRange, 0);
>
> - uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv,
> predYuv, outResiYuv, false, puCost, puBits, tuDepthRange);
> + uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv,
> predYuv, outResiYuv, false, puCost, puBits, tuDepthRange, cuData);
> xSetIntraResultQT(cu, initTrDepth, 0, outReconYuv);
>
> //=== update PU data ====
> @@ -2018,7 +2019,7 @@
> //===== set distortion (rate and r-d costs are determined later) =====
> cu->m_totalDistortion = puDistY;
>
> - estIntraPredChromaQT(cu, fencYuv, predYuv, outResiYuv, outReconYuv);
> + estIntraPredChromaQT(cu, fencYuv, predYuv, outResiYuv, outReconYuv,
> cuData);
> m_entropyCoder->resetBits();
> if (cu->m_slice->m_pps->bTransquantBypassEnabled)
>
> m_entropyCoder->codeCUTransquantBypassFlag(cu->getCUTransquantBypass(0));
> @@ -2050,7 +2051,7 @@
> cu->m_totalRDCost = m_rdCost.calcRdCost(cu->m_totalDistortion,
> cu->m_totalBits);
> }
>
> -void Analysis::encodeResidue(TComDataCU* lcu, TComDataCU* cu, uint32_t
> absPartIdx, uint32_t depth)
> +void Analysis::encodeResidue(TComDataCU* lcu, TComDataCU* cu, uint32_t
> absPartIdx, uint32_t depth, CU* cuData)
> {
> Frame* pic = cu->m_pic;
>
> @@ -2063,10 +2064,11 @@
> uint32_t xmax = slice->m_sps->picWidthInLumaSamples -
> lcu->getCUPelX();
> uint32_t ymax = slice->m_sps->picHeightInLumaSamples -
> lcu->getCUPelY(); for (uint32_t partUnitIdx = 0; partUnitIdx < 4;
> partUnitIdx++, absPartIdx += qNumParts)
> {
> + CU *child_cu = cu->m_CULocalData + cuData->childIdx +
> partUnitIdx;
> if (g_zscanToPelX[absPartIdx] < xmax &&
> g_zscanToPelY[absPartIdx] < ymax)
> {
> - subTempPartCU->copyToSubCU(cu, partUnitIdx, nextDepth);
> - encodeResidue(lcu, subTempPartCU, absPartIdx, nextDepth);
> + subTempPartCU->copyToSubCU(cu, partUnitIdx, nextDepth,
> child_cu);
> + encodeResidue(lcu, subTempPartCU, absPartIdx, nextDepth,
> child_cu);
> }
> }
>
> @@ -2108,7 +2110,7 @@
> uint32_t tuDepthRange[2];
> cu->getQuadtreeTULog2MinSizeInCU(tuDepthRange, 0);
> // Residual encoding
> - residualTransformQuantInter(cu, 0, m_origYuv[0],
> m_tmpResiYuv[depth], cu->getDepth(0), tuDepthRange);
> + residualTransformQuantInter(cu, 0, m_origYuv[0],
> m_tmpResiYuv[depth], cu->getDepth(0), tuDepthRange, cuData);
> checkDQP(cu);
>
> if (lcu->getMergeFlag(absPartIdx) && cu->getPartitionSize(0)
> == SIZE_2Nx2N && !cu->getQtRootCbf(0))
> @@ -2168,7 +2170,7 @@
> else
> {
> m_origYuv[0]->copyPartToYuv(m_origYuv[depth], absPartIdx);
> - generateCoeffRecon(cu, m_origYuv[depth], m_modePredYuv[5][depth],
> m_tmpResiYuv[depth], m_tmpRecoYuv[depth]);
> + generateCoeffRecon(cu, m_origYuv[depth], m_modePredYuv[5][depth],
> m_tmpResiYuv[depth], m_tmpRecoYuv[depth], cuData);
> checkDQP(cu);
> m_tmpRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), cuAddr,
> absPartIdx);
> cu->copyCodedToPic(depth);
> diff -r c8f53398f8ce -r f70fd79cb3e1 source/encoder/analysis.h
> --- a/source/encoder/analysis.h Sat Sep 20 15:41:08 2014 +0100
> +++ b/source/encoder/analysis.h Mon Sep 22 09:05:53 2014 +0530
> @@ -113,21 +113,21 @@
> void checkIntra(TComDataCU*& outBestCU, TComDataCU*& outTempCU,
> PartSize partSize, CU *cu, uint8_t* sharedModes);
> void compressSharedIntraCTU(TComDataCU*& outBestCU, TComDataCU*&
> outTempCU, uint32_t depth, TComDataCU* cuPicsym, CU *cu, uint8_t*
> sharedDepth, char* sharedPartSizes, uint8_t* sharedModes, uint32_t &zOrder);
>
> - void compressInterCU_rd0_4(TComDataCU*& outBestCU, TComDataCU*&
> outTempCU, TComDataCU* cu, uint32_t depth, TComDataCU* cuPicsym, CU *cu_t,
> + void compressInterCU_rd0_4(TComDataCU*& outBestCU, TComDataCU*&
> outTempCU, TComDataCU* cu, uint32_t depth, TComDataCU* cuPicsym, CU *cuData,
> int bInsidePicture, uint32_t
> partitionIndex, uint32_t minDepth);
> void compressInterCU_rd5_6(TComDataCU*& outBestCU, TComDataCU*&
> outTempCU, uint32_t depth, TComDataCU* cuPicsym, CU *cu,
> PartSize parentSize = SIZE_NONE);
> - void checkMerge2Nx2N_rd0_4(TComDataCU*& outBestCU, TComDataCU*&
> outTempCU, TComYuv*& bestPredYuv, TComYuv*& tmpPredYuv);
> + void checkMerge2Nx2N_rd0_4(TComDataCU*& outBestCU, TComDataCU*&
> outTempCU, TComYuv*& bestPredYuv, TComYuv*& tmpPredYuv, CU* cuData);
> void checkMerge2Nx2N_rd5_6(TComDataCU*& outBestCU, TComDataCU*&
> outTempCU, bool *earlyDetectionSkipMode,
> - TComYuv*& outBestPredYuv, TComYuv*&
> rpcYuvReconBest);
> - void checkInter_rd0_4(TComDataCU* outTempCU, TComYuv* outPredYUV,
> PartSize partSize, bool bUseMRG = false);
> - void checkInter_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU,
> PartSize partSize, bool bUseMRG = false);
> - void checkIntraInInter_rd0_4(TComDataCU* cu, PartSize partSize);
> - void checkIntraInInter_rd5_6(TComDataCU*& outBestCU, TComDataCU*&
> outTempCU, PartSize partSize);
> + TComYuv*& outBestPredYuv, TComYuv*&
> rpcYuvReconBest, CU* cuData);
> + void checkInter_rd0_4(TComDataCU* outTempCU, TComYuv* outPredYUV,
> PartSize partSize, CU* cuData, bool bUseMRG = false);
> + void checkInter_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU,
> PartSize partSize, CU* cuData, bool bUseMRG = false);
> + void checkIntraInInter_rd0_4(TComDataCU* cu, PartSize partSize, CU*
> cuData);
> + void checkIntraInInter_rd5_6(TComDataCU*& outBestCU, TComDataCU*&
> outTempCU, PartSize partSize, CU* cuData);
>
> void checkBestMode(TComDataCU*& outBestCU, TComDataCU*& outTempCU,
> uint32_t depth);
> - void encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv, TComYuv*
> predYuv, ShortYuv* outResiYuv, TComYuv* outReconYuv);
> - void encodeResidue(TComDataCU* lcu, TComDataCU* cu, uint32_t
> absPartIdx, uint32_t depth);
> + void encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv, TComYuv*
> predYuv, ShortYuv* outResiYuv, TComYuv* outReconYuv, CU* cuData);
> + void encodeResidue(TComDataCU* lcu, TComDataCU* cu, uint32_t
> absPartIdx, uint32_t depth, CU* cuData);
> void checkDQP(TComDataCU* cu);
> void deriveTestModeAMP(TComDataCU* bestCU, PartSize parentSize, bool
> &bTestAMP_Hor, bool &bTestAMP_Ver,
> bool &bTestMergeAMP_Hor, bool
> &bTestMergeAMP_Ver);
> diff -r c8f53398f8ce -r f70fd79cb3e1 source/encoder/predict.cpp
> --- a/source/encoder/predict.cpp Sat Sep 20 15:41:08 2014 +0100
> +++ b/source/encoder/predict.cpp Mon Sep 22 09:05:53 2014 +0530
> @@ -170,12 +170,12 @@
> return false;
> }
>
> -void Predict::prepMotionCompensation(TComDataCU* cu, int partIdx)
> +void Predict::prepMotionCompensation(TComDataCU* cu, int partIdx, CU*
> cuData)
> {
> m_slice = cu->m_slice;
> cu->getPartIndexAndSize(partIdx, m_partAddr, m_width, m_height);
> m_cuAddr = cu->getAddr();
> - m_zOrderIdxinCU = cu->getZorderIdxInCU();
> + m_zOrderIdxinCU = cuData->encodeIdx * 4;
>
> m_mvField[0] = cu->getCUMvField(REF_PIC_LIST_0);
> m_mvField[1] = cu->getCUMvField(REF_PIC_LIST_1);
> diff -r c8f53398f8ce -r f70fd79cb3e1 source/encoder/predict.h
> --- a/source/encoder/predict.h Sat Sep 20 15:41:08 2014 +0100
> +++ b/source/encoder/predict.h Mon Sep 22 09:05:53 2014 +0530
> @@ -88,7 +88,7 @@
> void initTempBuff(int csp);
>
> // prepMotionCompensation needs to be called to prepare MC with
> CU-relevant data */
> - void prepMotionCompensation(TComDataCU* cu, int partIdx);
> + void prepMotionCompensation(TComDataCU* cu, int partIdx, CU* cuData);
> void motionCompensation(TComDataCU* cu, TComYuv* predYuv, int
> picList, bool bLuma, bool bChroma);
>
> // Angular Intra
> diff -r c8f53398f8ce -r f70fd79cb3e1 source/encoder/search.cpp
> --- a/source/encoder/search.cpp Sat Sep 20 15:41:08 2014 +0100
> +++ b/source/encoder/search.cpp Mon Sep 22 09:05:53 2014 +0530
> @@ -288,14 +288,13 @@
>
> /* returns distortion */
> uint32_t Search::xIntraCodingLumaBlk(TComDataCU* cu, uint32_t absPartIdx,
> uint32_t log2TrSize, TComYuv* fencYuv, TComYuv* predYuv,
> - ShortYuv* resiYuv, int16_t* reconQt,
> uint32_t reconQtStride, coeff_t* coeff, uint32_t& cbf)
> + ShortYuv* resiYuv, int16_t* reconQt,
> uint32_t reconQtStride, coeff_t* coeff, uint32_t& cbf, CU* cuData)
> {
> uint32_t stride = fencYuv->getStride();
> pixel* fenc = fencYuv->getLumaAddr(absPartIdx);
> pixel* pred = predYuv->getLumaAddr(absPartIdx);
> int16_t* residual = resiYuv->getLumaAddr(absPartIdx);
> -
> - uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
> + uint32_t zorder = cuData->encodeIdx * 4 + absPartIdx;
> pixel* reconIPred =
> cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
> uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getStride();
> bool useTransformSkip = !!cu->getTransformSkip(absPartIdx,
> TEXT_LUMA);
> @@ -338,7 +337,7 @@
> }
>
> uint32_t Search::xIntraCodingChromaBlk(TComDataCU* cu, uint32_t
> absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, int16_t*
> reconQt,
> - uint32_t reconQtStride, coeff_t*
> coeff, uint32_t& cbf, uint32_t chromaId, uint32_t log2TrSizeC)
> + uint32_t reconQtStride, coeff_t*
> coeff, uint32_t& cbf, uint32_t chromaId, uint32_t log2TrSizeC, CU* cuData)
> {
> TextType ttype = (TextType)chromaId;
> uint32_t stride = fencYuv->getCStride();
> @@ -346,7 +345,7 @@
> pixel* pred = predYuv->getChromaAddr(chromaId, absPartIdx);
> int16_t* residual = resiYuv->getChromaAddr(chromaId, absPartIdx);
>
> - uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
> + uint32_t zorder = cuData->encodeIdx * 4 + absPartIdx;
> pixel* reconIPred =
> cu->m_pic->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
> uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getCStride();
> bool useTransformSkipC = !!cu->getTransformSkip(absPartIdx,
> ttype);
> @@ -394,7 +393,7 @@
>
> /* returns distortion. TODO reorder params */
> uint32_t Search::xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth,
> uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv,
> - ShortYuv* resiYuv, bool
> bAllowRQTSplit, uint64_t& rdCost, uint32_t& rdBits, uint32_t depthRange[2])
> + ShortYuv* resiYuv, bool
> bAllowRQTSplit, uint64_t& rdCost, uint32_t& rdBits, uint32_t depthRange[2],
> CU* cuData)
> {
> uint32_t fullDepth = cu->getDepth(0) + trDepth;
> uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
> @@ -454,7 +453,7 @@
>
> // init availability pattern
> uint32_t lumaPredMode = cu->getLumaIntraDir(absPartIdx);
> - TComPattern::initAdiPattern(cu, absPartIdx, trDepth, m_predBuf,
> m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, lumaPredMode);
> + TComPattern::initAdiPattern(cu, absPartIdx, trDepth, m_predBuf,
> m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, lumaPredMode, cuData);
>
> // get prediction signal
> predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize);
> @@ -496,11 +495,11 @@
> cu->setCUTransquantBypassSubParts(bIsLossLess,
> absPartIdx, fullDepth);
>
> // code luma block with given intra prediction mode and
> store Cbf
> - singleDistYTmp = xIntraCodingLumaBlk(cu, absPartIdx,
> log2TrSize, fencYuv, predYuv, resiYuv, recon, reconStride, coeff,
> singleCbfYTmp);
> + singleDistYTmp = xIntraCodingLumaBlk(cu, absPartIdx,
> log2TrSize, fencYuv, predYuv, resiYuv, recon, reconStride, coeff,
> singleCbfYTmp, cuData);
> singlePsyEnergyYTmp = 0;
> if (m_rdCost.m_psyRd)
> {
> - uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
> + uint32_t zorder = cuData->encodeIdx * 4 + absPartIdx;
> singlePsyEnergyYTmp = m_rdCost.psyCost(log2TrSize -
> 2, fencYuv->getLumaAddr(absPartIdx), fencYuv->getStride(),
>
> cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder),
> cu->m_pic->getPicYuvRec()->getStride());
> }
> @@ -540,7 +539,7 @@
>
> if (bestModeId == firstCheckId)
> {
> - xLoadIntraResultQT(cu, absPartIdx, log2TrSize, reconQt,
> reconQtStride);
> + xLoadIntraResultQT(cu, absPartIdx, log2TrSize, reconQt,
> reconQtStride, cuData);
> cu->setCbfSubParts(singleCbfY << trDepth, TEXT_LUMA,
> absPartIdx, fullDepth);
>
> m_entropyCoder->load(m_rdEntropyCoders[fullDepth][CI_TEMP_BEST]);
> }
> @@ -557,10 +556,10 @@
>
> // code luma block with given intra prediction mode and store
> Cbf
> cu->setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx,
> fullDepth);
> - singleDistY = xIntraCodingLumaBlk(cu, absPartIdx, log2TrSize,
> fencYuv, predYuv, resiYuv, reconQt, reconQtStride, coeffY, singleCbfY);
> + singleDistY = xIntraCodingLumaBlk(cu, absPartIdx, log2TrSize,
> fencYuv, predYuv, resiYuv, reconQt, reconQtStride, coeffY, singleCbfY,
> cuData);
> if (m_rdCost.m_psyRd)
> {
> - uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
> + uint32_t zorder = cuData->encodeIdx * 4 + absPartIdx;
> singlePsyEnergyY = m_rdCost.psyCost(log2TrSize - 2,
> fencYuv->getLumaAddr(absPartIdx), fencYuv->getStride(),
> cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(),
> zorder), cu->m_pic->getPicYuvRec()->getStride());
> }
> @@ -600,7 +599,7 @@
> for (uint32_t part = 0; part < 4; part++, absPartIdxSub +=
> qPartsDiv)
> {
> cu->m_psyEnergy = 0;
> - splitDistY += xRecurIntraCodingQT(cu, trDepth + 1,
> absPartIdxSub, fencYuv, predYuv, resiYuv, bAllowRQTSplit, splitCost,
> splitBits, depthRange);
> + splitDistY += xRecurIntraCodingQT(cu, trDepth + 1,
> absPartIdxSub, fencYuv, predYuv, resiYuv, bAllowRQTSplit, splitCost,
> splitBits, depthRange, cuData);
> splitPsyEnergyY += cu->m_psyEnergy;
> splitCbfY |= cu->getCbf(absPartIdxSub, TEXT_LUMA, trDepth +
> 1);
> }
> @@ -641,7 +640,7 @@
>
> // set reconstruction for next intra prediction blocks
> uint32_t qtLayer = log2TrSize - 2;
> - uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
> + uint32_t zorder = cuData->encodeIdx * 4 + absPartIdx;
> int16_t* reconQt =
> m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
> X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE,
> "width is not max CU size\n");
> const uint32_t reconQtStride = MAX_CU_SIZE;
> @@ -659,7 +658,7 @@
> }
>
> void Search::residualTransformQuantIntra(TComDataCU* cu, uint32_t
> trDepth, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv,
> - ShortYuv* resiYuv, TComYuv*
> reconYuv, uint32_t depthRange[2])
> + ShortYuv* resiYuv, TComYuv*
> reconYuv, uint32_t depthRange[2], CU* cuData)
> {
> uint32_t fullDepth = cu->getDepth(0) + trDepth;
> uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
> @@ -689,14 +688,14 @@
> uint32_t coeffOffsetY = absPartIdx << LOG2_UNIT_SIZE * 2;
> coeff_t* coeff = cu->getCoeffY() + coeffOffsetY;
>
> - uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
> + uint32_t zorder = cuData->encodeIdx * 4 + absPartIdx;
> pixel* reconIPred =
> cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
> uint32_t reconIPredStride =
> cu->m_pic->getPicYuvRec()->getStride();
>
> bool useTransformSkip = !!cu->getTransformSkip(absPartIdx,
> TEXT_LUMA);
>
> // init availability pattern
> - TComPattern::initAdiPattern(cu, absPartIdx, trDepth, m_predBuf,
> m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, lumaPredMode);
> + TComPattern::initAdiPattern(cu, absPartIdx, trDepth, m_predBuf,
> m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, lumaPredMode, cuData);
> // get prediction signal
> predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize);
>
> @@ -745,7 +744,7 @@
>
> for (uint32_t part = 0; part < 4; part++, absPartIdxSub +=
> qPartsDiv)
> {
> - residualTransformQuantIntra(cu, trDepth + 1, absPartIdxSub,
> fencYuv, predYuv, resiYuv, reconYuv, depthRange);
> + residualTransformQuantIntra(cu, trDepth + 1, absPartIdxSub,
> fencYuv, predYuv, resiYuv, reconYuv, depthRange, cuData);
> splitCbfY |= cu->getCbf(absPartIdxSub, TEXT_LUMA, trDepth +
> 1);
> }
>
> @@ -781,24 +780,24 @@
> }
> }
>
> -void Search::xLoadIntraResultQT(TComDataCU* cu, uint32_t absPartIdx,
> uint32_t log2TrSize, int16_t* reconQt, uint32_t reconQtStride)
> +void Search::xLoadIntraResultQT(TComDataCU* cu, uint32_t absPartIdx,
> uint32_t log2TrSize, int16_t* reconQt, uint32_t reconQtStride, CU* cuData)
> {
> // copy reconstruction
> int sizeIdx = log2TrSize - 2;
> - uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
> + uint32_t zorder = cuData->encodeIdx * 4 + absPartIdx;
> pixel* reconIPred =
> cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
> uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getStride();
> primitives.square_copy_sp[sizeIdx](reconIPred, reconIPredStride,
> reconQt, reconQtStride);
> }
>
> void Search::xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t
> absPartIdx, uint32_t log2TrSizeC, uint32_t chromaId,
> - int16_t* reconQt, uint32_t
> reconQtStride)
> + int16_t* reconQt, uint32_t
> reconQtStride, CU* cuData)
> {
> X265_CHECK(chromaId == 1 || chromaId == 2, "invalid chroma id");
>
> // copy reconstruction
> int sizeIdxC = log2TrSizeC - 2;
> - uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
> + uint32_t zorder = cuData->encodeIdx * 4 + absPartIdx;
> pixel* reconIPred =
> cu->m_pic->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
> uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getCStride();
> primitives.square_copy_sp[sizeIdxC](reconIPred, reconIPredStride,
> reconQt, reconQtStride);
> @@ -841,7 +840,7 @@
> }
>
> /* returns distortion */
> -uint32_t Search::xRecurIntraChromaCodingQT(TComDataCU* cu, uint32_t
> trDepth, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv*
> resiYuv)
> +uint32_t Search::xRecurIntraChromaCodingQT(TComDataCU* cu, uint32_t
> trDepth, uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv*
> resiYuv, CU* cuData)
> {
> uint32_t fullDepth = cu->getDepth(0) + trDepth;
> uint32_t trMode = cu->getTransformIdx(absPartIdx);
> @@ -899,7 +898,7 @@
> pixel* pred = predYuv->getChromaAddr(chromaId,
> absPartIdxC);
>
> // init availability pattern
> - TComPattern::initAdiPatternChroma(cu, absPartIdxC,
> trDepthC, m_predBuf, chromaId);
> + TComPattern::initAdiPatternChroma(cu, absPartIdxC,
> trDepthC, m_predBuf, chromaId, cuData);
> pixel* chromaPred =
> TComPattern::getAdiChromaBuf(chromaId, tuSize, m_predBuf);
>
> uint32_t chromaPredMode =
> cu->getChromaIntraDir(absPartIdxC);
> @@ -943,7 +942,7 @@
>
> cu->setTransformSkipPartRange(chromaModeId,
> (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);
>
> - singleDistCTmp = xIntraCodingChromaBlk(cu,
> absPartIdxC, fencYuv, predYuv, resiYuv, recon, reconStride, coeff,
> singleCbfCTmp, chromaId, log2TrSizeC);
> + singleDistCTmp = xIntraCodingChromaBlk(cu,
> absPartIdxC, fencYuv, predYuv, resiYuv, recon, reconStride, coeff,
> singleCbfCTmp, chromaId, log2TrSizeC, cuData);
> cu->setCbfPartRange(singleCbfCTmp << trDepth,
> (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);
>
> if (chromaModeId == 1 && !singleCbfCTmp)
> @@ -954,7 +953,7 @@
> uint32_t bitsTmp = singleCbfCTmp ?
> xGetIntraBitsChroma(cu, absPartIdxC, log2TrSizeC, chromaId, coeff) : 0;
> if (m_rdCost.m_psyRd)
> {
> - uint32_t zorder = cu->getZorderIdxInCU()
> + absPartIdxC;
> + uint32_t zorder = cuData->encodeIdx * 4 +
> absPartIdxC;
> singlePsyEnergyTmp =
> m_rdCost.psyCost(log2TrSizeC - 2, fencYuv->getChromaAddr(chromaId,
> absPartIdxC), fencYuv->getCStride(),
>
> cu->m_pic->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder),
> cu->m_pic->getPicYuvRec()->getCStride());
> singleCostTmp =
> m_rdCost.calcPsyRdCost(singleDistCTmp, bitsTmp, singlePsyEnergyTmp);
> @@ -979,7 +978,7 @@
>
> if (bestModeId == firstCheckId)
> {
> - xLoadIntraResultChromaQT(cu, absPartIdxC,
> log2TrSizeC, chromaId, reconQt, reconQtStride);
> + xLoadIntraResultChromaQT(cu, absPartIdxC,
> log2TrSizeC, chromaId, reconQt, reconQtStride, cuData);
> cu->setCbfPartRange(singleCbfC << trDepth,
> (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);
>
> m_entropyCoder->load(m_rdEntropyCoders[fullDepth][CI_TEMP_BEST]);
> }
> @@ -1000,10 +999,10 @@
> else
> {
> cu->setTransformSkipPartRange(0, (TextType)chromaId,
> absPartIdxC, tuIterator.absPartIdxStep);
> - outDist += xIntraCodingChromaBlk(cu, absPartIdxC,
> fencYuv, predYuv, resiYuv, reconQt, reconQtStride, coeffC, singleCbfC,
> chromaId, log2TrSizeC);
> + outDist += xIntraCodingChromaBlk(cu, absPartIdxC,
> fencYuv, predYuv, resiYuv, reconQt, reconQtStride, coeffC, singleCbfC,
> chromaId, log2TrSizeC, cuData);
> if (m_rdCost.m_psyRd)
> {
> - uint32_t zorder = cu->getZorderIdxInCU() +
> absPartIdxC;
> + uint32_t zorder = cuData->encodeIdx * 4 +
> absPartIdxC;
> singlePsyEnergyTmp = m_rdCost.psyCost(log2TrSizeC
> - 2, fencYuv->getChromaAddr(chromaId, absPartIdxC), fencYuv->getCStride(),
>
> cu->m_pic->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder),
> cu->m_pic->getPicYuvRec()->getCStride());
> }
> @@ -1027,7 +1026,7 @@
> uint32_t absPartIdxSub = absPartIdx;
> for (uint32_t part = 0; part < 4; part++, absPartIdxSub +=
> qPartsDiv)
> {
> - outDist += xRecurIntraChromaCodingQT(cu, trDepth + 1,
> absPartIdxSub, fencYuv, predYuv, resiYuv);
> + outDist += xRecurIntraChromaCodingQT(cu, trDepth + 1,
> absPartIdxSub, fencYuv, predYuv, resiYuv, cuData);
> splitPsyEnergy += cu->m_psyEnergy;
> splitCbfU |= cu->getCbf(absPartIdxSub, TEXT_CHROMA_U, trDepth
> + 1);
> splitCbfV |= cu->getCbf(absPartIdxSub, TEXT_CHROMA_V, trDepth
> + 1);
> @@ -1091,7 +1090,7 @@
> }
>
> void Search::residualQTIntraChroma(TComDataCU* cu, uint32_t trDepth,
> uint32_t absPartIdx,
> - TComYuv* fencYuv, TComYuv* predYuv,
> ShortYuv* resiYuv, TComYuv* reconYuv)
> + TComYuv* fencYuv, TComYuv* predYuv,
> ShortYuv* resiYuv, TComYuv* reconYuv, CU* cuData)
> {
> uint32_t fullDepth = cu->getDepth(0) + trDepth;
> uint32_t trMode = cu->getTransformIdx(absPartIdx);
> @@ -1135,7 +1134,7 @@
> pixel* recon =
> reconYuv->getChromaAddr(chromaId, absPartIdxC);
> uint32_t coeffOffsetC = absPartIdxC << (LOG2_UNIT_SIZE
> * 2 - (hChromaShift + vChromaShift));
> coeff_t* coeff = cu->getCoeff(ttype) +
> coeffOffsetC;
> - uint32_t zorder = cu->getZorderIdxInCU() +
> absPartIdxC;
> + uint32_t zorder = cuData->encodeIdx * 4 +
> absPartIdxC;
> pixel* reconIPred =
> cu->m_pic->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
> uint32_t reconIPredStride =
> cu->m_pic->getPicYuvRec()->getCStride();
>
> @@ -1149,7 +1148,7 @@
> chromaPredMode = cu->getLumaIntraDir((m_csp ==
> X265_CSP_I444) ? absPartIdxC : 0);
> chromaPredMode = (m_csp == X265_CSP_I422) ?
> g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;
> // init availability pattern
> - TComPattern::initAdiPatternChroma(cu, absPartIdxC,
> trDepthC, m_predBuf, chromaId);
> + TComPattern::initAdiPatternChroma(cu, absPartIdxC,
> trDepthC, m_predBuf, chromaId, cuData);
> pixel* chromaPred =
> TComPattern::getAdiChromaBuf(chromaId, tuSize, m_predBuf);
>
> // get prediction signal
> @@ -1197,7 +1196,7 @@
> uint32_t absPartIdxSub = absPartIdx;
> for (uint32_t part = 0; part < 4; part++, absPartIdxSub +=
> qPartsDiv)
> {
> - residualQTIntraChroma(cu, trDepth + 1, absPartIdxSub,
> fencYuv, predYuv, resiYuv, reconYuv);
> + residualQTIntraChroma(cu, trDepth + 1, absPartIdxSub,
> fencYuv, predYuv, resiYuv, reconYuv, cuData);
> splitCbfU |= cu->getCbf(absPartIdxSub, TEXT_CHROMA_U, trDepth
> + 1);
> splitCbfV |= cu->getCbf(absPartIdxSub, TEXT_CHROMA_V, trDepth
> + 1);
> }
> @@ -1210,7 +1209,7 @@
> }
> }
>
> -void Search::estIntraPredQT(TComDataCU* cu, TComYuv* fencYuv, TComYuv*
> predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t depthRange[2])
> +void Search::estIntraPredQT(TComDataCU* cu, TComYuv* fencYuv, TComYuv*
> predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t depthRange[2], CU*
> cuData)
> {
> uint32_t depth = cu->getDepth(0);
> uint32_t initTrDepth = cu->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
> @@ -1227,7 +1226,7 @@
> for (uint32_t pu = 0; pu < numPU; pu++, partOffset += qNumParts)
> {
> // Reference sample smoothing
> - TComPattern::initAdiPattern(cu, partOffset, initTrDepth,
> m_predBuf, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, ALL_IDX);
> + TComPattern::initAdiPattern(cu, partOffset, initTrDepth,
> m_predBuf, m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, ALL_IDX,
> cuData);
>
> // determine set of modes to be tested (using prediction signal
> only)
> pixel* fenc = fencYuv->getLumaAddr(partOffset);
> @@ -1346,7 +1345,7 @@
> m_entropyCoder->load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
> cu->setLumaIntraDirSubParts(rdModeList[i], partOffset, depth
> + initTrDepth);
> cost = bits = 0;
> - xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv,
> predYuv, resiYuv, false, cost, bits, depthRange);
> + xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv,
> predYuv, resiYuv, false, cost, bits, depthRange, cuData);
> COPY2_IF_LT(bcost, cost, bmode, rdModeList[i]);
> }
>
> @@ -1355,14 +1354,14 @@
> m_entropyCoder->load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
>
> // update distortion (rate and r-d costs are determined later)
> - cu->m_totalDistortion += xRecurIntraCodingQT(cu, initTrDepth,
> partOffset, fencYuv, predYuv, resiYuv, true, cost, bits, depthRange);
> + cu->m_totalDistortion += xRecurIntraCodingQT(cu, initTrDepth,
> partOffset, fencYuv, predYuv, resiYuv, true, cost, bits, depthRange,
> cuData);
>
> xSetIntraResultQT(cu, initTrDepth, partOffset, reconYuv);
>
> // set reconstruction for next intra prediction blocks
> if (pu != numPU - 1)
> {
> - uint32_t zorder = cu->getZorderIdxInCU() + partOffset;
> + uint32_t zorder = cuData->encodeIdx * 4 + partOffset;
> pixel* dst =
> cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
> pixel* src = reconYuv->getLumaAddr(partOffset);
> primitives.square_copy_pp[log2TrSize - 2](dst, dststride,
> src, srcstride);
> @@ -1386,7 +1385,7 @@
> x265_emms();
> }
>
> -void Search::sharedEstIntraPredQT(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t
> depthRange[2], uint8_t* sharedModes)
> +void Search::sharedEstIntraPredQT(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t
> depthRange[2], uint8_t* sharedModes, CU* cuData)
> {
> uint32_t depth = cu->getDepth(0);
> uint32_t initTrDepth = cu->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
> @@ -1409,12 +1408,12 @@
> m_entropyCoder->load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
>
> // update overall distortion (rate and r-d costs are determined
> later)
> - cu->m_totalDistortion += xRecurIntraCodingQT(cu, initTrDepth,
> partOffset, fencYuv, predYuv, resiYuv, true, puCost, bits, depthRange);
> + cu->m_totalDistortion += xRecurIntraCodingQT(cu, initTrDepth,
> partOffset, fencYuv, predYuv, resiYuv, true, puCost, bits, depthRange,
> cuData);
> xSetIntraResultQT(cu, initTrDepth, partOffset, reconYuv);
>
> if (pu != numPU - 1)
> {
> - uint32_t zorder = cu->getZorderIdxInCU() + partOffset;
> + uint32_t zorder = cuData->encodeIdx * 4 + partOffset;
> pixel* dst =
> cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
> pixel* src = reconYuv->getLumaAddr(partOffset);
> primitives.luma_copy_pp[log2TrSize - 2](dst, dststride, src,
> srcstride);
> @@ -1441,7 +1440,7 @@
> m_entropyCoder->load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
> }
>
> -void Search::getBestIntraModeChroma(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv)
> +void Search::getBestIntraModeChroma(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, CU* cuData)
> {
> uint32_t bestMode = 0;
> uint64_t bestCost = MAX_INT64;
> @@ -1461,8 +1460,8 @@
> int32_t sizeIdx = log2TrSizeC - 2;
> pixelcmp_t sa8d = primitives.sa8d[sizeIdx];
>
> - TComPattern::initAdiPatternChroma(cu, 0, 0, m_predBuf, 1);
> - TComPattern::initAdiPatternChroma(cu, 0, 0, m_predBuf, 2);
> + TComPattern::initAdiPatternChroma(cu, 0, 0, m_predBuf, 1, cuData);
> + TComPattern::initAdiPatternChroma(cu, 0, 0, m_predBuf, 2, cuData);
> cu->getAllowedChromaDir(0, modeList);
>
> // check chroma modes
> @@ -1494,7 +1493,7 @@
> cu->setChromIntraDirSubParts(bestMode, 0, cu->getDepth(0));
> }
>
> -void Search::estIntraPredChromaQT(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv)
> +void Search::estIntraPredChromaQT(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv,CU* cuData)
> {
> uint32_t depth = cu->getDepth(0);
> uint32_t initTrDepth = (cu->getPartitionSize(0) != SIZE_2Nx2N) &&
> (cu->getChromaFormat() == X265_CSP_I444 ? 1 : 0);
> @@ -1529,7 +1528,7 @@
> // chroma coding
> cu->setChromIntraDirSubParts(modeList[mode], absPartIdxC,
> depth + initTrDepth);
>
> - uint32_t dist = xRecurIntraChromaCodingQT(cu, initTrDepth,
> absPartIdxC, fencYuv, predYuv, resiYuv);
> + uint32_t dist = xRecurIntraChromaCodingQT(cu, initTrDepth,
> absPartIdxC, fencYuv, predYuv, resiYuv, cuData);
>
> if (cu->m_slice->m_pps->bTransformSkipEnabled)
>
> m_entropyCoder->load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
> @@ -1556,7 +1555,7 @@
>
> if (!tuIterator.isLastSection())
> {
> - uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;
> + uint32_t zorder = cuData->encodeIdx * 4 + absPartIdxC;
> uint32_t dststride =
> cu->m_pic->getPicYuvRec()->getCStride();
> uint32_t srcstride = reconYuv->getCStride();
> pixel *src, *dst;
> @@ -1602,7 +1601,7 @@
> }
>
> /* estimation of best merge coding */
> -uint32_t Search::mergeEstimation(TComDataCU* cu, int puIdx, MergeData& m)
> +uint32_t Search::mergeEstimation(TComDataCU* cu, int puIdx, MergeData& m,
> CU* cuData)
> {
> X265_CHECK(cu->getPartitionSize(0) != SIZE_2Nx2N, "merge tested on
> non-2Nx2N partition\n");
>
> @@ -1636,7 +1635,7 @@
> cu->getCUMvField(REF_PIC_LIST_1)->m_mv[m.absPartIdx] =
> m.mvFieldNeighbours[mergeCand][1].mv;
> cu->getCUMvField(REF_PIC_LIST_1)->m_refIdx[m.absPartIdx] =
> (char)m.mvFieldNeighbours[mergeCand][1].refIdx;
>
> - prepMotionCompensation(cu, puIdx);
> + prepMotionCompensation(cu, puIdx, cuData);
> motionCompensation(cu, &m_predTempYuv, REF_PIC_LIST_X, true,
> false);
> uint32_t costCand =
> m_me.bufSATD(m_predTempYuv.getLumaAddr(m.absPartIdx),
> m_predTempYuv.getStride());
> uint32_t bitsCand = getTUBits(mergeCand, m.maxNumMergeCand);
> @@ -1658,7 +1657,7 @@
>
> /* search of the best candidate for inter prediction
> * returns true if predYuv was filled with a motion compensated
> prediction */
> -bool Search::predInterSearch(TComDataCU* cu, TComYuv* predYuv, bool
> bMergeOnly, bool bChroma)
> +bool Search::predInterSearch(TComDataCU* cu, TComYuv* predYuv, bool
> bMergeOnly, bool bChroma, CU* cuData)
> {
> MV amvpCand[2][MAX_NUM_REF][AMVP_NUM_CANDS];
> MV mvc[(MD_ABOVE_LEFT + 1) * 2 + 1];
> @@ -1687,7 +1686,7 @@
> int roiWidth, roiHeight;
> cu->getPartIndexAndSize(partIdx, partAddr, roiWidth, roiHeight);
>
> - pixel* pu = fenc->getLumaAddr(cu->getAddr(),
> cu->getZorderIdxInCU() + partAddr);
> + pixel* pu = fenc->getLumaAddr(cu->getAddr(), cuData->encodeIdx *
> 4 + partAddr);
> m_me.setSourcePU(pu - fenc->getLumaAddr(), roiWidth, roiHeight);
>
> uint32_t mrgCost = MAX_UINT;
> @@ -1698,7 +1697,7 @@
> merge.absPartIdx = partAddr;
> merge.width = roiWidth;
> merge.height = roiHeight;
> - mrgCost = mergeEstimation(cu, partIdx, merge);
> + mrgCost = mergeEstimation(cu, partIdx, merge, cuData);
>
> if (bMergeOnly && cu->getLog2CUSize(0) > 3)
> {
> @@ -1716,7 +1715,7 @@
>
> cu->getCUMvField(REF_PIC_LIST_1)->setAllMvField(merge.mvField[1],
> partSize, partAddr, 0, partIdx);
> totalmebits += merge.bits;
>
> - prepMotionCompensation(cu, partIdx);
> + prepMotionCompensation(cu, partIdx, cuData);
> motionCompensation(cu, predYuv, REF_PIC_LIST_X, true,
> bChroma);
> continue;
> }
> @@ -1758,7 +1757,7 @@
>
> cu->clipMv(mvCand);
>
> - prepMotionCompensation(cu, partIdx);
> + prepMotionCompensation(cu, partIdx, cuData);
>
> predInterLumaBlk(slice->m_refPicList[l][ref]->getPicYuvRec(),
> &m_predTempYuv, &mvCand);
> uint32_t cost =
> m_me.bufSAD(m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride());
> cost = (uint32_t)m_rdCost.calcRdSADCost(cost,
> MVP_IDX_BITS);
> @@ -1806,7 +1805,7 @@
> TComPicYuv *refPic0 =
> slice->m_refPicList[0][list[0].ref]->getPicYuvRec();
> TComPicYuv *refPic1 =
> slice->m_refPicList[1][list[1].ref]->getPicYuvRec();
>
> - prepMotionCompensation(cu, partIdx);
> + prepMotionCompensation(cu, partIdx, cuData);
> predInterLumaBlk(refPic0, &m_predYuv[0], &list[0].mv);
> predInterLumaBlk(refPic1, &m_predYuv[1], &list[1].mv);
>
> @@ -1932,7 +1931,7 @@
>
> totalmebits += list[1].bits;
> }
> - prepMotionCompensation(cu, partIdx);
> + prepMotionCompensation(cu, partIdx, cuData);
> motionCompensation(cu, predYuv, REF_PIC_LIST_X, true, bChroma);
> }
>
> @@ -2092,7 +2091,7 @@
>
> /** encode residual and calculate rate-distortion for a CU block */
> void Search::encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, ShortYuv* outResiYuv,
> - ShortYuv* outBestResiYuv, TComYuv*
> outReconYuv)
> + ShortYuv* outBestResiYuv, TComYuv*
> outReconYuv, CU* cuData)
> {
> X265_CHECK(!cu->isIntra(0), "intra CU not expected\n");
>
> @@ -2135,7 +2134,7 @@
> uint64_t cost = 0;
> uint32_t zeroDistortion = 0;
> uint32_t bits = 0;
> - uint32_t distortion = xEstimateResidualQT(cu, 0, fencYuv,
> predYuv, outResiYuv, depth, cost, bits, &zeroDistortion, tuDepthRange);
> + uint32_t distortion = xEstimateResidualQT(cu, 0, fencYuv,
> predYuv, outResiYuv, depth, cost, bits, &zeroDistortion, tuDepthRange,
> cuData);
>
> m_entropyCoder->resetBits();
> m_entropyCoder->codeQtRootCbfZero();
> @@ -2205,7 +2204,7 @@
> m_entropyCoder->load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
> uint64_t cost = 0;
> uint32_t bits = 0;
> - xEstimateResidualQT(cu, 0, fencYuv, predYuv, outResiYuv, depth,
> cost, bits, NULL, tuDepthRange);
> + xEstimateResidualQT(cu, 0, fencYuv, predYuv, outResiYuv, depth,
> cost, bits, NULL, tuDepthRange, cuData);
> xSetResidualQTData(cu, 0, NULL, depth, false);
> m_entropyCoder->store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);
> }
> @@ -2240,7 +2239,7 @@
> cu->clearCbf(0, depth);
> }
>
> -void Search::generateCoeffRecon(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv)
> +void Search::generateCoeffRecon(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, CU* cuData)
> {
> m_quant.setQPforQuant(cu);
>
> @@ -2249,7 +2248,7 @@
>
> if (cu->getPredictionMode(0) == MODE_INTER)
> {
> - residualTransformQuantInter(cu, 0, fencYuv, resiYuv,
> cu->getDepth(0), tuDepthRange);
> + residualTransformQuantInter(cu, 0, fencYuv, resiYuv,
> cu->getDepth(0), tuDepthRange, cuData);
> if (cu->getQtRootCbf(0))
> reconYuv->addClip(predYuv, resiYuv, cu->getLog2CUSize(0));
> else
> @@ -2262,14 +2261,14 @@
> else if (cu->getPredictionMode(0) == MODE_INTRA)
> {
> uint32_t initTrDepth = cu->getPartitionSize(0) == SIZE_2Nx2N ? 0
> : 1;
> - residualTransformQuantIntra(cu, initTrDepth, 0, fencYuv, predYuv,
> resiYuv, reconYuv, tuDepthRange);
> - getBestIntraModeChroma(cu, fencYuv, predYuv);
> - residualQTIntraChroma(cu, 0, 0, fencYuv, predYuv, resiYuv,
> reconYuv);
> + residualTransformQuantIntra(cu, initTrDepth, 0, fencYuv, predYuv,
> resiYuv, reconYuv, tuDepthRange, cuData);
> + getBestIntraModeChroma(cu, fencYuv, predYuv, cuData);
> + residualQTIntraChroma(cu, 0, 0, fencYuv, predYuv, resiYuv,
> reconYuv, cuData);
> }
> }
>
> void Search::residualTransformQuantInter(TComDataCU* cu, uint32_t
> absPartIdx, TComYuv* fencYuv, ShortYuv* resiYuv,
> - const uint32_t depth, uint32_t
> depthRange[2])
> + const uint32_t depth, uint32_t
> depthRange[2], CU* cuData)
> {
> X265_CHECK(cu->getDepth(0) == cu->getDepth(absPartIdx), "invalid
> depth\n");
> const uint32_t trMode = depth - cu->getDepth(0);
> @@ -2379,7 +2378,9 @@
> {
> const uint32_t qPartNumSubdiv = cu->m_pic->getNumPartInCU() >>
> ((depth + 1) << 1);
> for (uint32_t i = 0; i < 4; ++i)
> - residualTransformQuantInter(cu, absPartIdx + i *
> qPartNumSubdiv, fencYuv, resiYuv, depth + 1, depthRange);
> + {
> + residualTransformQuantInter(cu, absPartIdx + i *
> qPartNumSubdiv, fencYuv, resiYuv, depth + 1, depthRange, cuData);
> + }
>
> uint32_t ycbf = 0;
> uint32_t ucbf = 0;
> @@ -2401,7 +2402,7 @@
> }
>
> uint32_t Search::xEstimateResidualQT(TComDataCU* cu, uint32_t absPartIdx,
> TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv,
> - uint32_t depth, uint64_t& rdCost,
> uint32_t& outBits, uint32_t* outZeroDist, uint32_t depthRange[2])
> + uint32_t depth, uint64_t& rdCost,
> uint32_t& outBits, uint32_t* outZeroDist, uint32_t depthRange[2], CU*
> cuData)
> {
> X265_CHECK(cu->getDepth(0) == cu->getDepth(absPartIdx), "depth not
> matching\n");
> const uint32_t trMode = depth - cu->getDepth(0);
> @@ -2568,7 +2569,7 @@
> if (m_rdCost.m_psyRd)
> {
> pixel* pred = predYuv->getLumaAddr(absPartIdx);
> - uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
> + uint32_t zorder = cuData->encodeIdx * 4 + absPartIdx;
> pixel* reconIPred =
> cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
> uint32_t reconIPredStride =
> cu->m_pic->getPicYuvRec()->getStride();
> uint32_t stride = fencYuv->getStride();
> @@ -2667,7 +2668,7 @@
> if (m_rdCost.m_psyRd)
> {
> pixel* pred = predYuv->getCbAddr(absPartIdxC);
> - uint32_t zorder = cu->getZorderIdxInCU() +
> absPartIdxC;
> + uint32_t zorder = cuData->encodeIdx * 4+
> absPartIdxC;
> pixel* reconIPred =
> cu->m_pic->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);
> uint32_t reconIPredStride =
> cu->m_pic->getPicYuvRec()->getCStride();
> uint32_t stride = fencYuv->getCStride();
> @@ -2749,7 +2750,7 @@
> if (m_rdCost.m_psyRd)
> {
> pixel* pred = predYuv->getCrAddr(absPartIdxC);
> - uint32_t zorder = cu->getZorderIdxInCU() +
> absPartIdxC;
> + uint32_t zorder = cuData->encodeIdx * 4 +
> absPartIdxC;
> pixel* reconIPred =
> cu->m_pic->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
> uint32_t reconIPredStride =
> cu->m_pic->getPicYuvRec()->getCStride();
> uint32_t stride = fencYuv->getCStride();
> @@ -2855,7 +2856,7 @@
> if (m_rdCost.m_psyRd)
> {
> pixel* pred = predYuv->getLumaAddr(absPartIdx);
> - uint32_t zorder = cu->getZorderIdxInCU() + absPartIdx;
> + uint32_t zorder = cuData->encodeIdx * 4 + absPartIdx;
> pixel* reconIPred =
> cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder);
> uint32_t reconIPredStride =
> cu->m_pic->getPicYuvRec()->getStride();
> uint32_t stride = fencYuv->getStride();
> @@ -2943,7 +2944,7 @@
> if (m_rdCost.m_psyRd)
> {
> pixel* pred = predYuv->getCbAddr(absPartIdxC);
> - uint32_t zorder = cu->getZorderIdxInCU() +
> absPartIdxC;
> + uint32_t zorder = cuData->encodeIdx * 4 +
> absPartIdxC;
> pixel* reconIPred =
> cu->m_pic->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);
> uint32_t reconIPredStride =
> cu->m_pic->getPicYuvRec()->getCStride();
> uint32_t stride = fencYuv->getCStride();
> @@ -2984,7 +2985,7 @@
> if (m_rdCost.m_psyRd)
> {
> pixel* pred = predYuv->getCrAddr(absPartIdxC);
> - uint32_t zorder = cu->getZorderIdxInCU() +
> absPartIdxC;
> + uint32_t zorder = cuData->encodeIdx * 4 +
> absPartIdxC;
> pixel* reconIPred =
> cu->m_pic->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
> uint32_t reconIPredStride =
> cu->m_pic->getPicYuvRec()->getCStride();
> uint32_t stride = fencYuv->getCStride();
> @@ -3127,7 +3128,7 @@
> for (uint32_t i = 0; i < 4; ++i)
> {
> cu->m_psyEnergy = 0;
> - subdivDist += xEstimateResidualQT(cu, absPartIdx + i *
> qPartNumSubdiv, fencYuv, predYuv, resiYuv, depth + 1, subDivCost,
> subdivBits, bCheckFull ? NULL : outZeroDist, depthRange);
> + subdivDist += xEstimateResidualQT(cu, absPartIdx + i *
> qPartNumSubdiv, fencYuv, predYuv, resiYuv, depth + 1, subDivCost,
> subdivBits, bCheckFull ? NULL : outZeroDist, depthRange, cuData);
> subDivPsyEnergy += cu->m_psyEnergy;
> }
>
> diff -r c8f53398f8ce -r f70fd79cb3e1 source/encoder/search.h
> --- a/source/encoder/search.h Sat Sep 20 15:41:08 2014 +0100
> +++ b/source/encoder/search.h Mon Sep 22 09:05:53 2014 +0530
> @@ -79,19 +79,19 @@
>
> bool initSearch(x265_param *param, ScalingList& scalingList);
>
> - void estIntraPredQT(TComDataCU* cu, TComYuv* fencYuv, TComYuv*
> predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t depthRange[2]);
> - void sharedEstIntraPredQT(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t
> depthRange[2], uint8_t* sharedModes);
> - void estIntraPredChromaQT(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv);
> + void estIntraPredQT(TComDataCU* cu, TComYuv* fencYuv, TComYuv*
> predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t depthRange[2], CU*
> cuData);
> + void sharedEstIntraPredQT(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, uint32_t
> depthRange[2], uint8_t* sharedModes, CU* cuData);
> + void estIntraPredChromaQT(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, CU* cuData);
>
> // estimation inter prediction (non-skip)
> - bool predInterSearch(TComDataCU* cu, TComYuv* predYuv, bool
> bMergeOnly, bool bChroma);
> + bool predInterSearch(TComDataCU* cu, TComYuv* predYuv, bool
> bMergeOnly, bool bChroma, CU* cuData);
>
> // encode residual and compute rd-cost for inter mode
> - void encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, ShortYuv* resiYuv, ShortYuv* bestResiYuv, TComYuv*
> reconYuv);
> + void encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, ShortYuv* resiYuv, ShortYuv* bestResiYuv, TComYuv*
> reconYuv, CU* cuData);
> void encodeResAndCalcRdSkipCU(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, TComYuv* reconYuv);
>
> - void generateCoeffRecon(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv);
> - void residualTransformQuantInter(TComDataCU* cu, uint32_t
> absPartIdx, TComYuv* fencYuv, ShortYuv* resiYuv, uint32_t depth, uint32_t
> depthRange[2]);
> + void generateCoeffRecon(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, CU* cuData);
> + void residualTransformQuantInter(TComDataCU* cu, uint32_t
> absPartIdx, TComYuv* fencYuv, ShortYuv* resiYuv, uint32_t depth, uint32_t
> depthRange[2], CU* cuData);
>
> uint32_t getIntraModeBits(TComDataCU* cu, uint32_t mode, uint32_t
> partOffset, uint32_t depth);
> uint32_t getIntraRemModeBits(TComDataCU * cu, uint32_t partOffset,
> uint32_t depth, uint32_t preds[3], uint64_t& mpms);
> @@ -110,30 +110,30 @@
> uint32_t xGetIntraBitsLuma(TComDataCU* cu, uint32_t trDepth, uint32_t
> absPartIdx, uint32_t log2TrSize, coeff_t* coeff, uint32_t depthRange[2]);
> uint32_t xGetIntraBitsChroma(TComDataCU* cu, uint32_t absPartIdx,
> uint32_t log2TrSizeC, uint32_t chromaId, coeff_t* coeff);
> uint32_t xIntraCodingLumaBlk(TComDataCU* cu, uint32_t absPartIdx,
> uint32_t log2TrSize, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv,
> - int16_t* reconQt, uint32_t
> reconQtStride, coeff_t* coeff, uint32_t& cbf);
> + int16_t* reconQt, uint32_t
> reconQtStride, coeff_t* coeff, uint32_t& cbf, CU* cuData);
>
> uint32_t xEstimateResidualQT(TComDataCU* cu, uint32_t absPartIdx,
> TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, uint32_t depth,
> - uint64_t &rdCost, uint32_t &outBits,
> uint32_t *zeroDist, uint32_t tuDepthRange[2]);
> + uint64_t &rdCost, uint32_t &outBits,
> uint32_t *zeroDist, uint32_t tuDepthRange[2], CU* cuData);
>
> uint32_t xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth,
> uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv,
> - ShortYuv* resiYuv, bool bAllowRQTSplit,
> uint64_t& dRDCost, uint32_t& puBits, uint32_t depthRange[2]);
> + ShortYuv* resiYuv, bool bAllowRQTSplit,
> uint64_t& dRDCost, uint32_t& puBits, uint32_t depthRange[2], CU* cuData);
>
> - uint32_t xRecurIntraChromaCodingQT(TComDataCU* cu, uint32_t trDepth,
> uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv);
> + uint32_t xRecurIntraChromaCodingQT(TComDataCU* cu, uint32_t trDepth,
> uint32_t absPartIdx, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv,
> CU* cuData);
>
> uint32_t xIntraCodingChromaBlk(TComDataCU* cu, uint32_t absPartIdx,
> TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv,
> - int16_t* reconQt, uint32_t
> reconQtStride, coeff_t* coeff, uint32_t& cbf, uint32_t chromaId, uint32_t
> log2TrSizeC);
> + int16_t* reconQt, uint32_t
> reconQtStride, coeff_t* coeff, uint32_t& cbf, uint32_t chromaId, uint32_t
> log2TrSizeC, CU* cuData);
>
> void residualTransformQuantIntra(TComDataCU* cu, uint32_t
> trDepth, uint32_t absPartIdx, TComYuv* fencYuv,
> - TComYuv* predYuv, ShortYuv*
> resiYuv, TComYuv* reconYuv, uint32_t depthRange[2]);
> + TComYuv* predYuv, ShortYuv*
> resiYuv, TComYuv* reconYuv, uint32_t depthRange[2], CU* cuData);
>
> void residualQTIntraChroma(TComDataCU* cu, uint32_t trDepth,
> uint32_t absPartIdx, TComYuv* fencYuv,
> - TComYuv* predYuv, ShortYuv* resiYuv,
> TComYuv* reconYuv);
> + TComYuv* predYuv, ShortYuv* resiYuv,
> TComYuv* reconYuv, CU* cuData);
>
> void xEncodeResidualQT(TComDataCU* cu, uint32_t absPartIdx,
> uint32_t depth, bool bSubdivAndCbf, TextType ttype, uint32_t depthRange[2]);
> void xSetIntraResultChromaQT(TComDataCU* cu, uint32_t trDepth,
> uint32_t absPartIdx, TComYuv* reconYuv);
>
> - void xLoadIntraResultQT(TComDataCU* cu, uint32_t absPartIdx,
> uint32_t log2TrSize, int16_t* reconQt, uint32_t reconQtStride);
> - void xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t
> absPartIdx, uint32_t log2TrSizeC, uint32_t chromaId, int16_t* reconQt,
> uint32_t reconQtStride);
> + void xLoadIntraResultQT(TComDataCU* cu, uint32_t absPartIdx,
> uint32_t log2TrSize, int16_t* reconQt, uint32_t reconQtStride, CU* cuData);
> + void xLoadIntraResultChromaQT(TComDataCU* cu, uint32_t
> absPartIdx, uint32_t log2TrSizeC, uint32_t chromaId, int16_t* reconQt,
> uint32_t reconQtStride, CU* cuData);
>
> void offsetSubTUCBFs(TComDataCU* cu, TextType ttype, uint32_t
> trDepth, uint32_t absPartIdx);
>
> @@ -170,13 +170,13 @@
> void checkBestMVP(MV* amvpCand, MV cMv, MV& mvPred, int& mvpIdx,
> uint32_t& outBits, uint32_t& outCost);
> void getBlkBits(PartSize cuMode, bool bPSlice, int partIdx,
> uint32_t lastMode, uint32_t blockBit[3]);
> uint32_t getInterSymbolBits(TComDataCU* cu, uint32_t depthRange[2]);
> - uint32_t mergeEstimation(TComDataCU* cu, int partIdx, MergeData& m);
> + uint32_t mergeEstimation(TComDataCU* cu, int partIdx, MergeData& m,
> CU* cuData);
> void setSearchRange(TComDataCU* cu, MV mvp, int merange, MV&
> mvmin, MV& mvmax);
>
> /* intra helper functions */
> enum { MAX_RD_INTRA_MODES = 16 };
> void updateCandList(uint32_t mode, uint64_t cost, int
> maxCandCount, uint32_t* candModeList, uint64_t* candCostList);
> - void getBestIntraModeChroma(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv);
> + void getBestIntraModeChroma(TComDataCU* cu, TComYuv* fencYuv,
> TComYuv* predYuv, CU* cuData);
> };
> }
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140922/f8f249b7/attachment-0001.html>
More information about the x265-devel
mailing list