[x265] inline simple functions
Steve Borho
steve at borho.org
Wed Sep 17 12:23:23 CEST 2014
On 09/17, Satoshi Nakagawa wrote:
> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1410947343 -32400
> # Wed Sep 17 18:49:03 2014 +0900
> # Node ID b00d1f46a7632572df3be47decee9be9881c511c
> # Parent 199e8f2e0d54abd16657ccd0952bdc25cadf8420
> inline simple functions
>
> diff -r 199e8f2e0d54 -r b00d1f46a763 source/Lib/TLibCommon/TComDataCU.cpp
> --- a/source/Lib/TLibCommon/TComDataCU.cpp Tue Sep 16 17:50:06 2014 +0530
> +++ b/source/Lib/TLibCommon/TComDataCU.cpp Wed Sep 17 18:49:03 2014 +0900
> @@ -88,9 +88,6 @@
> m_DataCUMemPool.m_tqBypassYuvMemBlock = NULL;
> }
>
> -TComDataCU::~TComDataCU()
> -{}
> -
>
> bool TComDataCU::initialize(uint32_t numPartition, uint32_t sizeL, uint32_t sizeC, uint32_t numBlocks, bool isLossless)
> {
> @@ -1086,15 +1083,6 @@
> }
> }
>
> -/** Check whether the CU is coded in lossless coding mode
> - * \param absPartIdx
> - * \returns true if the CU is coded in lossless coding mode; false if otherwise
> - */
> -bool TComDataCU::isLosslessCoded(uint32_t absPartIdx)
> -{
> - return m_slice->m_pps->bTransquantBypassEnabled && getCUTransquantBypass(absPartIdx);
> -}
> -
> /** Get allowed chroma intra modes
> *\param absPartIdx
> *\param uiModeList pointer to chroma intra modes array
> @@ -1224,11 +1212,6 @@
> return ctx;
> }
>
> -uint32_t TComDataCU::getCtxInterDir(uint32_t absPartIdx)
> -{
> - return getDepth(absPartIdx);
> -}
> -
> void TComDataCU::clearCbf(uint32_t absPartIdx, uint32_t depth)
> {
> uint32_t curPartNum = m_pic->getNumPartInCU() >> (depth << 1);
> @@ -2111,11 +2094,6 @@
> return numMvc;
> }
>
> -bool TComDataCU::isBipredRestriction()
> -{
> - return getLog2CUSize(0) == 3 && getPartitionSize(0) != SIZE_2Nx2N;
> -}
> -
> void TComDataCU::clipMv(MV& outMV)
> {
> int mvshift = 2;
> @@ -2130,15 +2108,6 @@
> outMV.y = X265_MIN(ymax, X265_MAX(ymin, (int)outMV.y));
> }
>
> -/** Test whether the current block is skipped
> - * \param partIdx Block index
> - * \returns Flag indicating whether the block is skipped
> - */
> -bool TComDataCU::isSkipped(uint32_t partIdx)
> -{
> - return getSkipFlag(partIdx);
> -}
> -
> // ====================================================================================================================
> // Protected member functions
> // ====================================================================================================================
> @@ -2438,9 +2407,4 @@
> result.firstSignificanceMapContext = bIsLuma ? 21 : 12;
> }
>
> -uint32_t TComDataCU::getSCUAddr()
> -{
> - return (m_cuAddr << g_maxFullDepth * 2) + m_absIdxInLCU;
> -}
> -
> //! \}
> diff -r 199e8f2e0d54 -r b00d1f46a763 source/Lib/TLibCommon/TComDataCU.h
> --- a/source/Lib/TLibCommon/TComDataCU.h Tue Sep 16 17:50:06 2014 +0530
> +++ b/source/Lib/TLibCommon/TComDataCU.h Wed Sep 17 18:49:03 2014 +0900
> @@ -248,7 +248,7 @@
> public:
>
> TComDataCU();
> - virtual ~TComDataCU();
> + ~TComDataCU() {}
>
> uint32_t m_psyEnergy;
> uint64_t m_totalPsyCost;
> @@ -290,7 +290,8 @@
>
> uint32_t& getZorderIdxInCU() { return m_absIdxInLCU; }
>
> - uint32_t getSCUAddr();
> + uint32_t getSCUAddr() const { return (m_cuAddr << g_maxFullDepth * 2) + m_absIdxInLCU; }
> +
>
> uint32_t getCUPelX() { return m_cuPelX; }
>
> @@ -344,7 +345,7 @@
> char getLastCodedQP(uint32_t absPartIdx);
> void setQPSubCUs(int qp, TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool &foundNonZeroCbf);
>
> - bool isLosslessCoded(uint32_t absPartIdx);
> + bool isLosslessCoded(uint32_t idx) const { return m_cuTransquantBypass[idx] && m_slice->m_pps->bTransquantBypassEnabled; }
>
> uint8_t* getTransformIdx() { return m_trIdx; }
>
> @@ -488,10 +489,9 @@
> // member functions for modes
> // -------------------------------------------------------------------------------------------------------------------
>
> - bool isIntra(uint32_t partIdx) { return m_predModes[partIdx] == MODE_INTRA; }
> -
> - bool isSkipped(uint32_t partIdx); ///< SKIP (no residual)
> - bool isBipredRestriction();
> + bool isIntra(uint32_t partIdx) const { return m_predModes[partIdx] == MODE_INTRA; }
> + bool isSkipped(uint32_t idx) const { return m_skipFlag[idx]; }
> + bool isBipredRestriction() const { return m_log2CUSize[0] == 3 && m_partSizes[0] != SIZE_2Nx2N; }
>
> // -------------------------------------------------------------------------------------------------------------------
> // member functions for symbol prediction (most probable / mode conversion)
> @@ -506,7 +506,7 @@
>
> uint32_t getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth);
> uint32_t getCtxSkipFlag(uint32_t absPartIdx);
> - uint32_t getCtxInterDir(uint32_t absPartIdx);
> + uint32_t getCtxInterDir(uint32_t idx) const { return m_depth[idx]; }
>
> // -------------------------------------------------------------------------------------------------------------------
> // member functions for RD cost storage
> diff -r 199e8f2e0d54 -r b00d1f46a763 source/Lib/TLibCommon/TComPicYuv.cpp
> --- a/source/Lib/TLibCommon/TComPicYuv.cpp Tue Sep 16 17:50:06 2014 +0530
> +++ b/source/Lib/TLibCommon/TComPicYuv.cpp Wed Sep 17 18:49:03 2014 +0900
> @@ -60,10 +60,6 @@
> m_buOffsetC = NULL;
> }
>
> -TComPicYuv::~TComPicYuv()
> -{
> -}
> -
> bool TComPicYuv::create(int picWidth, int picHeight, int picCsp, uint32_t maxCUSize, uint32_t maxFullDepth)
> {
> m_picWidth = picWidth;
> diff -r 199e8f2e0d54 -r b00d1f46a763 source/Lib/TLibCommon/TComPicYuv.h
> --- a/source/Lib/TLibCommon/TComPicYuv.h Tue Sep 16 17:50:06 2014 +0530
> +++ b/source/Lib/TLibCommon/TComPicYuv.h Wed Sep 17 18:49:03 2014 +0900
> @@ -94,7 +94,7 @@
> int m_numCuInHeight;
>
> TComPicYuv();
> - virtual ~TComPicYuv();
> + ~TComPicYuv() {}
>
> // ------------------------------------------------------------------------------------------------
> // Memory management
> diff -r 199e8f2e0d54 -r b00d1f46a763 source/common/deblock.cpp
> --- a/source/common/deblock.cpp Tue Sep 16 17:50:06 2014 +0530
> +++ b/source/common/deblock.cpp Wed Sep 17 18:49:03 2014 +0900
> @@ -525,8 +525,8 @@
> if (cu->m_slice->m_pps->bTransquantBypassEnabled)
> {
> // check if each of PUs is lossless coded
> - partPNoFilter = cuP->isLosslessCoded(partP);
> - partQNoFilter = cuQ->isLosslessCoded(partQ);
> + partPNoFilter = cuP->getCUTransquantBypass(partP);
> + partQNoFilter = cuQ->getCUTransquantBypass(partQ);
> }
>
> if (d < beta)
> @@ -623,8 +623,8 @@
> if (cu->m_slice->m_pps->bTransquantBypassEnabled)
> {
> // check if each of PUs is lossless coded
> - partPNoFilter = cuP->isLosslessCoded(partP);
> - partQNoFilter = cuQ->isLosslessCoded(partQ);
> + partPNoFilter = cuP->getCUTransquantBypass(partP);
> + partQNoFilter = cuQ->getCUTransquantBypass(partQ);
> }
>
> for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
> diff -r 199e8f2e0d54 -r b00d1f46a763 source/common/frame.cpp
> --- a/source/common/frame.cpp Tue Sep 16 17:50:06 2014 +0530
> +++ b/source/common/frame.cpp Wed Sep 17 18:49:03 2014 +0900
> @@ -55,9 +55,6 @@
> m_interData = NULL;
> }
>
> -Frame::~Frame()
> -{}
> -
> bool Frame::create(x265_param *param, Window& display, Window& conformance)
> {
> m_conformanceWindow = conformance;
> diff -r 199e8f2e0d54 -r b00d1f46a763 source/common/frame.h
> --- a/source/common/frame.h Tue Sep 16 17:50:06 2014 +0530
> +++ b/source/common/frame.h Wed Sep 17 18:49:03 2014 +0900
> @@ -87,7 +87,7 @@
> x265_inter_data* m_interData; // inter analysis information
>
> Frame();
> - virtual ~Frame();
> + ~Frame() {}
>
> bool create(x265_param *param, Window& display, Window& conformance);
> bool allocPicSym(x265_param *param);
> diff -r 199e8f2e0d54 -r b00d1f46a763 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Tue Sep 16 17:50:06 2014 +0530
> +++ b/source/encoder/analysis.cpp Wed Sep 17 18:49:03 2014 +0900
> @@ -299,35 +299,38 @@
>
> void Analysis::compressCU(TComDataCU* cu)
> {
> + Frame* pic = cu->m_pic;
> + uint32_t cuAddr = cu->getAddr();
> +
> if (cu->m_slice->m_pps->bUseDQP)
> m_bEncodeDQP = true;
>
> // initialize CU data
> - m_bestCU[0]->initCU(cu->m_pic, cu->getAddr());
> - m_tempCU[0]->initCU(cu->m_pic, cu->getAddr());
> + m_bestCU[0]->initCU(pic, cuAddr);
> + m_tempCU[0]->initCU(pic, cuAddr);
>
> // analysis of CU
> uint32_t numPartition = cu->getTotalNumPart();
> if (m_bestCU[0]->m_slice->m_sliceType == I_SLICE)
> {
> - if (m_param->analysisMode == X265_ANALYSIS_LOAD && m_bestCU[0]->m_pic->m_intraData)
> + if (m_param->analysisMode == X265_ANALYSIS_LOAD && pic->m_intraData)
> {
> uint32_t zOrder = 0;
> compressSharedIntraCTU(m_bestCU[0], m_tempCU[0], false, cu, cu->m_CULocalData,
> - &m_bestCU[0]->m_pic->m_intraData->depth[cu->getAddr() * cu->m_numPartitions],
> - &m_bestCU[0]->m_pic->m_intraData->partSizes[cu->getAddr() * cu->m_numPartitions],
> - &m_bestCU[0]->m_pic->m_intraData->modes[cu->getAddr() * cu->m_numPartitions], zOrder);
> + &pic->m_intraData->depth[cuAddr * cu->m_numPartitions],
> + &pic->m_intraData->partSizes[cuAddr * cu->m_numPartitions],
> + &pic->m_intraData->modes[cuAddr * cu->m_numPartitions], zOrder);
> }
> else
> {
> compressIntraCU(m_bestCU[0], m_tempCU[0], false, cu, cu->m_CULocalData);
> - if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_bestCU[0]->m_pic->m_intraData)
> + if (m_param->analysisMode == X265_ANALYSIS_SAVE && pic->m_intraData)
> {
> - memcpy(&m_bestCU[0]->m_pic->m_intraData->depth[cu->getAddr() * cu->m_numPartitions], m_bestCU[0]->getDepth(), sizeof(uint8_t) * cu->getTotalNumPart());
> - memcpy(&m_bestCU[0]->m_pic->m_intraData->modes[cu->getAddr() * cu->m_numPartitions], m_bestCU[0]->getLumaIntraDir(), sizeof(uint8_t) * cu->getTotalNumPart());
> - memcpy(&m_bestCU[0]->m_pic->m_intraData->partSizes[cu->getAddr() * cu->m_numPartitions], m_bestCU[0]->getPartitionSize(), sizeof(char) * cu->getTotalNumPart());
> - m_bestCU[0]->m_pic->m_intraData->cuAddr[cu->getAddr()] = cu->getAddr();
> - m_bestCU[0]->m_pic->m_intraData->poc[cu->getAddr()] = cu->m_pic->m_POC;
> + memcpy(&pic->m_intraData->depth[cuAddr * cu->m_numPartitions], m_bestCU[0]->getDepth(), sizeof(uint8_t) * cu->getTotalNumPart());
> + memcpy(&pic->m_intraData->modes[cuAddr * cu->m_numPartitions], m_bestCU[0]->getLumaIntraDir(), sizeof(uint8_t) * cu->getTotalNumPart());
> + memcpy(&pic->m_intraData->partSizes[cuAddr * cu->m_numPartitions], m_bestCU[0]->getPartitionSize(), sizeof(char) * cu->getTotalNumPart());
> + pic->m_intraData->cuAddr[cuAddr] = cuAddr;
> + pic->m_intraData->poc[cuAddr] = cu->m_pic->m_POC;
> }
> }
> if (m_param->bLogCuStats || m_param->rc.bStatWrite)
> @@ -415,17 +418,20 @@
> }
> }
> }
> +
> void Analysis::compressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, TComDataCU* cuPicsym, CU *cu)
> {
> //PPAScopeEvent(CompressIntraCU + depth);
> Frame* pic = outBestCU->m_pic;
> + uint32_t cuAddr = outBestCU->getAddr();
> + uint32_t absPartIdx = outBestCU->getZorderIdxInCU();
>
> if (depth == 0)
> // get original YUV data from picture
> - m_origYuv[depth]->copyFromPicYuv(pic->getPicYuvOrg(), outBestCU->getAddr(), outBestCU->getZorderIdxInCU());
> + m_origYuv[depth]->copyFromPicYuv(pic->getPicYuvOrg(), cuAddr, absPartIdx);
> else
> // copy partition YUV from depth 0 CTU cache
> - m_origYuv[0]->copyPartToYuv(m_origYuv[depth], outBestCU->getZorderIdxInCU());
> + m_origYuv[0]->copyPartToYuv(m_origYuv[depth], absPartIdx);
> Slice* slice = outTempCU->m_slice;
> // We need to split, so don't try these modes.
> int cu_split_flag = !(cu->flags & CU::LEAF);
> @@ -447,12 +453,12 @@
> outBestCU->m_totalPsyCost = m_rdCost.calcPsyRdCost(outBestCU->m_totalDistortion, outBestCU->m_totalBits, outBestCU->m_psyEnergy);
> else
> outBestCU->m_totalRDCost = m_rdCost.calcRdCost(outBestCU->m_totalDistortion, outBestCU->m_totalBits);
> +
> + // copy original YUV samples in lossless mode
> + if (outBestCU->isLosslessCoded(0))
> + fillOrigYUVBuffer(outBestCU, m_origYuv[depth]);
> }
>
> - // copy original YUV samples in lossless mode
> - if (outBestCU->isLosslessCoded(0))
> - fillOrigYUVBuffer(outBestCU, m_origYuv[depth]);
> -
> // further split
> if (cu_split_flag)
> {
> @@ -475,7 +481,7 @@
>
> compressIntraCU(subBestPartCU, subTempPartCU, nextDepth, cuPicsym, child_cu);
> outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth); // Keep best part data to current temporary data.
> - copyYuv2Tmp(subBestPartCU->getTotalNumPart() * partUnitIdx, nextDepth);
> + m_bestRecoYuv[nextDepth]->copyToPartYuv(m_tmpRecoYuv[depth], subBestPartCU->getTotalNumPart() * partUnitIdx);
> }
> else
> {
> @@ -527,7 +533,7 @@
> outBestCU->copyToPic(depth); // Copy Best data to Picture for next partition prediction.
>
> // Copy Yuv data to picture Yuv
> - copyYuv2Pic(pic, outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth);
> + m_bestRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), cuAddr, absPartIdx);
>
> #if CHECKED_BUILD || _DEBUG
> X265_CHECK(outBestCU->getPartitionSize(0) != SIZE_NONE, "no best partition size\n");
> @@ -616,7 +622,7 @@
> if (!subBestPartCU->m_totalRDCost) // if cost is 0, CU is best CU
> outTempCU->m_totalRDCost = 0; // set outTempCU cost to 0, so later check will use this CU as best CU
>
> - copyYuv2Tmp(subBestPartCU->getTotalNumPart() * partUnitIdx, nextDepth);
> + m_bestRecoYuv[nextDepth]->copyToPartYuv(m_tmpRecoYuv[depth], subBestPartCU->getTotalNumPart() * partUnitIdx);
> }
> else
> {
> @@ -661,7 +667,7 @@
> checkBestMode(outBestCU, outTempCU, depth);
> }
> outBestCU->copyToPic(depth);
> - copyYuv2Pic(pic, outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth);
> + m_bestRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), outBestCU->getAddr(), outBestCU->getZorderIdxInCU());
>
> #if CHECKED_BUILD || _DEBUG
> X265_CHECK(outBestCU->getPartitionSize(0) != SIZE_NONE, "no best partition size\n");
> @@ -727,11 +733,12 @@
> void Analysis::compressInterCU_rd0_4(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComDataCU* cu, uint32_t depth, TComDataCU* cuPicsym, CU *cu_t, int bInsidePicture, uint32_t PartitionIndex, uint32_t minDepth)
> {
> Frame* pic = outTempCU->m_pic;
> + uint32_t cuAddr = outTempCU->getAddr();
> uint32_t absPartIdx = outTempCU->getZorderIdxInCU();
>
> if (depth == 0)
> // get original YUV data from picture
> - m_origYuv[depth]->copyFromPicYuv(pic->getPicYuvOrg(), outTempCU->getAddr(), absPartIdx);
> + m_origYuv[depth]->copyFromPicYuv(pic->getPicYuvOrg(), cuAddr, absPartIdx);
> else
> // copy partition YUV from depth 0 CTU cache
> m_origYuv[0]->copyPartToYuv(m_origYuv[depth], absPartIdx);
> @@ -750,14 +757,14 @@
>
> if (depth == 0 && m_param->rdLevel == 0)
> {
> - m_origYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), cu->getAddr(), 0);
> + m_origYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), cuAddr, 0);
> }
> // We need to split, so don't try these modes.
> #if TOPSKIP
> if (cu_unsplit_flag && !bInsidePictureParent)
> {
> - TComDataCU* colocated0 = slice->m_numRefIdx[0] > 0 ? slice->m_refPicList[0][0]->getCU(outTempCU->getAddr()) : NULL;
> - TComDataCU* colocated1 = slice->m_numRefIdx[1] > 0 ? slice->m_refPicList[1][0]->getCU(outTempCU->getAddr()) : NULL;
> + TComDataCU* colocated0 = slice->m_numRefIdx[0] > 0 ? slice->m_refPicList[0][0]->getCU(cuAddr) : NULL;
> + TComDataCU* colocated1 = slice->m_numRefIdx[1] > 0 ? slice->m_refPicList[1][0]->getCU(cuAddr) : NULL;
> char currentQP = outTempCU->getQP(0);
> char previousQP = colocated0->getQP(0);
> uint32_t delta = 0, minDepth0 = 4, minDepth1 = 4;
> @@ -793,12 +800,12 @@
> /* Initialise all Mode-CUs based on parentCU */
> if (depth == 0)
> {
> - m_interCU_2Nx2N[depth]->initCU(pic, cu->getAddr());
> - m_interCU_Nx2N[depth]->initCU(pic, cu->getAddr());
> - m_interCU_2NxN[depth]->initCU(pic, cu->getAddr());
> - m_intraInInterCU[depth]->initCU(pic, cu->getAddr());
> - m_mergeCU[depth]->initCU(pic, cu->getAddr());
> - m_bestMergeCU[depth]->initCU(pic, cu->getAddr());
> + m_interCU_2Nx2N[depth]->initCU(pic, cuAddr);
> + m_interCU_Nx2N[depth]->initCU(pic, cuAddr);
> + m_interCU_2NxN[depth]->initCU(pic, cuAddr);
> + m_intraInInterCU[depth]->initCU(pic, cuAddr);
> + m_mergeCU[depth]->initCU(pic, cuAddr);
> + m_bestMergeCU[depth]->initCU(pic, cuAddr);
> }
> else
> {
> @@ -1021,7 +1028,7 @@
> TComDataCU* aboveLeft = outTempCU->getCUAboveLeft();
> TComDataCU* aboveRight = outTempCU->getCUAboveRight();
> TComDataCU* left = outTempCU->getCULeft();
> - TComDataCU* rootCU = pic->getPicSym()->getCU(outTempCU->getAddr());
> + TComDataCU* rootCU = pic->getPicSym()->getCU(cuAddr);
>
> totalCostCU += rootCU->m_avgCost[depth] * rootCU->m_count[depth];
> totalCountCU += rootCU->m_count[depth];
> @@ -1063,7 +1070,7 @@
>
> /* Copy Yuv data to picture Yuv */
> if (m_param->rdLevel != 0)
> - copyYuv2Pic(pic, outBestCU->getAddr(), absPartIdx, depth);
> + m_bestRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), cuAddr, absPartIdx);
> return;
> }
> }
> @@ -1094,7 +1101,7 @@
> tempavgCost = m_rdCost.m_psyRd ? subBestPartCU->m_totalPsyCost : subBestPartCU->m_totalRDCost;
> else
> tempavgCost = subBestPartCU->m_totalRDCost;
> - TComDataCU* rootCU = pic->getPicSym()->getCU(outTempCU->getAddr());
> + TComDataCU* rootCU = pic->getPicSym()->getCU(cuAddr);
> uint64_t temp = rootCU->m_avgCost[nextDepth] * rootCU->m_count[nextDepth];
> rootCU->m_count[nextDepth] += 1;
> rootCU->m_avgCost[nextDepth] = (temp + tempavgCost) / rootCU->m_count[nextDepth];
> @@ -1167,7 +1174,7 @@
> if (depth == 0)
> {
> uint64_t tempavgCost = m_rdCost.m_psyRd ? outBestCU->m_totalPsyCost : outBestCU->m_totalRDCost;
> - TComDataCU* rootCU = pic->getPicSym()->getCU(outTempCU->getAddr());
> + TComDataCU* rootCU = pic->getPicSym()->getCU(cuAddr);
> uint64_t temp = rootCU->m_avgCost[depth] * rootCU->m_count[depth];
> rootCU->m_count[depth] += 1;
> rootCU->m_avgCost[depth] = (temp + tempavgCost) / rootCU->m_count[depth];
> @@ -1199,7 +1206,7 @@
> {
> /* Copy Yuv data to picture Yuv */
> if (cu_unsplit_flag)
> - copyYuv2Pic(pic, outBestCU->getAddr(), absPartIdx, depth);
> + m_bestRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), cuAddr, absPartIdx);
> }
>
> #if CHECKED_BUILD || _DEBUG
> @@ -1235,13 +1242,15 @@
> //PPAScopeEvent(CompressCU + depth);
>
> Frame* pic = outBestCU->m_pic;
> + uint32_t cuAddr = outBestCU->getAddr();
> + uint32_t absPartIdx = outBestCU->getZorderIdxInCU();
>
> if (depth == 0)
> // get original YUV data from picture
> - m_origYuv[depth]->copyFromPicYuv(pic->getPicYuvOrg(), outBestCU->getAddr(), outBestCU->getZorderIdxInCU());
> + m_origYuv[depth]->copyFromPicYuv(pic->getPicYuvOrg(), cuAddr, absPartIdx);
> else
> // copy partition YUV from depth 0 CTU cache
> - m_origYuv[0]->copyPartToYuv(m_origYuv[depth], outBestCU->getZorderIdxInCU());
> + m_origYuv[0]->copyPartToYuv(m_origYuv[depth], absPartIdx);
>
> // variable for Cbf fast mode PU decision
> bool doNotBlockPu = true;
> @@ -1418,12 +1427,12 @@
> outBestCU->m_totalPsyCost = m_rdCost.calcPsyRdCost(outBestCU->m_totalDistortion, outBestCU->m_totalBits, outBestCU->m_psyEnergy);
> else
> outBestCU->m_totalRDCost = m_rdCost.calcRdCost(outBestCU->m_totalDistortion, outBestCU->m_totalBits);
> +
> + // copy original YUV samples in lossless mode
> + if (outBestCU->isLosslessCoded(0))
> + fillOrigYUVBuffer(outBestCU, m_origYuv[depth]);
> }
>
> - // copy original YUV samples in lossless mode
> - if (outBestCU->isLosslessCoded(0))
> - fillOrigYUVBuffer(outBestCU, m_origYuv[depth]);
> -
> // further split
> if (cu_split_flag && !outBestCU->isSkipped(0))
> {
> @@ -1448,7 +1457,7 @@
>
> compressInterCU_rd5_6(subBestPartCU, subTempPartCU, nextDepth, cuPicsym, child_cu);
> outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth); // Keep best part data to current temporary data.
> - copyYuv2Tmp(subBestPartCU->getTotalNumPart() * partUnitIdx, nextDepth);
> + m_bestRecoYuv[nextDepth]->copyToPartYuv(m_tmpRecoYuv[depth], subBestPartCU->getTotalNumPart() * partUnitIdx);
> }
> else
> {
> @@ -1499,7 +1508,7 @@
> outBestCU->copyToPic(depth); // Copy Best data to Picture for next partition prediction.
>
> // Copy Yuv data to picture Yuv
> - copyYuv2Pic(pic, outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth);
> + m_bestRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), cuAddr, absPartIdx);
>
> #if CHECKED_BUILD || _DEBUG
> X265_CHECK(outBestCU->getPartitionSize(0) != SIZE_NONE, "no best partition size\n");
> @@ -2051,8 +2060,7 @@
> TComDataCU* subTempPartCU = m_tempCU[nextDepth];
> uint32_t qNumParts = (pic->getNumPartInCU() >> (depth << 1)) >> 2;
> uint32_t xmax = slice->m_sps->picWidthInLumaSamples - lcu->getCUPelX();
> - uint32_t ymax = slice->m_sps->picHeightInLumaSamples - lcu->getCUPelY();
> - for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++, absPartIdx += qNumParts)
> + uint32_t ymax = slice->m_sps->picHeightInLumaSamples - lcu->getCUPelY(); for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++, absPartIdx += qNumParts)
> {
> if (g_zscanToPelX[absPartIdx] < xmax && g_zscanToPelY[absPartIdx] < ymax)
> {
> @@ -2064,6 +2072,8 @@
> return;
> }
>
> + uint32_t cuAddr = cu->getAddr();
> +
> m_quant.setQPforQuant(cu);
>
> if (lcu->getPredictionMode(absPartIdx) == MODE_INTER)
> @@ -2092,7 +2102,6 @@
> src2 = m_bestPredYuv[0]->getCrAddr(absPartIdx);
> src1 = m_origYuv[0]->getCrAddr(absPartIdx);
> dst = m_tmpResiYuv[depth]->getCrAddr();
> - dststride = m_tmpResiYuv[depth]->m_cwidth;
> primitives.chroma[m_param->internalCsp].sub_ps[sizeIdx](dst, dststride, src1, src2, src1stride, src2stride);
>
> uint32_t tuDepthRange[2];
> @@ -2130,9 +2139,8 @@
> pred = m_bestPredYuv[0]->getCrAddr(absPartIdx);
> res = m_tmpResiYuv[depth]->getCrAddr();
> reco = m_bestRecoYuv[depth]->getCrAddr();
> - reco = m_bestRecoYuv[depth]->getCrAddr();
> primitives.chroma[m_param->internalCsp].add_ps[sizeIdx](reco, dststride, pred, res, src1stride, src2stride);
> - m_bestRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), lcu->getAddr(), absPartIdx);
> + m_bestRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), cuAddr, absPartIdx);
> return;
> }
> }
> @@ -2141,19 +2149,19 @@
> int part = partitionFromLog2Size(log2CUSize);
> TComPicYuv* rec = pic->getPicYuvRec();
> pixel* src = m_bestPredYuv[0]->getLumaAddr(absPartIdx);
> - pixel* dst = rec->getLumaAddr(cu->getAddr(), absPartIdx);
> + pixel* dst = rec->getLumaAddr(cuAddr, absPartIdx);
> uint32_t srcstride = m_bestPredYuv[0]->getStride();
> uint32_t dststride = rec->getStride();
> primitives.luma_copy_pp[part](dst, dststride, src, srcstride);
>
> src = m_bestPredYuv[0]->getCbAddr(absPartIdx);
> - dst = rec->getCbAddr(cu->getAddr(), absPartIdx);
> + dst = rec->getCbAddr(cuAddr, absPartIdx);
> srcstride = m_bestPredYuv[0]->getCStride();
> dststride = rec->getCStride();
> primitives.chroma[m_param->internalCsp].copy_pp[part](dst, dststride, src, srcstride);
>
> src = m_bestPredYuv[0]->getCrAddr(absPartIdx);
> - dst = rec->getCrAddr(cu->getAddr(), absPartIdx);
> + dst = rec->getCrAddr(cuAddr, absPartIdx);
> primitives.chroma[m_param->internalCsp].copy_pp[part](dst, dststride, src, srcstride);
> }
> else
> @@ -2161,7 +2169,7 @@
> m_origYuv[0]->copyPartToYuv(m_origYuv[depth], absPartIdx);
> generateCoeffRecon(cu, m_origYuv[depth], m_modePredYuv[5][depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth]);
> checkDQP(cu);
> - m_tmpRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), lcu->getAddr(), absPartIdx);
> + m_tmpRecoYuv[depth]->copyToPicYuv(pic->getPicYuvRec(), cuAddr, absPartIdx);
> cu->copyCodedToPic(depth);
> }
> }
> @@ -2240,16 +2248,6 @@
> }
> }
>
> -void Analysis::copyYuv2Pic(Frame* outPic, uint32_t cuAddr, uint32_t absPartIdx, uint32_t depth)
> -{
> - m_bestRecoYuv[depth]->copyToPicYuv(outPic->getPicYuvRec(), cuAddr, absPartIdx);
> -}
> -
> -void Analysis::copyYuv2Tmp(uint32_t partUnitIdx, uint32_t nextDepth)
> -{
> - m_bestRecoYuv[nextDepth]->copyToPartYuv(m_tmpRecoYuv[nextDepth - 1], partUnitIdx);
> -}
> -
> /* Function for filling original YUV samples of a CU in lossless mode */
> void Analysis::fillOrigYUVBuffer(TComDataCU* cu, TComYuv* fencYuv)
> {
> diff -r 199e8f2e0d54 -r b00d1f46a763 source/encoder/analysis.h
> --- a/source/encoder/analysis.h Tue Sep 16 17:50:06 2014 +0530
> +++ b/source/encoder/analysis.h Wed Sep 17 18:49:03 2014 +0900
> @@ -129,8 +129,6 @@
> void encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv, TComYuv* outReconYuv);
> void encodeResidue(TComDataCU* lcu, TComDataCU* cu, uint32_t absPartIdx, uint32_t depth);
> void checkDQP(TComDataCU* cu);
> - void copyYuv2Pic(Frame* outPic, uint32_t cuAddr, uint32_t absPartIdx, uint32_t depth);
> - void copyYuv2Tmp(uint32_t partUnitIdx, uint32_t depth);
> void deriveTestModeAMP(TComDataCU* bestCU, PartSize parentSize, bool &bTestAMP_Hor, bool &bTestAMP_Ver,
> bool &bTestMergeAMP_Hor, bool &bTestMergeAMP_Ver);
> void fillOrigYUVBuffer(TComDataCU* outCU, TComYuv* origYuv);
> diff -r 199e8f2e0d54 -r b00d1f46a763 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Tue Sep 16 17:50:06 2014 +0530
> +++ b/source/encoder/encoder.cpp Wed Sep 17 18:49:03 2014 +0900
> @@ -75,10 +75,6 @@
> m_param = NULL;
> }
>
> -Encoder::~Encoder()
> -{
> -}
> -
> void Encoder::create()
> {
> if (!primitives.sad[0])
> diff -r 199e8f2e0d54 -r b00d1f46a763 source/encoder/encoder.h
> --- a/source/encoder/encoder.h Tue Sep 16 17:50:06 2014 +0530
> +++ b/source/encoder/encoder.h Wed Sep 17 18:49:03 2014 +0900
> @@ -130,7 +130,7 @@
>
> Encoder();
>
> - virtual ~Encoder();
> + ~Encoder() {}
>
> void create();
> void destroy();
> diff -r 199e8f2e0d54 -r b00d1f46a763 source/encoder/entropy.cpp
> --- a/source/encoder/entropy.cpp Tue Sep 16 17:50:06 2014 +0530
> +++ b/source/encoder/entropy.cpp Wed Sep 17 18:49:03 2014 +0900
> @@ -1143,11 +1143,6 @@
> }
>
> // SBAC RD
> -void Entropy::load(Entropy& src)
> -{
> - this->copyFrom(src);
> -}
> -
> void Entropy::loadIntraDirModeLuma(Entropy& src)
> {
> copyState(src);
> @@ -1155,11 +1150,6 @@
> ::memcpy(&m_contextState[OFF_ADI_CTX], &src.m_contextState[OFF_ADI_CTX], sizeof(uint8_t) * NUM_ADI_CTX);
> }
>
> -void Entropy::store(Entropy& dest)
> -{
> - dest.copyFrom(*this);
> -}
> -
> void Entropy::copyFrom(Entropy& src)
> {
> copyState(src);
> diff -r 199e8f2e0d54 -r b00d1f46a763 source/encoder/entropy.h
> --- a/source/encoder/entropy.h Tue Sep 16 17:50:06 2014 +0530
> +++ b/source/encoder/entropy.h Wed Sep 17 18:49:03 2014 +0900
> @@ -117,7 +117,6 @@
> Entropy();
>
> void setBitstream(Bitstream* p) { m_bitIf = p; }
> - bool isBitCounter() const { return !m_bitIf; }
>
> uint32_t getNumberOfWrittenBits()
> {
> @@ -130,9 +129,10 @@
> void resetEntropy(Slice *slice);
>
> // SBAC RD
> - void load(Entropy& src);
> + void load(Entropy& src) { copyFrom(src); }
> +
> void loadIntraDirModeLuma(Entropy& src);
> - void store(Entropy& dest);
> + void store(Entropy& dest) { dest.copyFrom(*this); }
> void loadContexts(Entropy& src) { copyContextsFrom(src); }
> void copyState(Entropy& other);
>
> diff -r 199e8f2e0d54 -r b00d1f46a763 source/encoder/motion.h
> --- a/source/encoder/motion.h Tue Sep 16 17:50:06 2014 +0530
> +++ b/source/encoder/motion.h Wed Sep 17 18:49:03 2014 +0900
> @@ -65,7 +65,7 @@
>
> MotionEstimate();
>
> - virtual ~MotionEstimate();
> + ~MotionEstimate();
>
> void setSearchMethod(int i) { searchMethod = i; }
>
> diff -r 199e8f2e0d54 -r b00d1f46a763 source/encoder/predict.cpp
> --- a/source/encoder/predict.cpp Tue Sep 16 17:50:06 2014 +0530
> +++ b/source/encoder/predict.cpp Wed Sep 17 18:49:03 2014 +0900
> @@ -363,7 +363,6 @@
> {
> int refStride = refPic->getCStride();
> int dstStride = dstPic->getCStride();
> -
> int hChromaShift = CHROMA_H_SHIFT(m_csp);
> int vChromaShift = CHROMA_V_SHIFT(m_csp);
>
> diff -r 199e8f2e0d54 -r b00d1f46a763 source/encoder/predict.h
> --- a/source/encoder/predict.h Tue Sep 16 17:50:06 2014 +0530
> +++ b/source/encoder/predict.h Wed Sep 17 18:49:03 2014 +0900
> @@ -83,7 +83,7 @@
> pixel* m_refLeftFlt;
>
> Predict();
> - virtual ~Predict();
> + ~Predict();
>
> void initTempBuff(int csp);
>
> diff -r 199e8f2e0d54 -r b00d1f46a763 source/encoder/search.cpp
> --- a/source/encoder/search.cpp Tue Sep 16 17:50:06 2014 +0530
> +++ b/source/encoder/search.cpp Wed Sep 17 18:49:03 2014 +0900
> @@ -717,7 +717,7 @@
>
> if (numSig)
> {
> - m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdx), residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTransformSkip, numSig);
> + m_quant.invtransformNxN(cu->getCUTransquantBypass(0), residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTransformSkip, numSig);
Can you split this change of getCUTransquantBypass() usage into a
seperate patch? I think I can push the rest of it now, but would like to
pass these changes through the regression tests.
> // Generate Recon
> primitives.luma_add_ps[sizeIdx](recon, stride, pred, residual, stride, stride);
> @@ -1168,7 +1168,7 @@
> if (numSig)
> {
> // inverse transform
> - m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), residual, stride, coeff, log2TrSizeC, ttype, true, useTransformSkipC, numSig);
> + m_quant.invtransformNxN(cu->getCUTransquantBypass(0), residual, stride, coeff, log2TrSizeC, ttype, true, useTransformSkipC, numSig);
>
> // reconstruction
> primitives.chroma[X265_CSP_I444].add_ps[sizeIdxC](recon, stride, pred, residual, stride, stride);
> @@ -2166,7 +2166,7 @@
> else
> zeroCost = m_rdCost.calcRdCost(zeroDistortion, zeroResiBits);
>
> - if (cu->isLosslessCoded(0))
> + if (bIsLosslessMode)
> zeroCost = cost + 1;
>
> if (zeroCost < cost)
> @@ -2340,7 +2340,7 @@
> cu->setCbfSubParts(numSigY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
>
> if (numSigY)
> - m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdx), curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY);
> + m_quant.invtransformNxN(cu->getCUTransquantBypass(0), curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY);
> else
> primitives.blockfill_s[sizeIdx](curResiY, strideResiY, 0);
>
> @@ -2368,12 +2368,12 @@
> cu->setCbfPartRange(numSigV ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.absPartIdxStep);
>
> if (numSigU)
> - m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU);
> + m_quant.invtransformNxN(cu->getCUTransquantBypass(0), curResiU, strideResiC, coeffCurU + subTUOffset, log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU);
> else
> primitives.blockfill_s[sizeIdxC](curResiU, strideResiC, 0);
>
> if (numSigV)
> - m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV);
> + m_quant.invtransformNxN(cu->getCUTransquantBypass(0), curResiV, strideResiC, coeffCurV + subTUOffset, log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV);
> else
> primitives.blockfill_s[sizeIdxC](curResiV, strideResiC, 0);
> }
> @@ -2575,7 +2575,7 @@
>
> if (numSigY)
> {
> - m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdx), curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY); //this is for inter mode only
> + m_quant.invtransformNxN(cu->getCUTransquantBypass(0), curResiY, strideResiY, coeffCurY, log2TrSize, TEXT_LUMA, false, false, numSigY); //this is for inter mode only
>
> const uint32_t nonZeroDistY = primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, curResiY, strideResiY);
> uint32_t nonZeroPsyEnergyY = 0;
> @@ -2592,7 +2592,7 @@
> nonZeroPsyEnergyY = m_rdCost.psyCost(size, fencYuv->getLumaAddr(absPartIdx), fencYuv->getStride(),
> cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder), cu->m_pic->getPicYuvRec()->getStride());
> }
> - if (cu->isLosslessCoded(0))
> + if (cu->getCUTransquantBypass(0))
> {
> distY = nonZeroDistY;
> psyEnergyY = nonZeroPsyEnergyY;
> @@ -2671,7 +2671,7 @@
>
> if (numSigU[tuIterator.section])
> {
> - m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), curResiU, strideResiC, coeffCurU + subTUOffset,
> + m_quant.invtransformNxN(cu->getCUTransquantBypass(0), curResiU, strideResiC, coeffCurU + subTUOffset,
> log2TrSizeC, TEXT_CHROMA_U, false, false, numSigU[tuIterator.section]);
> uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth,
> curResiU, strideResiC);
> @@ -2692,7 +2692,7 @@
> cu->m_pic->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder),
> cu->m_pic->getPicYuvRec()->getCStride());
> }
> - if (cu->isLosslessCoded(0))
> + if (cu->getCUTransquantBypass(0))
> {
> distU = nonZeroDistU;
> psyEnergyU = nonZeroPsyEnergyU;
> @@ -2753,7 +2753,7 @@
>
> if (numSigV[tuIterator.section])
> {
> - m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), curResiV, strideResiC, coeffCurV + subTUOffset,
> + m_quant.invtransformNxN(cu->getCUTransquantBypass(0), curResiV, strideResiC, coeffCurV + subTUOffset,
> log2TrSizeC, TEXT_CHROMA_V, false, false, numSigV[tuIterator.section]);
> uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth,
> curResiV, strideResiC);
> @@ -2774,7 +2774,7 @@
> cu->m_pic->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder),
> cu->m_pic->getPicYuvRec()->getCStride());
> }
> - if (cu->isLosslessCoded(0))
> + if (cu->getCUTransquantBypass(0))
> {
> distV = nonZeroDistV;
> psyEnergyV = nonZeroPsyEnergyV;
> @@ -2862,7 +2862,7 @@
> m_entropyCoder->codeCoeffNxN(cu, tsCoeffY, absPartIdx, log2TrSize, TEXT_LUMA);
> const uint32_t skipSingleBitsY = m_entropyCoder->getNumberOfWrittenBits();
>
> - m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdx), tsResiY, trSize, tsCoeffY, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
> + m_quant.invtransformNxN(cu->getCUTransquantBypass(0), tsResiY, trSize, tsCoeffY, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
>
> nonZeroDistY = primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, tsResiY, trSize);
>
> @@ -2949,7 +2949,7 @@
> m_entropyCoder->codeCoeffNxN(cu, tsCoeffU, absPartIdxC, log2TrSizeC, TEXT_CHROMA_U);
> singleBitsComp[TEXT_CHROMA_U][tuIterator.section] = m_entropyCoder->getNumberOfWrittenBits();
>
> - m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), tsResiU, trSizeC, tsCoeffU,
> + m_quant.invtransformNxN(cu->getCUTransquantBypass(0), tsResiU, trSizeC, tsCoeffU,
> log2TrSizeC, TEXT_CHROMA_U, false, true, numSigTSkipU);
> uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth,
> tsResiU, trSizeC);
> @@ -2990,7 +2990,7 @@
> m_entropyCoder->codeCoeffNxN(cu, tsCoeffV, absPartIdxC, log2TrSizeC, TEXT_CHROMA_V);
> singleBitsComp[TEXT_CHROMA_V][tuIterator.section] = m_entropyCoder->getNumberOfWrittenBits() - singleBitsComp[TEXT_CHROMA_U][tuIterator.section];
>
> - m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), tsResiV, trSizeC, tsCoeffV,
> + m_quant.invtransformNxN(cu->getCUTransquantBypass(0), tsResiV, trSizeC, tsCoeffV,
> log2TrSizeC, TEXT_CHROMA_V, false, true, numSigTSkipV);
> uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth,
> tsResiV, trSizeC);
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
Steve Borho
More information about the x265-devel
mailing list