[x265] [PATCH] TComDataCU: replace getTotalNumPart() with CU structure details

Fri Sep 26 20:46:08 CEST 2014

On 09/26, santhoshini at multicorewareinc.com wrote:
> # HG changeset patch
> # User Santhoshini Sekar <santhoshini at multicorewareinc.com>
> # Date 1411724066 -19800
> #      Fri Sep 26 15:04:26 2014 +0530
> # Node ID cfa8a52f704e3e56e7327dc90316ad19b405b499
> # Parent  76afa140c7ed50065c52d52101c19526aad0acd1
> TComDataCU: replace getTotalNumPart() with CU structure details
> 
> diff -r 76afa140c7ed -r cfa8a52f704e source/Lib/TLibCommon/TComDataCU.cpp
> --- a/source/Lib/TLibCommon/TComDataCU.cpp	Thu Sep 25 15:24:16 2014 +0530
> +++ b/source/Lib/TLibCommon/TComDataCU.cpp	Fri Sep 26 15:04:26 2014 +0530
> @@ -409,7 +409,7 @@
>      m_mvBits           = 0;
>      m_coeffBits        = 0;
>  
> -    m_numPartitions    = cu->getTotalNumPart() >> 2;
> +    m_numPartitions    = (NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - cuData->log2CUSize) * 2));

can g_log2Size[g_maxCUSize] be calculated as an offset from
g_maxCUDepth? this would be much more efficient than a sparse table
lookup.

At first glance, this patch doesn't look like an efficiency improvement,
perhaps numPartitions should be pre-calculated as a member of cuData?

>      for (int i = 0; i < 4; i++)
>      {
> @@ -456,7 +456,7 @@
>  {
>      X265_CHECK(partUnitIdx < 4, "part unit should be less than 4\n");
>  
> -    uint32_t partOffset = (cu->getTotalNumPart() >> 2) * partUnitIdx;
> +    uint32_t partOffset = ((NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - cuData->log2CUSize) * 2))) * partUnitIdx;
>  
>      m_pic              = cu->m_pic;
>      m_slice            = cu->m_slice;
> @@ -474,7 +474,7 @@
>      m_totalBits        = 0;
>      m_mvBits           = 0;
>      m_coeffBits        = 0;
> -    m_numPartitions    = cu->getTotalNumPart() >> 2;
> +    m_numPartitions    = (NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - cuData->log2CUSize) * 2));
>  
>      TComDataCU* otherCU = m_pic->getCU(m_cuAddr);
>      int sizeInChar  = sizeof(char) * m_numPartitions;
> @@ -496,7 +496,7 @@
>  
>  // Copy small CU to bigger CU.
>  // One of quarter parts overwritten by predicted sub part.
> -void TComDataCU::copyPartFrom(TComDataCU* cu, uint32_t partUnitIdx, uint32_t depth, bool isRDObasedAnalysis)
> +void TComDataCU::copyPartFrom(TComDataCU* cu, uint32_t partUnitIdx, uint32_t depth, CU* cuData, bool isRDObasedAnalysis)
>  {
>      X265_CHECK(partUnitIdx < 4, "part unit should be less than 4\n");
>      if (isRDObasedAnalysis)
> @@ -511,8 +511,8 @@
>      m_mvBits           += cu->m_mvBits;
>      m_coeffBits        += cu->m_coeffBits;
>  
> -    uint32_t offset       = cu->getTotalNumPart() * partUnitIdx;
> -    uint32_t numPartition = cu->getTotalNumPart();
> +    uint32_t offset       = (NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - cuData->log2CUSize) * 2)) * partUnitIdx;
> +    uint32_t numPartition = (NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - cuData->log2CUSize) * 2));

try to be work efficient

  uint32_t numPartition = (NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - cuData->log2CUSize) * 2));
  uint32_t offset       = numPartition * partUnitIdx;

>      int sizeInBool  = sizeof(bool) * numPartition;
>      int sizeInChar  = sizeof(char) * numPartition;
>      memcpy(m_skipFlag  + offset, cu->getSkipFlag(),       sizeof(*m_skipFlag)   * numPartition);
> @@ -544,8 +544,8 @@
>      m_cuAbove          = cu->getCUAbove();
>      m_cuLeft           = cu->getCULeft();
>  
> -    m_cuMvField[0].copyFrom(cu->getCUMvField(REF_PIC_LIST_0), cu->getTotalNumPart(), offset);
> -    m_cuMvField[1].copyFrom(cu->getCUMvField(REF_PIC_LIST_1), cu->getTotalNumPart(), offset);
> +    m_cuMvField[0].copyFrom(cu->getCUMvField(REF_PIC_LIST_0), (NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - cuData->log2CUSize) * 2)), offset);
> +    m_cuMvField[1].copyFrom(cu->getCUMvField(REF_PIC_LIST_1), (NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - cuData->log2CUSize) * 2)), offset);
>  
>      uint32_t tmp  = 1 << ((g_maxLog2CUSize - depth) * 2);
>      uint32_t tmp2 = partUnitIdx * tmp;
> diff -r 76afa140c7ed -r cfa8a52f704e source/Lib/TLibCommon/TComDataCU.h
> --- a/source/Lib/TLibCommon/TComDataCU.h	Thu Sep 25 15:24:16 2014 +0530
> +++ b/source/Lib/TLibCommon/TComDataCU.h	Fri Sep 26 15:04:26 2014 +0530
> @@ -276,7 +276,7 @@
>      void          initSubCU(TComDataCU* cu, uint32_t partUnitIdx, uint32_t depth, int qp, CU* cuData);
>  
>      void          copyToSubCU(TComDataCU* lcu, uint32_t partUnitIdx, uint32_t depth, CU* cuData);
> -    void          copyPartFrom(TComDataCU* cu, uint32_t partUnitIdx, uint32_t depth, bool isRDObasedAnalysis = true);
> +    void          copyPartFrom(TComDataCU* cu, uint32_t partUnitIdx, uint32_t depth, CU* cuData, bool isRDObasedAnalysis = true);
>  
>      void          copyToPic(uint32_t depth);
>      void          copyToPic(uint32_t depth, uint32_t partIdx, uint32_t partDepth);
> @@ -510,8 +510,6 @@
>      // member functions for RD cost storage
>      // -------------------------------------------------------------------------------------------------------------------
>  
> -    uint32_t&     getTotalNumPart()     { return m_numPartitions; }
> -
>      ScanType      getCoefScanIdx(uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma, bool bIsIntra);
>      void          getTUEntropyCodingParameters(TUEntropyCodingParameters &result, uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma);
>  
> diff -r 76afa140c7ed -r cfa8a52f704e source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp	Thu Sep 25 15:24:16 2014 +0530
> +++ b/source/encoder/analysis.cpp	Fri Sep 26 15:04:26 2014 +0530
> @@ -310,7 +310,7 @@
>      m_tempCU[0]->initCU(pic, cuAddr);
>  
>      // analysis of CU
> -    uint32_t numPartition = cu->getTotalNumPart();
> +    uint32_t numPartition = (NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - cu->m_CULocalData->log2CUSize) * 2));
>      if (m_bestCU[0]->m_slice->m_sliceType == I_SLICE)
>      {
>          if (m_param->analysisMode == X265_ANALYSIS_LOAD && pic->m_intraData)
> @@ -326,9 +326,9 @@
>              compressIntraCU(m_bestCU[0], m_tempCU[0], false, cu->m_CULocalData);
>              if (m_param->analysisMode == X265_ANALYSIS_SAVE && pic->m_intraData)
>              {
> -                memcpy(&pic->m_intraData->depth[cuAddr * cu->m_numPartitions], m_bestCU[0]->getDepth(), sizeof(uint8_t) * cu->getTotalNumPart());
> -                memcpy(&pic->m_intraData->modes[cuAddr * cu->m_numPartitions], m_bestCU[0]->getLumaIntraDir(), sizeof(uint8_t) * cu->getTotalNumPart());
> -                memcpy(&pic->m_intraData->partSizes[cuAddr * cu->m_numPartitions], m_bestCU[0]->getPartitionSize(), sizeof(char) * cu->getTotalNumPart());
> +                memcpy(&pic->m_intraData->depth[cuAddr * cu->m_numPartitions], m_bestCU[0]->getDepth(), sizeof(uint8_t) * numPartition);
> +                memcpy(&pic->m_intraData->modes[cuAddr * cu->m_numPartitions], m_bestCU[0]->getLumaIntraDir(), sizeof(uint8_t) * numPartition);
> +                memcpy(&pic->m_intraData->partSizes[cuAddr * cu->m_numPartitions], m_bestCU[0]->getPartitionSize(), sizeof(char) * numPartition);
>                  pic->m_intraData->cuAddr[cuAddr] = cuAddr;
>                  pic->m_intraData->poc[cuAddr]    = cu->m_pic->m_POC;
>              }
> @@ -484,13 +484,13 @@
>                      m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);
>  
>                  compressIntraCU(subBestPartCU, subTempPartCU, nextDepth, child_cu);
> -                outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth); // Keep best part data to current temporary data.
> -                m_bestRecoYuv[nextDepth]->copyToPartYuv(m_tmpRecoYuv[depth], subBestPartCU->getTotalNumPart() * partUnitIdx);
> +                outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth, child_cu); // Keep best part data to current temporary data.
> +                m_bestRecoYuv[nextDepth]->copyToPartYuv(m_tmpRecoYuv[depth], (NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - child_cu->log2CUSize) * 2)) * partUnitIdx);
>              }
>              else
>              {
>                  subBestPartCU->copyToPic(nextDepth);
> -                outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth);
> +                outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth, child_cu);
>              }
>          }
>          if (cu_unsplit_flag)
> @@ -508,7 +508,8 @@
>          if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
>          {
>              bool hasResidual = false;
> -            for (uint32_t blkIdx = 0; blkIdx < outTempCU->getTotalNumPart(); blkIdx++)
> +            uint32_t numPartitions = uint32_t(NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - cu->log2CUSize) * 2));

we generally don't use C++ style casts. ie, instead of instantiating
uint32_t(foo) we use a C cast (uint32_t)foo. In any case, isn't
NUM_CU_PARTITIONS already an unsigned value?

> +            for (uint32_t blkIdx = 0; blkIdx < numPartitions; blkIdx++)
>              {
>                  if (outTempCU->getCbf(blkIdx, TEXT_LUMA) || outTempCU->getCbf(blkIdx, TEXT_CHROMA_U) ||
>                      outTempCU->getCbf(blkIdx, TEXT_CHROMA_V))
> @@ -623,17 +624,17 @@
>                  subTempPartCU->m_totalRDCost = 1;
>  
>                  compressSharedIntraCTU(subBestPartCU, subTempPartCU, nextDepth, child_cu, sharedDepth, sharedPartSizes, sharedModes, zOrder);
> -                outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth); // Keep best part data to current temporary data.
> +                outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth, child_cu); // Keep best part data to current temporary data.
>  
>                  if (!subBestPartCU->m_totalRDCost) // if cost is 0, CU is best CU
>                      outTempCU->m_totalRDCost = 0;  // set outTempCU cost to 0, so later check will use this CU as best CU
>  
> -                m_bestRecoYuv[nextDepth]->copyToPartYuv(m_tmpRecoYuv[depth], subBestPartCU->getTotalNumPart() * partUnitIdx);
> +                m_bestRecoYuv[nextDepth]->copyToPartYuv(m_tmpRecoYuv[depth], (NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - child_cu->log2CUSize) * 2)) * partUnitIdx);
>              }
>              else
>              {
>                  subBestPartCU->copyToPic(nextDepth);
> -                outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth);
> +                outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth, child_cu);
>  
>                  // increment zOrder offset to point to next best depth in sharedDepth buffer
>                  zOrder += g_depthInc[ctuToDepthIndex][nextDepth];
> @@ -649,7 +650,7 @@
>          if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
>          {
>              bool hasResidual = false;
> -            for (uint32_t blkIdx = 0; blkIdx < outTempCU->getTotalNumPart(); blkIdx++)
> +            for (uint32_t blkIdx = 0; blkIdx < uint32_t(NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - cu->log2CUSize) * 2)); blkIdx++)
>              {
>                  if (outTempCU->getCbf(blkIdx, TEXT_LUMA) || outTempCU->getCbf(blkIdx, TEXT_CHROMA_U) ||
>                      outTempCU->getCbf(blkIdx, TEXT_CHROMA_V))
> @@ -778,7 +779,7 @@
>          char previousQP = colocated0->getQP(0);
>          uint32_t delta = 0, minDepth0 = 4, minDepth1 = 4;
>          uint32_t sum0 = 0, sum1 = 0;
> -        uint32_t numPartitions = outTempCU->getTotalNumPart();
> +        uint32_t numPartitions = (NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - cu->log2CUSize) * 2));
>          for (uint32_t i = 0; i < numPartitions; i = i + 4)
>          {
>              uint32_t j = absPartIdx + i;
> @@ -1117,16 +1118,16 @@
>                  }
>  #endif // if EARLY_EXIT
>                  /* Adding costs from best SUbCUs */
> -                outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth, true); // Keep best part data to current temporary data.
> +                outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth, child_cu, true); // Keep best part data to current temporary data.
>                  if (m_param->rdLevel != 0)
> -                    m_bestRecoYuv[nextDepth]->copyToPartYuv(m_tmpRecoYuv[depth], subBestPartCU->getTotalNumPart() * partUnitIdx);
> +                    m_bestRecoYuv[nextDepth]->copyToPartYuv(m_tmpRecoYuv[depth], (NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - child_cu->log2CUSize) * 2)) * partUnitIdx);
>                  else
> -                    m_bestPredYuv[nextDepth]->copyToPartYuv(m_tmpPredYuv[depth], subBestPartCU->getTotalNumPart() * partUnitIdx);
> +                    m_bestPredYuv[nextDepth]->copyToPartYuv(m_tmpPredYuv[depth], (NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - child_cu->log2CUSize) * 2)) * partUnitIdx);
>              }
>              else
>              {
>                  subTempPartCU->copyToPic(nextDepth);
> -                outTempCU->copyPartFrom(subTempPartCU, partUnitIdx, nextDepth, false);
> +                outTempCU->copyPartFrom(subTempPartCU, partUnitIdx, nextDepth, child_cu, false);
>              }
>          }
>  
> @@ -1152,7 +1153,7 @@
>          if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
>          {
>              bool hasResidual = false;
> -            for (uint32_t blkIdx = 0; blkIdx < outTempCU->getTotalNumPart(); blkIdx++)
> +            for (uint32_t blkIdx = 0; blkIdx < uint32_t(NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - cu->log2CUSize) * 2)); blkIdx++)
>              {
>                  if (outTempCU->getCbf(blkIdx, TEXT_LUMA) || outTempCU->getCbf(blkIdx, TEXT_CHROMA_U) ||
>                      outTempCU->getCbf(blkIdx, TEXT_CHROMA_V))
> @@ -1465,13 +1466,13 @@
>                      m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);
>  
>                  compressInterCU_rd5_6(subBestPartCU, subTempPartCU, nextDepth, child_cu);
> -                outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth); // Keep best part data to current temporary data.
> -                m_bestRecoYuv[nextDepth]->copyToPartYuv(m_tmpRecoYuv[depth], subBestPartCU->getTotalNumPart() * partUnitIdx);
> +                outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth, child_cu); // Keep best part data to current temporary data.
> +                m_bestRecoYuv[nextDepth]->copyToPartYuv(m_tmpRecoYuv[depth], (NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - child_cu->log2CUSize) * 2)) * partUnitIdx);
>              }
>              else
>              {
>                  subBestPartCU->copyToPic(nextDepth);
> -                outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth);
> +                outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth, child_cu);
>              }
>          }
>  
> @@ -1490,7 +1491,7 @@
>          if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
>          {
>              bool hasResidual = false;
> -            for (uint32_t blkIdx = 0; blkIdx < outTempCU->getTotalNumPart(); blkIdx++)
> +            for (uint32_t blkIdx = 0; blkIdx < uint32_t(NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - cu->log2CUSize) * 2)); blkIdx++)
>              {
>                  if (outTempCU->getCbf(blkIdx, TEXT_LUMA) || outTempCU->getCbf(blkIdx, TEXT_CHROMA_U) ||
>                      outTempCU->getCbf(blkIdx, TEXT_CHROMA_V))
> @@ -2072,7 +2073,8 @@
>          TComDataCU* subTempPartCU = m_tempCU[nextDepth];
>          uint32_t qNumParts = (NUM_CU_PARTITIONS >> (depth << 1)) >> 2;
>          uint32_t xmax = slice->m_sps->picWidthInLumaSamples  - lcu->getCUPelX();
> -        uint32_t ymax = slice->m_sps->picHeightInLumaSamples - lcu->getCUPelY();        for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++, absPartIdx += qNumParts)
> +        uint32_t ymax = slice->m_sps->picHeightInLumaSamples - lcu->getCUPelY();
> +        for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++, absPartIdx += qNumParts)
>          {
>              CU *child_cu = cu->m_CULocalData + cuData->childIdx + partUnitIdx;
>              if (g_zscanToPelX[absPartIdx] < xmax && g_zscanToPelY[absPartIdx] < ymax)
> diff -r 76afa140c7ed -r cfa8a52f704e source/encoder/search.cpp
> --- a/source/encoder/search.cpp	Thu Sep 25 15:24:16 2014 +0530
> +++ b/source/encoder/search.cpp	Fri Sep 26 15:04:26 2014 +0530
> @@ -198,7 +198,7 @@
>      }
>  }
>  
> -void Search::xEncIntraHeaderLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx)
> +void Search::xEncIntraHeaderLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, CU* cuData)
>  {
>      // CU header
>      if (!absPartIdx)
> @@ -221,7 +221,7 @@
>      }
>      else
>      {
> -        uint32_t qtNumParts = cu->getTotalNumPart() >> 2;
> +        uint32_t qtNumParts = (NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - cuData->log2CUSize) * 2)) >> 2;
>          if (!trDepth)
>          {
>              X265_CHECK(absPartIdx == 0, "unexpected absPartIdx %d\n", absPartIdx);
> @@ -233,7 +233,7 @@
>      }
>  }
>  
> -void Search::xEncIntraHeaderChroma(TComDataCU* cu, uint32_t absPartIdx)
> +void Search::xEncIntraHeaderChroma(TComDataCU* cu, uint32_t absPartIdx, CU* cuData)

The argument order feels wrong. It seems like we should be passing
cuData directly after the cu, and not as the last argument. (same goes
for all the function changes in this patch)

>  {
>      // chroma prediction mode
>      if (cu->getPartitionSize(0) == SIZE_2Nx2N || cu->getChromaFormat() != X265_CSP_I444)
> @@ -243,27 +243,27 @@
>      }
>      else
>      {
> -        uint32_t qtNumParts = cu->getTotalNumPart() >> 2;
> +        uint32_t qtNumParts = (NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - cuData->log2CUSize) * 2)) >> 2;
>          if (!(absPartIdx & (qtNumParts - 1)))
>              m_entropyCoder->codeIntraDirChroma(cu, absPartIdx);
>      }
>  }
>  
> -uint32_t Search::xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep)
> +uint32_t Search::xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, CU* cuData)
>  {
>      int cuSize = 1 << cu->getLog2CUSize(absPartIdx);
>      m_entropyCoder->resetBits();
> -    xEncIntraHeaderChroma(cu, absPartIdx);
> +    xEncIntraHeaderChroma(cu, absPartIdx, cuData);
>      xEncSubdivCbfQTChroma(cu, trDepth, absPartIdx, absPartIdxStep, cuSize, cuSize);
>      xEncCoeffQTChroma(cu, trDepth, absPartIdx, TEXT_CHROMA_U);
>      xEncCoeffQTChroma(cu, trDepth, absPartIdx, TEXT_CHROMA_V);
>      return m_entropyCoder->getNumberOfWrittenBits();
>  }
>  
> -uint32_t Search::xGetIntraBitsLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t log2TrSize, coeff_t* coeff, uint32_t depthRange[2])
> +uint32_t Search::xGetIntraBitsLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t log2TrSize, coeff_t* coeff, uint32_t depthRange[2], CU* cuData)
>  {
>      m_entropyCoder->resetBits();
> -    xEncIntraHeaderLuma(cu, trDepth, absPartIdx);
> +    xEncIntraHeaderLuma(cu, trDepth, absPartIdx, cuData);
>  
>      // Transform subdiv flag
>      if (log2TrSize != *depthRange)
> @@ -511,7 +511,7 @@
>                      break;
>                  else
>                  {
> -                    singleBits = xGetIntraBitsLuma(cu, trDepth, absPartIdx, log2TrSize, coeff, depthRange);
> +                    singleBits = xGetIntraBitsLuma(cu, trDepth, absPartIdx, log2TrSize, coeff, depthRange, cuData);
>                      if (m_rdCost.m_psyRd)
>                          singleCostTmp = m_rdCost.calcPsyRdCost(singleDistYTmp, singleBits, singlePsyEnergyYTmp);
>                      else
> @@ -565,7 +565,7 @@
>              }
>              cu->setCbfSubParts(singleCbfY << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
>  
> -            singleBits = xGetIntraBitsLuma(cu, trDepth, absPartIdx, log2TrSize, coeffY, depthRange);
> +            singleBits = xGetIntraBitsLuma(cu, trDepth, absPartIdx, log2TrSize, coeffY, depthRange, cuData);
>              if (m_param->rdPenalty && (log2TrSize == 5) && !isIntraSlice)
>                  singleBits *= 4;
>  
> @@ -1215,7 +1215,7 @@
>      uint32_t numPU        = 1 << (2 * initTrDepth);
>      uint32_t log2TrSize   = cu->getLog2CUSize(0) - initTrDepth;
>      uint32_t tuSize       = 1 << log2TrSize;
> -    uint32_t qNumParts    = cu->getTotalNumPart() >> 2;
> +    uint32_t qNumParts    = (NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - cuData->log2CUSize) * 2)) >> 2;
>      uint32_t sizeIdx      = log2TrSize - 2;
>      uint32_t partOffset   = 0;
>      uint32_t srcstride   = reconYuv->getStride();
> @@ -1392,7 +1392,7 @@
>      uint32_t initTrDepth = cu->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;
>      uint32_t numPU       = 1 << (2 * initTrDepth);
>      uint32_t log2TrSize  = cu->getLog2CUSize(0) - initTrDepth;
> -    uint32_t qNumParts   = cu->getTotalNumPart() >> 2;
> +    uint32_t qNumParts   = (NUM_CU_PARTITIONS >> ((g_log2Size[g_maxCUSize] - cuData->log2CUSize) * 2)) >> 2;
>  
>      // loop over partitions
>      uint32_t partOffset  = 0;
> @@ -1536,7 +1536,7 @@
>              if (cu->m_slice->m_pps->bTransformSkipEnabled)
>                  m_entropyCoder->load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
>  
> -            uint32_t bits = xGetIntraBitsQTChroma(cu, initTrDepth, absPartIdxC, tuIterator.absPartIdxStep);
> +            uint32_t bits = xGetIntraBitsQTChroma(cu, initTrDepth, absPartIdxC, tuIterator.absPartIdxStep, cuData);
>              uint64_t cost = 0; 
>              if (m_rdCost.m_psyRd)
>                  cost = m_rdCost.calcPsyRdCost(dist, bits, psyEnergy);
> diff -r 76afa140c7ed -r cfa8a52f704e source/encoder/search.h
> --- a/source/encoder/search.h	Thu Sep 25 15:24:16 2014 +0530
> +++ b/source/encoder/search.h	Fri Sep 26 15:04:26 2014 +0530
> @@ -103,11 +103,11 @@
>  
>      void     xEncSubdivCbfQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx,  uint32_t absPartIdxStep, uint32_t width, uint32_t height);
>      void     xEncCoeffQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype);
> -    void     xEncIntraHeaderLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);
> -    void     xEncIntraHeaderChroma(TComDataCU* cu, uint32_t absPartIdx);
> +    void     xEncIntraHeaderLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, CU* cuData);
> +    void     xEncIntraHeaderChroma(TComDataCU* cu, uint32_t absPartIdx, CU* cuData);
>  
> -    uint32_t xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep);
> -    uint32_t xGetIntraBitsLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t log2TrSize, coeff_t* coeff, uint32_t depthRange[2]);
> +    uint32_t xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, CU* cuData);
> +    uint32_t xGetIntraBitsLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t log2TrSize, coeff_t* coeff, uint32_t depthRange[2], CU* cuData);
>      uint32_t xGetIntraBitsChroma(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSizeC, uint32_t chromaId, coeff_t* coeff);
>      uint32_t xIntraCodingLumaBlk(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv,
>                                   int16_t* reconQt, uint32_t reconQtStride, coeff_t* coeff, uint32_t& cbf, CU* cuData);
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho