[x265] cleanup bReusePred, unify absTUPartIdx to absPartIdx

Deepthi Nandakumar deepthi at multicorewareinc.com
Tue May 20 11:24:11 CEST 2014


Thanks for this patch - on testing now.


On Mon, May 19, 2014 at 2:53 PM, Satoshi Nakagawa <nakagawa424 at oki.com>wrote:

> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1400490890 -32400
> #      Mon May 19 18:14:50 2014 +0900
> # Node ID 862e454b039b1642f18e692f8def392bb636df0b
> # Parent  ba2a9f61ea06f0ac799d8c0247eec770065465bb
> cleanup bReusePred, unify absTUPartIdx to absPartIdx
>
> diff -r ba2a9f61ea06 -r 862e454b039b source/Lib/TLibCommon/TComRom.h
> --- a/source/Lib/TLibCommon/TComRom.h   Fri May 16 19:20:46 2014 +0900
> +++ b/source/Lib/TLibCommon/TComRom.h   Mon May 19 18:14:50 2014 +0900
> @@ -92,8 +92,8 @@
>  extern uint32_t g_maxCUDepth;
>  extern uint32_t g_addCUDepth;
>
> -#define MAX_TS_WIDTH  4
> -#define MAX_TS_HEIGHT 4
> +#define LOG2_MAX_TS_SIZE 2 // TODO: RExt
> +#define MAX_TS_SIZE (1 << LOG2_MAX_TS_SIZE)
>
>  extern const uint32_t g_puOffset[8];
>
> diff -r ba2a9f61ea06 -r 862e454b039b source/Lib/TLibEncoder/TEncSearch.cpp
> --- a/source/Lib/TLibEncoder/TEncSearch.cpp     Fri May 16 19:20:46 2014
> +0900
> +++ b/source/Lib/TLibEncoder/TEncSearch.cpp     Mon May 19 18:14:50 2014
> +0900
> @@ -132,9 +132,9 @@
>      m_qtTempTransformSkipFlag[1] = m_qtTempTransformSkipFlag[0] +
> numPartitions;
>      m_qtTempTransformSkipFlag[2] = m_qtTempTransformSkipFlag[0] +
> numPartitions * 2;
>
> -    CHECKED_MALLOC(m_qtTempTUCoeffY, coeff_t, MAX_TS_WIDTH *
> MAX_TS_HEIGHT * 3);
> -    m_qtTempTUCoeffCb = m_qtTempTUCoeffY + MAX_TS_WIDTH * MAX_TS_HEIGHT;
> -    m_qtTempTUCoeffCr = m_qtTempTUCoeffY + MAX_TS_WIDTH * MAX_TS_HEIGHT *
> 2;
> +    CHECKED_MALLOC(m_qtTempTUCoeffY, coeff_t, MAX_TS_SIZE * MAX_TS_SIZE *
> 3);
> +    m_qtTempTUCoeffCb = m_qtTempTUCoeffY + MAX_TS_SIZE * MAX_TS_SIZE;
> +    m_qtTempTUCoeffCr = m_qtTempTUCoeffY + MAX_TS_SIZE * MAX_TS_SIZE * 2;
>
>      return m_qtTempTransformSkipYuv.create(g_maxCUSize, g_maxCUSize,
> cfg->param->internalCsp);
>
> @@ -409,8 +409,7 @@
>                                       TComYuv*    fencYuv,
>                                       TComYuv*    predYuv,
>                                       ShortYuv*   resiYuv,
> -                                     uint32_t&   outDist,
> -                                     bool        bReusePred)
> +                                     uint32_t&   outDist)
>  {
>      uint32_t fullDepth    = cu->getDepth(0)  + trDepth;
>      uint32_t tuSize       = cu->getCUSize(0) >> trDepth;
> @@ -434,15 +433,6 @@
>      uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getStride();
>      bool     useTransformSkip = cu->getTransformSkip(absPartIdx,
> TEXT_LUMA);
>
> -    if (!bReusePred)
> -    {
> -        //===== init availability pattern =====
> -        uint32_t lumaPredMode = cu->getLumaIntraDir(absPartIdx);
> -        TComPattern::initAdiPattern(cu, absPartIdx, trDepth, m_predBuf,
> m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, lumaPredMode);
> -        //===== get prediction signal =====
> -        predIntraLumaAng(lumaPredMode, pred, stride, tuSize);
> -    }
> -
>      //===== get residual signal =====
>      X265_CHECK(!((uint32_t)(size_t)fenc & (tuSize - 1)), "fenc alignment
> check fail\n");
>      X265_CHECK(!((uint32_t)(size_t)pred & (tuSize - 1)), "pred alignment
> check fail\n");
> @@ -500,8 +490,7 @@
>                                         TComYuv*    predYuv,
>                                         ShortYuv*   resiYuv,
>                                         uint32_t&   outDist,
> -                                       uint32_t    chromaId,
> -                                       bool        bReusePred)
> +                                       uint32_t    chromaId)
>  {
>      uint32_t fullDepth   = cu->getDepth(0) + trDepth;
>      uint32_t trSizeLog2  =
> g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> fullDepth] + 2;
> @@ -540,24 +529,6 @@
>      bool     useTransformSkipChroma = cu->getTransformSkip(absPartIdx,
> ttype);
>      int      part = partitionFromSizes(tuSize, tuSize);
>
> -    if (!bReusePred)
> -    {
> -        //===== init availability pattern =====
> -        TComPattern::initAdiPatternChroma(cu, absPartIdx, trDepth,
> m_predBuf, chromaId);
> -        pixel* chromaPred = TComPattern::getAdiChromaBuf(chromaId,
> tuSize, m_predBuf);
> -
> -        uint32_t chromaPredMode = cu->getChromaIntraDir(absPartIdx);
> -        //===== update chroma mode =====
> -        if (chromaPredMode == DM_CHROMA_IDX)
> -        {
> -            uint32_t lumaLCUIdx  = (chFmt == CHROMA_444) ? absPartIdx :
> absPartIdx & (~((1 << (2 * g_addCUDepth)) - 1));
> -            chromaPredMode = cu->getLumaIntraDir(lumaLCUIdx);
> -        }
> -        chromaPredMode = (chFmt == CHROMA_422) ?
> g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;
> -        //===== get prediction signal =====
> -        predIntraChromaAng(chromaPred, chromaPredMode, pred, stride,
> tuSize, chFmt);
> -    }
> -
>      //===== get residual signal =====
>      X265_CHECK(!((uint32_t)(size_t)fenc & (tuSize - 1)), "fenc alignment
> check fail\n");
>      X265_CHECK(!((uint32_t)(size_t)pred & (tuSize - 1)), "pred alignment
> check fail\n");
> @@ -669,21 +640,33 @@
>      uint64_t singleCost  = MAX_INT64;
>      uint32_t singleDistY = 0;
>      uint32_t singleCbfY  = 0;
> -    bool   checkTransformSkip  =
> cu->getSlice()->getPPS()->getUseTransformSkip();
> -    uint32_t widthTransformSkip  = cu->getCUSize(0) >> trDepth;
> -    uint32_t heightTransformSkip = cu->getCUSize(0) >> trDepth;
>      int    bestModeId    = 0;
>
> -    checkTransformSkip &= (widthTransformSkip == 4 && heightTransformSkip
> == 4);
> -    checkTransformSkip &= (!cu->getCUTransquantBypass(0));
> -    checkTransformSkip &= (!((cu->getQP(0) == 0) &&
> (cu->getSlice()->getSPS()->getUseLossless())));
> -    if (m_cfg->param->bEnableTSkipFast)
> -    {
> -        checkTransformSkip &= (cu->getPartitionSize(absPartIdx) ==
> SIZE_NxN);
> -    }
> -
>      if (bCheckFull)
>      {
> +        uint32_t tuSize = 1 << trSizeLog2;
> +
> +        bool checkTransformSkip =
> (cu->getSlice()->getPPS()->getUseTransformSkip() &&
> +                                   trSizeLog2 <= LOG2_MAX_TS_SIZE &&
> +                                   !cu->getCUTransquantBypass(0));
> +        if (checkTransformSkip)
> +        {
> +            checkTransformSkip &= (!((cu->getQP(0) == 0) &&
> (cu->getSlice()->getSPS()->getUseLossless())));
> +            if (m_cfg->param->bEnableTSkipFast)
> +            {
> +                checkTransformSkip &= (cu->getPartitionSize(absPartIdx)
> == SIZE_NxN);
> +            }
> +        }
> +
> +        uint32_t stride       = fencYuv->getStride();
> +        pixel*   pred         = predYuv->getLumaAddr(absPartIdx);
> +
> +        //===== init availability pattern =====
> +        uint32_t lumaPredMode = cu->getLumaIntraDir(absPartIdx);
> +        TComPattern::initAdiPattern(cu, absPartIdx, trDepth, m_predBuf,
> m_refAbove, m_refLeft, m_refAboveFlt, m_refLeftFlt, lumaPredMode);
> +        //===== get prediction signal =====
> +        predIntraLumaAng(lumaPredMode, pred, stride, tuSize);
> +
>          if (checkTransformSkip == true)
>          {
>              //----- store original entropy coding status -----
> @@ -699,8 +682,7 @@
>                  singleDistYTmp = 0;
>                  cu->setTransformSkipSubParts(modeId, TEXT_LUMA,
> absPartIdx, fullDepth);
>                  //----- code luma block with given intra prediction mode
> and store Cbf-----
> -                bool bReusePred = modeId != firstCheckId;
> -                xIntraCodingLumaBlk(cu, trDepth, absPartIdx, fencYuv,
> predYuv, resiYuv, singleDistYTmp, bReusePred);
> +                xIntraCodingLumaBlk(cu, trDepth, absPartIdx, fencYuv,
> predYuv, resiYuv, singleDistYTmp);
>                  singleCbfYTmp = cu->getCbf(absPartIdx, TEXT_LUMA,
> trDepth);
>                  //----- determine rate and r-d cost -----
>                  if (modeId == 1 && singleCbfYTmp == 0)
> @@ -1199,13 +1181,14 @@
>      if (trMode == trDepth)
>      {
>          int chFmt = cu->getChromaFormat();
> -        bool checkTransformSkip =
> cu->getSlice()->getPPS()->getUseTransformSkip();
>          uint32_t trSizeLog2 =
> g_convertToBit[cu->getSlice()->getSPS()->getMaxCUSize() >> fullDepth] + 2;
> +        uint32_t trSizeCLog2 = trSizeLog2 - m_hChromaShift;
>          uint32_t actualTrDepth = trDepth;
>          if ((trSizeLog2 == 2) && !(chFmt == CHROMA_444))
>          {
>              X265_CHECK(trDepth > 0, "invalid trDepth\n");
>              actualTrDepth--;
> +            trSizeCLog2++;
>              uint32_t qpdiv = cu->getPic()->getNumPartInCU() >>
> ((cu->getDepth(0) + actualTrDepth) << 1);
>              bool bFirstQ = ((absPartIdx % qpdiv) == 0);
>              if (!bFirstQ)
> @@ -1215,9 +1198,12 @@
>          }
>
>          uint32_t tuSize = cu->getCUSize(0) >> (actualTrDepth +
> m_hChromaShift);
> +        uint32_t stride = fencYuv->getCStride();
>          const bool splitIntoSubTUs = (chFmt == CHROMA_422);
>
> -        checkTransformSkip &= (tuSize <= 4);
> +        bool checkTransformSkip =
> (cu->getSlice()->getPPS()->getUseTransformSkip() &&
> +                                   trSizeCLog2 <= LOG2_MAX_TS_SIZE &&
> +                                   !cu->getCUTransquantBypass(0));
>
>          if (m_cfg->param->bEnableTSkipFast)
>          {
> @@ -1242,6 +1228,24 @@
>
>              do
>              {
> +                uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
> +                pixel*   pred        = (chromaId == 1) ?
> predYuv->getCbAddr(absPartIdxC) : predYuv->getCrAddr(absPartIdxC);
> +
> +                //===== init availability pattern =====
> +                TComPattern::initAdiPatternChroma(cu, absPartIdxC,
> actualTrDepth, m_predBuf, chromaId);
> +                pixel* chromaPred =
> TComPattern::getAdiChromaBuf(chromaId, tuSize, m_predBuf);
> +
> +                uint32_t chromaPredMode =
> cu->getChromaIntraDir(absPartIdxC);
> +                //===== update chroma mode =====
> +                if (chromaPredMode == DM_CHROMA_IDX)
> +                {
> +                    uint32_t lumaLCUIdx  = (chFmt == CHROMA_444) ?
> absPartIdxC : absPartIdxC & (~((1 << (2 * g_addCUDepth)) - 1));
> +                    chromaPredMode = cu->getLumaIntraDir(lumaLCUIdx);
> +                }
> +                chromaPredMode = (chFmt == CHROMA_422) ?
> g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;
> +                //===== get prediction signal =====
> +                predIntraChromaAng(chromaPred, chromaPredMode, pred,
> stride, tuSize, chFmt);
> +
>                  if (checkTransformSkip)
>                  {
>                      // use RDO to decide whether Cr/Cb takes TS
> @@ -1259,14 +1263,12 @@
>
>                      for (int chromaModeId = firstCheckId; chromaModeId <
> 2; chromaModeId++)
>                      {
> -                        cu->setTransformSkipPartRange(chromaModeId,
> (TextType)chromaId, tuIterator.m_absPartIdxTURelCU,
> tuIterator.m_absPartIdxStep);
> +                        cu->setTransformSkipPartRange(chromaModeId,
> (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
>
>                          singleDistCTmp = 0;
> -                        bool bReusePred = chromaModeId != firstCheckId;
> -
> -                        xIntraCodingChromaBlk(cu, trDepth,
> tuIterator.m_absPartIdxTURelCU, tuIterator.m_absPartIdxStep, fencYuv,
> predYuv, resiYuv, singleDistCTmp, (TextType)chromaId, bReusePred);
> -
> -                        singleCbfCTmp =
> cu->getCbf(tuIterator.m_absPartIdxTURelCU, (TextType)chromaId, trDepth);
> +                        xIntraCodingChromaBlk(cu, trDepth, absPartIdxC,
> tuIterator.m_absPartIdxStep, fencYuv, predYuv, resiYuv, singleDistCTmp,
> chromaId);
> +
> +                        singleCbfCTmp = cu->getCbf(absPartIdxC,
> (TextType)chromaId, trDepth);
>
>                          if (chromaModeId == 1 && singleCbfCTmp == 0)
>                          {
> @@ -1275,7 +1277,7 @@
>                          }
>                          else
>                          {
> -                            uint32_t bitsTmp = xGetIntraBitsQTChroma(cu,
> trDepth, tuIterator.m_absPartIdxTURelCU, chromaId, splitIntoSubTUs);
> +                            uint32_t bitsTmp = xGetIntraBitsQTChroma(cu,
> trDepth, absPartIdxC, chromaId, splitIntoSubTUs);
>                              singleCostTmp =
> m_rdCost->calcRdCost(singleDistCTmp, bitsTmp);
>                          }
>
> @@ -1288,7 +1290,7 @@
>
>                              if (bestModeId == firstCheckId)
>                              {
> -                                xStoreIntraResultChromaQT(cu, trDepth,
> tuIterator.m_absPartIdxTURelCU, (TextType)chromaId, splitIntoSubTUs);
> +                                xStoreIntraResultChromaQT(cu, trDepth,
> absPartIdxC, (TextType)chromaId, splitIntoSubTUs);
>
>  m_rdGoOnSbacCoder->store(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
>                              }
>                          }
> @@ -1300,13 +1302,13 @@
>
>                      if (bestModeId == firstCheckId)
>                      {
> -                        xLoadIntraResultChromaQT(cu, trDepth,
> tuIterator.m_absPartIdxTURelCU, (TextType)chromaId, splitIntoSubTUs);
> -                        cu->setCbfPartRange(singleCbfC << trDepth,
> (TextType)chromaId, tuIterator.m_absPartIdxTURelCU,
> tuIterator.m_absPartIdxStep);
> +                        xLoadIntraResultChromaQT(cu, trDepth,
> absPartIdxC, (TextType)chromaId, splitIntoSubTUs);
> +                        cu->setCbfPartRange(singleCbfC << trDepth,
> (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
>
>
>  m_rdGoOnSbacCoder->load(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]);
>                      }
>
> -                    cu->setTransformSkipPartRange(bestModeId,
> (TextType)chromaId, tuIterator.m_absPartIdxTURelCU,
> tuIterator.m_absPartIdxStep);
> +                    cu->setTransformSkipPartRange(bestModeId,
> (TextType)chromaId, absPartIdxC, tuIterator.m_absPartIdxStep);
>
>                      outDist += singleDistC;
>
> @@ -1317,8 +1319,8 @@
>                  }
>                  else
>                  {
> -                    cu->setTransformSkipPartRange(0, (TextType)chromaId,
> tuIterator.m_absPartIdxTURelCU, tuIterator.m_absPartIdxStep);
> -                    xIntraCodingChromaBlk(cu, trDepth,
> tuIterator.m_absPartIdxTURelCU, tuIterator.m_absPartIdxStep, fencYuv,
> predYuv, resiYuv, outDist, (TextType)chromaId);
> +                    cu->setTransformSkipPartRange(0, (TextType)chromaId,
> absPartIdxC, tuIterator.m_absPartIdxStep);
> +                    xIntraCodingChromaBlk(cu, trDepth, absPartIdxC,
> tuIterator.m_absPartIdxStep, fencYuv, predYuv, resiYuv, outDist, chromaId);
>                  }
>              }
>              while (isNextSection(&tuIterator));
> @@ -1442,32 +1444,32 @@
>
>              do
>              {
> -                uint32_t absTUPartIdxC = tuIterator.m_absPartIdxTURelCU;
> -                cu->setTransformSkipPartRange(0, (TextType)chromaId,
> absTUPartIdxC, tuIterator.m_absPartIdxStep);
> +                uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
> +                cu->setTransformSkipPartRange(0, (TextType)chromaId,
> absPartIdxC, tuIterator.m_absPartIdxStep);
>
>                  TextType ttype          = (chromaId == 1) ? TEXT_CHROMA_U
> : TEXT_CHROMA_V;
> -                pixel*   fenc           = (chromaId == 1) ?
> fencYuv->getCbAddr(absTUPartIdxC) : fencYuv->getCrAddr(absTUPartIdxC);
> -                pixel*   pred           = (chromaId == 1) ?
> predYuv->getCbAddr(absTUPartIdxC) : predYuv->getCrAddr(absTUPartIdxC);
> -                int16_t* residual       = (chromaId == 1) ?
> resiYuv->getCbAddr(absTUPartIdxC) : resiYuv->getCrAddr(absTUPartIdxC);
> -                pixel*   recon          = (chromaId == 1) ?
> reconYuv->getCbAddr(absTUPartIdxC) : reconYuv->getCrAddr(absTUPartIdxC);
> +                pixel*   fenc           = (chromaId == 1) ?
> fencYuv->getCbAddr(absPartIdxC) : fencYuv->getCrAddr(absPartIdxC);
> +                pixel*   pred           = (chromaId == 1) ?
> predYuv->getCbAddr(absPartIdxC) : predYuv->getCrAddr(absPartIdxC);
> +                int16_t* residual       = (chromaId == 1) ?
> resiYuv->getCbAddr(absPartIdxC) : resiYuv->getCrAddr(absPartIdxC);
> +                pixel*   recon          = (chromaId == 1) ?
> reconYuv->getCbAddr(absPartIdxC) : reconYuv->getCrAddr(absPartIdxC);
>                  uint32_t numCoeffPerInc =
> (cu->getSlice()->getSPS()->getMaxCUSize() *
> cu->getSlice()->getSPS()->getMaxCUSize() >>
> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1)) >> (m_hChromaShift +
> m_vChromaShift);
> -                coeff_t*  coeff         = (chromaId == 1 ?
> cu->getCoeffCb() : cu->getCoeffCr()) + numCoeffPerInc * absTUPartIdxC;
> -                uint32_t zorder           = cu->getZorderIdxInCU() +
> absTUPartIdxC;
> +                coeff_t*  coeff         = (chromaId == 1 ?
> cu->getCoeffCb() : cu->getCoeffCr()) + numCoeffPerInc * absPartIdxC;
> +                uint32_t zorder           = cu->getZorderIdxInCU() +
> absPartIdxC;
>                  pixel*   reconIPred       = (chromaId == 1) ?
> cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder) :
> cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
>                  uint32_t reconIPredStride =
> cu->getPic()->getPicYuvRec()->getCStride();
> -                //bool     useTransformSkipChroma =
> cu->getTransformSkip(absTUPartIdxC, ttype);
> +                //bool     useTransformSkipChroma =
> cu->getTransformSkip(absPartIdxC, ttype);
>                  const bool useTransformSkipChroma = false;
>
> -                uint32_t chromaPredMode =
> cu->getChromaIntraDir(absTUPartIdxC);
> +                uint32_t chromaPredMode =
> cu->getChromaIntraDir(absPartIdxC);
>                  //===== update chroma mode =====
>                  if (chromaPredMode == DM_CHROMA_IDX)
>                  {
> -                    uint32_t lumaLCUIdx  = (chFmt == CHROMA_444) ?
> absTUPartIdxC : absTUPartIdxC & (~((1 << (2 * g_addCUDepth)) - 1));
> +                    uint32_t lumaLCUIdx  = (chFmt == CHROMA_444) ?
> absPartIdxC : absPartIdxC & (~((1 << (2 * g_addCUDepth)) - 1));
>                      chromaPredMode = cu->getLumaIntraDir(lumaLCUIdx);
>                  }
>                  chromaPredMode = (chFmt == CHROMA_422) ?
> g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;
>                  //===== init availability pattern =====
> -                TComPattern::initAdiPatternChroma(cu, absTUPartIdxC,
> actualTrDepth, m_predBuf, chromaId);
> +                TComPattern::initAdiPatternChroma(cu, absPartIdxC,
> actualTrDepth, m_predBuf, chromaId);
>                  pixel* chromaPred =
> TComPattern::getAdiChromaBuf(chromaId, tuSize, m_predBuf);
>
>                  //===== get prediction signal =====
> @@ -1497,17 +1499,17 @@
>
>                  m_trQuant->selectLambda(TEXT_CHROMA);
>
> -                absSum = m_trQuant->transformNxN(cu, residual, stride,
> coeff, tuSize, ttype, absTUPartIdxC, &lastPos, useTransformSkipChroma);
> +                absSum = m_trQuant->transformNxN(cu, residual, stride,
> coeff, tuSize, ttype, absPartIdxC, &lastPos, useTransformSkipChroma);
>
>                  //--- set coded block flag ---
> -                cu->setCbfPartRange((((absSum > 0) ? 1 : 0) <<
> origTrDepth), ttype, absTUPartIdxC, tuIterator.m_absPartIdxStep);
> +                cu->setCbfPartRange((((absSum > 0) ? 1 : 0) <<
> origTrDepth), ttype, absPartIdxC, tuIterator.m_absPartIdxStep);
>
>                  //--- inverse transform ---
>                  if (absSum)
>                  {
>                      int scalingListType = 0 + ttype;
>                      X265_CHECK(scalingListType < 6, "scalingListType too
> large %d\n", scalingListType);
> -
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absTUPartIdxC),
> REG_DCT, residual, stride, coeff, tuSize, scalingListType,
> useTransformSkipChroma, lastPos);
> +
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC),
> REG_DCT, residual, stride, coeff, tuSize, scalingListType,
> useTransformSkipChroma, lastPos);
>                  }
>                  else
>                  {
> @@ -2729,7 +2731,7 @@
>
>  m_rdGoOnSbacCoder->load(m_rdSbacCoders[cu->getDepth(0)][CI_CURR_BEST]);
>
>      uint32_t zeroDistortion = 0;
> -    xEstimateResidualQT(cu, 0, 0, outResiYuv, cu->getDepth(0), cost,
> bits, distortion, &zeroDistortion, curUseRDOQ);
> +    xEstimateResidualQT(cu, 0, outResiYuv, cu->getDepth(0), cost, bits,
> distortion, &zeroDistortion, curUseRDOQ);
>
>      m_entropyCoder->resetBits();
>      m_entropyCoder->encodeQtRootCbfZero(cu);
> @@ -2756,7 +2758,7 @@
>      }
>      else
>      {
> -        xSetResidualQTData(cu, 0, 0, NULL, cu->getDepth(0), false);
> +        xSetResidualQTData(cu, 0, NULL, cu->getDepth(0), false);
>      }
>
>
>  m_rdGoOnSbacCoder->load(m_rdSbacCoders[cu->getDepth(0)][CI_CURR_BEST]);
> @@ -2769,7 +2771,7 @@
>      {
>          if (cu->getQtRootCbf(0))
>          {
> -            xSetResidualQTData(cu, 0, 0, outBestResiYuv, cu->getDepth(0),
> true);
> +            xSetResidualQTData(cu, 0, outBestResiYuv, cu->getDepth(0),
> true);
>          }
>
>          bestBits = bits;
> @@ -2825,7 +2827,7 @@
>      }
>      if (cu->getPredictionMode(0) == MODE_INTER)
>      {
> -        residualTransformQuantInter(cu, 0, 0, resiYuv, cu->getDepth(0),
> true);
> +        residualTransformQuantInter(cu, 0, resiYuv, cu->getDepth(0),
> true);
>          uint32_t width  = cu->getCUSize(0);
>          if (cu->getQtRootCbf(0))
>          {
> @@ -2853,7 +2855,7 @@
>  #pragma warning(disable: 4701) // potentially uninitialized local variable
>  #endif
>
> -void TEncSearch::residualTransformQuantInter(TComDataCU* cu, uint32_t
> absPartIdx, uint32_t absTUPartIdx, ShortYuv* resiYuv, const uint32_t depth,
> bool curuseRDOQ)
> +void TEncSearch::residualTransformQuantInter(TComDataCU* cu, uint32_t
> absPartIdx, ShortYuv* resiYuv, const uint32_t depth, bool curuseRDOQ)
>  {
>      X265_CHECK(cu->getDepth(0) == cu->getDepth(absPartIdx), "invalid
> depth\n");
>      const uint32_t trMode = depth - cu->getDepth(0);
> @@ -2906,14 +2908,14 @@
>          m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET,
> 0, chFmt);
>          m_trQuant->selectLambda(TEXT_LUMA);
>
> -        absSumY = m_trQuant->transformNxN(cu,
> resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width, coeffCurY,
> +        absSumY = m_trQuant->transformNxN(cu,
> resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, coeffCurY,
>                                            trWidth, TEXT_LUMA, absPartIdx,
> &lastPosY, false, curuseRDOQ);
>
>          cu->setCbfSubParts(absSumY ? setCbf : 0, TEXT_LUMA, absPartIdx,
> depth);
>
>          if (absSumY)
>          {
> -            int16_t *curResiY = resiYuv->getLumaAddr(absTUPartIdx);
> +            int16_t *curResiY = resiYuv->getLumaAddr(absPartIdx);
>
>              m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> QP_BD_OFFSET, 0, chFmt);
>
> @@ -2923,7 +2925,7 @@
>          }
>          else
>          {
> -            int16_t *ptr =  resiYuv->getLumaAddr(absTUPartIdx);
> +            int16_t *ptr =  resiYuv->getLumaAddr(absPartIdx);
>              X265_CHECK(trWidth == trHeight, "square transform
> expected\n");
>              primitives.blockfill_s[(int)g_convertToBit[trWidth]](ptr,
> resiYuv->m_width, 0);
>          }
> @@ -2939,63 +2941,63 @@
>
>              do
>              {
> -                uint32_t absTUPartIdxC = tuIterator.m_absPartIdxTURelCU;
> -                uint32_t subTUBufferOffset    = widthC * heightC *
> tuIterator.m_section;
> -
> -                cu->setTransformSkipPartRange(0, TEXT_CHROMA_U,
> absTUPartIdxC, tuIterator.m_absPartIdxStep);
> -                cu->setTransformSkipPartRange(0, TEXT_CHROMA_V,
> absTUPartIdxC, tuIterator.m_absPartIdxStep);
> +                uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
> +                uint32_t subTUBufferOffset = widthC * heightC *
> tuIterator.m_section;
> +
> +                cu->setTransformSkipPartRange(0, TEXT_CHROMA_U,
> absPartIdxC, tuIterator.m_absPartIdxStep);
> +                cu->setTransformSkipPartRange(0, TEXT_CHROMA_V,
> absPartIdxC, tuIterator.m_absPartIdxStep);
>
>                  int curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCbQpOffset() +
> cu->getSlice()->getSliceQpDeltaCb();
>                  m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
>
>                  m_trQuant->selectLambda(TEXT_CHROMA);
>
> -                absSumU = m_trQuant->transformNxN(cu,
> resiYuv->getCbAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurU +
> subTUBufferOffset,
> -                                                  trWidthC,
> TEXT_CHROMA_U, absTUPartIdxC, &lastPosU, false, curuseRDOQ);
> +                absSumU = m_trQuant->transformNxN(cu,
> resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurU +
> subTUBufferOffset,
> +                                                  trWidthC,
> TEXT_CHROMA_U, absPartIdxC, &lastPosU, false, curuseRDOQ);
>
>                  curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
>                  m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
> -                absSumV = m_trQuant->transformNxN(cu,
> resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurV +
> subTUBufferOffset,
> -                                                  trWidthC,
> TEXT_CHROMA_V, absTUPartIdxC, &lastPosV, false, curuseRDOQ);
> -
> -                cu->setCbfPartRange(absSumU ? setCbf : 0, TEXT_CHROMA_U,
> absTUPartIdxC, tuIterator.m_absPartIdxStep);
> -                cu->setCbfPartRange(absSumV ? setCbf : 0, TEXT_CHROMA_V,
> absTUPartIdxC, tuIterator.m_absPartIdxStep);
> +                absSumV = m_trQuant->transformNxN(cu,
> resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurV +
> subTUBufferOffset,
> +                                                  trWidthC,
> TEXT_CHROMA_V, absPartIdxC, &lastPosV, false, curuseRDOQ);
> +
> +                cu->setCbfPartRange(absSumU ? setCbf : 0, TEXT_CHROMA_U,
> absPartIdxC, tuIterator.m_absPartIdxStep);
> +                cu->setCbfPartRange(absSumV ? setCbf : 0, TEXT_CHROMA_V,
> absPartIdxC, tuIterator.m_absPartIdxStep);
>
>                  if (absSumU)
>                  {
> -                    int16_t *pcResiCurrU =
> resiYuv->getCbAddr(absTUPartIdxC);
> +                    int16_t *pcResiCurrU =
> resiYuv->getCbAddr(absPartIdxC);
>
>                      curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCbQpOffset() +
> cu->getSlice()->getSliceQpDeltaCb();
>                      m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
>
>                      int scalingListType = 3 + TEXT_CHROMA_U;
>                      X265_CHECK(scalingListType < 6, "scalingListType too
> large %d\n", scalingListType);
> -
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absTUPartIdxC),
> REG_DCT, pcResiCurrU, resiYuv->m_cwidth, coeffCurU + subTUBufferOffset,
> trWidthC, scalingListType, false, lastPosU);
> +
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC),
> REG_DCT, pcResiCurrU, resiYuv->m_cwidth, coeffCurU + subTUBufferOffset,
> trWidthC, scalingListType, false, lastPosU);
>                  }
>                  else
>                  {
> -                    int16_t *ptr = resiYuv->getCbAddr(absTUPartIdxC);
> +                    int16_t *ptr = resiYuv->getCbAddr(absPartIdxC);
>                      X265_CHECK(trWidthC == trHeightC, "square chroma
> transform expected\n");
>
>  primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr,
> resiYuv->m_cwidth, 0);
>                  }
>                  if (absSumV)
>                  {
> -                    int16_t *curResiV = resiYuv->getCrAddr(absTUPartIdxC);
> +                    int16_t *curResiV = resiYuv->getCrAddr(absPartIdxC);
>                      curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
>                      m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
>
>                      int scalingListType = 3 + TEXT_CHROMA_V;
>                      X265_CHECK(scalingListType < 6, "scalingListType too
> large %d\n", scalingListType);
> -
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absTUPartIdxC),
> REG_DCT, curResiV, resiYuv->m_cwidth, coeffCurV + subTUBufferOffset,
> trWidthC, scalingListType, false, lastPosV);
> +
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC),
> REG_DCT, curResiV, resiYuv->m_cwidth, coeffCurV + subTUBufferOffset,
> trWidthC, scalingListType, false, lastPosV);
>                  }
>                  else
>                  {
> -                    int16_t *ptr =  resiYuv->getCrAddr(absTUPartIdxC);
> +                    int16_t *ptr =  resiYuv->getCrAddr(absPartIdxC);
>                      X265_CHECK(trWidthC == trHeightC, "square chroma
> transform expected\n");
>
>  primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr,
> resiYuv->m_cwidth, 0);
>                  }
> -                cu->setCbfPartRange(absSumU ? setCbf : 0, TEXT_CHROMA_U,
> absTUPartIdxC, tuIterator.m_absPartIdxStep);
> -                cu->setCbfPartRange(absSumV ? setCbf : 0, TEXT_CHROMA_V,
> absTUPartIdxC, tuIterator.m_absPartIdxStep);
> +                cu->setCbfPartRange(absSumU ? setCbf : 0, TEXT_CHROMA_U,
> absPartIdxC, tuIterator.m_absPartIdxStep);
> +                cu->setCbfPartRange(absSumV ? setCbf : 0, TEXT_CHROMA_V,
> absPartIdxC, tuIterator.m_absPartIdxStep);
>              }
>              while (isNextSection(&tuIterator));
>
> @@ -3014,8 +3016,7 @@
>          const uint32_t qPartNumSubdiv = cu->getPic()->getNumPartInCU() >>
> ((depth + 1) << 1);
>          for (uint32_t i = 0; i < 4; ++i)
>          {
> -            uint32_t nsAddr = absPartIdx + i * qPartNumSubdiv;
> -            residualTransformQuantInter(cu, absPartIdx + i *
> qPartNumSubdiv, nsAddr, resiYuv, depth + 1, curuseRDOQ);
> +            residualTransformQuantInter(cu, absPartIdx + i *
> qPartNumSubdiv, resiYuv, depth + 1, curuseRDOQ);
>          }
>
>          uint32_t ycbf = 0;
> @@ -3041,7 +3042,6 @@
>
>  void TEncSearch::xEstimateResidualQT(TComDataCU*    cu,
>                                       uint32_t       absPartIdx,
> -                                     uint32_t       absTUPartIdx,
>                                       ShortYuv*      resiYuv,
>                                       const uint32_t depth,
>                                       uint64_t &     rdCost,
> @@ -3110,8 +3110,9 @@
>          coeff_t *coeffCurV = m_qtTempCoeffCr[qtlayer] +
> (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift +
> m_vChromaShift));
>
>          cu->setTrIdxSubParts(depth - cu->getDepth(0), absPartIdx, depth);
> -        bool checkTransformSkipY  =
> cu->getSlice()->getPPS()->getUseTransformSkip() && (trWidth == 4) &&
> (trHeight == 4) &&  (!cu->isLosslessCoded(0));
> -        bool checkTransformSkipUV =
> cu->getSlice()->getPPS()->getUseTransformSkip() && (trWidthC <= 4) &&
> (!cu->isLosslessCoded(0));
> +        bool checkTransformSkip   =
> cu->getSlice()->getPPS()->getUseTransformSkip() &&
> !cu->getCUTransquantBypass(0);
> +        bool checkTransformSkipY  = checkTransformSkip && trSizeLog2  <=
> LOG2_MAX_TS_SIZE;
> +        bool checkTransformSkipUV = checkTransformSkip && trSizeCLog2 <=
> LOG2_MAX_TS_SIZE;
>
>          cu->setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, depth);
>
> @@ -3123,7 +3124,7 @@
>          m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET,
> 0, chFmt);
>          m_trQuant->selectLambda(TEXT_LUMA);
>
> -        absSum[TEXT_LUMA][0] = m_trQuant->transformNxN(cu,
> resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width, coeffCurY,
> +        absSum[TEXT_LUMA][0] = m_trQuant->transformNxN(cu,
> resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, coeffCurY,
>                                                         trWidth,
> TEXT_LUMA, absPartIdx, &lastPos[TEXT_LUMA][0], false, curuseRDOQ);
>
>          cu->setCbfSubParts(absSum[TEXT_LUMA][0] ? setCbf : 0, TEXT_LUMA,
> absPartIdx, depth);
> @@ -3146,10 +3147,11 @@
>
>              do
>              {
> -                uint32_t subTUBufferOffset    = widthC * heightC *
> tuIterator.m_section;
> -
> -                cu->setTransformSkipPartRange(0, TEXT_CHROMA_U,
> tuIterator.m_absPartIdxTURelCU, tuIterator.m_absPartIdxStep);
> -                cu->setTransformSkipPartRange(0, TEXT_CHROMA_V,
> tuIterator.m_absPartIdxTURelCU, tuIterator.m_absPartIdxStep);
> +                uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
> +                uint32_t subTUBufferOffset = widthC * heightC *
> tuIterator.m_section;
> +
> +                cu->setTransformSkipPartRange(0, TEXT_CHROMA_U,
> absPartIdxC, tuIterator.m_absPartIdxStep);
> +                cu->setTransformSkipPartRange(0, TEXT_CHROMA_V,
> absPartIdxC, tuIterator.m_absPartIdxStep);
>
>                  if (m_cfg->bEnableRDOQ && curuseRDOQ)
>                  {
> @@ -3161,25 +3163,25 @@
>
>                  m_trQuant->selectLambda(TEXT_CHROMA);
>
> -                absSum[TEXT_CHROMA_U][tuIterator.m_section] =
> m_trQuant->transformNxN(cu,
> resiYuv->getCbAddr(tuIterator.m_absPartIdxTURelCU), resiYuv->m_cwidth,
> coeffCurU + subTUBufferOffset,
> -
>              widthC, TEXT_CHROMA_U, tuIterator.m_absPartIdxTURelCU,
> &lastPos[TEXT_CHROMA_U][tuIterator.m_section], false, curuseRDOQ);
> +                absSum[TEXT_CHROMA_U][tuIterator.m_section] =
> m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absPartIdxC),
> resiYuv->m_cwidth, coeffCurU + subTUBufferOffset,
> +
>              widthC, TEXT_CHROMA_U, absPartIdxC,
> &lastPos[TEXT_CHROMA_U][tuIterator.m_section], false, curuseRDOQ);
>                  //Cr transform
>                  curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
>                  m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
> -                absSum[TEXT_CHROMA_V][tuIterator.m_section] =
> m_trQuant->transformNxN(cu,
> resiYuv->getCrAddr(tuIterator.m_absPartIdxTURelCU), resiYuv->m_cwidth,
> coeffCurV + subTUBufferOffset,
> -
>              widthC, TEXT_CHROMA_V, tuIterator.m_absPartIdxTURelCU,
> &lastPos[TEXT_CHROMA_V][tuIterator.m_section], false, curuseRDOQ);
> -
> -
>  cu->setCbfPartRange(absSum[TEXT_CHROMA_U][tuIterator.m_section] ? setCbf :
> 0, TEXT_CHROMA_U, tuIterator.m_absPartIdxTURelCU,
> tuIterator.m_absPartIdxStep);
> -
>  cu->setCbfPartRange(absSum[TEXT_CHROMA_V][tuIterator.m_section] ? setCbf :
> 0, TEXT_CHROMA_V, tuIterator.m_absPartIdxTURelCU,
> tuIterator.m_absPartIdxStep);
> -
> -                m_entropyCoder->encodeQtCbf(cu,
> tuIterator.m_absPartIdxTURelCU, tuIterator.m_absPartIdxStep, widthC,
> heightC, TEXT_CHROMA_U, trMode, true);
> +                absSum[TEXT_CHROMA_V][tuIterator.m_section] =
> m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absPartIdxC),
> resiYuv->m_cwidth, coeffCurV + subTUBufferOffset,
> +
>              widthC, TEXT_CHROMA_V, absPartIdxC,
> &lastPos[TEXT_CHROMA_V][tuIterator.m_section], false, curuseRDOQ);
> +
> +
>  cu->setCbfPartRange(absSum[TEXT_CHROMA_U][tuIterator.m_section] ? setCbf :
> 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
> +
>  cu->setCbfPartRange(absSum[TEXT_CHROMA_V][tuIterator.m_section] ? setCbf :
> 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
> +
> +                m_entropyCoder->encodeQtCbf(cu, absPartIdxC,
> tuIterator.m_absPartIdxStep, widthC, heightC, TEXT_CHROMA_U, trMode, true);
>                  if (absSum[TEXT_CHROMA_U][tuIterator.m_section])
> -                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU +
> subTUBufferOffset, tuIterator.m_absPartIdxTURelCU, widthC, TEXT_CHROMA_U);
> +                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU +
> subTUBufferOffset, absPartIdxC, widthC, TEXT_CHROMA_U);
>                  singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section] =
> m_entropyCoder->getNumberOfWrittenBits() - singleBitsPrev;
>
> -                m_entropyCoder->encodeQtCbf(cu,
> tuIterator.m_absPartIdxTURelCU, tuIterator.m_absPartIdxStep, widthC,
> heightC, TEXT_CHROMA_V, trMode, true);
> +                m_entropyCoder->encodeQtCbf(cu, absPartIdxC,
> tuIterator.m_absPartIdxStep, widthC, heightC, TEXT_CHROMA_V, trMode, true);
>                  if (absSum[TEXT_CHROMA_V][tuIterator.m_section])
> -                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV +
> subTUBufferOffset, tuIterator.m_absPartIdxTURelCU, widthC, TEXT_CHROMA_V);
> +                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV +
> subTUBufferOffset, absPartIdxC, widthC, TEXT_CHROMA_V);
>                  uint32_t newBits =
> m_entropyCoder->getNumberOfWrittenBits();
>                  singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section] =
> newBits - (singleBitsPrev +
> singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section]);
>
> @@ -3198,7 +3200,7 @@
>          }
>
>          int partSize = partitionFromSizes(trWidth, trHeight);
> -        uint32_t distY =
> primitives.sse_sp[partSize](resiYuv->getLumaAddr(absTUPartIdx),
> resiYuv->m_width, (pixel*)TComRdCost::zeroPel, trWidth);
> +        uint32_t distY =
> primitives.sse_sp[partSize](resiYuv->getLumaAddr(absPartIdx),
> resiYuv->m_width, (pixel*)TComRdCost::zeroPel, trWidth);
>
>          if (outZeroDist)
>          {
> @@ -3206,7 +3208,7 @@
>          }
>          if (absSum[TEXT_LUMA][0])
>          {
> -            int16_t *curResiY =
> m_qtTempShortYuv[qtlayer].getLumaAddr(absTUPartIdx);
> +            int16_t *curResiY =
> m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx);
>
>              m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> QP_BD_OFFSET, 0, chFmt);
>
> @@ -3215,7 +3217,7 @@
>              X265_CHECK(m_qtTempShortYuv[qtlayer].m_width == MAX_CU_SIZE,
> "width not full CU\n");
>
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT,
> curResiY, MAX_CU_SIZE,  coeffCurY, trWidth, scalingListType, false,
> lastPos[TEXT_LUMA][0]); //this is for inter mode only
>
> -            const uint32_t nonZeroDistY =
> primitives.sse_ss[partSize](resiYuv->getLumaAddr(absTUPartIdx),
> resiYuv->m_width, m_qtTempShortYuv[qtlayer].getLumaAddr(absTUPartIdx),
> MAX_CU_SIZE);
> +            const uint32_t nonZeroDistY =
> primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx),
> resiYuv->m_width, m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx),
> MAX_CU_SIZE);
>              if (cu->isLosslessCoded(0))
>              {
>                  distY = nonZeroDistY;
> @@ -3258,7 +3260,7 @@
>
>          if (!absSum[TEXT_LUMA][0])
>          {
> -            int16_t *ptr =
>  m_qtTempShortYuv[qtlayer].getLumaAddr(absTUPartIdx);
> +            int16_t *ptr =
>  m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx);
>              X265_CHECK(m_qtTempShortYuv[qtlayer].m_width == MAX_CU_SIZE,
> "width not full CU\n");
>              X265_CHECK(trWidth == trHeight, "not square block\n");
>              primitives.blockfill_s[(int)g_convertToBit[trWidth]](ptr,
> MAX_CU_SIZE, 0);
> @@ -3280,9 +3282,10 @@
>
>              do
>              {
> +                uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
>                  uint32_t subTUBufferOffset = widthC * heightC *
> tuIterator.m_section;
>
> -                distU =
> m_rdCost->scaleChromaDistCb(primitives.sse_sp[partSizeC](resiYuv->getCbAddr(tuIterator.m_absPartIdxTURelCU),
> resiYuv->m_cwidth, (pixel*)TComRdCost::zeroPel, widthC));
> +                distU =
> m_rdCost->scaleChromaDistCb(primitives.sse_sp[partSizeC](resiYuv->getCbAddr(absPartIdxC),
> resiYuv->m_cwidth, (pixel*)TComRdCost::zeroPel, widthC));
>
>                  if (outZeroDist)
>                  {
> @@ -3290,17 +3293,17 @@
>                  }
>                  if (absSum[TEXT_CHROMA_U][tuIterator.m_section])
>                  {
> -                    int16_t *pcResiCurrU =
> m_qtTempShortYuv[qtlayer].getCbAddr(tuIterator.m_absPartIdxTURelCU);
> +                    int16_t *pcResiCurrU =
> m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdxC);
>
>                      int curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCbQpOffset() +
> cu->getSlice()->getSliceQpDeltaCb();
>                      m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
>
>                      int scalingListType = 3 + TEXT_CHROMA_U;
>                      X265_CHECK(scalingListType < 6, "scalingListType too
> large %d\n", scalingListType);
> -
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(tuIterator.m_absPartIdxTURelCU),
> REG_DCT, pcResiCurrU, m_qtTempShortYuv[qtlayer].m_cwidth, coeffCurU +
> subTUBufferOffset,
> +
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC),
> REG_DCT, pcResiCurrU, m_qtTempShortYuv[qtlayer].m_cwidth, coeffCurU +
> subTUBufferOffset,
>                                                 widthC, scalingListType,
> false, lastPos[TEXT_CHROMA_U][tuIterator.m_section]);
> -                    uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCbAddr(tuIterator.m_absPartIdxTURelCU),
> resiYuv->m_cwidth,
> -
> m_qtTempShortYuv[qtlayer].getCbAddr(tuIterator.m_absPartIdxTURelCU),
> +                    uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC),
> resiYuv->m_cwidth,
> +
> m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdxC),
>
> m_qtTempShortYuv[qtlayer].m_cwidth);
>                      const uint32_t nonZeroDistU =
> m_rdCost->scaleChromaDistCb(dist);
>
> @@ -3346,29 +3349,29 @@
>
>                  if (!absSum[TEXT_CHROMA_U][tuIterator.m_section])
>                  {
> -                    int16_t *ptr =
> m_qtTempShortYuv[qtlayer].getCbAddr(tuIterator.m_absPartIdxTURelCU);
> +                    int16_t *ptr =
> m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdxC);
>                      const uint32_t stride =
> m_qtTempShortYuv[qtlayer].m_cwidth;
>                      X265_CHECK(trWidthC == trHeightC, "square chroma
> transform expected\n");
>
>  primitives.blockfill_s[(int)g_convertToBit[widthC]](ptr, stride, 0);
>                  }
>
> -                distV =
> m_rdCost->scaleChromaDistCr(primitives.sse_sp[partSizeC](resiYuv->getCrAddr(tuIterator.m_absPartIdxTURelCU),
> resiYuv->m_cwidth, (pixel*)TComRdCost::zeroPel, widthC));
> +                distV =
> m_rdCost->scaleChromaDistCr(primitives.sse_sp[partSizeC](resiYuv->getCrAddr(absPartIdxC),
> resiYuv->m_cwidth, (pixel*)TComRdCost::zeroPel, widthC));
>                  if (outZeroDist)
>                  {
>                      *outZeroDist += distV;
>                  }
>                  if (absSum[TEXT_CHROMA_V][tuIterator.m_section])
>                  {
> -                    int16_t *curResiV =
> m_qtTempShortYuv[qtlayer].getCrAddr(tuIterator.m_absPartIdxTURelCU);
> +                    int16_t *curResiV =
> m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdxC);
>                      int curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
>                      m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
>
>                      int scalingListType = 3 + TEXT_CHROMA_V;
>                      X265_CHECK(scalingListType < 6, "scalingListType too
> large %d\n", scalingListType);
> -
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(tuIterator.m_absPartIdxTURelCU),
> REG_DCT, curResiV, m_qtTempShortYuv[qtlayer].m_cwidth, coeffCurV +
> subTUBufferOffset,
> +
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC),
> REG_DCT, curResiV, m_qtTempShortYuv[qtlayer].m_cwidth, coeffCurV +
> subTUBufferOffset,
>                                                 widthC, scalingListType,
> false, lastPos[TEXT_CHROMA_V][tuIterator.m_section]);
> -                    uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCrAddr(tuIterator.m_absPartIdxTURelCU),
> resiYuv->m_cwidth,
> -
> m_qtTempShortYuv[qtlayer].getCrAddr(tuIterator.m_absPartIdxTURelCU),
> +                    uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC),
> resiYuv->m_cwidth,
> +
> m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdxC),
>
> m_qtTempShortYuv[qtlayer].m_cwidth);
>                      const uint32_t nonZeroDistV =
> m_rdCost->scaleChromaDistCr(dist);
>
> @@ -3414,14 +3417,14 @@
>
>                  if (!absSum[TEXT_CHROMA_V][tuIterator.m_section])
>                  {
> -                    int16_t *ptr =
>  m_qtTempShortYuv[qtlayer].getCrAddr(tuIterator.m_absPartIdxTURelCU);
> +                    int16_t *ptr =
>  m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdxC);
>                      const uint32_t stride =
> m_qtTempShortYuv[qtlayer].m_cwidth;
>                      X265_CHECK(trWidthC == trHeightC, "square chroma
> transform expected\n");
>
>  primitives.blockfill_s[(int)g_convertToBit[widthC]](ptr, stride, 0);
>                  }
>
> -
>  cu->setCbfPartRange(absSum[TEXT_CHROMA_U][tuIterator.m_section] ? setCbf :
> 0, TEXT_CHROMA_U, tuIterator.m_absPartIdxTURelCU,
> tuIterator.m_absPartIdxStep);
> -
>  cu->setCbfPartRange(absSum[TEXT_CHROMA_V][tuIterator.m_section] ? setCbf :
> 0, TEXT_CHROMA_V, tuIterator.m_absPartIdxTURelCU,
> tuIterator.m_absPartIdxStep);
> +
>  cu->setCbfPartRange(absSum[TEXT_CHROMA_U][tuIterator.m_section] ? setCbf :
> 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
> +
>  cu->setCbfPartRange(absSum[TEXT_CHROMA_V][tuIterator.m_section] ? setCbf :
> 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
>              }
>              while (isNextSection(&tuIterator));
>          }
> @@ -3432,7 +3435,7 @@
>              uint32_t nonZeroDistY = 0, absSumTransformSkipY;
>              uint64_t singleCostY = MAX_INT64;
>
> -            int16_t *curResiY =
> m_qtTempShortYuv[qtlayer].getLumaAddr(absTUPartIdx);
> +            int16_t *curResiY =
> m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx);
>              X265_CHECK(m_qtTempShortYuv[qtlayer].m_width == MAX_CU_SIZE,
> "width not full CU\n");
>
>              coeff_t bestCoeffY[32 * 32];
> @@ -3456,7 +3459,7 @@
>              m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> QP_BD_OFFSET, 0, chFmt);
>
>              m_trQuant->selectLambda(TEXT_LUMA);
> -            absSumTransformSkipY = m_trQuant->transformNxN(cu,
> resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width, coeffCurY,
> +            absSumTransformSkipY = m_trQuant->transformNxN(cu,
> resiYuv->getLumaAddr(absPartIdx), resiYuv->m_width, coeffCurY,
>                                                             trWidth,
> TEXT_LUMA, absPartIdx, &lastPosTransformSkip[TEXT_LUMA][0], true,
> curuseRDOQ);
>              cu->setCbfSubParts(absSumTransformSkipY ? setCbf : 0,
> TEXT_LUMA, absPartIdx, depth);
>
> @@ -3475,8 +3478,8 @@
>
>
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT,
> curResiY, MAX_CU_SIZE,  coeffCurY, trWidth, scalingListType, true,
> lastPosTransformSkip[TEXT_LUMA][0]);
>
> -                nonZeroDistY =
> primitives.sse_ss[partSize](resiYuv->getLumaAddr(absTUPartIdx),
> resiYuv->m_width,
> -
> m_qtTempShortYuv[qtlayer].getLumaAddr(absTUPartIdx),
> +                nonZeroDistY =
> primitives.sse_ss[partSize](resiYuv->getLumaAddr(absPartIdx),
> resiYuv->m_width,
> +
> m_qtTempShortYuv[qtlayer].getLumaAddr(absPartIdx),
>                                                             MAX_CU_SIZE);
>
>                  singleCostY = m_rdCost->calcRdCost(nonZeroDistY,
> skipSingleBitsY);
> @@ -3520,10 +3523,11 @@
>
>              do
>              {
> +                uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
>                  uint32_t subTUBufferOffset = widthC * heightC *
> tuIterator.m_section;
>
> -                int16_t *curResiU =
> m_qtTempShortYuv[qtlayer].getCbAddr(tuIterator.m_absPartIdxTURelCU);
> -                int16_t *curResiV =
> m_qtTempShortYuv[qtlayer].getCrAddr(tuIterator.m_absPartIdxTURelCU);
> +                int16_t *curResiU =
> m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdxC);
> +                int16_t *curResiV =
> m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdxC);
>                  uint32_t stride = m_qtTempShortYuv[qtlayer].m_cwidth;
>
>                  coeff_t bestCoeffU[32 * 32], bestCoeffV[32 * 32];
> @@ -3537,8 +3541,8 @@
>                      memcpy(&bestResiV[i * widthC], curResiV + i * stride,
> sizeof(int16_t) * widthC);
>                  }
>
> -                cu->setTransformSkipPartRange(1, TEXT_CHROMA_U,
> tuIterator.m_absPartIdxTURelCU, tuIterator.m_absPartIdxStep);
> -                cu->setTransformSkipPartRange(1, TEXT_CHROMA_V,
> tuIterator.m_absPartIdxTURelCU, tuIterator.m_absPartIdxStep);
> +                cu->setTransformSkipPartRange(1, TEXT_CHROMA_U,
> absPartIdxC, tuIterator.m_absPartIdxStep);
> +                cu->setTransformSkipPartRange(1, TEXT_CHROMA_V,
> absPartIdxC, tuIterator.m_absPartIdxStep);
>
>                  if (m_cfg->bEnableRDOQTS)
>                  {
> @@ -3549,23 +3553,23 @@
>                  m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
>                  m_trQuant->selectLambda(TEXT_CHROMA);
>
> -                absSumTransformSkipU = m_trQuant->transformNxN(cu,
> resiYuv->getCbAddr(tuIterator.m_absPartIdxTURelCU), resiYuv->m_cwidth,
> coeffCurU + subTUBufferOffset,
> -                                                               widthC,
> TEXT_CHROMA_U, tuIterator.m_absPartIdxTURelCU,
> &lastPosTransformSkip[TEXT_CHROMA_U][tuIterator.m_section], true,
> curuseRDOQ);
> +                absSumTransformSkipU = m_trQuant->transformNxN(cu,
> resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurU +
> subTUBufferOffset,
> +                                                               widthC,
> TEXT_CHROMA_U, absPartIdxC,
> &lastPosTransformSkip[TEXT_CHROMA_U][tuIterator.m_section], true,
> curuseRDOQ);
>                  curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
>                  m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
> -                absSumTransformSkipV = m_trQuant->transformNxN(cu,
> resiYuv->getCrAddr(tuIterator.m_absPartIdxTURelCU), resiYuv->m_cwidth,
> coeffCurV + subTUBufferOffset,
> -                                                               widthC,
> TEXT_CHROMA_V, tuIterator.m_absPartIdxTURelCU,
> &lastPosTransformSkip[TEXT_CHROMA_V][tuIterator.m_section], true,
> curuseRDOQ);
> -
> -                cu->setCbfPartRange(absSumTransformSkipU ? setCbf : 0,
> TEXT_CHROMA_U, tuIterator.m_absPartIdxTURelCU, tuIterator.m_absPartIdxStep);
> -                cu->setCbfPartRange(absSumTransformSkipV ? setCbf : 0,
> TEXT_CHROMA_V, tuIterator.m_absPartIdxTURelCU, tuIterator.m_absPartIdxStep);
> +                absSumTransformSkipV = m_trQuant->transformNxN(cu,
> resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurV +
> subTUBufferOffset,
> +                                                               widthC,
> TEXT_CHROMA_V, absPartIdxC,
> &lastPosTransformSkip[TEXT_CHROMA_V][tuIterator.m_section], true,
> curuseRDOQ);
> +
> +                cu->setCbfPartRange(absSumTransformSkipU ? setCbf : 0,
> TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
> +                cu->setCbfPartRange(absSumTransformSkipV ? setCbf : 0,
> TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
>
>                  m_entropyCoder->resetBits();
>                  singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section] = 0;
>
>                  if (absSumTransformSkipU)
>                  {
> -                    m_entropyCoder->encodeQtCbf(cu,
> tuIterator.m_absPartIdxTURelCU, tuIterator.m_absPartIdxStep, widthC,
> heightC, TEXT_CHROMA_U, trMode, true);
> -                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU +
> subTUBufferOffset, tuIterator.m_absPartIdxTURelCU, widthC, TEXT_CHROMA_U);
> +                    m_entropyCoder->encodeQtCbf(cu, absPartIdxC,
> tuIterator.m_absPartIdxStep, widthC, heightC, TEXT_CHROMA_U, trMode, true);
> +                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU +
> subTUBufferOffset, absPartIdxC, widthC, TEXT_CHROMA_U);
>                      singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section] =
> m_entropyCoder->getNumberOfWrittenBits();
>
>                      curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCbQpOffset() +
> cu->getSlice()->getSliceQpDeltaCb();
> @@ -3573,10 +3577,10 @@
>
>                      int scalingListType = 3 + TEXT_CHROMA_U;
>                      X265_CHECK(scalingListType < 6, "scalingListType too
> large %d\n", scalingListType);
> -
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(tuIterator.m_absPartIdxTURelCU),
> REG_DCT, curResiU, m_qtTempShortYuv[qtlayer].m_cwidth, coeffCurU +
> subTUBufferOffset,
> +
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC),
> REG_DCT, curResiU, m_qtTempShortYuv[qtlayer].m_cwidth, coeffCurU +
> subTUBufferOffset,
>                                                 widthC, scalingListType,
> true, lastPosTransformSkip[TEXT_CHROMA_U][tuIterator.m_section]);
> -                    uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCbAddr(tuIterator.m_absPartIdxTURelCU),
> resiYuv->m_cwidth,
> -
> m_qtTempShortYuv[qtlayer].getCbAddr(tuIterator.m_absPartIdxTURelCU),
> +                    uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC),
> resiYuv->m_cwidth,
> +
> m_qtTempShortYuv[qtlayer].getCbAddr(absPartIdxC),
>
> m_qtTempShortYuv[qtlayer].m_cwidth);
>                      nonZeroDistU = m_rdCost->scaleChromaDistCb(dist);
>                      singleCostU = m_rdCost->calcRdCost(nonZeroDistU,
> singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section]);
> @@ -3584,7 +3588,7 @@
>
>                  if (!absSumTransformSkipU ||
> minCost[TEXT_CHROMA_U][tuIterator.m_section] < singleCostU)
>                  {
> -                    cu->setTransformSkipPartRange(0, TEXT_CHROMA_U,
> tuIterator.m_absPartIdxTURelCU, tuIterator.m_absPartIdxStep);
> +                    cu->setTransformSkipPartRange(0, TEXT_CHROMA_U,
> absPartIdxC, tuIterator.m_absPartIdxStep);
>
>                      memcpy(coeffCurU + subTUBufferOffset, bestCoeffU,
> sizeof(coeff_t) * numSamplesChroma);
>                      for (int i = 0; i < heightC; ++i)
> @@ -3601,8 +3605,8 @@
>
>                  if (absSumTransformSkipV)
>                  {
> -                    m_entropyCoder->encodeQtCbf(cu,
> tuIterator.m_absPartIdxTURelCU, tuIterator.m_absPartIdxStep, widthC,
> heightC, TEXT_CHROMA_V, trMode, true);
> -                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV +
> subTUBufferOffset, tuIterator.m_absPartIdxTURelCU, widthC, TEXT_CHROMA_V);
> +                    m_entropyCoder->encodeQtCbf(cu, absPartIdxC,
> tuIterator.m_absPartIdxStep, widthC, heightC, TEXT_CHROMA_V, trMode, true);
> +                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV +
> subTUBufferOffset, absPartIdxC, widthC, TEXT_CHROMA_V);
>                      singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section] =
> m_entropyCoder->getNumberOfWrittenBits() -
> singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section];
>
>                      curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
> @@ -3610,10 +3614,10 @@
>
>                      int scalingListType = 3 + TEXT_CHROMA_V;
>                      X265_CHECK(scalingListType < 6, "scalingListType too
> large %d\n", scalingListType);
> -
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(tuIterator.m_absPartIdxTURelCU),
> REG_DCT, curResiV, m_qtTempShortYuv[qtlayer].m_cwidth, coeffCurV +
> subTUBufferOffset,
> +
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC),
> REG_DCT, curResiV, m_qtTempShortYuv[qtlayer].m_cwidth, coeffCurV +
> subTUBufferOffset,
>                                                 widthC, scalingListType,
> true, lastPosTransformSkip[TEXT_CHROMA_V][tuIterator.m_section]);
> -                    uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCrAddr(tuIterator.m_absPartIdxTURelCU),
> resiYuv->m_cwidth,
> -
> m_qtTempShortYuv[qtlayer].getCrAddr(tuIterator.m_absPartIdxTURelCU),
> +                    uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC),
> resiYuv->m_cwidth,
> +
> m_qtTempShortYuv[qtlayer].getCrAddr(absPartIdxC),
>
> m_qtTempShortYuv[qtlayer].m_cwidth);
>                      nonZeroDistV = m_rdCost->scaleChromaDistCr(dist);
>                      singleCostV = m_rdCost->calcRdCost(nonZeroDistV,
> singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section]);
> @@ -3621,7 +3625,7 @@
>
>                  if (!absSumTransformSkipV ||
> minCost[TEXT_CHROMA_V][tuIterator.m_section] < singleCostV)
>                  {
> -                    cu->setTransformSkipPartRange(0, TEXT_CHROMA_V,
> tuIterator.m_absPartIdxTURelCU, tuIterator.m_absPartIdxStep);
> +                    cu->setTransformSkipPartRange(0, TEXT_CHROMA_V,
> absPartIdxC, tuIterator.m_absPartIdxStep);
>
>                      memcpy(coeffCurV + subTUBufferOffset, bestCoeffV,
> sizeof(coeff_t) * numSamplesChroma);
>                      for (int i = 0; i < heightC; ++i)
> @@ -3636,8 +3640,8 @@
>
>  bestTransformMode[TEXT_CHROMA_V][tuIterator.m_section] = 1;
>                  }
>
> -
>  cu->setCbfPartRange(absSum[TEXT_CHROMA_U][tuIterator.m_section] ? setCbf :
> 0, TEXT_CHROMA_U, tuIterator.m_absPartIdxTURelCU,
> tuIterator.m_absPartIdxStep);
> -
>  cu->setCbfPartRange(absSum[TEXT_CHROMA_V][tuIterator.m_section] ? setCbf :
> 0, TEXT_CHROMA_V, tuIterator.m_absPartIdxTURelCU,
> tuIterator.m_absPartIdxStep);
> +
>  cu->setCbfPartRange(absSum[TEXT_CHROMA_U][tuIterator.m_section] ? setCbf :
> 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
> +
>  cu->setCbfPartRange(absSum[TEXT_CHROMA_V][tuIterator.m_section] ? setCbf :
> 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
>              }
>              while (isNextSection(&tuIterator));
>          }
> @@ -3752,8 +3756,7 @@
>          const uint32_t qPartNumSubdiv = cu->getPic()->getNumPartInCU() >>
> ((depth + 1) << 1);
>          for (uint32_t i = 0; i < 4; ++i)
>          {
> -            uint32_t nsAddr = absPartIdx + i * qPartNumSubdiv;
> -            xEstimateResidualQT(cu, absPartIdx + i * qPartNumSubdiv,
> nsAddr, resiYuv, depth + 1, subDivCost, subdivBits, subdivDist, bCheckFull
> ? NULL : outZeroDist);
> +            xEstimateResidualQT(cu, absPartIdx + i * qPartNumSubdiv,
> resiYuv, depth + 1, subDivCost, subdivBits, subdivDist, bCheckFull ? NULL :
> outZeroDist);
>          }
>
>          uint32_t ycbf = 0;
> @@ -3977,7 +3980,7 @@
>      }
>  }
>
> -void TEncSearch::xSetResidualQTData(TComDataCU* cu, uint32_t absPartIdx,
> uint32_t absTUPartIdx, ShortYuv* resiYuv, uint32_t depth, bool bSpatial)
> +void TEncSearch::xSetResidualQTData(TComDataCU* cu, uint32_t absPartIdx,
> ShortYuv* resiYuv, uint32_t depth, bool bSpatial)
>  {
>      X265_CHECK(cu->getDepth(0) == cu->getDepth(absPartIdx), "depth not
> matching\n");
>      const uint32_t curTrMode = depth - cu->getDepth(0);
> @@ -4006,7 +4009,7 @@
>          {
>              uint32_t trWidth  = 1 << trSizeLog2;
>              uint32_t trHeight = 1 << trSizeLog2;
> -            m_qtTempShortYuv[qtlayer].copyPartToPartLuma(resiYuv,
> absTUPartIdx, trWidth, trHeight);
> +            m_qtTempShortYuv[qtlayer].copyPartToPartLuma(resiYuv,
> absPartIdx, trWidth, trHeight);
>
>              if (bCodeChroma)
>              {
> @@ -4040,8 +4043,7 @@
>          const uint32_t qPartNumSubdiv = cu->getPic()->getNumPartInCU() >>
> ((depth + 1) << 1);
>          for (uint32_t i = 0; i < 4; ++i)
>          {
> -            uint32_t nsAddr = absPartIdx + i * qPartNumSubdiv;
> -            xSetResidualQTData(cu, absPartIdx + i * qPartNumSubdiv,
> nsAddr, resiYuv, depth + 1, bSpatial);
> +            xSetResidualQTData(cu, absPartIdx + i * qPartNumSubdiv,
> resiYuv, depth + 1, bSpatial);
>          }
>      }
>  }
> diff -r ba2a9f61ea06 -r 862e454b039b source/Lib/TLibEncoder/TEncSearch.h
> --- a/source/Lib/TLibEncoder/TEncSearch.h       Fri May 16 19:20:46 2014
> +0900
> +++ b/source/Lib/TLibEncoder/TEncSearch.h       Mon May 19 18:14:50 2014
> +0900
> @@ -177,11 +177,11 @@
>
>      void generateCoeffRecon(TComDataCU* cu, TComYuv* fencYuv, TComYuv*
> predYuv, ShortYuv* resiYuv, TComYuv* reconYuv, bool skipRes);
>
> -    void xEstimateResidualQT(TComDataCU* cu, uint32_t absPartIdx,
> uint32_t absTUPartIdx, ShortYuv* resiYuv, uint32_t depth,
> +    void xEstimateResidualQT(TComDataCU* cu, uint32_t absPartIdx,
> ShortYuv* resiYuv, uint32_t depth,
>                               uint64_t &rdCost, uint32_t &outBits,
> uint32_t &outDist, uint32_t *puiZeroDist, bool curUseRDOQ = true);
> -    void xSetResidualQTData(TComDataCU* cu, uint32_t absPartIdx, uint32_t
> absTUPartIdx, ShortYuv* resiYuv, uint32_t depth, bool bSpatial);
> +    void xSetResidualQTData(TComDataCU* cu, uint32_t absPartIdx,
> ShortYuv* resiYuv, uint32_t depth, bool bSpatial);
>
> -    void residualTransformQuantInter(TComDataCU* cu, uint32_t absPartIdx,
> uint32_t absTUPartIdx, ShortYuv* resiYuv, uint32_t depth, bool curUseRDOQ =
> true);
> +    void residualTransformQuantInter(TComDataCU* cu, uint32_t absPartIdx,
> ShortYuv* resiYuv, uint32_t depth, bool curUseRDOQ = true);
>
>      //
> -------------------------------------------------------------------------------------------------------------------
>      // compute symbol bits
> @@ -206,10 +206,10 @@
>      uint32_t xGetIntraBitsQT(TComDataCU* cu, uint32_t trDepth, uint32_t
> absPartIdx, uint32_t absPartIdxStep, bool bLuma, bool bChroma);
>      uint32_t xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth,
> uint32_t absPartIdx, uint32_t chromaId, const bool splitIntoSubTUs);
>      void xIntraCodingLumaBlk(TComDataCU* cu, uint32_t trDepth, uint32_t
> absPartIdx, TComYuv* fencYuv, TComYuv* predYuv,
> -                             ShortYuv* resiYuv, uint32_t& outDist, bool
> bReusePred = false);
> +                             ShortYuv* resiYuv, uint32_t& outDist);
>
>      void xIntraCodingChromaBlk(TComDataCU* cu, uint32_t trDepth, uint32_t
> absPartIdx, uint32_t absPartIdxStep, TComYuv* fencYuv, TComYuv* predYuv,
> -                               ShortYuv* resiYuv, uint32_t& outDist,
> uint32_t uiChromaId, bool bReusePred = false);
> +                               ShortYuv* resiYuv, uint32_t& outDist,
> uint32_t chromaId);
>
>      void xRecurIntraChromaCodingQT(TComDataCU* cu, uint32_t trDepth,
> uint32_t absPartIdx, TComYuv* fencYuv,
>                                     TComYuv* predYuv, ShortYuv* resiYuv,
> uint32_t& outDist);
> diff -r ba2a9f61ea06 -r 862e454b039b source/encoder/compress.cpp
> --- a/source/encoder/compress.cpp       Fri May 16 19:20:46 2014 +0900
> +++ b/source/encoder/compress.cpp       Mon May 19 18:14:50 2014 +0900
> @@ -892,7 +892,7 @@
>              primitives.chroma[m_param->internalCsp].sub_ps[part](dst,
> dststride, src1, src2, src1stride, src2stride);
>
>              //Residual encoding
> -            m_search->residualTransformQuantInter(cu, 0, 0,
> m_tmpResiYuv[depth], cu->getDepth(0), true);
> +            m_search->residualTransformQuantInter(cu, 0,
> m_tmpResiYuv[depth], cu->getDepth(0), true);
>              xCheckDQP(cu);
>
>              if (lcu->getMergeFlag(absPartIdx) && cu->getPartitionSize(0)
> == SIZE_2Nx2N && !cu->getQtRootCbf(0))
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140520/d951631a/attachment-0001.html>


More information about the x265-devel mailing list