[x265] [PATCH] Modify TEncSearch structure to support multiple color space formats

Steve Borho steve at borho.org
Wed Jan 8 00:37:26 CET 2014


2014/1/3 <ashok at multicorewareinc.com>

> # HG changeset patch
> # User ashok at multicorewareinc.com
> # Date 1388753602 -19800
> #      Fri Jan 03 18:23:22 2014 +0530
> # Node ID 6a602378d31b983db4f4293621c35e73f91f9922
> # Parent  98e238489f843ff233c512570c1fab75b2ff801a
> Modify TEncSearch structure to support multiple color space formats
>
> diff -r 98e238489f84 -r 6a602378d31b source/Lib/TLibEncoder/TEncSearch.cpp
> --- a/source/Lib/TLibEncoder/TEncSearch.cpp     Fri Jan 03 18:21:41 2014
> +0530
> +++ b/source/Lib/TLibEncoder/TEncSearch.cpp     Fri Jan 03 18:23:22 2014
> +0530
> @@ -229,13 +229,10 @@
>
>      if (bChroma)
>      {
> -        if (trSizeLog2 > 2)
> -        {
> -            if (trDepth == 0 || cu->getCbf(absPartIdx, TEXT_CHROMA_U,
> trDepth - 1))
> -                m_entropyCoder->encodeQtCbf(cu, absPartIdx,
> TEXT_CHROMA_U, trDepth);
> -            if (trDepth == 0 || cu->getCbf(absPartIdx, TEXT_CHROMA_V,
> trDepth - 1))
> -                m_entropyCoder->encodeQtCbf(cu, absPartIdx,
> TEXT_CHROMA_V, trDepth);
> -        }
> +        if (trDepth == 0 || cu->getCbf(absPartIdx, TEXT_CHROMA_U, trDepth
> - 1))
> +            m_entropyCoder->encodeQtCbf(cu, absPartIdx, TEXT_CHROMA_U,
> trDepth);
> +        if (trDepth == 0 || cu->getCbf(absPartIdx, TEXT_CHROMA_V, trDepth
> - 1))
> +            m_entropyCoder->encodeQtCbf(cu, absPartIdx, TEXT_CHROMA_V,
> trDepth);
>      }
>
>      if (subdiv)
> @@ -275,7 +272,7 @@
>          return;
>      }
>
> -    if (ttype != TEXT_LUMA && trSizeLog2 == 2)
> +    if ( (ttype != TEXT_LUMA) && (trSizeLog2 == 2) &&
> !(cu->getChromaFormat() == CHROMA_444))
>

white-space


>      {
>          assert(trDepth > 0);
>          trDepth--;
> @@ -288,9 +285,11 @@
>      }
>
>      //===== coefficients =====
> -    uint32_t width = cu->getWidth(0) >> (trDepth + chroma);
> -    uint32_t height = cu->getHeight(0) >> (trDepth + chroma);
> -    uint32_t coeffOffset = (cu->getPic()->getMinCUWidth() *
> cu->getPic()->getMinCUHeight() * absPartIdx) >> (chroma << 1);
> +    int cspx = chroma ? m_hChromaShift : 0;
> +    int cspy = chroma ? m_vChromaShift : 0;
> +    uint32_t width = cu->getWidth(0) >> (trDepth + cspx);
> +    uint32_t height = cu->getHeight(0) >> (trDepth + cspy);
> +    uint32_t coeffOffset = (cu->getPic()->getMinCUWidth() >> cspx) *
> (cu->getPic()->getMinCUHeight() >> cspy) * absPartIdx;
>      uint32_t qtLayer =
> cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
>      TCoeff* coeff = 0;
>      switch (ttype)
> @@ -363,12 +362,23 @@
>              }
>          }
>      }
> +
>      if (bChroma)
>      {
>          // chroma prediction mode
> -        if (absPartIdx == 0)
> +        if ((cu->getPartitionSize(0) == SIZE_2Nx2N) ||
> !(cu->getChromaFormat() == CHROMA_444))
>          {
> -            m_entropyCoder->encodeIntraDirModeChroma(cu, 0, true);
> +            if (absPartIdx == 0)
> +            {
> +                m_entropyCoder->encodeIntraDirModeChroma(cu, absPartIdx,
> true);
> +            }
> +        }
> +        else
> +        {
> +            uint32_t qtNumParts = cu->getTotalNumPart() >> 2;
> +            assert(trDepth > 0);
> +            if ((absPartIdx%qtNumParts) == 0)
>

white-space


> +                m_entropyCoder->encodeIntraDirModeChroma(cu, absPartIdx,
> true);
>          }
>      }
>  }
> @@ -475,7 +485,7 @@
>      int lastPos = -1;
>      cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);
>
> -    m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
> +    m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
>      m_trQuant->selectLambda(TEXT_LUMA);
>
>      absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, width,
> height, TEXT_LUMA, absPartIdx, &lastPos, useTransformSkip);
> @@ -520,7 +530,7 @@
>      uint32_t fullDepth   = cu->getDepth(0) + trDepth;
>      uint32_t trSizeLog2  =
> g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> fullDepth] + 2;
>
> -    if (trSizeLog2 == 2)
> +    if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))
>      {
>          assert(trDepth > 0);
>          trDepth--;
> @@ -534,7 +544,7 @@
>
>      TextType ttype          = (chromaId > 0 ? TEXT_CHROMA_V :
> TEXT_CHROMA_U);
>      uint32_t chromaPredMode = cu->getChromaIntraDir(absPartIdx);
> -    uint32_t width          = cu->getWidth(0) >> (trDepth +
> m_hChromaShift);
> +    uint32_t width          = cu->getWidth(0)  >> (trDepth +
> m_hChromaShift);
>      uint32_t height         = cu->getHeight(0) >> (trDepth +
> m_vChromaShift);
>      uint32_t stride         = fencYuv->getCStride();
>      Pel*     fenc           = (chromaId > 0 ?
> fencYuv->getCrAddr(absPartIdx) : fencYuv->getCbAddr(absPartIdx));
> @@ -543,10 +553,10 @@
>      Pel*     recon          = (chromaId > 0 ?
> predYuv->getCrAddr(absPartIdx) : predYuv->getCbAddr(absPartIdx));
>
>      uint32_t qtlayer        =
> cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
> -    uint32_t numCoeffPerInc = (cu->getSlice()->getSPS()->getMaxCUWidth()
> * cu->getSlice()->getSPS()->getMaxCUHeight() >>
> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1)) >> 2;
> +    uint32_t numCoeffPerInc = (cu->getSlice()->getSPS()->getMaxCUWidth()
> * cu->getSlice()->getSPS()->getMaxCUHeight() >>
> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1)) >> (m_hChromaShift +
> m_vChromaShift);
>      TCoeff*  coeff          = (chromaId > 0 ? m_qtTempCoeffCr[qtlayer] :
> m_qtTempCoeffCb[qtlayer]) + numCoeffPerInc * absPartIdx;
>      int16_t* reconQt        = (chromaId > 0 ?
> m_qtTempTComYuv[qtlayer].getCrAddr(absPartIdx) :
> m_qtTempTComYuv[qtlayer].getCbAddr(absPartIdx));
> -    assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);
> +    uint32_t reconQtStride  = m_qtTempTComYuv[qtlayer].m_cwidth;
>
>      uint32_t zorder           = cu->getZorderIdxInCU() + absPartIdx;
>      Pel*     reconIPred       = (chromaId > 0 ?
> cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder) :
> cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder));
> @@ -557,7 +567,7 @@
>      //===== update chroma mode =====
>      if (chromaPredMode == DM_CHROMA_IDX)
>      {
> -        chromaPredMode = cu->getLumaIntraDir(0);
> +        chromaPredMode = cu->getLumaIntraDir(absPartIdx);
>      }
>
>      //===== init availability pattern =====
> @@ -565,11 +575,11 @@
>      {
>          cu->getPattern()->initPattern(cu, trDepth, absPartIdx);
>
> -        cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth,
> m_predBuf, m_predBufStride, m_predBufHeight);
> +        cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth,
> m_predBuf, m_predBufStride, m_predBufHeight, chromaId);
>          Pel* chromaPred = (chromaId > 0 ?
> cu->getPattern()->getAdiCrBuf(width, height, m_predBuf) :
> cu->getPattern()->getAdiCbBuf(width, height, m_predBuf));
>
>          //===== get prediction signal =====
> -        predIntraChromaAng(chromaPred, chromaPredMode, pred, stride,
> width);
> +        predIntraChromaAng(chromaPred, chromaPredMode, pred, stride,
> width, height, cu->getChromaFormat());
>

we should cache the chroma format in TComPrediction


>
>          // save prediction
>          if (default0Save1Load2 == 1)
> @@ -612,7 +622,7 @@
>          {
>              curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
>          }
> -        m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> +        m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>
>          m_trQuant->selectLambda(TEXT_CHROMA);
>
> @@ -639,7 +649,7 @@
>      //===== reconstruction =====
>      assert(((uint32_t)(size_t)residual & (width - 1)) == 0);
>      assert(width <= 32);
> -    primitives.calcrecon[size](pred, residual, recon, reconQt,
> reconIPred, stride, MAX_CU_SIZE / 2, reconIPredStride);
> +    primitives.calcrecon[size](pred, residual, recon, reconQt,
> reconIPred, stride, reconQtStride, reconIPredStride);
>
>      //===== update distortion =====
>      uint32_t dist = primitives.sse_pp[part](fenc, stride, recon, stride);
> @@ -702,11 +712,11 @@
>      uint32_t singleCbfY  = 0;
>      uint32_t singleCbfU  = 0;
>      uint32_t singleCbfV  = 0;
> -    bool   checkTransformSkip  =
> cu->getSlice()->getPPS()->getUseTransformSkip();
> +    bool     checkTransformSkip  =
> cu->getSlice()->getPPS()->getUseTransformSkip();
>      uint32_t widthTransformSkip  = cu->getWidth(0) >> trDepth;
>      uint32_t heightTransformSkip = cu->getHeight(0) >> trDepth;
> -    int    bestModeId    = 0;
> -    int    bestModeIdUV[2] = { 0, 0 };
> +    int      bestModeId          = 0;
> +    int      bestModeIdUV[2]     = { 0, 0 };
>
>      checkTransformSkip &= (widthTransformSkip == 4 && heightTransformSkip
> == 4);
>      checkTransformSkip &= (!cu->getCUTransquantBypass(0));
> @@ -729,8 +739,8 @@
>              uint32_t singleCbfUTmp      = 0;
>              uint32_t singleCbfVTmp      = 0;
>              uint64_t singleCostTmp      = 0;
> -            int    default0Save1Load2 = 0;
> -            int    firstCheckId       = 0;
> +            int      default0Save1Load2 = 0;
> +            int      firstCheckId       = 0;
>
>              uint32_t qpdiv = cu->getPic()->getNumPartInCU() >>
> ((cu->getDepth(0) + (trDepth - 1)) << 1);
>              bool   bFirstQ = ((absPartIdx % qpdiv) == 0);
> @@ -964,17 +974,17 @@
>
>          if (!bLumaOnly)
>          {
> -            width >>= 1;
> -            height >>= 1;
> +            width  >>= m_hChromaShift;
> +            height >>= m_vChromaShift;
>              src       = m_qtTempTComYuv[qtLayer].getCbAddr(absPartIdx);
> -            assert(m_qtTempTComYuv[qtLayer].m_cwidth == MAX_CU_SIZE / 2);
> +            uint32_t srcstride = m_qtTempTComYuv[qtLayer].m_cwidth;
>              dst       =
> cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);
>              dststride = cu->getPic()->getPicYuvRec()->getCStride();
> -            primitives.blockcpy_ps(width, height, dst, dststride, src,
> MAX_CU_SIZE / 2);
> +            primitives.blockcpy_ps(width, height, dst, dststride, src,
> srcstride);
>
>              src = m_qtTempTComYuv[qtLayer].getCrAddr(absPartIdx);
>              dst = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(),
> zorder);
> -            primitives.blockcpy_ps(width, height, dst, dststride, src,
> MAX_CU_SIZE / 2);
> +            primitives.blockcpy_ps(width, height, dst, dststride, src,
> srcstride);
>          }
>      }
>
> @@ -1049,7 +1059,7 @@
>          int lastPos = -1;
>          cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);
>
> -        m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
> +        m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
>          m_trQuant->selectLambda(TEXT_LUMA);
>          absSum = m_trQuant->transformNxN(cu, residual, stride, coeff,
> width, height, TEXT_LUMA, absPartIdx, &lastPos, useTransformSkip);
>
> @@ -1081,7 +1091,6 @@
>      if (bCheckSplit && !bCheckFull)
>      {
>          //----- code splitted block -----
> -
>          uint32_t qPartsDiv     = cu->getPic()->getNumPartInCU() >>
> ((fullDepth + 1) << 1);
>          uint32_t absPartIdxSub = absPartIdx;
>          uint32_t splitCbfY = 0;
> @@ -1267,12 +1276,12 @@
>          reconIPred =
> cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zOrder);
>          reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();
>          reconQt = m_qtTempTComYuv[qtlayer].getCbAddr(absPartIdx);
> -        assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);
> -        primitives.blockcpy_ps(width, height, reconIPred,
> reconIPredStride, reconQt, MAX_CU_SIZE / 2);
> +        uint32_t reconQtStride = m_qtTempTComYuv[qtlayer].m_cwidth;
> +        primitives.blockcpy_ps(width, height, reconIPred,
> reconIPredStride, reconQt, reconQtStride);
>
>          reconIPred =
> cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zOrder);
>          reconQt    = m_qtTempTComYuv[qtlayer].getCrAddr(absPartIdx);
> -        primitives.blockcpy_ps(width, height, reconIPred,
> reconIPredStride, reconQt, MAX_CU_SIZE / 2);
> +        primitives.blockcpy_ps(width, height, reconIPred,
> reconIPredStride, reconQt, reconQtStride);
>      }
>  }
>
> @@ -1376,20 +1385,20 @@
>          uint32_t zorder           = cu->getZorderIdxInCU() + absPartIdx;
>          uint32_t width            = cu->getWidth(0) >> (trDepth + 1);
>          uint32_t height           = cu->getHeight(0) >> (trDepth + 1);
> -        assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);
> +        uint32_t reconQtStride    = m_qtTempTComYuv[qtlayer].m_cwidth;
>          uint32_t reconIPredStride =
> cu->getPic()->getPicYuvRec()->getCStride();
>
>          if (stateU0V1Both2 == 0 || stateU0V1Both2 == 2)
>          {
>              Pel* reconIPred =
> cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);
>              int16_t* reconQt  =
> m_qtTempTComYuv[qtlayer].getCbAddr(absPartIdx);
> -            primitives.blockcpy_ps(width, height, reconIPred,
> reconIPredStride, reconQt, MAX_CU_SIZE / 2);
> +            primitives.blockcpy_ps(width, height, reconIPred,
> reconIPredStride, reconQt, reconQtStride);
>          }
>          if (stateU0V1Both2 == 1 || stateU0V1Both2 == 2)
>          {
>              Pel* reconIPred =
> cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
>              int16_t* reconQt  =
> m_qtTempTComYuv[qtlayer].getCrAddr(absPartIdx);
> -            primitives.blockcpy_ps(width, height, reconIPred,
> reconIPredStride, reconQt, MAX_CU_SIZE / 2);
> +            primitives.blockcpy_ps(width, height, reconIPred,
> reconIPredStride, reconQt, reconQtStride);
>          }
>      }
>  }
> @@ -1411,7 +1420,7 @@
>          uint32_t trSizeLog2 =
> g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> fullDepth] + 2;
>
>          uint32_t actualTrDepth = trDepth;
> -        if (trSizeLog2 == 2)
> +        if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))
>          {
>              assert(trDepth > 0);
>              actualTrDepth--;
> @@ -1557,7 +1566,7 @@
>          uint32_t qtlayer    =
> cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
>
>          bool bChromaSame  = false;
> -        if (trSizeLog2 == 2)
> +        if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))
>          {
>              assert(trDepth > 0);
>              uint32_t qpdiv = cu->getPic()->getNumPartInCU() >>
> ((cu->getDepth(0) + trDepth - 1) << 1);
> @@ -1572,9 +1581,11 @@
>          uint32_t numCoeffC = (cu->getSlice()->getSPS()->getMaxCUWidth() *
> cu->getSlice()->getSPS()->getMaxCUHeight()) >> (fullDepth << 1);
>          if (!bChromaSame)
>          {
> -            numCoeffC >>= 2;
> +            numCoeffC = ((cu->getSlice()->getSPS()->getMaxCUWidth() >>
> m_hChromaShift) * (cu->getSlice()->getSPS()->getMaxCUHeight() >>
> m_vChromaShift)) >> (fullDepth << 1);
>          }
> -        uint32_t numCoeffIncC =
> (cu->getSlice()->getSPS()->getMaxCUWidth() *
> cu->getSlice()->getSPS()->getMaxCUHeight()) >>
> ((cu->getSlice()->getSPS()->getMaxCUDepth() << 1) + 2);
> +
> +        uint32_t numCoeffIncC =
> ((cu->getSlice()->getSPS()->getMaxCUWidth() >> m_hChromaShift) *
> (cu->getSlice()->getSPS()->getMaxCUHeight() >> m_vChromaShift)) >>
> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
> +
>          TCoeff* coeffSrcU = m_qtTempCoeffCb[qtlayer] + (numCoeffIncC *
> absPartIdx);
>          TCoeff* coeffSrcV = m_qtTempCoeffCr[qtlayer] + (numCoeffIncC *
> absPartIdx);
>          TCoeff* coeffDstU = cu->getCoeffCb()         + (numCoeffIncC *
> absPartIdx);
> @@ -1583,7 +1594,7 @@
>          ::memcpy(coeffDstV, coeffSrcV, sizeof(TCoeff) * numCoeffC);
>
>          //===== copy reconstruction =====
> -        uint32_t trSizeCLog2 = (bChromaSame ? trSizeLog2 : trSizeLog2 -
> 1);
> +        uint32_t trSizeCLog2 = (bChromaSame || (cu->getChromaFormat() ==
> CHROMA_444))  ? trSizeLog2 : trSizeLog2 - 1;
>          m_qtTempTComYuv[qtlayer].copyPartToPartChroma(reconYuv,
> absPartIdx, 1 << trSizeCLog2, 1 << trSizeCLog2);
>      }
>      else
> @@ -1650,11 +1661,11 @@
>              }
>              //===== init availability pattern =====
>              cu->getPattern()->initPattern(cu, trDepth, absPartIdx);
> -            cu->getPattern()->initAdiPatternChroma(cu, absPartIdx,
> trDepth, m_predBuf, m_predBufStride, m_predBufHeight);
> +            cu->getPattern()->initAdiPatternChroma(cu, absPartIdx,
> trDepth, m_predBuf, m_predBufStride, m_predBufHeight, chromaId);
>              Pel* chromaPred = (chromaId > 0 ?
> cu->getPattern()->getAdiCrBuf(width, height, m_predBuf) :
> cu->getPattern()->getAdiCbBuf(width, height, m_predBuf));
>
>              //===== get prediction signal =====
> -            predIntraChromaAng(chromaPred, chromaPredMode, pred, stride,
> width);
> +            predIntraChromaAng(chromaPred, chromaPredMode, pred, stride,
> width, height, cu->getChromaFormat());
>
>              //===== get residual signal =====
>              assert(!((uint32_t)(size_t)fenc & (width - 1)));
> @@ -1676,7 +1687,7 @@
>              {
>                  curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
>              }
> -            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> +            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>
>              m_trQuant->selectLambda(TEXT_CHROMA);
>
> @@ -1741,7 +1752,8 @@
>      //===== init pattern =====
>      assert(width == height);
>      cu->getPattern()->initPattern(cu, 0, 0);
> -    cu->getPattern()->initAdiPatternChroma(cu, 0, 0, m_predBuf,
> m_predBufStride, m_predBufHeight);
> +    cu->getPattern()->initAdiPatternChroma(cu, 0, 0, m_predBuf,
> m_predBufStride, m_predBufHeight, 0/*chromaId*/);
> +    cu->getPattern()->initAdiPatternChroma(cu, 0, 0, m_predBuf,
> m_predBufStride, m_predBufHeight, 1/*chromaId*/);
>      Pel* patChromaU = cu->getPattern()->getAdiCbBuf(width, height,
> m_predBuf);
>      Pel* patChromaV = cu->getPattern()->getAdiCrBuf(width, height,
> m_predBuf);
>
> @@ -1754,8 +1766,8 @@
>      for (uint32_t mode = minMode; mode < maxMode; mode++)
>      {
>          //--- get prediction ---
> -        predIntraChromaAng(patChromaU, mode, predU, stride, width);
> -        predIntraChromaAng(patChromaV, mode, predV, stride, width);
> +        predIntraChromaAng(patChromaU, mode, predU, stride, width,
> height, cu->getChromaFormat());
> +        predIntraChromaAng(patChromaV, mode, predV, stride, width,
> height, cu->getChromaFormat());
>
>          //--- get SAD ---
>          uint32_t sad = sa8d(fencU, stride, predU, stride) + sa8d(fencV,
> stride, predV, stride);
> @@ -2131,13 +2143,14 @@
>
>      if (width > 32)
>      {
> -        scaleWidth = 32;
> -        scaleStride = 32;
> +        scaleWidth     = 32;
> +        scaleStride    = 32;
>          costMultiplier = 4;
>      }
>
>      cu->getPattern()->initPattern(cu, trDepth, absPartIdx);
> -    cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth,
> m_predBuf, m_predBufStride, m_predBufHeight);
> +    cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth,
> m_predBuf, m_predBufStride, m_predBufHeight, 0);
> +    cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth,
> m_predBuf, m_predBufStride, m_predBufHeight, 1);
>
>      cu->getAllowedChromaDir(0, modeList);
>      //----- check chroma modes -----
> @@ -2156,7 +2169,7 @@
>              Pel* chromaPred = (chromaId > 0 ?
> cu->getPattern()->getAdiCrBuf(width, height, m_predBuf) :
> cu->getPattern()->getAdiCbBuf(width, height, m_predBuf));
>
>              //===== get prediction signal =====
> -            predIntraChromaAng(chromaPred, chromaPredMode, pred, stride,
> width);
> +            predIntraChromaAng(chromaPred, chromaPredMode, pred, stride,
> width, height, cu->getChromaFormat());
>              int log2SizeMinus2 = g_convertToBit[scaleWidth];
>              pixelcmp_t sa8d = primitives.sa8d[log2SizeMinus2];
>              sad = costMultiplier * sa8d(fenc, scaleStride, pred,
> scaleStride);
> @@ -2174,6 +2187,27 @@
>      cu->setChromIntraDirSubParts(bestMode, 0, depth);
>  }
>
>
do these belong here?  perhaps TEncCu?


> +bool TEncSearch::isNextSection()
> +{
> +    if (m_splitMode == DONT_SPLIT)
> +    {
> +        m_section++;
> +        return false;
> +    }
> +    else
> +    {
> +        m_absPartIdxTURelCU += m_absPartIdxStep;
> +
> +        m_section++;
> +        return m_section< (1 << m_splitMode);
> +    }
> +}
> +
> +bool TEncSearch::isLastSection()
> +{
> +    return (m_section+1) >= (1<<m_splitMode);
>

white-space


> +}
> +
>  void TEncSearch::estIntraPredChromaQT(TComDataCU* cu,
>                                        TComYuv*    fencYuv,
>                                        TComYuv*    predYuv,
> @@ -2181,60 +2215,109 @@
>                                        TComYuv*    reconYuv,
>                                        uint32_t    preCalcDistC)
>  {
> -    uint32_t depth     = cu->getDepth(0);
> -    uint32_t bestMode  = 0;
> -    uint32_t bestDist  = 0;
> -    uint64_t bestCost  = MAX_INT64;
> -
> -    //----- init mode list -----
> -    uint32_t minMode = 0;
> -    uint32_t maxMode = NUM_CHROMA_MODE;
> -    uint32_t modeList[NUM_CHROMA_MODE];
> -
> -    cu->getAllowedChromaDir(0, modeList);
> -
> -    //----- check chroma modes -----
> -    for (uint32_t mode = minMode; mode < maxMode; mode++)
> +    uint32_t depth              = cu->getDepth(0);
> +    uint32_t initTrDepth        = (cu->getPartitionSize(0) != SIZE_2Nx2N)
> && (cu->getChromaFormat() == CHROMA_444 ? 1 : 0);
> +    m_splitMode                 = (initTrDepth == 0) ? DONT_SPLIT :
> QUAD_SPLIT;
> +    m_absPartIdxStep            = (cu->getPic()->getNumPartInCU() >>
> (depth << 1)) >> partIdxStepShift[m_splitMode];
> +    m_partOffset                = 0;
> +    m_section                   = 0;
> +    m_absPartIdxTURelCU         = 0;
> +
> +    do
>      {
> -        //----- restore context models -----
> -        m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
> -
> -        //----- chroma coding -----
> -        uint32_t dist = 0;
> -        cu->setChromIntraDirSubParts(modeList[mode], 0, depth);
> -        xRecurIntraChromaCodingQT(cu, 0, 0, fencYuv, predYuv, resiYuv,
> dist);
> -        if (cu->getSlice()->getPPS()->getUseTransformSkip())
> +        uint32_t bestMode           = 0;
> +        uint32_t bestDist           = 0;
> +        uint64_t bestCost           = MAX_INT64;
> +
> +        //----- init mode list -----
> +        uint32_t minMode = 0;
> +        uint32_t maxMode = NUM_CHROMA_MODE;
> +        uint32_t modeList[NUM_CHROMA_MODE];
> +
> +        m_partOffset = m_absPartIdxTURelCU;
> +
> +        cu->getAllowedChromaDir(m_partOffset, modeList);
> +
> +        //----- check chroma modes -----
> +        for (uint32_t mode = minMode; mode < maxMode; mode++)
>          {
> +            //----- restore context models -----
>              m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
> +
> +            //----- chroma coding -----
> +            uint32_t dist = 0;
> +
> +            cu->setChromIntraDirSubParts(modeList[mode], m_partOffset,
> depth + initTrDepth);
> +
> +            xRecurIntraChromaCodingQT(cu, initTrDepth,
> m_absPartIdxTURelCU, fencYuv, predYuv, resiYuv, dist);
> +
> +            if (cu->getSlice()->getPPS()->getUseTransformSkip())
> +            {
> +
>  m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
> +            }
> +
> +            uint32_t bits = xGetIntraBitsQT(cu, initTrDepth,
> m_absPartIdxTURelCU, false, true);
> +            uint64_t cost = m_rdCost->calcRdCost(dist, bits);
> +
> +            //----- compare -----
> +            if (cost < bestCost)
> +            {
> +                bestCost = cost;
> +                bestDist = dist;
> +                bestMode = modeList[mode];
> +                xSetIntraResultChromaQT(cu, initTrDepth,
> m_absPartIdxTURelCU, reconYuv);
> +                ::memcpy(m_qtTempCbf[1], cu->getCbf(TEXT_CHROMA_U) +
> m_partOffset, m_absPartIdxStep * sizeof(UChar));
> +                ::memcpy(m_qtTempCbf[2], cu->getCbf(TEXT_CHROMA_V) +
> m_partOffset, m_absPartIdxStep * sizeof(UChar));
> +                ::memcpy(m_qtTempTransformSkipFlag[1],
> cu->getTransformSkip(TEXT_CHROMA_U) + m_partOffset, m_absPartIdxStep *
> sizeof(UChar));
> +                ::memcpy(m_qtTempTransformSkipFlag[2],
> cu->getTransformSkip(TEXT_CHROMA_V) + m_partOffset, m_absPartIdxStep *
> sizeof(UChar));
> +            }
>          }
>
> -        uint32_t bits = xGetIntraBitsQT(cu, 0, 0, false, true);
> -        uint64_t cost = m_rdCost->calcRdCost(dist, bits);
> -
> -        //----- compare -----
> -        if (cost < bestCost)
> +        if (!isLastSection())
>          {
> -            bestCost = cost;
> -            bestDist = dist;
> -            bestMode = modeList[mode];
> -            uint32_t qpn = cu->getPic()->getNumPartInCU() >> (depth << 1);
> -            xSetIntraResultChromaQT(cu, 0, 0, reconYuv);
> -            ::memcpy(m_qtTempCbf[1], cu->getCbf(TEXT_CHROMA_U), qpn *
> sizeof(UChar));
> -            ::memcpy(m_qtTempCbf[2], cu->getCbf(TEXT_CHROMA_V), qpn *
> sizeof(UChar));
> -            ::memcpy(m_qtTempTransformSkipFlag[1],
> cu->getTransformSkip(TEXT_CHROMA_U), qpn * sizeof(UChar));
> -            ::memcpy(m_qtTempTransformSkipFlag[2],
> cu->getTransformSkip(TEXT_CHROMA_V), qpn * sizeof(UChar));
> +            uint32_t compWidth   = (cu->getWidth(0) >> m_hChromaShift)
>  >> initTrDepth;
> +            uint32_t compHeight  = (cu->getHeight(0) >> m_vChromaShift)
> >> initTrDepth;
> +            uint32_t zorder      = cu->getZorderIdxInCU() + m_partOffset;
> +            Pel*     dst         =
> cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);
> +            uint32_t dststride   =
> cu->getPic()->getPicYuvRec()->getCStride();
> +            Pel*     src         = reconYuv->getCbAddr(m_partOffset);
> +            uint32_t srcstride   = reconYuv->getCStride();
> +
> +            primitives.blockcpy_pp(compWidth, compHeight, dst, dststride,
> src, srcstride);
> +
> +            dst                 =
> cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);
> +            src                 = reconYuv->getCrAddr(m_partOffset);
> +            primitives.blockcpy_pp(compWidth, compHeight, dst, dststride,
> src, srcstride);
> +        }
> +
> +        //----- set data -----
> +        ::memcpy(cu->getCbf(TEXT_CHROMA_U) + m_partOffset,
> m_qtTempCbf[1], m_absPartIdxStep * sizeof(UChar));
> +        ::memcpy(cu->getCbf(TEXT_CHROMA_V) + m_partOffset,
> m_qtTempCbf[2], m_absPartIdxStep * sizeof(UChar));
> +        ::memcpy(cu->getTransformSkip(TEXT_CHROMA_U) + m_partOffset,
> m_qtTempTransformSkipFlag[1], m_absPartIdxStep * sizeof(UChar));
> +        ::memcpy(cu->getTransformSkip(TEXT_CHROMA_V) + m_partOffset,
> m_qtTempTransformSkipFlag[2], m_absPartIdxStep * sizeof(UChar));
> +        cu->setChromIntraDirSubParts(bestMode, m_partOffset, depth +
> initTrDepth);
> +        cu->m_totalDistortion += bestDist - preCalcDistC;
> +
> +    } while(isNextSection());
>

white-space


> +
> +    //----- restore context models -----
> +    if (initTrDepth != 0)
> +    {   // set Cbf for all blocks
> +        uint32_t uiCombCbfU = 0;
> +        uint32_t uiCombCbfV = 0;
> +        uint32_t uiPartIdx  = 0;
> +        for (uint32_t uiPart = 0; uiPart < 4; uiPart++, uiPartIdx +=
> m_absPartIdxStep)
> +        {
> +            uiCombCbfU |= cu->getCbf(uiPartIdx, TEXT_CHROMA_U, 1);
> +            uiCombCbfV |= cu->getCbf(uiPartIdx, TEXT_CHROMA_V, 1);
> +        }
> +        for (uint32_t uiOffs = 0; uiOffs < 4 * m_absPartIdxStep; uiOffs++)
> +        {
> +            cu->getCbf( TEXT_CHROMA_U )[ uiOffs ] |= uiCombCbfU;
> +            cu->getCbf( TEXT_CHROMA_V )[ uiOffs ] |= uiCombCbfV;
>          }
>      }
>
> -    //----- set data -----
> -    uint32_t qpn = cu->getPic()->getNumPartInCU() >> (depth << 1);
> -    ::memcpy(cu->getCbf(TEXT_CHROMA_U), m_qtTempCbf[1], qpn *
> sizeof(UChar));
> -    ::memcpy(cu->getCbf(TEXT_CHROMA_V), m_qtTempCbf[2], qpn *
> sizeof(UChar));
> -    ::memcpy(cu->getTransformSkip(TEXT_CHROMA_U),
> m_qtTempTransformSkipFlag[1], qpn * sizeof(UChar));
> -    ::memcpy(cu->getTransformSkip(TEXT_CHROMA_V),
> m_qtTempTransformSkipFlag[2], qpn * sizeof(UChar));
> -    cu->setChromIntraDirSubParts(bestMode, 0, depth);
> -    cu->m_totalDistortion += bestDist - preCalcDistC;
> -
>      //----- restore context models -----
>      m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
>  }
> @@ -3085,10 +3168,11 @@
>          outResiYuv->clear();
>
>          predYuv->copyToPartYuv(outReconYuv, 0);
> -
> +        //Luma
>          int part = partitionFromSizes(width, height);
>          distortion = primitives.sse_pp[part](fencYuv->getLumaAddr(),
> fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());
> -        part = partitionFromSizes(width >> 1, height >> 1);
> +        //Chroma
> +        part = partitionFromSizes(width >> m_hChromaShift, height >>
> m_vChromaShift);
>          distortion +=
> m_rdCost->scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(),
> fencYuv->getCStride(), outReconYuv->getCbAddr(),
> outReconYuv->getCStride()));
>          distortion +=
> m_rdCost->scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(),
> fencYuv->getCStride(), outReconYuv->getCrAddr(),
> outReconYuv->getCStride()));
>
> @@ -3208,7 +3292,7 @@
>      // update with clipped distortion and cost (qp estimation loop uses
> unclipped values)
>      int part = partitionFromSizes(width, height);
>      bdist = primitives.sse_pp[part](fencYuv->getLumaAddr(),
> fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());
> -    part = partitionFromSizes(width >> 1, height >> 1);
> +    part = partitionFromSizes(width >> cu->getHorzChromaShift(), height
> >> cu->getVertChromaShift());
>      bdist +=
> m_rdCost->scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(),
> fencYuv->getCStride(), outReconYuv->getCbAddr(),
> outReconYuv->getCStride()));
>      bdist +=
> m_rdCost->scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(),
> fencYuv->getCStride(), outReconYuv->getCrAddr(),
> outReconYuv->getCStride()));
>      bcost = m_rdCost->calcRdCost(bdist, bestBits);
> @@ -3311,7 +3395,7 @@
>              cu->setTransformSkipSubParts(0, TEXT_CHROMA_V, absPartIdx,
> cu->getDepth(0) + trModeC);
>          }
>
> -        m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
> +        m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
>          m_trQuant->selectLambda(TEXT_LUMA);
>
>          absSumY = m_trQuant->transformNxN(cu,
> resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width, coeffCurY,
> @@ -3322,7 +3406,7 @@
>          if (bCodeChroma)
>          {
>              int curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCbQpOffset() +
> cu->getSlice()->getSliceQpDeltaCb();
> -            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> +            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>
>              m_trQuant->selectLambda(TEXT_CHROMA);
>
> @@ -3330,7 +3414,7 @@
>                                                trWidthC, trHeightC,
> TEXT_CHROMA_U, absPartIdx, &lastPosU, false, curuseRDOQ);
>
>              curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
> -            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> +            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>              absSumV = m_trQuant->transformNxN(cu,
> resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurV,
>                                                trWidthC, trHeightC,
> TEXT_CHROMA_V, absPartIdx, &lastPosV, false, curuseRDOQ);
>
> @@ -3342,7 +3426,7 @@
>          {
>              int16_t *curResiY = resiYuv->getLumaAddr(absTUPartIdx);
>
> -            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
> +            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
>
>              int scalingListType = 3 + g_eTTable[(int)TEXT_LUMA];
>              assert(scalingListType < 6);
> @@ -3362,7 +3446,7 @@
>                  int16_t *pcResiCurrU = resiYuv->getCbAddr(absTUPartIdxC);
>
>                  int curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCbQpOffset() +
> cu->getSlice()->getSliceQpDeltaCb();
> -                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> +                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>
>                  int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_U];
>                  assert(scalingListType < 6);
> @@ -3378,7 +3462,7 @@
>              {
>                  int16_t *curResiV = resiYuv->getCrAddr(absTUPartIdxC);
>                  int curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
> -                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> +                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>
>                  int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_V];
>                  assert(scalingListType < 6);
> @@ -3453,6 +3537,7 @@
>      assert(cu->getDepth(0) == cu->getDepth(absPartIdx));
>      const uint32_t trMode = depth - cu->getDepth(0);
>      const uint32_t trSizeLog2 =
> g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> depth] + 2;
> +    uint32_t  trSizeCLog2 =
> g_convertToBit[(cu->getSlice()->getSPS()->getMaxCUWidth() >>
> m_hChromaShift) >> depth] + 2;;
>
>      bool bSplitFlag =
> ((cu->getSlice()->getSPS()->getQuadtreeTUMaxDepthInter() == 1) &&
> cu->getPredictionMode(absPartIdx) == MODE_INTER &&
> (cu->getPartitionSize(absPartIdx) != SIZE_2Nx2N));
>      bool bCheckFull;
> @@ -3465,12 +3550,11 @@
>
>      bool  bCodeChroma = true;
>      uint32_t  trModeC     = trMode;
> -    uint32_t  trSizeCLog2 = trSizeLog2 - 1;
> -    if (trSizeLog2 == 2)
> +    if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))
>      {
>          trSizeCLog2++;
>          trModeC--;
> -        uint32_t qpdiv = cu->getPic()->getNumPartInCU() >>
> ((cu->getDepth(0) + trModeC) << 1);
> +        uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((depth - 1)
> << 1);
>          bCodeChroma = ((absPartIdx % qpdiv) == 0);
>      }
>
> @@ -3490,8 +3574,8 @@
>          const uint32_t numCoeffPerAbsPartIdxIncrement =
> cu->getSlice()->getSPS()->getMaxCUWidth() *
> cu->getSlice()->getSPS()->getMaxCUHeight() >>
> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
>          const uint32_t qtlayer =
> cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
>          TCoeff *coeffCurY = m_qtTempCoeffY[qtlayer] +
> (numCoeffPerAbsPartIdxIncrement * absPartIdx);
> -        TCoeff *coeffCurU = m_qtTempCoeffCb[qtlayer] +
> (numCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
> -        TCoeff *coeffCurV = m_qtTempCoeffCr[qtlayer] +
> (numCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
> +        TCoeff *coeffCurU = m_qtTempCoeffCb[qtlayer] +
> (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift +
> m_vChromaShift));
> +        TCoeff *coeffCurV = m_qtTempCoeffCr[qtlayer] +
> (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift +
> m_vChromaShift));
>
>          int trWidth = 0, trHeight = 0, trWidthC = 0, trHeightC = 0;
>          uint32_t absTUPartIdxC = absPartIdx;
> @@ -3520,7 +3604,7 @@
>              m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac,
> trWidth, trHeight, TEXT_LUMA);
>          }
>
> -        m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
> +        m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
>          m_trQuant->selectLambda(TEXT_LUMA);
>
>          absSumY = m_trQuant->transformNxN(cu,
> resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width, coeffCurY,
> @@ -3534,17 +3618,17 @@
>              {
>                  m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac,
> trWidthC, trHeightC, TEXT_CHROMA);
>              }
> -
> +            //Cb transform
>              int curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCbQpOffset() +
> cu->getSlice()->getSliceQpDeltaCb();
> -            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> +            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>
>              m_trQuant->selectLambda(TEXT_CHROMA);
>
>              absSumU = m_trQuant->transformNxN(cu,
> resiYuv->getCbAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurU,
>                                                trWidthC, trHeightC,
> TEXT_CHROMA_U, absPartIdx, &lastPosU, false, curuseRDOQ);
> -
> +            //Cr transform
>              curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
> -            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> +            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>              absSumV = m_trQuant->transformNxN(cu,
> resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurV,
>                                                trWidthC, trHeightC,
> TEXT_CHROMA_V, absPartIdx, &lastPosV, false, curuseRDOQ);
>
> @@ -3586,7 +3670,7 @@
>          {
>              int16_t *curResiY =
> m_qtTempTComYuv[qtlayer].getLumaAddr(absTUPartIdx);
>
> -            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
> +            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
>
>              int scalingListType = 3 + g_eTTable[(int)TEXT_LUMA];
>              assert(scalingListType < 6);
> @@ -3658,16 +3742,15 @@
>                  int16_t *pcResiCurrU =
> m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC);
>
>                  int curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCbQpOffset() +
> cu->getSlice()->getSliceQpDeltaCb();
> -                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> +                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>
>                  int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_U];
>                  assert(scalingListType < 6);
> -                assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE /
> 2);
> -
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT,
> pcResiCurrU, MAX_CU_SIZE / 2, coeffCurU, trWidthC, trHeightC,
> scalingListType, false, lastPosU);
> +
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT,
> pcResiCurrU, m_qtTempTComYuv[qtlayer].m_cwidth, coeffCurU, trWidthC,
> trHeightC, scalingListType, false, lastPosU);
>
>                  uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absTUPartIdxC),
> resiYuv->m_cwidth,
>
> m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC),
> -                                                             MAX_CU_SIZE
> / 2);
> +
> m_qtTempTComYuv[qtlayer].m_cwidth);
>                  const uint32_t nonZeroDistU =
> m_rdCost->scaleChromaDistCb(dist);
>
>                  if (cu->isLosslessCoded(0))
> @@ -3710,10 +3793,10 @@
>              if (!absSumU)
>              {
>                  int16_t *ptr =
> m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC);
> -                assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE /
> 2);
> +                const uint32_t stride = m_qtTempTComYuv[qtlayer].m_cwidth;
>
>                  assert(trWidthC == trHeightC);
> -
>  primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, MAX_CU_SIZE /
> 2, 0);
> +
>  primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, stride, 0);
>              }
>
>              distV =
> m_rdCost->scaleChromaDistCr(primitives.sse_sp[partSizeC](resiYuv->getCrAddr(absTUPartIdxC),
> resiYuv->m_cwidth, m_tempPel, trWidthC));
> @@ -3725,16 +3808,15 @@
>              {
>                  int16_t *curResiV =
> m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC);
>                  int curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
> -                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> +                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>
>                  int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_V];
>                  assert(scalingListType < 6);
> -                assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE /
> 2);
> -
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT,
> curResiV, MAX_CU_SIZE / 2, coeffCurV, trWidthC, trHeightC, scalingListType,
> false, lastPosV);
> +
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT,
> curResiV, m_qtTempTComYuv[qtlayer].m_cwidth, coeffCurV, trWidthC,
> trHeightC, scalingListType, false, lastPosV);
>
>                  uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absTUPartIdxC),
> resiYuv->m_cwidth,
>
> m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC),
> -                                                             MAX_CU_SIZE
> / 2);
> +
> m_qtTempTComYuv[qtlayer].m_cwidth);
>                  const uint32_t nonZeroDistV =
> m_rdCost->scaleChromaDistCr(dist);
>
>                  if (cu->isLosslessCoded(0))
> @@ -3777,10 +3859,10 @@
>              if (!absSumV)
>              {
>                  int16_t *ptr =
>  m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC);
> -                assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE /
> 2);
> +                const uint32_t stride = m_qtTempTComYuv[qtlayer].m_cwidth;
>
>                  assert(trWidthC == trHeightC);
> -
>  primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, MAX_CU_SIZE /
> 2, 0);
> +
>  primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, stride, 0);
>              }
>          }
>          cu->setCbfSubParts(absSumY ? setCbf : 0, TEXT_LUMA, absPartIdx,
> depth);
> @@ -3817,7 +3899,7 @@
>                  m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac,
> trWidth, trHeight, TEXT_LUMA);
>              }
>
> -            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
> +            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
>
>              m_trQuant->selectLambda(TEXT_LUMA);
>              absSumTransformSkipY = m_trQuant->transformNxN(cu,
> resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width, coeffCurY,
> @@ -3831,7 +3913,7 @@
>                  m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx,
> trWidth, trHeight, depth, TEXT_LUMA);
>                  const uint32_t skipSingleBitsY =
> m_entropyCoder->getNumberOfWrittenBits();
>
> -                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);
> +                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA,
> cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());
>
>                  int scalingListType = 3 + g_eTTable[(int)TEXT_LUMA];
>                  assert(scalingListType < 6);
> @@ -3874,7 +3956,7 @@
>
>              int16_t *curResiU =
> m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC);
>              int16_t *curResiV =
> m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC);
> -            assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);
> +            uint32_t stride = m_qtTempTComYuv[qtlayer].m_cwidth;
>
>              TCoeff bestCoeffU[32 * 32], bestCoeffV[32 * 32];
>              memcpy(bestCoeffU, coeffCurU, sizeof(TCoeff) *
> numSamplesChroma);
> @@ -3883,8 +3965,8 @@
>              int16_t bestResiU[32 * 32], bestResiV[32 * 32];
>              for (int i = 0; i < trHeightC; ++i)
>              {
> -                memcpy(&bestResiU[i * trWidthC], curResiU + i *
> (MAX_CU_SIZE / 2), sizeof(int16_t) * trWidthC);
> -                memcpy(&bestResiV[i * trWidthC], curResiV + i *
> (MAX_CU_SIZE / 2), sizeof(int16_t) * trWidthC);
> +                memcpy(&bestResiU[i * trWidthC], curResiU + i * stride,
> sizeof(int16_t) * trWidthC);
> +                memcpy(&bestResiV[i * trWidthC], curResiV + i * stride,
> sizeof(int16_t) * trWidthC);
>              }
>
>
>  m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_QT_TRAFO_ROOT]);
> @@ -3898,13 +3980,13 @@
>              }
>
>              int curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCbQpOffset() +
> cu->getSlice()->getSliceQpDeltaCb();
> -            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> +            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>              m_trQuant->selectLambda(TEXT_CHROMA);
>
>              absSumTransformSkipU = m_trQuant->transformNxN(cu,
> resiYuv->getCbAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurU,
>                                                             trWidthC,
> trHeightC, TEXT_CHROMA_U, absPartIdx, &lastPosTransformSkipU, true,
> curuseRDOQ);
>              curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
> -            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> +            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>              absSumTransformSkipV = m_trQuant->transformNxN(cu,
> resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurV,
>                                                             trWidthC,
> trHeightC, TEXT_CHROMA_V, absPartIdx, &lastPosTransformSkipV, true,
> curuseRDOQ);
>
> @@ -3922,17 +4004,15 @@
>                  singleBitsU = m_entropyCoder->getNumberOfWrittenBits();
>
>                  curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCbQpOffset() +
> cu->getSlice()->getSliceQpDeltaCb();
> -                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> +                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>
>                  int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_U];
>                  assert(scalingListType < 6);
> -                assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE /
> 2);
> -
> -
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT,
> curResiU, MAX_CU_SIZE / 2, coeffCurU, trWidthC, trHeightC, scalingListType,
> true, lastPosTransformSkipU);
> +
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT,
> curResiU, m_qtTempTComYuv[qtlayer].m_cwidth, coeffCurU, trWidthC,
> trHeightC, scalingListType, true, lastPosTransformSkipU);
>
>                  uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absTUPartIdxC),
> resiYuv->m_cwidth,
>
> m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC),
> -                                                             MAX_CU_SIZE
> / 2);
> +
> m_qtTempTComYuv[qtlayer].m_cwidth);
>                  nonZeroDistU = m_rdCost->scaleChromaDistCb(dist);
>                  singleCostU = m_rdCost->calcRdCost(nonZeroDistU,
> singleBitsU);
>              }
> @@ -3944,7 +4024,7 @@
>                  memcpy(coeffCurU, bestCoeffU, sizeof(TCoeff) *
> numSamplesChroma);
>                  for (int i = 0; i < trHeightC; ++i)
>                  {
> -                    memcpy(curResiU + i * (MAX_CU_SIZE / 2), &bestResiU[i
> * trWidthC], sizeof(int16_t) * trWidthC);
> +                    memcpy(curResiU + i * stride, &bestResiU[i *
> trWidthC], sizeof(int16_t) * trWidthC);
>                  }
>              }
>              else
> @@ -3961,17 +4041,15 @@
>                  singleBitsV = m_entropyCoder->getNumberOfWrittenBits() -
> singleBitsU;
>
>                  curChromaQpOffset =
> cu->getSlice()->getPPS()->getChromaCrQpOffset() +
> cu->getSlice()->getSliceQpDeltaCr();
> -                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);
> +                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA,
> cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset,
> cu->getChromaFormat());
>
>                  int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_V];
>                  assert(scalingListType < 6);
> -                assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE /
> 2);
> -
> -
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT,
> curResiV, MAX_CU_SIZE / 2, coeffCurV, trWidthC, trHeightC, scalingListType,
> true, lastPosTransformSkipV);
> +
>  m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT,
> curResiV, m_qtTempTComYuv[qtlayer].m_cwidth, coeffCurV, trWidthC,
> trHeightC, scalingListType, true, lastPosTransformSkipV);
>
>                  uint32_t dist =
> primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absTUPartIdxC),
> resiYuv->m_cwidth,
>
> m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC),
> -                                                             MAX_CU_SIZE
> / 2);
> +
> m_qtTempTComYuv[qtlayer].m_cwidth);
>                  nonZeroDistV = m_rdCost->scaleChromaDistCr(dist);
>                  singleCostV = m_rdCost->calcRdCost(nonZeroDistV,
> singleBitsV);
>              }
> @@ -3983,7 +4061,7 @@
>                  memcpy(coeffCurV, bestCoeffV, sizeof(TCoeff) *
> numSamplesChroma);
>                  for (int i = 0; i < trHeightC; ++i)
>                  {
> -                    memcpy(curResiV + i * (MAX_CU_SIZE / 2), &bestResiV[i
> * trWidthC], sizeof(int16_t) * trWidthC);
> +                    memcpy(curResiV + i * stride, &bestResiV[i *
> trWidthC], sizeof(int16_t) * trWidthC);
>                  }
>              }
>              else
> @@ -4115,6 +4193,7 @@
>      const uint32_t trMode = cu->getTransformIdx(absPartIdx);
>      const bool bSubdiv = curTrMode != trMode;
>      const uint32_t trSizeLog2 =
> g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> depth] + 2;
> +    uint32_t  trSizeCLog2 =
> g_convertToBit[(cu->getSlice()->getSPS()->getMaxCUWidth() >>
> m_hChromaShift) >> depth] + 2;
>
>      if (bSubdivAndCbf && trSizeLog2 <=
> cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() && trSizeLog2 >
> cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))
>      {
> @@ -4122,10 +4201,23 @@
>      }
>
>      assert(cu->getPredictionMode(absPartIdx) != MODE_INTRA);
> +
> +    bool mCodeAll = true;
> +    if ((ttype == TEXT_CHROMA_U) || (ttype == TEXT_CHROMA_V))
> +    {
> +        int width  = 1 << trSizeLog2;
> +        int height = 1 << trSizeLog2;
> +        const uint32_t numPels = (width >> cu->getHorzChromaShift()) *
> (height >> cu->getHorzChromaShift());
> +        if(numPels < (MIN_TU_SIZE * MIN_TU_SIZE))
> +        {
> +            mCodeAll = false;
> +        }
> +    }
> +
>      if (bSubdivAndCbf)
>      {
>          const bool bFirstCbfOfCU = curTrMode == 0;
> -        if (bFirstCbfOfCU || trSizeLog2 > 2)
> +        if (bFirstCbfOfCU || mCodeAll)
>          {
>              if (bFirstCbfOfCU || cu->getCbf(absPartIdx, TEXT_CHROMA_U,
> curTrMode - 1))
>              {
> @@ -4136,7 +4228,7 @@
>                  m_entropyCoder->encodeQtCbf(cu, absPartIdx,
> TEXT_CHROMA_V, curTrMode);
>              }
>          }
> -        else if (trSizeLog2 == 2)
> +        else
>          {
>              assert(cu->getCbf(absPartIdx, TEXT_CHROMA_U, curTrMode) ==
> cu->getCbf(absPartIdx, TEXT_CHROMA_U, curTrMode - 1));
>              assert(cu->getCbf(absPartIdx, TEXT_CHROMA_V, curTrMode) ==
> cu->getCbf(absPartIdx, TEXT_CHROMA_V, curTrMode - 1));
> @@ -4145,21 +4237,20 @@
>
>      if (!bSubdiv)
>      {
> +        //Luma
>          const uint32_t numCoeffPerAbsPartIdxIncrement =
> cu->getSlice()->getSPS()->getMaxCUWidth() *
> cu->getSlice()->getSPS()->getMaxCUHeight() >>
> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);
> -        //assert( 16 == uiNumCoeffPerAbsPartIdxIncrement ); // check
>          const uint32_t qtlayer =
> cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
>          TCoeff *coeffCurY = m_qtTempCoeffY[qtlayer] +
>  numCoeffPerAbsPartIdxIncrement * absPartIdx;
> -        TCoeff *coeffCurU = m_qtTempCoeffCb[qtlayer] +
> (numCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
> -        TCoeff *coeffCurV = m_qtTempCoeffCr[qtlayer] +
> (numCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
> +
> +        //Chroma
> +        TCoeff *coeffCurU = m_qtTempCoeffCb[qtlayer] +
> (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift +
> m_vChromaShift));
> +        TCoeff *coeffCurV = m_qtTempCoeffCr[qtlayer] +
> (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift +
> m_vChromaShift));
>
>          bool  bCodeChroma = true;
> -        uint32_t  trModeC     = trMode;
> -        uint32_t  trSizeCLog2 = trSizeLog2 - 1;
> -        if (trSizeLog2 == 2)
> +        if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))
>          {
>              trSizeCLog2++;
> -            trModeC--;
> -            uint32_t qpdiv = cu->getPic()->getNumPartInCU() >>
> ((cu->getDepth(0) + trModeC) << 1);
> +            uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((depth -
> 1) << 1);
>              bCodeChroma = ((absPartIdx % qpdiv) == 0);
>          }
>
> @@ -4171,21 +4262,18 @@
>          {
>              if (ttype == TEXT_LUMA && cu->getCbf(absPartIdx, TEXT_LUMA,
> trMode))
>              {
> -                int trWidth  = 1 << trSizeLog2;
> -                int trHeight = 1 << trSizeLog2;
> -                m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx,
> trWidth, trHeight, depth, TEXT_LUMA);
> +                m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx,
> 1 << trSizeLog2, 1 << trSizeLog2, depth, TEXT_LUMA);
>              }
> +
>              if (bCodeChroma)
>              {
> -                int trWidth  = 1 << trSizeCLog2;
> -                int trHeight = 1 << trSizeCLog2;
>                  if (ttype == TEXT_CHROMA_U && cu->getCbf(absPartIdx,
> TEXT_CHROMA_U, trMode))
>                  {
> -                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU,
> absPartIdx, trWidth, trHeight, depth, TEXT_CHROMA_U);
> +                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU,
> absPartIdx, 1 << trSizeCLog2, 1 << trSizeCLog2, depth, TEXT_CHROMA_U);
>                  }
>                  if (ttype == TEXT_CHROMA_V && cu->getCbf(absPartIdx,
> TEXT_CHROMA_V, trMode))
>                  {
> -                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV,
> absPartIdx, trWidth, trHeight, depth, TEXT_CHROMA_V);
> +                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV,
> absPartIdx, 1 << trSizeCLog2, 1 << trSizeCLog2, depth, TEXT_CHROMA_V);
>                  }
>              }
>          }
> @@ -4211,13 +4299,13 @@
>
>      if (curTrMode == trMode)
>      {
> -        const uint32_t trSizeLog2 =
> g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> depth] + 2;
> +        const uint32_t trSizeLog2   =
> g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> depth] + 2;
> +        uint32_t  trSizeCLog2 =
> g_convertToBit[(cu->getSlice()->getSPS()->getMaxCUWidth() >>
> cu->getHorzChromaShift()) >> depth] + 2;;
>          const uint32_t qtlayer =
> cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;
>
>          bool  bCodeChroma   = true;
>          uint32_t  trModeC     = trMode;
> -        uint32_t  trSizeCLog2 = trSizeLog2 - 1;
> -        if (trSizeLog2 == 2)
> +        if((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))
>          {
>              trSizeCLog2++;
>              trModeC--;
> @@ -4246,10 +4334,10 @@
>              if (bCodeChroma)
>              {
>                  uint32_t    uiNumCoeffC = (1 << (trSizeCLog2 << 1));
> -                TCoeff* pcCoeffSrcU = m_qtTempCoeffCb[qtlayer] +
> (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
> -                TCoeff* pcCoeffSrcV = m_qtTempCoeffCr[qtlayer] +
> (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
> -                TCoeff* pcCoeffDstU = cu->getCoeffCb() +
> (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
> -                TCoeff* pcCoeffDstV = cu->getCoeffCr() +
> (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);
> +                TCoeff* pcCoeffSrcU = m_qtTempCoeffCb[qtlayer] +
> (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift +
> m_vChromaShift));
> +                TCoeff* pcCoeffSrcV = m_qtTempCoeffCr[qtlayer] +
> (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift +
> m_vChromaShift));
> +                TCoeff* pcCoeffDstU = cu->getCoeffCb() +
> (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift +
> m_vChromaShift));
> +                TCoeff* pcCoeffDstV = cu->getCoeffCr() +
> (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift +
> m_vChromaShift));
>                  ::memcpy(pcCoeffDstU, pcCoeffSrcU, sizeof(TCoeff) *
> uiNumCoeffC);
>                  ::memcpy(pcCoeffDstV, pcCoeffSrcV, sizeof(TCoeff) *
> uiNumCoeffC);
>              }
> diff -r 98e238489f84 -r 6a602378d31b source/Lib/TLibEncoder/TEncSearch.h
> --- a/source/Lib/TLibEncoder/TEncSearch.h       Fri Jan 03 18:21:41 2014
> +0530
> +++ b/source/Lib/TLibEncoder/TEncSearch.h       Fri Jan 03 18:23:22 2014
> +0530
> @@ -59,6 +59,13 @@
>  namespace x265 {
>  // private namespace
>
> +#define DONT_SPLIT            0
> +#define VERTICAL_SPLIT        1
> +#define QUAD_SPLIT            2
> +#define NUMBER_OF_SPLIT_MODES 3
> +
> +static const uint32_t partIdxStepShift[NUMBER_OF_SPLIT_MODES] = { 0, 1, 2
> };
> +
>  class TEncCu;
>
>  //
> ====================================================================================================================
> @@ -113,6 +120,12 @@
>      int             m_hChromaShift;
>      int             m_vChromaShift;
>
> +    uint32_t        m_section;
> +    uint32_t        m_splitMode;
> +    uint32_t        m_absPartIdxTURelCU;
> +    uint32_t        m_absPartIdxStep;
> +    uint32_t        m_partOffset;
> +
>  public:
>
>      TEncSbac***     m_rdSbacCoders;
> @@ -184,6 +197,9 @@
>
>      uint32_t xSymbolBitsInter(TComDataCU* cu);
>
> +    bool isNextSection();
> +    bool isLastSection();
> +
>  protected:
>
>      //
> --------------------------------------------------------------------------------------------
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>



-- 
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20140107/16964875/attachment-0001.html>


More information about the x265-devel mailing list