[x265] split rate calculation functions to luma and chroma to simplify luma path

Steve Borho steve at borho.org
Mon Jun 30 19:57:02 CEST 2014


On Sun, Jun 29, 2014 at 9:52 PM, Satoshi Nakagawa <nakagawa424 at oki.com> wrote:
> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa424 at oki.com>
> # Date 1404096538 -32400
> #      Mon Jun 30 11:48:58 2014 +0900
> # Node ID 4c30d66afc78ed99385c04645d7b1303d80dea2c
> # Parent  32aa6cc3cf4d108ac92f5d29258b2c38ca888d29
> split rate calculation functions to luma and chroma to simplify luma path

I'm afraid this patch doesn't apply on top of the current tip + your
previous "fix emms" patch

Can you rebase this?

> diff -r 32aa6cc3cf4d -r 4c30d66afc78 source/Lib/TLibEncoder/TEncEntropy.cpp
> --- a/source/Lib/TLibEncoder/TEncEntropy.cpp    Thu Jun 26 17:19:08 2014 -0700
> +++ b/source/Lib/TLibEncoder/TEncEntropy.cpp    Mon Jun 30 11:48:58 2014 +0900
> @@ -325,7 +325,7 @@
>          }
>          else
>          {
> -            m_entropyCoder->codeQtCbf(cu, absPartIdx, TEXT_LUMA, cu->getTransformIdx(absPartIdx), absPartIdxStep, tuSize, tuSize, (subdiv == 0));
> +            m_entropyCoder->codeQtCbf(cu, absPartIdx, TEXT_LUMA, cu->getTransformIdx(absPartIdx));
>          }
>
>          if (cbfY || cbfU || cbfV)
> @@ -342,7 +342,7 @@
>          }
>          if (cbfY)
>          {
> -            m_entropyCoder->codeCoeffNxN(cu, (cu->getCoeffY() + offsetLuma), absPartIdx, tuSize, TEXT_LUMA);
> +            m_entropyCoder->codeCoeffNxN(cu, (cu->getCoeffY() + offsetLuma), absPartIdx, log2TrSize, TEXT_LUMA);
>          }
>
>          int chFmt = cu->getChromaFormat();
> @@ -351,7 +351,7 @@
>              uint32_t partNum = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
>              if ((absPartIdx & (partNum - 1)) == (partNum - 1))
>              {
> -                uint32_t trSizeC           = 1 << log2TrSize;
> +                const uint32_t log2TrSizeC = 2;
>                  const bool splitIntoSubTUs = (chFmt == CHROMA_422);
>
>                  uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);
> @@ -364,10 +364,10 @@
>                      do
>                      {
>                          uint32_t cbf = cu->getCbf(tuIterator.m_absPartIdxTURelCU, (TextType)chromaId, trIdx + splitIntoSubTUs);
> -                        uint32_t subTUIndex = tuIterator.m_section * trSizeC * trSizeC;
>                          if (cbf)
>                          {
> -                            m_entropyCoder->codeCoeffNxN(cu, (coeffChroma + m_bakChromaOffset + subTUIndex), tuIterator.m_absPartIdxTURelCU, trSizeC, (TextType)chromaId);
> +                            uint32_t subTUOffset = tuIterator.m_section << (log2TrSizeC * 2);
> +                            m_entropyCoder->codeCoeffNxN(cu, (coeffChroma + m_bakChromaOffset + subTUOffset), tuIterator.m_absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
>                          }
>                      }
>                      while (isNextTUSection(&tuIterator));
> @@ -376,7 +376,7 @@
>          }
>          else
>          {
> -            uint32_t trSizeC  = tuSize >> hChromaShift;
> +            uint32_t log2TrSizeC = log2TrSize - hChromaShift;
>              const bool splitIntoSubTUs = (chFmt == CHROMA_422);
>              uint32_t curPartNum = cu->getPic()->getNumPartInCU() >> (depth << 1);
>              for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
> @@ -387,10 +387,10 @@
>                  do
>                  {
>                      uint32_t cbf = cu->getCbf(tuIterator.m_absPartIdxTURelCU, (TextType)chromaId, trIdx + splitIntoSubTUs);
> -                    uint32_t subTUIndex = tuIterator.m_section * trSizeC * trSizeC;
>                      if (cbf)
>                      {
> -                        m_entropyCoder->codeCoeffNxN(cu, (coeffChroma + offsetChroma + subTUIndex), tuIterator.m_absPartIdxTURelCU, trSizeC, (TextType)chromaId);
> +                        uint32_t subTUOffset = tuIterator.m_section << (log2TrSizeC * 2);
> +                        m_entropyCoder->codeCoeffNxN(cu, (coeffChroma + offsetChroma + subTUOffset), tuIterator.m_absPartIdxTURelCU, log2TrSizeC, (TextType)chromaId);
>                      }
>                  }
>                  while (isNextTUSection(&tuIterator));
> @@ -540,11 +540,6 @@
>      m_entropyCoder->codeQtRootCbf(cu, absPartIdx);
>  }
>
> -void TEncEntropy::encodeQtCbfZero(TComDataCU* cu, TextType ttype, uint32_t trDepth)
> -{
> -    m_entropyCoder->codeQtCbfZero(cu, ttype, trDepth);
> -}
> -
>  void TEncEntropy::encodeQtRootCbfZero(TComDataCU* cu)
>  {
>      m_entropyCoder->codeQtRootCbfZero(cu);
> @@ -593,11 +588,6 @@
>      xEncodeTransform(cu, lumaOffset, chromaOffset, absPartIdx, absPartIdxStep, depth, cuSize, 0, bCodeDQP);
>  }
>
> -void TEncEntropy::encodeCoeffNxN(TComDataCU* cu, coeff_t* coeff, uint32_t absPartIdx, uint32_t trSize, TextType ttype)
> -{
> -    m_entropyCoder->codeCoeffNxN(cu, coeff, absPartIdx, trSize, ttype);
> -}
> -
>  void TEncEntropy::estimateBit(estBitsSbacStruct* estBitsSBac, int trSize, TextType ttype)
>  {
>      ttype = ttype == TEXT_LUMA ? TEXT_LUMA : TEXT_CHROMA;
> diff -r 32aa6cc3cf4d -r 4c30d66afc78 source/Lib/TLibEncoder/TEncEntropy.h
> --- a/source/Lib/TLibEncoder/TEncEntropy.h      Thu Jun 26 17:19:08 2014 -0700
> +++ b/source/Lib/TLibEncoder/TEncEntropy.h      Mon Jun 30 11:48:58 2014 +0900
> @@ -114,14 +114,24 @@
>
>      void encodeTransformSubdivFlag(uint32_t symbol, uint32_t ctx);
>      void encodeQtCbf(TComDataCU* cu, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height, TextType ttype, uint32_t trDepth, bool lowestLevel);
> -    void encodeQtCbfZero(TComDataCU* cu, TextType ttype, uint32_t trDepth);
> +    void encodeQtCbf(TComDataCU* cu, uint32_t absPartIdx, TextType ttype, uint32_t trDepth)
> +    {
> +        m_entropyCoder->codeQtCbf(cu, absPartIdx, ttype, trDepth);
> +    }
> +    void encodeQtCbfZero(TComDataCU* cu, TextType ttype, uint32_t trDepth)
> +    {
> +        m_entropyCoder->codeQtCbfZero(cu, ttype, trDepth);
> +    }
> +
>      void encodeQtRootCbfZero(TComDataCU* cu);
>      void encodeQtRootCbf(TComDataCU* cu, uint32_t absPartIdx);
>      void encodeQP(TComDataCU* cu, uint32_t absPartIdx);
>      void encodeScalingList(TComScalingList* scalingList);
>      void encodeCoeff(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, uint32_t cuSize, bool& bCodeDQP);
> -    void encodeCoeffNxN(TComDataCU* cu, coeff_t* pcCoeff, uint32_t absPartIdx, uint32_t trSize, TextType ttype);
> -
> +    void encodeCoeffNxN(TComDataCU* cu, coeff_t* coeff, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype)
> +    {
> +        m_entropyCoder->codeCoeffNxN(cu, coeff, absPartIdx, log2TrSize, ttype);
> +    }
>
>      void estimateBit(estBitsSbacStruct* estBitsSbac, int trSize, TextType ttype);
>      void encodeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx);
> diff -r 32aa6cc3cf4d -r 4c30d66afc78 source/Lib/TLibEncoder/TEncSbac.cpp
> --- a/source/Lib/TLibEncoder/TEncSbac.cpp       Thu Jun 26 17:19:08 2014 -0700
> +++ b/source/Lib/TLibEncoder/TEncSbac.cpp       Mon Jun 30 11:48:58 2014 +0900
> @@ -1873,6 +1873,25 @@
>      }
>  }
>
> +void TEncSbac::codeQtCbf(TComDataCU* cu, uint32_t absPartIdx, TextType ttype, uint32_t trDepth)
> +{
> +    uint32_t ctx = cu->getCtxQtCbf(ttype, trDepth);
> +    uint32_t cbf = cu->getCbf(absPartIdx, ttype, trDepth);
> +    m_cabac->encodeBin(cbf, m_contextModels[OFF_QT_CBF_CTX + ctx]);
> +
> +    DTRACE_CABAC_VL(g_nSymbolCounter++)
> +    DTRACE_CABAC_T("\tparseQtCbf()")
> +    DTRACE_CABAC_T("\tsymbol=")
> +    DTRACE_CABAC_V(cbf)
> +    DTRACE_CABAC_T("\tctx=")
> +    DTRACE_CABAC_V(ctx)
> +    DTRACE_CABAC_T("\tetype=")
> +    DTRACE_CABAC_V(ttype)
> +    DTRACE_CABAC_T("\tuiAbsPartIdx=")
> +    DTRACE_CABAC_V(absPartIdx)
> +    DTRACE_CABAC_T("\n")
> +}
> +
>  void TEncSbac::codeTransformSkipFlags(TComDataCU* cu, uint32_t absPartIdx, uint32_t trSize, TextType ttype)
>  {
>      if (cu->getCUTransquantBypass(absPartIdx))
> @@ -1999,8 +2018,9 @@
>      }
>  }
>
> -void TEncSbac::codeCoeffNxN(TComDataCU* cu, coeff_t* coeff, uint32_t absPartIdx, uint32_t trSize, TextType ttype)
> +void TEncSbac::codeCoeffNxN(TComDataCU* cu, coeff_t* coeff, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype)
>  {
> +    uint32_t trSize = 1 << log2TrSize;
>  #if ENC_DEC_TRACE
>      DTRACE_CABAC_VL(g_nSymbolCounter++)
>      DTRACE_CABAC_T("\tparseCoeffNxN()\teType=")
> @@ -2028,8 +2048,6 @@
>
>      X265_CHECK(trSize <= m_slice->getSPS()->getMaxTrSize(), "transform size out of range\n");
>
> -    const uint32_t log2TrSize = g_convertToBit[trSize] + 2;
> -
>      // compute number of significant coefficients
>      uint32_t numSig = primitives.count_nonzero(coeff, (1 << (log2TrSize << 1)));
>
> diff -r 32aa6cc3cf4d -r 4c30d66afc78 source/Lib/TLibEncoder/TEncSbac.h
> --- a/source/Lib/TLibEncoder/TEncSbac.h Thu Jun 26 17:19:08 2014 -0700
> +++ b/source/Lib/TLibEncoder/TEncSbac.h Mon Jun 30 11:48:58 2014 +0900
> @@ -114,6 +114,7 @@
>      void codePredMode(TComDataCU* cu, uint32_t absPartIdx);
>      void codeTransformSubdivFlag(uint32_t symbol, uint32_t ctx);
>      void codeQtCbf(TComDataCU* cu, uint32_t absPartIdx, TextType ttype, uint32_t trDepth, uint32_t absPartIdxStep, uint32_t width, uint32_t height, bool lowestLevel);
> +    void codeQtCbf(TComDataCU* cu, uint32_t absPartIdx, TextType ttype, uint32_t trDepth);
>      void codeQtRootCbf(TComDataCU* cu, uint32_t absPartIdx);
>      void codeQtCbfZero(TComDataCU* cu, TextType ttype, uint32_t trDepth);
>      void codeQtRootCbfZero(TComDataCU* cu);
> @@ -127,7 +128,7 @@
>      void codeDeltaQP(TComDataCU* cu, uint32_t absPartIdx);
>
>      void codeLastSignificantXY(uint32_t posx, uint32_t posy, uint32_t log2TrSize, TextType ttype, uint32_t scanIdx);
> -    void codeCoeffNxN(TComDataCU* cu, coeff_t* coef, uint32_t absPartIdx, uint32_t trSize, TextType ttype);
> +    void codeCoeffNxN(TComDataCU* cu, coeff_t* coef, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype);
>      void codeTransformSkipFlags(TComDataCU* cu, uint32_t absPartIdx, uint32_t trSize, TextType ttype);
>
>      // -------------------------------------------------------------------------------------------------------------------
> diff -r 32aa6cc3cf4d -r 4c30d66afc78 source/Lib/TLibEncoder/TEncSearch.cpp
> --- a/source/Lib/TLibEncoder/TEncSearch.cpp     Thu Jun 26 17:19:08 2014 -0700
> +++ b/source/Lib/TLibEncoder/TEncSearch.cpp     Mon Jun 30 11:48:58 2014 +0900
> @@ -151,7 +151,7 @@
>      m_rdCost->setCrDistortionWeight(lambdaOffset);
>  }
>
> -void TEncSearch::xEncSubdivCbfQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
> +void TEncSearch::xEncSubdivCbfQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx)
>  {
>      uint32_t fullDepth  = cu->getDepth(0) + trDepth;
>      uint32_t trMode     = cu->getTransformIdx(absPartIdx);
> @@ -177,77 +177,116 @@
>      else
>      {
>          X265_CHECK(log2TrSize > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx), "transform size too small\n");
> -        if (bLuma)
> -        {
> -            m_entropyCoder->encodeTransformSubdivFlag(subdiv, 5 - log2TrSize);
> -        }
> +        m_entropyCoder->encodeTransformSubdivFlag(subdiv, 5 - log2TrSize);
>      }
>
> -    if (bChroma)
> -    {
> -        int      chFmt      = cu->getChromaFormat();
> -        if ((log2TrSize > 2) && !(chFmt == CHROMA_444))
> -        {
> -            if (trDepth == 0 || cu->getCbf(absPartIdx, TEXT_CHROMA_U, trDepth - 1))
> -                m_entropyCoder->encodeQtCbf(cu, absPartIdx, absPartIdxStep, (width >> m_hChromaShift), (height >> m_vChromaShift), TEXT_CHROMA_U, trDepth, (subdiv == 0));
> -
> -            if (trDepth == 0 || cu->getCbf(absPartIdx, TEXT_CHROMA_V, trDepth - 1))
> -                m_entropyCoder->encodeQtCbf(cu, absPartIdx, absPartIdxStep, (width >> m_hChromaShift), (height >> m_vChromaShift), TEXT_CHROMA_V, trDepth, (subdiv == 0));
> -        }
> -    }
> -
> -    if (subdiv)
> -    {
> -        TComTURecurse tuIterator;
> -        initSection(&tuIterator, QUAD_SPLIT, absPartIdxStep);
> -        width  >>= 1;
> -        height >>= 1;
> -
> -        uint32_t qtPartNum = cu->getPic()->getNumPartInCU() >> ((fullDepth + 1) << 1);
> -        for (uint32_t part = 0; part < 4; part++)
> -        {
> -            xEncSubdivCbfQT(cu, trDepth + 1, absPartIdx + part * qtPartNum, tuIterator.m_absPartIdxStep, width, height, bLuma, bChroma);
> -        }
> -
> -        return;
> -    }
> -
> -    //===== Cbfs =====
> -    if (bLuma)
> -    {
> -        m_entropyCoder->encodeQtCbf(cu, absPartIdx, absPartIdxStep, width, height, TEXT_LUMA, trMode, (subdiv == 0));
> -    }
> -}
> -
> -void TEncSearch::xEncCoeffQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype)
> -{
> -    if (!cu->getCbf(absPartIdx, ttype, trDepth))
> -        return;
> -
> -    uint32_t fullDepth  = cu->getDepth(0) + trDepth;
> -    uint32_t trMode     = cu->getTransformIdx(absPartIdx);
> -    uint32_t subdiv     = (trMode > trDepth ? 1 : 0);
> -
>      if (subdiv)
>      {
>          uint32_t qtPartNum = cu->getPic()->getNumPartInCU() >> ((fullDepth + 1) << 1);
>          for (uint32_t part = 0; part < 4; part++)
>          {
> -            xEncCoeffQT(cu, trDepth + 1, absPartIdx + part * qtPartNum, ttype);
> +            xEncSubdivCbfQTLuma(cu, trDepth + 1, absPartIdx + part * qtPartNum);
>          }
>
>          return;
>      }
>
> -    uint32_t origTrDepth = trDepth;
> -
> +    //===== Cbfs =====
> +    m_entropyCoder->encodeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode);
> +}
> +
> +void TEncSearch::xEncSubdivCbfQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height)
> +{
> +    uint32_t fullDepth  = cu->getDepth(0) + trDepth;
> +    uint32_t trMode     = cu->getTransformIdx(absPartIdx);
> +    uint32_t subdiv     = (trMode > trDepth ? 1 : 0);
>      uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> -    int chFmt           = cu->getChromaFormat();
> -    if ((ttype != TEXT_LUMA) && (log2TrSize == 2) && !(chFmt == CHROMA_444))
> +
> +    int      chFmt      = cu->getChromaFormat();
> +    if ((log2TrSize > 2) && !(chFmt == CHROMA_444))
> +    {
> +        if (trDepth == 0 || cu->getCbf(absPartIdx, TEXT_CHROMA_U, trDepth - 1))
> +            m_entropyCoder->encodeQtCbf(cu, absPartIdx, absPartIdxStep, (width >> m_hChromaShift), (height >> m_vChromaShift), TEXT_CHROMA_U, trDepth, (subdiv == 0));
> +
> +        if (trDepth == 0 || cu->getCbf(absPartIdx, TEXT_CHROMA_V, trDepth - 1))
> +            m_entropyCoder->encodeQtCbf(cu, absPartIdx, absPartIdxStep, (width >> m_hChromaShift), (height >> m_vChromaShift), TEXT_CHROMA_V, trDepth, (subdiv == 0));
> +    }
> +
> +    if (subdiv)
> +    {
> +        absPartIdxStep >>= 2;
> +        width  >>= 1;
> +        height >>= 1;
> +
> +        uint32_t qtPartNum = cu->getPic()->getNumPartInCU() >> ((fullDepth + 1) << 1);
> +        for (uint32_t part = 0; part < 4; part++)
> +        {
> +            xEncSubdivCbfQTChroma(cu, trDepth + 1, absPartIdx + part * qtPartNum, absPartIdxStep, width, height);
> +        }
> +    }
> +}
> +
> +void TEncSearch::xEncCoeffQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx)
> +{
> +    const TextType ttype = TEXT_LUMA;
> +
> +    if (!cu->getCbf(absPartIdx, ttype, trDepth))
> +        return;
> +
> +    uint32_t fullDepth  = cu->getDepth(0) + trDepth;
> +    uint32_t trMode     = cu->getTransformIdx(absPartIdx);
> +
> +    if (trMode > trDepth)
> +    {
> +        uint32_t qtPartNum = cu->getPic()->getNumPartInCU() >> ((fullDepth + 1) << 1);
> +        for (uint32_t part = 0; part < 4; part++)
> +        {
> +            xEncCoeffQTLuma(cu, trDepth + 1, absPartIdx + part * qtPartNum);
> +        }
> +
> +        return;
> +    }
> +
> +    uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> +    uint32_t qtLayer    = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
> +    uint32_t log2UnitSize = cu->getPic()->getLog2UnitSize();
> +    uint32_t coeffOffset = absPartIdx << (log2UnitSize * 2);
> +    coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset;
> +    m_entropyCoder->encodeCoeffNxN(cu, coeff, absPartIdx, log2TrSize, ttype);
> +}
> +
> +void TEncSearch::xEncCoeffQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype)
> +{
> +    if (!cu->getCbf(absPartIdx, ttype, trDepth))
> +        return;
> +
> +    uint32_t fullDepth  = cu->getDepth(0) + trDepth;
> +    uint32_t trMode     = cu->getTransformIdx(absPartIdx);
> +
> +    if (trMode > trDepth)
> +    {
> +        uint32_t qtPartNum = cu->getPic()->getNumPartInCU() >> ((fullDepth + 1) << 1);
> +        for (uint32_t part = 0; part < 4; part++)
> +        {
> +            xEncCoeffQTChroma(cu, trDepth + 1, absPartIdx + part * qtPartNum, ttype);
> +        }
> +
> +        return;
> +    }
> +
> +    uint32_t log2TrSize = cu->getSlice()->getSPS()->getLog2MaxCodingBlockSize() - fullDepth;
> +    uint32_t qtLayer    = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
> +    uint32_t log2UnitSize = cu->getPic()->getLog2UnitSize();
> +
> +    uint32_t trDepthC = trDepth;
> +    uint32_t log2TrSizeC = log2TrSize - m_hChromaShift;
> +    int chFmt = cu->getChromaFormat();
> +    if ((log2TrSize == 2) && !(chFmt == CHROMA_444))
>      {
>          X265_CHECK(trDepth > 0, "transform size too small\n");
> -        trDepth--;
> -        uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth) << 1);
> +        trDepthC--;
> +        log2TrSizeC++;
> +        uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepthC) << 1);
>          bool bFirstQ = ((absPartIdx & (qpdiv - 1)) == 0);
>          if (!bFirstQ)
>          {
> @@ -255,132 +294,122 @@
>          }
>      }
>
> -    //===== coefficients =====
> -    uint32_t chroma     = (ttype != TEXT_LUMA ? 1 : 0);
> -    int cspx = chroma ? m_hChromaShift : 0;
> -    int cspy = chroma ? m_vChromaShift : 0;
> -    uint32_t width = cu->getCUSize(0) >> (trDepth + cspx);
> -    uint32_t height = cu->getCUSize(0) >> (trDepth + cspy);
> -    uint32_t coeffOffset = absPartIdx << (cu->getPic()->getLog2UnitSize() * 2 - (cspx + cspy));
> -    uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - log2TrSize;
> -    coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset;
> -
> -    if (width == height)
> +    if (chFmt != CHROMA_422)
>      {
> -        m_entropyCoder->encodeCoeffNxN(cu, coeff, absPartIdx, width, ttype);
> +        uint32_t shift = (chFmt == CHROMA_420) ? 2 : 0;
> +        uint32_t coeffOffset = absPartIdx << (log2UnitSize * 2 - shift);
> +        coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset;
> +        m_entropyCoder->encodeCoeffNxN(cu, coeff, absPartIdx, log2TrSizeC, ttype);
>      }
>      else
>      {
> -        uint32_t subTUSize = width * width;
> -        uint32_t partIdxesPerSubTU  = cu->getPic()->getNumPartInCU() >> (((cu->getDepth(absPartIdx) + trDepth) << 1) + 1);
> -
> -        if (cu->getCbf(absPartIdx, ttype, origTrDepth + 1))
> -            m_entropyCoder->encodeCoeffNxN(cu, coeff, absPartIdx, width, ttype);
> -        if (cu->getCbf(absPartIdx + partIdxesPerSubTU, ttype, origTrDepth + 1))
> -            m_entropyCoder->encodeCoeffNxN(cu, coeff + subTUSize, absPartIdx + partIdxesPerSubTU, width, ttype);
> +        uint32_t coeffOffset = absPartIdx << (log2UnitSize * 2 - 1);
> +        coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset;
> +        uint32_t subTUSize = 1 << (log2TrSizeC * 2);
> +        uint32_t partIdxesPerSubTU  = cu->getPic()->getNumPartInCU() >> (((cu->getDepth(absPartIdx) + trDepthC) << 1) + 1);
> +        if (cu->getCbf(absPartIdx, ttype, trDepth + 1))
> +            m_entropyCoder->encodeCoeffNxN(cu, coeff, absPartIdx, log2TrSizeC, ttype);
> +        if (cu->getCbf(absPartIdx + partIdxesPerSubTU, ttype, trDepth + 1))
> +            m_entropyCoder->encodeCoeffNxN(cu, coeff + subTUSize, absPartIdx + partIdxesPerSubTU, log2TrSizeC, ttype);
>      }
>  }
>
> -void TEncSearch::xEncIntraHeader(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, bool bLuma, bool bChroma)
> +void TEncSearch::xEncIntraHeaderLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx)
>  {
> -    if (bLuma)
> +    // CU header
> +    if (absPartIdx == 0)
>      {
> -        // CU header
> +        if (!cu->getSlice()->isIntra())
> +        {
> +            if (cu->getSlice()->getPPS()->getTransquantBypassEnableFlag())
> +            {
> +                m_entropyCoder->encodeCUTransquantBypassFlag(cu, 0);
> +            }
> +            m_entropyCoder->encodeSkipFlag(cu, 0);
> +            m_entropyCoder->encodePredMode(cu, 0);
> +        }
> +
> +        m_entropyCoder->encodePartSize(cu, 0, cu->getDepth(0));
> +    }
> +    // luma prediction mode
> +    if (cu->getPartitionSize(0) == SIZE_2Nx2N)
> +    {
>          if (absPartIdx == 0)
>          {
> -            if (!cu->getSlice()->isIntra())
> +            m_entropyCoder->encodeIntraDirModeLuma(cu, 0);
> +        }
> +    }
> +    else
> +    {
> +        uint32_t qtNumParts = cu->getTotalNumPart() >> 2;
> +        if (trDepth == 0)
> +        {
> +            X265_CHECK(absPartIdx == 0, "unexpected absPartIdx %d\n", absPartIdx);
> +            for (uint32_t part = 0; part < 4; part++)
>              {
> -                if (cu->getSlice()->getPPS()->getTransquantBypassEnableFlag())
> -                {
> -                    m_entropyCoder->encodeCUTransquantBypassFlag(cu, 0);
> -                }
> -                m_entropyCoder->encodeSkipFlag(cu, 0);
> -                m_entropyCoder->encodePredMode(cu, 0);
> -            }
> -
> -            m_entropyCoder->encodePartSize(cu, 0, cu->getDepth(0));
> -        }
> -        // luma prediction mode
> -        if (cu->getPartitionSize(0) == SIZE_2Nx2N)
> -        {
> -            if (absPartIdx == 0)
> -            {
> -                m_entropyCoder->encodeIntraDirModeLuma(cu, 0);
> +                m_entropyCoder->encodeIntraDirModeLuma(cu, part * qtNumParts);
>              }
>          }
> -        else
> +        else if ((absPartIdx & (qtNumParts - 1)) == 0)
>          {
> -            uint32_t qtNumParts = cu->getTotalNumPart() >> 2;
> -            if (trDepth == 0)
> -            {
> -                X265_CHECK(absPartIdx == 0, "unexpected absPartIdx %d\n", absPartIdx);
> -                for (uint32_t part = 0; part < 4; part++)
> -                {
> -                    m_entropyCoder->encodeIntraDirModeLuma(cu, part * qtNumParts);
> -                }
> -            }
> -            else if ((absPartIdx & (qtNumParts - 1)) == 0)
> -            {
> -                m_entropyCoder->encodeIntraDirModeLuma(cu, absPartIdx);
> -            }
> -        }
> -    }
> -    if (bChroma)
> -    {
> -        // chroma prediction mode
> -        if ((cu->getPartitionSize(0) == SIZE_2Nx2N) || !(cu->getChromaFormat() == CHROMA_444))
> -        {
> -            if (absPartIdx == 0)
> -            {
> -                m_entropyCoder->encodeIntraDirModeChroma(cu, absPartIdx);
> -            }
> -        }
> -        else
> -        {
> -            uint32_t qtNumParts = cu->getTotalNumPart() >> 2;
> -            X265_CHECK(trDepth > 0, "unexpected trDepth %d\n", trDepth);
> -            if ((absPartIdx & (qtNumParts - 1)) == 0)
> -                m_entropyCoder->encodeIntraDirModeChroma(cu, absPartIdx);
> +            m_entropyCoder->encodeIntraDirModeLuma(cu, absPartIdx);
>          }
>      }
>  }
>
> -uint32_t TEncSearch::xGetIntraBitsQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, bool bLuma, bool bChroma)
> +void TEncSearch::xEncIntraHeaderChroma(TComDataCU* cu, uint32_t absPartIdx)
> +{
> +    // chroma prediction mode
> +    if ((cu->getPartitionSize(0) == SIZE_2Nx2N) || !(cu->getChromaFormat() == CHROMA_444))
> +    {
> +        if (absPartIdx == 0)
> +        {
> +            m_entropyCoder->encodeIntraDirModeChroma(cu, absPartIdx);
> +        }
> +    }
> +    else
> +    {
> +        uint32_t qtNumParts = cu->getTotalNumPart() >> 2;
> +        if ((absPartIdx & (qtNumParts - 1)) == 0)
> +            m_entropyCoder->encodeIntraDirModeChroma(cu, absPartIdx);
> +    }
> +}
> +
> +uint32_t TEncSearch::xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx)
>  {
>      m_entropyCoder->resetBits();
> -    xEncIntraHeader(cu, trDepth, absPartIdx, bLuma, bChroma);
> -    xEncSubdivCbfQT(cu, trDepth, absPartIdx, absPartIdxStep, cu->getCUSize(absPartIdx), cu->getCUSize(absPartIdx), bLuma, bChroma);
> -
> -    if (bLuma)
> -    {
> -        xEncCoeffQT(cu, trDepth, absPartIdx, TEXT_LUMA);
> -    }
> -    if (bChroma)
> -    {
> -        xEncCoeffQT(cu, trDepth, absPartIdx, TEXT_CHROMA_U);
> -        xEncCoeffQT(cu, trDepth, absPartIdx, TEXT_CHROMA_V);
> -    }
> +    xEncIntraHeaderLuma(cu, trDepth, absPartIdx);
> +    xEncSubdivCbfQTLuma(cu, trDepth, absPartIdx);
> +    xEncCoeffQTLuma(cu, trDepth, absPartIdx);
>      return m_entropyCoder->getNumberOfWrittenBits();
>  }
>
> -uint32_t TEncSearch::xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t log2TrSize, coeff_t* coeff)
> +uint32_t TEncSearch::xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep)
>  {
>      m_entropyCoder->resetBits();
> -    xEncIntraHeader(cu, trDepth, absPartIdx, true, false);
> -    xEncSubdivCbfQT(cu, trDepth, absPartIdx, 0, cu->getCUSize(absPartIdx), cu->getCUSize(absPartIdx), true, false);
> -
> -    if (cu->getCbf(absPartIdx, TEXT_LUMA, trDepth))
> -    {
> -        m_entropyCoder->encodeCoeffNxN(cu, coeff, absPartIdx, 1 << log2TrSize, TEXT_LUMA);
> -    }
> -
> +    xEncIntraHeaderChroma(cu, absPartIdx);
> +    xEncSubdivCbfQTChroma(cu, trDepth, absPartIdx, absPartIdxStep, cu->getCUSize(absPartIdx), cu->getCUSize(absPartIdx));
> +    xEncCoeffQTChroma(cu, trDepth, absPartIdx, TEXT_CHROMA_U);
> +    xEncCoeffQTChroma(cu, trDepth, absPartIdx, TEXT_CHROMA_V);
>      return m_entropyCoder->getNumberOfWrittenBits();
>  }
>
> -uint32_t TEncSearch::xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSizeC, uint32_t chromaId, coeff_t* coeff)
> +uint32_t TEncSearch::xGetIntraBitsLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t log2TrSize, coeff_t* coeff)
>  {
>      m_entropyCoder->resetBits();
> -    m_entropyCoder->encodeCoeffNxN(cu, coeff, absPartIdx, 1 << log2TrSizeC, (TextType)chromaId);
> +    xEncIntraHeaderLuma(cu, trDepth, absPartIdx);
> +    xEncSubdivCbfQTLuma(cu, trDepth, absPartIdx);
> +
> +    if (cu->getCbf(absPartIdx, TEXT_LUMA, trDepth))
> +        m_entropyCoder->encodeCoeffNxN(cu, coeff, absPartIdx, log2TrSize, TEXT_LUMA);
> +
> +    return m_entropyCoder->getNumberOfWrittenBits();
> +}
> +
> +uint32_t TEncSearch::xGetIntraBitsChroma(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSizeC, uint32_t chromaId, coeff_t* coeff)
> +{
> +    m_entropyCoder->resetBits();
> +    m_entropyCoder->encodeCoeffNxN(cu, coeff, absPartIdx, log2TrSizeC, (TextType)chromaId);
>      return m_entropyCoder->getNumberOfWrittenBits();
>  }
>
> @@ -688,7 +717,7 @@
>                  }
>                  else
>                  {
> -                    uint32_t singleBits = xGetIntraBitsQTLuma(cu, trDepth, absPartIdx, log2TrSize, coeff);
> +                    uint32_t singleBits = xGetIntraBitsLuma(cu, trDepth, absPartIdx, log2TrSize, coeff);
>                      if (m_rdCost->psyRdEnabled())
>                          singleCostTmp = m_rdCost->calcPsyRdCost(singleDistYTmp, singleBits, singlePsyEnergyYTmp);
>                      else
> @@ -748,7 +777,7 @@
>              }
>              cu->setCbfSubParts(singleCbfY << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
>
> -            uint32_t singleBits = xGetIntraBitsQTLuma(cu, trDepth, absPartIdx, log2TrSize, coeffY);
> +            uint32_t singleBits = xGetIntraBitsLuma(cu, trDepth, absPartIdx, log2TrSize, coeffY);
>              if (m_param->rdPenalty && (log2TrSize == 5) && !isIntraSlice)
>                  singleBits *= 4;
>
> @@ -799,7 +828,7 @@
>          m_rdGoOnSbacCoder->load(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]);
>
>          //----- determine rate and r-d cost -----
> -        uint32_t splitBits = xGetIntraBitsQT(cu, trDepth, absPartIdx, 0, true, false);
> +        uint32_t splitBits = xGetIntraBitsQTLuma(cu, trDepth, absPartIdx);
>          if (m_rdCost->psyRdEnabled())
>              splitCost = m_rdCost->calcPsyRdCost(splitDistY, splitBits, splitPsyEnergyY);
>          else
> @@ -1174,7 +1203,7 @@
>                          }
>                          else
>                          {
> -                            uint32_t bitsTmp = singleCbfCTmp ? xGetIntraBitsQTChroma(cu, absPartIdxC, log2TrSizeC, chromaId, coeff) : 0;
> +                            uint32_t bitsTmp = singleCbfCTmp ? xGetIntraBitsChroma(cu, absPartIdxC, log2TrSizeC, chromaId, coeff) : 0;
>                              if (m_rdCost->psyRdEnabled())
>                              {
>                                  uint32_t zorder = cu->getZorderIdxInCU() + absPartIdxC;
> @@ -1889,7 +1918,7 @@
>                  m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);
>              }
>
> -            uint32_t bits = xGetIntraBitsQT(cu, initTrDepth, absPartIdxC, tuIterator.m_absPartIdxStep, false, true);
> +            uint32_t bits = xGetIntraBitsQTChroma(cu, initTrDepth, absPartIdxC, tuIterator.m_absPartIdxStep);
>              uint64_t cost = 0;
>              if (m_rdCost->psyRdEnabled())
>                  cost = m_rdCost->calcPsyRdCost(dist, bits, cu->m_psyEnergy);
> @@ -2736,7 +2765,7 @@
>              do
>              {
>                  uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
> -                uint32_t subTUBufferOffset = trSizeC * trSizeC * tuIterator.m_section;
> +                uint32_t subTUOffset = tuIterator.m_section << (log2TrSizeC * 2);
>
>                  int16_t *curResiU = resiYuv->getCbAddr(absPartIdxC);
>                  int16_t *curResiV = resiYuv->getCrAddr(absPartIdxC);
> @@ -2747,13 +2776,13 @@
>                  int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
>                  m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
>                  m_trQuant->selectLambda(TEXT_CHROMA_U);
> -                absSumU = m_trQuant->transformNxN(cu, curResiU, strideResiC, coeffCurU + subTUBufferOffset,
> +                absSumU = m_trQuant->transformNxN(cu, curResiU, strideResiC, coeffCurU + subTUOffset,
>                                                    trSizeC, TEXT_CHROMA_U, absPartIdxC, &lastPosU, false, curuseRDOQ);
>
>                  curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
>                  m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
>                  m_trQuant->selectLambda(TEXT_CHROMA_V);
> -                absSumV = m_trQuant->transformNxN(cu, curResiV, strideResiC, coeffCurV + subTUBufferOffset,
> +                absSumV = m_trQuant->transformNxN(cu, curResiV, strideResiC, coeffCurV + subTUOffset,
>                                                    trSizeC, TEXT_CHROMA_V, absPartIdxC, &lastPosV, false, curuseRDOQ);
>
>                  cu->setCbfPartRange(absSumU ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
> @@ -2766,7 +2795,7 @@
>
>                      int scalingListType = 3 + TEXT_CHROMA_U;
>                      X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
> -                    m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiU, strideResiC, coeffCurU + subTUBufferOffset, trSizeC, scalingListType, false, lastPosU);
> +                    m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiU, strideResiC, coeffCurU + subTUOffset, trSizeC, scalingListType, false, lastPosU);
>                  }
>                  else
>                  {
> @@ -2779,7 +2808,7 @@
>
>                      int scalingListType = 3 + TEXT_CHROMA_V;
>                      X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
> -                    m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, strideResiC, coeffCurV + subTUBufferOffset, trSizeC, scalingListType, false, lastPosV);
> +                    m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, strideResiC, coeffCurV + subTUOffset, trSizeC, scalingListType, false, lastPosV);
>                  }
>                  else
>                  {
> @@ -2923,9 +2952,9 @@
>          cu->setCbfSubParts(absSum[TEXT_LUMA][0] ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);
>
>          m_entropyCoder->resetBits();
> -        m_entropyCoder->encodeQtCbf(cu, absPartIdx, 0, trSize, trSize, TEXT_LUMA, trMode, true);
> +        m_entropyCoder->encodeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode);
>          if (absSum[TEXT_LUMA][0])
> -            m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx,  trSize, TEXT_LUMA);
> +            m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, log2TrSize, TEXT_LUMA);
>          singleBitsComp[TEXT_LUMA][0] = m_entropyCoder->getNumberOfWrittenBits();
>
>          uint32_t singleBitsPrev = singleBitsComp[TEXT_LUMA][0];
> @@ -2938,7 +2967,7 @@
>              do
>              {
>                  uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
> -                uint32_t subTUBufferOffset = trSizeC * trSizeC * tuIterator.m_section;
> +                uint32_t subTUOffset = tuIterator.m_section << (log2TrSizeC * 2);
>
>                  cu->setTransformSkipPartRange(0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
>                  cu->setTransformSkipPartRange(0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
> @@ -2951,26 +2980,26 @@
>                  int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
>                  m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
>                  m_trQuant->selectLambda(TEXT_CHROMA_U);
> -                absSum[TEXT_CHROMA_U][tuIterator.m_section] = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurU + subTUBufferOffset,
> +                absSum[TEXT_CHROMA_U][tuIterator.m_section] = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurU + subTUOffset,
>                                                                                        trSizeC, TEXT_CHROMA_U, absPartIdxC, &lastPos[TEXT_CHROMA_U][tuIterator.m_section], false, curuseRDOQ);
>                  //Cr transform
>                  curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
>                  m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, chFmt);
>                  m_trQuant->selectLambda(TEXT_CHROMA_V);
> -                absSum[TEXT_CHROMA_V][tuIterator.m_section] = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurV + subTUBufferOffset,
> +                absSum[TEXT_CHROMA_V][tuIterator.m_section] = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth, coeffCurV + subTUOffset,
>                                                                                        trSizeC, TEXT_CHROMA_V, absPartIdxC, &lastPos[TEXT_CHROMA_V][tuIterator.m_section], false, curuseRDOQ);
>
>                  cu->setCbfPartRange(absSum[TEXT_CHROMA_U][tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_U, absPartIdxC, tuIterator.m_absPartIdxStep);
>                  cu->setCbfPartRange(absSum[TEXT_CHROMA_V][tuIterator.m_section] ? setCbf : 0, TEXT_CHROMA_V, absPartIdxC, tuIterator.m_absPartIdxStep);
>
> -                m_entropyCoder->encodeQtCbf(cu, absPartIdxC, tuIterator.m_absPartIdxStep, trSizeC, trSizeC, TEXT_CHROMA_U, trMode, true);
> +                m_entropyCoder->encodeQtCbf(cu, absPartIdxC, TEXT_CHROMA_U, trMode);
>                  if (absSum[TEXT_CHROMA_U][tuIterator.m_section])
> -                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUBufferOffset, absPartIdxC, trSizeC, TEXT_CHROMA_U);
> +                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUOffset, absPartIdxC, log2TrSizeC, TEXT_CHROMA_U);
>                  singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section] = m_entropyCoder->getNumberOfWrittenBits() - singleBitsPrev;
>
> -                m_entropyCoder->encodeQtCbf(cu, absPartIdxC, tuIterator.m_absPartIdxStep, trSizeC, trSizeC, TEXT_CHROMA_V, trMode, true);
> +                m_entropyCoder->encodeQtCbf(cu, absPartIdxC, TEXT_CHROMA_V, trMode);
>                  if (absSum[TEXT_CHROMA_V][tuIterator.m_section])
> -                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUBufferOffset, absPartIdxC, trSizeC, TEXT_CHROMA_V);
> +                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUOffset, absPartIdxC, log2TrSizeC, TEXT_CHROMA_V);
>                  uint32_t newBits = m_entropyCoder->getNumberOfWrittenBits();
>                  singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section] = newBits - (singleBitsPrev + singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section]);
>
> @@ -3105,7 +3134,7 @@
>              do
>              {
>                  uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
> -                uint32_t subTUBufferOffset = trSizeC * trSizeC * tuIterator.m_section;
> +                uint32_t subTUOffset = tuIterator.m_section << (log2TrSizeC * 2);
>
>                  int16_t *curResiU = m_qtTempShortYuv[qtLayer].getCbAddr(absPartIdxC);
>                  int16_t *curResiV = m_qtTempShortYuv[qtLayer].getCrAddr(absPartIdxC);
> @@ -3123,7 +3152,7 @@
>
>                      int scalingListType = 3 + TEXT_CHROMA_U;
>                      X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
> -                    m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiU, strideResiC, coeffCurU + subTUBufferOffset,
> +                    m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiU, strideResiC, coeffCurU + subTUOffset,
>                                                 trSizeC, scalingListType, false, lastPos[TEXT_CHROMA_U][tuIterator.m_section]);
>                      uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absPartIdxC), resiYuv->m_cwidth,
>                                                                   curResiU, strideResiC);
> @@ -3166,7 +3195,7 @@
>                          {
>                              absSum[TEXT_CHROMA_U][tuIterator.m_section] = 0;
>  #if CHECKED_BUILD || _DEBUG
> -                            ::memset(coeffCurU + subTUBufferOffset, 0, sizeof(coeff_t) * numCoeffC);
> +                            ::memset(coeffCurU + subTUOffset, 0, sizeof(coeff_t) * numCoeffC);
>  #endif
>                              if (checkTransformSkipUV)
>                              {
> @@ -3215,7 +3244,7 @@
>
>                      int scalingListType = 3 + TEXT_CHROMA_V;
>                      X265_CHECK(scalingListType < 6, "scalingListType too large %d\n", scalingListType);
> -                    m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, strideResiC, coeffCurV + subTUBufferOffset,
> +                    m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdxC), REG_DCT, curResiV, strideResiC, coeffCurV + subTUOffset,
>                                                 trSizeC, scalingListType, false, lastPos[TEXT_CHROMA_V][tuIterator.m_section]);
>                      uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absPartIdxC), resiYuv->m_cwidth,
>                                                                   curResiV, strideResiC);
> @@ -3259,7 +3288,7 @@
>                          {
>                              absSum[TEXT_CHROMA_V][tuIterator.m_section] = 0;
>  #if CHECKED_BUILD || _DEBUG
> -                            ::memset(coeffCurV + subTUBufferOffset, 0, sizeof(coeff_t) * numCoeffC);
> +                            ::memset(coeffCurV + subTUOffset, 0, sizeof(coeff_t) * numCoeffC);
>  #endif
>                              if (checkTransformSkipUV)
>                              {
> @@ -3330,8 +3359,8 @@
>              if (absSumTransformSkipY)
>              {
>                  m_entropyCoder->resetBits();
> -                m_entropyCoder->encodeQtCbf(cu, absPartIdx, 0, trSize, trSize, TEXT_LUMA, trMode, true);
> -                m_entropyCoder->encodeCoeffNxN(cu, tsCoeffY, absPartIdx, trSize, TEXT_LUMA);
> +                m_entropyCoder->encodeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode);
> +                m_entropyCoder->encodeCoeffNxN(cu, tsCoeffY, absPartIdx, log2TrSize, TEXT_LUMA);
>                  const uint32_t skipSingleBitsY = m_entropyCoder->getNumberOfWrittenBits();
>
>                  m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, QP_BD_OFFSET, 0, chFmt);
> @@ -3396,7 +3425,7 @@
>              do
>              {
>                  uint32_t absPartIdxC = tuIterator.m_absPartIdxTURelCU;
> -                uint32_t subTUBufferOffset = trSizeC * trSizeC * tuIterator.m_section;
> +                uint32_t subTUOffset = tuIterator.m_section << (log2TrSizeC * 2);
>
>                  int16_t *curResiU = m_qtTempShortYuv[qtLayer].getCbAddr(absPartIdxC);
>                  int16_t *curResiV = m_qtTempShortYuv[qtLayer].getCrAddr(absPartIdxC);
> @@ -3433,8 +3462,8 @@
>
>                  if (absSumTransformSkipU)
>                  {
> -                    m_entropyCoder->encodeQtCbf(cu, absPartIdxC, tuIterator.m_absPartIdxStep, trSizeC, trSizeC, TEXT_CHROMA_U, trMode, true);
> -                    m_entropyCoder->encodeCoeffNxN(cu, tsCoeffU, absPartIdxC, trSizeC, TEXT_CHROMA_U);
> +                    m_entropyCoder->encodeQtCbf(cu, absPartIdxC, TEXT_CHROMA_U, trMode);
> +                    m_entropyCoder->encodeCoeffNxN(cu, tsCoeffU, absPartIdxC, log2TrSizeC, TEXT_CHROMA_U);
>                      singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section] = m_entropyCoder->getNumberOfWrittenBits();
>
>                      curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();
> @@ -3475,14 +3504,14 @@
>                      singlePsyEnergyComp[TEXT_CHROMA_U][tuIterator.m_section] = nonZeroPsyEnergyU;
>                      absSum[TEXT_CHROMA_U][tuIterator.m_section] = absSumTransformSkipU;
>                      bestTransformMode[TEXT_CHROMA_U][tuIterator.m_section] = 1;
> -                    memcpy(coeffCurU + subTUBufferOffset, tsCoeffU, sizeof(coeff_t) * numCoeffC);
> +                    memcpy(coeffCurU + subTUOffset, tsCoeffU, sizeof(coeff_t) * numCoeffC);
>                      primitives.square_copy_ss[sizeIdxC](curResiU, strideResiC, tsResiU, trSizeC);
>                  }
>
>                  if (absSumTransformSkipV)
>                  {
> -                    m_entropyCoder->encodeQtCbf(cu, absPartIdxC, tuIterator.m_absPartIdxStep, trSizeC, trSizeC, TEXT_CHROMA_V, trMode, true);
> -                    m_entropyCoder->encodeCoeffNxN(cu, tsCoeffV, absPartIdxC, trSizeC, TEXT_CHROMA_V);
> +                    m_entropyCoder->encodeQtCbf(cu, absPartIdxC, TEXT_CHROMA_V, trMode);
> +                    m_entropyCoder->encodeCoeffNxN(cu, tsCoeffV, absPartIdxC, log2TrSizeC, TEXT_CHROMA_V);
>                      singleBitsComp[TEXT_CHROMA_V][tuIterator.m_section] = m_entropyCoder->getNumberOfWrittenBits() - singleBitsComp[TEXT_CHROMA_U][tuIterator.m_section];
>
>                      curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();
> @@ -3523,7 +3552,7 @@
>                      singlePsyEnergyComp[TEXT_CHROMA_V][tuIterator.m_section] = nonZeroPsyEnergyV;
>                      absSum[TEXT_CHROMA_V][tuIterator.m_section] = absSumTransformSkipV;
>                      bestTransformMode[TEXT_CHROMA_V][tuIterator.m_section] = 1;
> -                    memcpy(coeffCurV + subTUBufferOffset, tsCoeffV, sizeof(coeff_t) * numCoeffC);
> +                    memcpy(coeffCurV + subTUOffset, tsCoeffV, sizeof(coeff_t) * numCoeffC);
>                      primitives.square_copy_ss[sizeIdxC](curResiV, strideResiC, tsResiV, trSizeC);
>                  }
>
> @@ -3556,32 +3585,32 @@
>              m_entropyCoder->encodeQtCbf(cu, absPartIdx, absPartIdxStep, trSizeC, trHeightC, TEXT_CHROMA_V, trMode, true);
>          }
>
> -        m_entropyCoder->encodeQtCbf(cu, absPartIdx, 0, trSize, trSize, TEXT_LUMA,     trMode, true);
> +        m_entropyCoder->encodeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode);
>          if (absSum[TEXT_LUMA][0])
> -            m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, trSize, TEXT_LUMA);
> +            m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, log2TrSize, TEXT_LUMA);
>
>          if (bCodeChroma)
>          {
>              if (!splitIntoSubTUs)
>              {
>                  if (absSum[TEXT_CHROMA_U][0])
> -                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, trSizeC, TEXT_CHROMA_U);
> +                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, log2TrSizeC, TEXT_CHROMA_U);
>                  if (absSum[TEXT_CHROMA_V][0])
> -                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, trSizeC, TEXT_CHROMA_V);
> +                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, log2TrSizeC, TEXT_CHROMA_V);
>              }
>              else
>              {
> -                uint32_t subTUSize = trSizeC * trSizeC;
> +                uint32_t subTUSize = 1 << (log2TrSizeC * 2);
>                  uint32_t partIdxesPerSubTU = absPartIdxStep >> 1;
>
>                  if (absSum[TEXT_CHROMA_U][0])
> -                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, trSizeC, TEXT_CHROMA_U);
> +                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, log2TrSizeC, TEXT_CHROMA_U);
>                  if (absSum[TEXT_CHROMA_U][1])
> -                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUSize, absPartIdx + partIdxesPerSubTU, trSizeC, TEXT_CHROMA_U);
> +                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUSize, absPartIdx + partIdxesPerSubTU, log2TrSizeC, TEXT_CHROMA_U);
>                  if (absSum[TEXT_CHROMA_V][0])
> -                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, trSizeC, TEXT_CHROMA_V);
> +                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, log2TrSizeC, TEXT_CHROMA_V);
>                  if (absSum[TEXT_CHROMA_V][1])
> -                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUSize, absPartIdx + partIdxesPerSubTU, trSizeC, TEXT_CHROMA_V);
> +                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUSize, absPartIdx + partIdxesPerSubTU, log2TrSizeC, TEXT_CHROMA_V);
>              }
>          }
>
> @@ -3769,7 +3798,6 @@
>      X265_CHECK(cu->getPredictionMode(absPartIdx) != MODE_INTRA, "xEncodeResidualQT() with intra block\n");
>
>      bool mCodeAll = true;
> -    uint32_t trSize    = 1 << log2TrSize;
>      uint32_t trWidthC  = 1 << log2TrSizeC;
>      uint32_t trHeightC = splitIntoSubTUs ? (trWidthC << 1) : trWidthC;
>
> @@ -3821,49 +3849,48 @@
>
>          if (bSubdivAndCbf)
>          {
> -            m_entropyCoder->encodeQtCbf(cu, absPartIdx, 0, trSize, trSize, TEXT_LUMA, trMode, true);
> +            m_entropyCoder->encodeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode);
>          }
>          else
>          {
>              if (ttype == TEXT_LUMA && cu->getCbf(absPartIdx, TEXT_LUMA, trMode))
>              {
> -                m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, trSize, TEXT_LUMA);
> +                m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, log2TrSize, TEXT_LUMA);
>              }
>              if (bCodeChroma)
>              {
>                  uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + m_vChromaShift);
>                  coeff_t *coeffCurU = m_qtTempCoeff[1][qtLayer] + coeffOffsetC;
>                  coeff_t *coeffCurV = m_qtTempCoeff[2][qtLayer] + coeffOffsetC;
> -                uint32_t trSizeC = 1 << log2TrSizeC;
>
>                  if (!splitIntoSubTUs)
>                  {
>                      if (ttype == TEXT_CHROMA_U && cu->getCbf(absPartIdx, TEXT_CHROMA_U, trMode))
>                      {
> -                        m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, trSizeC, TEXT_CHROMA_U);
> +                        m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, log2TrSizeC, TEXT_CHROMA_U);
>                      }
>                      if (ttype == TEXT_CHROMA_V && cu->getCbf(absPartIdx, TEXT_CHROMA_V, trMode))
>                      {
> -                        m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, trSizeC, TEXT_CHROMA_V);
> +                        m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, log2TrSizeC, TEXT_CHROMA_V);
>                      }
>                  }
>                  else
>                  {
>                      uint32_t partIdxesPerSubTU  = cu->getPic()->getNumPartInCU() >> (((cu->getDepth(absPartIdx) + trModeC) << 1) + 1);
> -                    uint32_t subTUSize = trSizeC * trSizeC;
> +                    uint32_t subTUSize = 1 << (log2TrSizeC * 2);
>                      if (ttype == TEXT_CHROMA_U && cu->getCbf(absPartIdx, TEXT_CHROMA_U, trMode))
>                      {
>                          if (cu->getCbf(absPartIdx, ttype, trMode + 1))
> -                            m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, trSizeC, TEXT_CHROMA_U);
> +                            m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, log2TrSizeC, TEXT_CHROMA_U);
>                          if (cu->getCbf(absPartIdx + partIdxesPerSubTU, ttype, trMode + 1))
> -                            m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUSize, absPartIdx + partIdxesPerSubTU, trSizeC, TEXT_CHROMA_U);
> +                            m_entropyCoder->encodeCoeffNxN(cu, coeffCurU + subTUSize, absPartIdx + partIdxesPerSubTU, log2TrSizeC, TEXT_CHROMA_U);
>                      }
>                      if (ttype == TEXT_CHROMA_V && cu->getCbf(absPartIdx, TEXT_CHROMA_V, trMode))
>                      {
>                          if (cu->getCbf(absPartIdx, ttype, trMode + 1))
> -                            m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, trSizeC, TEXT_CHROMA_V);
> +                            m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, log2TrSizeC, TEXT_CHROMA_V);
>                          if (cu->getCbf(absPartIdx + partIdxesPerSubTU, ttype, trMode + 1))
> -                            m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUSize, absPartIdx + partIdxesPerSubTU, trSizeC, TEXT_CHROMA_V);
> +                            m_entropyCoder->encodeCoeffNxN(cu, coeffCurV + subTUSize, absPartIdx + partIdxesPerSubTU, log2TrSizeC, TEXT_CHROMA_V);
>                      }
>                  }
>              }
> diff -r 32aa6cc3cf4d -r 4c30d66afc78 source/Lib/TLibEncoder/TEncSearch.h
> --- a/source/Lib/TLibEncoder/TEncSearch.h       Thu Jun 26 17:19:08 2014 -0700
> +++ b/source/Lib/TLibEncoder/TEncSearch.h       Mon Jun 30 11:48:58 2014 +0900
> @@ -193,13 +193,17 @@
>      // Intra search
>      // --------------------------------------------------------------------------------------------
>
> -    void xEncSubdivCbfQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx,  uint32_t absPartIdxStep, uint32_t width, uint32_t height, bool bLuma, bool bChroma);
> +    void xEncSubdivCbfQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);
> +    void xEncSubdivCbfQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx,  uint32_t absPartIdxStep, uint32_t width, uint32_t height);
>
> -    void xEncCoeffQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype);
> -    void xEncIntraHeader(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, bool bLuma, bool bChroma);
> -    uint32_t xGetIntraBitsQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, bool bLuma, bool bChroma);
> -    uint32_t xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t log2TrSize, coeff_t* coeff);
> -    uint32_t xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSizeC, uint32_t chromaId, coeff_t* coeff);
> +    void xEncCoeffQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);
> +    void xEncCoeffQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype);
> +    void xEncIntraHeaderLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);
> +    void xEncIntraHeaderChroma(TComDataCU* cu, uint32_t absPartIdx);
> +    uint32_t xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx);
> +    uint32_t xGetIntraBitsQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep);
> +    uint32_t xGetIntraBitsLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t log2TrSize, coeff_t* coeff);
> +    uint32_t xGetIntraBitsChroma(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSizeC, uint32_t chromaId, coeff_t* coeff);
>      void xIntraCodingLumaBlk(TComDataCU* cu, uint32_t absPartIdx, uint32_t log2TrSize, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv,
>                               int16_t* reconQt, uint32_t reconQtStride, coeff_t* coeff,
>                               uint32_t& cbf, uint32_t& outDist);
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel



-- 
Steve Borho


More information about the x265-devel mailing list