<div dir="ltr"><div>The range-ext code has a whole lot of problems (whitespace and otherwise), but functionally we're getting there. I'd like t push in these patches to the default tip anyway, and let Ashok work on correcting the code and polishing it up. <br>
</div><div><br>With such a large series, I'd like to avoid painful merges and related bug fixes as much as possible. <br></div><div><br></div>I was waiting for him to fix an output mismatch for 4:2:0, which he now has. <br>
<div><div><div><div><div><div id="__tbSetup"></div></div></div></div></div></div></div><div class="gmail_extra"><br><br><div class="gmail_quote">On Wed, Jan 8, 2014 at 5:38 AM, Steve Borho <span dir="ltr"><<a href="mailto:steve@borho.org" target="_blank">steve@borho.org</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote"><div class="im">On Tue, Jan 7, 2014 at 5:16 AM,  <span dir="ltr"><<a href="mailto:ashok@multicorewareinc.com" target="_blank">ashok@multicorewareinc.com</a>></span> wrote:<br>

<blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex"># HG changeset patch<br>
# User <a href="mailto:ashok@multicorewareinc.com" target="_blank">ashok@multicorewareinc.com</a><br>
# Date 1389093279 -19800<br>
#      Tue Jan 07 16:44:39 2014 +0530<br>
# Node ID f7d21da102acf8d88be3f6ea6b6db5dc12134cdb<br>
# Parent  4811da38078cd02434f7da1dcc1b0af4dcf5adb8<br>
Modify TEncSearch structure to support multiple color space formats<br></blockquote><div><br></div></div><div>Some parts of this patch look redundant with some earlier ones.</div><div><br></div><div>It's an impressive series, ignoring the white-space and style problems.</div>

<div><br></div><div>Configuring the 4:4:4 chroma primitives needs to happen in x265_setup_primitives(), in the same place it configures other function pointer copies.  This way you get ASM optimized functions if they were configured.</div>
<div><div class="h5">
<div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex">
<br>
diff -r 4811da38078c -r f7d21da102ac source/Lib/TLibCommon/CommonDef.h<br>
--- a/source/Lib/TLibCommon/CommonDef.h Mon Jan 06 23:15:58 2014 -0600<br>
+++ b/source/Lib/TLibCommon/CommonDef.h Tue Jan 07 16:44:39 2014 +0530<br>
@@ -88,6 +88,9 @@<br>
 #define MLS_GRP_NUM                 64 ///< G644 : Max number of coefficient groups, max(16, 64)<br>
 #define MLS_CG_SIZE                 4 ///< G644 : Coefficient group size of 4x4<br>
<br>
+#define MLS_CG_LOG2_WIDTH           2<br>
+#define MLS_CG_LOG2_HEIGHT          2<br>
+<br>
 #define ARL_C_PRECISION             7 ///< G382: 7-bit arithmetic precision<br>
 #define LEVEL_RANGE                 30 ///< G382: max coefficient level in statistics collection<br>
<br>
diff -r 4811da38078c -r f7d21da102ac source/Lib/TLibEncoder/TEncSearch.cpp<br>
--- a/source/Lib/TLibEncoder/TEncSearch.cpp     Mon Jan 06 23:15:58 2014 -0600<br>
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp     Tue Jan 07 16:44:39 2014 +0530<br>
@@ -229,7 +229,7 @@<br>
<br>
     if (bChroma)<br>
     {<br>
-        if (trSizeLog2 > 2)<br>
+        if ((trSizeLog2 > 2) && !(cu->getChromaFormat() == CHROMA_444))<br>
         {<br>
             if (trDepth == 0 || cu->getCbf(absPartIdx, TEXT_CHROMA_U, trDepth - 1))<br>
                 m_entropyCoder->encodeQtCbf(cu, absPartIdx, TEXT_CHROMA_U, trDepth);<br>
@@ -275,7 +275,7 @@<br>
         return;<br>
     }<br>
<br>
-    if (ttype != TEXT_LUMA && trSizeLog2 == 2)<br>
+    if ( (ttype != TEXT_LUMA) && (trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))<br>
     {<br>
         assert(trDepth > 0);<br>
         trDepth--;<br>
@@ -288,9 +288,11 @@<br>
     }<br>
<br>
     //===== coefficients =====<br>
-    uint32_t width = cu->getWidth(0) >> (trDepth + chroma);<br>
-    uint32_t height = cu->getHeight(0) >> (trDepth + chroma);<br>
-    uint32_t coeffOffset = (cu->getPic()->getMinCUWidth() * cu->getPic()->getMinCUHeight() * absPartIdx) >> (chroma << 1);<br>
+    int cspx = chroma ? m_hChromaShift : 0;<br>
+    int cspy = chroma ? m_vChromaShift : 0;<br>
+    uint32_t width = cu->getWidth(0) >> (trDepth + cspx);<br>
+    uint32_t height = cu->getHeight(0) >> (trDepth + cspy);<br>
+    uint32_t coeffOffset = (cu->getPic()->getMinCUWidth() >> cspx) * (cu->getPic()->getMinCUHeight() >> cspy) * absPartIdx;<br>
     uint32_t qtLayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;<br>
     TCoeff* coeff = 0;<br>
     switch (ttype)<br>
@@ -363,12 +365,23 @@<br>
             }<br>
         }<br>
     }<br>
+<br>
     if (bChroma)<br>
     {<br>
         // chroma prediction mode<br>
-        if (absPartIdx == 0)<br>
+        if ((cu->getPartitionSize(0) == SIZE_2Nx2N) || !(cu->getChromaFormat() == CHROMA_444))<br>
         {<br>
-            m_entropyCoder->encodeIntraDirModeChroma(cu, 0, true);<br>
+            if (absPartIdx == 0)<br>
+            {<br>
+                m_entropyCoder->encodeIntraDirModeChroma(cu, absPartIdx, true);<br>
+            }<br>
+        }<br>
+        else<br>
+        {<br>
+            uint32_t qtNumParts = cu->getTotalNumPart() >> 2;<br>
+            assert(trDepth > 0);<br>
+            if ((absPartIdx%qtNumParts) == 0)<br>
+                m_entropyCoder->encodeIntraDirModeChroma(cu, absPartIdx, true);<br>
         }<br>
     }<br>
 }<br>
@@ -475,7 +488,7 @@<br>
     int lastPos = -1;<br>
     cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);<br>
<br>
-    m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);<br>
+    m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());<br>
     m_trQuant->selectLambda(TEXT_LUMA);<br>
<br>
     absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, width, height, TEXT_LUMA, absPartIdx, &lastPos, useTransformSkip);<br>
@@ -520,7 +533,7 @@<br>
     uint32_t fullDepth   = cu->getDepth(0) + trDepth;<br>
     uint32_t trSizeLog2  = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> fullDepth] + 2;<br>
<br>
-    if (trSizeLog2 == 2)<br>
+    if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))<br>
     {<br>
         assert(trDepth > 0);<br>
         trDepth--;<br>
@@ -534,7 +547,7 @@<br>
<br>
     TextType ttype          = (chromaId > 0 ? TEXT_CHROMA_V : TEXT_CHROMA_U);<br>
     uint32_t chromaPredMode = cu->getChromaIntraDir(absPartIdx);<br>
-    uint32_t width          = cu->getWidth(0) >> (trDepth + m_hChromaShift);<br>
+    uint32_t width          = cu->getWidth(0)  >> (trDepth + m_hChromaShift);<br>
     uint32_t height         = cu->getHeight(0) >> (trDepth + m_vChromaShift);<br>
     uint32_t stride         = fencYuv->getCStride();<br>
     Pel*     fenc           = (chromaId > 0 ? fencYuv->getCrAddr(absPartIdx) : fencYuv->getCbAddr(absPartIdx));<br>
@@ -543,10 +556,10 @@<br>
     Pel*     recon          = (chromaId > 0 ? predYuv->getCrAddr(absPartIdx) : predYuv->getCbAddr(absPartIdx));<br>
<br>
     uint32_t qtlayer        = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;<br>
-    uint32_t numCoeffPerInc = (cu->getSlice()->getSPS()->getMaxCUWidth() * cu->getSlice()->getSPS()->getMaxCUHeight() >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1)) >> 2;<br>


+    uint32_t numCoeffPerInc = (cu->getSlice()->getSPS()->getMaxCUWidth() * cu->getSlice()->getSPS()->getMaxCUHeight() >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1)) >> (m_hChromaShift + m_vChromaShift);<br>


     TCoeff*  coeff          = (chromaId > 0 ? m_qtTempCoeffCr[qtlayer] : m_qtTempCoeffCb[qtlayer]) + numCoeffPerInc * absPartIdx;<br>
     int16_t* reconQt        = (chromaId > 0 ? m_qtTempTComYuv[qtlayer].getCrAddr(absPartIdx) : m_qtTempTComYuv[qtlayer].getCbAddr(absPartIdx));<br>
-    assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);<br>
+    uint32_t reconQtStride  = m_qtTempTComYuv[qtlayer].m_cwidth;<br>
<br>
     uint32_t zorder           = cu->getZorderIdxInCU() + absPartIdx;<br>
     Pel*     reconIPred       = (chromaId > 0 ? cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder) : cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder));<br>
@@ -557,7 +570,7 @@<br>
     //===== update chroma mode =====<br>
     if (chromaPredMode == DM_CHROMA_IDX)<br>
     {<br>
-        chromaPredMode = cu->getLumaIntraDir(0);<br>
+        chromaPredMode = cu->getLumaIntraDir(absPartIdx);<br>
     }<br>
<br>
     //===== init availability pattern =====<br>
@@ -565,11 +578,11 @@<br>
     {<br>
         cu->getPattern()->initPattern(cu, trDepth, absPartIdx);<br>
<br>
-        cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight);<br>
+        cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight, chromaId);<br>
         Pel* chromaPred = (chromaId > 0 ? cu->getPattern()->getAdiCrBuf(width, height, m_predBuf) : cu->getPattern()->getAdiCbBuf(width, height, m_predBuf));<br>
<br>
         //===== get prediction signal =====<br>
-        predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, width);<br>
+        predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, width, height, cu->getChromaFormat());<br>
<br>
         // save prediction<br>
         if (default0Save1Load2 == 1)<br>
@@ -612,7 +625,7 @@<br>
         {<br>
             curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();<br>
         }<br>
-        m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);<br>
+        m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());<br>
<br>
         m_trQuant->selectLambda(TEXT_CHROMA);<br>
<br>
@@ -639,7 +652,7 @@<br>
     //===== reconstruction =====<br>
     assert(((uint32_t)(size_t)residual & (width - 1)) == 0);<br>
     assert(width <= 32);<br>
-    primitives.calcrecon[size](pred, residual, recon, reconQt, reconIPred, stride, MAX_CU_SIZE / 2, reconIPredStride);<br>
+    primitives.calcrecon[size](pred, residual, recon, reconQt, reconIPred, stride, reconQtStride, reconIPredStride);<br>
<br>
     //===== update distortion =====<br>
     uint32_t dist = primitives.sse_pp[part](fenc, stride, recon, stride);<br>
@@ -702,11 +715,11 @@<br>
     uint32_t singleCbfY  = 0;<br>
     uint32_t singleCbfU  = 0;<br>
     uint32_t singleCbfV  = 0;<br>
-    bool   checkTransformSkip  = cu->getSlice()->getPPS()->getUseTransformSkip();<br>
+    bool     checkTransformSkip  = cu->getSlice()->getPPS()->getUseTransformSkip();<br>
     uint32_t widthTransformSkip  = cu->getWidth(0) >> trDepth;<br>
     uint32_t heightTransformSkip = cu->getHeight(0) >> trDepth;<br>
-    int    bestModeId    = 0;<br>
-    int    bestModeIdUV[2] = { 0, 0 };<br>
+    int      bestModeId          = 0;<br>
+    int      bestModeIdUV[2]     = { 0, 0 };<br>
<br>
     checkTransformSkip &= (widthTransformSkip == 4 && heightTransformSkip == 4);<br>
     checkTransformSkip &= (!cu->getCUTransquantBypass(0));<br>
@@ -729,8 +742,8 @@<br>
             uint32_t singleCbfUTmp      = 0;<br>
             uint32_t singleCbfVTmp      = 0;<br>
             uint64_t singleCostTmp      = 0;<br>
-            int    default0Save1Load2 = 0;<br>
-            int    firstCheckId       = 0;<br>
+            int      default0Save1Load2 = 0;<br>
+            int      firstCheckId       = 0;<br>
<br>
             uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + (trDepth - 1)) << 1);<br>
             bool   bFirstQ = ((absPartIdx % qpdiv) == 0);<br>
@@ -964,17 +977,17 @@<br>
<br>
         if (!bLumaOnly)<br>
         {<br>
-            width >>= 1;<br>
-            height >>= 1;<br>
+            width  >>= m_hChromaShift;<br>
+            height >>= m_vChromaShift;<br>
             src       = m_qtTempTComYuv[qtLayer].getCbAddr(absPartIdx);<br>
-            assert(m_qtTempTComYuv[qtLayer].m_cwidth == MAX_CU_SIZE / 2);<br>
+            uint32_t srcstride = m_qtTempTComYuv[qtLayer].m_cwidth;<br>
             dst       = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);<br>
             dststride = cu->getPic()->getPicYuvRec()->getCStride();<br>
-            primitives.blockcpy_ps(width, height, dst, dststride, src, MAX_CU_SIZE / 2);<br>
+            primitives.blockcpy_ps(width, height, dst, dststride, src, srcstride);<br>
<br>
             src = m_qtTempTComYuv[qtLayer].getCrAddr(absPartIdx);<br>
             dst = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);<br>
-            primitives.blockcpy_ps(width, height, dst, dststride, src, MAX_CU_SIZE / 2);<br>
+            primitives.blockcpy_ps(width, height, dst, dststride, src, srcstride);<br>
         }<br>
     }<br>
<br>
@@ -1049,7 +1062,7 @@<br>
         int lastPos = -1;<br>
         cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);<br>
<br>
-        m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);<br>
+        m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());<br>
         m_trQuant->selectLambda(TEXT_LUMA);<br>
         absSum = m_trQuant->transformNxN(cu, residual, stride, coeff, width, height, TEXT_LUMA, absPartIdx, &lastPos, useTransformSkip);<br>
<br>
@@ -1081,7 +1094,6 @@<br>
     if (bCheckSplit && !bCheckFull)<br>
     {<br>
         //----- code splitted block -----<br>
-<br>
         uint32_t qPartsDiv     = cu->getPic()->getNumPartInCU() >> ((fullDepth + 1) << 1);<br>
         uint32_t absPartIdxSub = absPartIdx;<br>
         uint32_t splitCbfY = 0;<br>
@@ -1267,12 +1279,12 @@<br>
         reconIPred = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zOrder);<br>
         reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();<br>
         reconQt = m_qtTempTComYuv[qtlayer].getCbAddr(absPartIdx);<br>
-        assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);<br>
-        primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, MAX_CU_SIZE / 2);<br>
+        uint32_t reconQtStride = m_qtTempTComYuv[qtlayer].m_cwidth;<br>
+        primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, reconQtStride);<br>
<br>
         reconIPred = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zOrder);<br>
         reconQt    = m_qtTempTComYuv[qtlayer].getCrAddr(absPartIdx);<br>
-        primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, MAX_CU_SIZE / 2);<br>
+        primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, reconQtStride);<br>
     }<br>
 }<br>
<br>
@@ -1376,20 +1388,20 @@<br>
         uint32_t zorder           = cu->getZorderIdxInCU() + absPartIdx;<br>
         uint32_t width            = cu->getWidth(0) >> (trDepth + 1);<br>
         uint32_t height           = cu->getHeight(0) >> (trDepth + 1);<br>
-        assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);<br>
+        uint32_t reconQtStride    = m_qtTempTComYuv[qtlayer].m_cwidth;<br>
         uint32_t reconIPredStride = cu->getPic()->getPicYuvRec()->getCStride();<br>
<br>
         if (stateU0V1Both2 == 0 || stateU0V1Both2 == 2)<br>
         {<br>
             Pel* reconIPred = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);<br>
             int16_t* reconQt  = m_qtTempTComYuv[qtlayer].getCbAddr(absPartIdx);<br>
-            primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, MAX_CU_SIZE / 2);<br>
+            primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, reconQtStride);<br>
         }<br>
         if (stateU0V1Both2 == 1 || stateU0V1Both2 == 2)<br>
         {<br>
             Pel* reconIPred = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);<br>
             int16_t* reconQt  = m_qtTempTComYuv[qtlayer].getCrAddr(absPartIdx);<br>
-            primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, MAX_CU_SIZE / 2);<br>
+            primitives.blockcpy_ps(width, height, reconIPred, reconIPredStride, reconQt, reconQtStride);<br>
         }<br>
     }<br>
 }<br>
@@ -1411,7 +1423,7 @@<br>
         uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> fullDepth] + 2;<br>
<br>
         uint32_t actualTrDepth = trDepth;<br>
-        if (trSizeLog2 == 2)<br>
+        if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))<br>
         {<br>
             assert(trDepth > 0);<br>
             actualTrDepth--;<br>
@@ -1557,7 +1569,7 @@<br>
         uint32_t qtlayer    = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;<br>
<br>
         bool bChromaSame  = false;<br>
-        if (trSizeLog2 == 2)<br>
+        if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))<br>
         {<br>
             assert(trDepth > 0);<br>
             uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trDepth - 1) << 1);<br>
@@ -1572,9 +1584,11 @@<br>
         uint32_t numCoeffC = (cu->getSlice()->getSPS()->getMaxCUWidth() * cu->getSlice()->getSPS()->getMaxCUHeight()) >> (fullDepth << 1);<br>
         if (!bChromaSame)<br>
         {<br>
-            numCoeffC >>= 2;<br>
+            numCoeffC = ((cu->getSlice()->getSPS()->getMaxCUWidth() >> m_hChromaShift) * (cu->getSlice()->getSPS()->getMaxCUHeight() >> m_vChromaShift)) >> (fullDepth << 1);<br>


         }<br>
-        uint32_t numCoeffIncC = (cu->getSlice()->getSPS()->getMaxCUWidth() * cu->getSlice()->getSPS()->getMaxCUHeight()) >> ((cu->getSlice()->getSPS()->getMaxCUDepth() << 1) + 2);<br>


+<br>
+        uint32_t numCoeffIncC = ((cu->getSlice()->getSPS()->getMaxCUWidth() >> m_hChromaShift) * (cu->getSlice()->getSPS()->getMaxCUHeight() >> m_vChromaShift)) >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);<br>


+<br>
         TCoeff* coeffSrcU = m_qtTempCoeffCb[qtlayer] + (numCoeffIncC * absPartIdx);<br>
         TCoeff* coeffSrcV = m_qtTempCoeffCr[qtlayer] + (numCoeffIncC * absPartIdx);<br>
         TCoeff* coeffDstU = cu->getCoeffCb()         + (numCoeffIncC * absPartIdx);<br>
@@ -1583,7 +1597,7 @@<br>
         ::memcpy(coeffDstV, coeffSrcV, sizeof(TCoeff) * numCoeffC);<br>
<br>
         //===== copy reconstruction =====<br>
-        uint32_t trSizeCLog2 = (bChromaSame ? trSizeLog2 : trSizeLog2 - 1);<br>
+        uint32_t trSizeCLog2 = (bChromaSame || (cu->getChromaFormat() == CHROMA_444))  ? trSizeLog2 : trSizeLog2 - 1;<br>
         m_qtTempTComYuv[qtlayer].copyPartToPartChroma(reconYuv, absPartIdx, 1 << trSizeCLog2, 1 << trSizeCLog2);<br>
     }<br>
     else<br>
@@ -1650,11 +1664,11 @@<br>
             }<br>
             //===== init availability pattern =====<br>
             cu->getPattern()->initPattern(cu, trDepth, absPartIdx);<br>
-            cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight);<br>
+            cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight, chromaId);<br>
             Pel* chromaPred = (chromaId > 0 ? cu->getPattern()->getAdiCrBuf(width, height, m_predBuf) : cu->getPattern()->getAdiCbBuf(width, height, m_predBuf));<br>
<br>
             //===== get prediction signal =====<br>
-            predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, width);<br>
+            predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, width, height, cu->getChromaFormat());<br>
<br>
             //===== get residual signal =====<br>
             assert(!((uint32_t)(size_t)fenc & (width - 1)));<br>
@@ -1676,7 +1690,7 @@<br>
             {<br>
                 curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();<br>
             }<br>
-            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);<br>
+            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());<br>
<br>
             m_trQuant->selectLambda(TEXT_CHROMA);<br>
<br>
@@ -1741,7 +1755,8 @@<br>
     //===== init pattern =====<br>
     assert(width == height);<br>
     cu->getPattern()->initPattern(cu, 0, 0);<br>
-    cu->getPattern()->initAdiPatternChroma(cu, 0, 0, m_predBuf, m_predBufStride, m_predBufHeight);<br>
+    cu->getPattern()->initAdiPatternChroma(cu, 0, 0, m_predBuf, m_predBufStride, m_predBufHeight, 0/*chromaId*/);<br>
+    cu->getPattern()->initAdiPatternChroma(cu, 0, 0, m_predBuf, m_predBufStride, m_predBufHeight, 1/*chromaId*/);<br>
     Pel* patChromaU = cu->getPattern()->getAdiCbBuf(width, height, m_predBuf);<br>
     Pel* patChromaV = cu->getPattern()->getAdiCrBuf(width, height, m_predBuf);<br>
<br>
@@ -1754,8 +1769,8 @@<br>
     for (uint32_t mode = minMode; mode < maxMode; mode++)<br>
     {<br>
         //--- get prediction ---<br>
-        predIntraChromaAng(patChromaU, mode, predU, stride, width);<br>
-        predIntraChromaAng(patChromaV, mode, predV, stride, width);<br>
+        predIntraChromaAng(patChromaU, mode, predU, stride, width, height, cu->getChromaFormat());<br>
+        predIntraChromaAng(patChromaV, mode, predV, stride, width, height, cu->getChromaFormat());<br>
<br>
         //--- get SAD ---<br>
         uint32_t sad = sa8d(fencU, stride, predU, stride) + sa8d(fencV, stride, predV, stride);<br>
@@ -2131,13 +2146,14 @@<br>
<br>
     if (width > 32)<br>
     {<br>
-        scaleWidth = 32;<br>
-        scaleStride = 32;<br>
+        scaleWidth     = 32;<br>
+        scaleStride    = 32;<br>
         costMultiplier = 4;<br>
     }<br>
<br>
     cu->getPattern()->initPattern(cu, trDepth, absPartIdx);<br>
-    cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight);<br>
+    cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight, 0);<br>
+    cu->getPattern()->initAdiPatternChroma(cu, absPartIdx, trDepth, m_predBuf, m_predBufStride, m_predBufHeight, 1);<br>
<br>
     cu->getAllowedChromaDir(0, modeList);<br>
     //----- check chroma modes -----<br>
@@ -2156,7 +2172,7 @@<br>
             Pel* chromaPred = (chromaId > 0 ? cu->getPattern()->getAdiCrBuf(width, height, m_predBuf) : cu->getPattern()->getAdiCbBuf(width, height, m_predBuf));<br>
<br>
             //===== get prediction signal =====<br>
-            predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, width);<br>
+            predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, width, height, cu->getChromaFormat());<br>
             int log2SizeMinus2 = g_convertToBit[scaleWidth];<br>
             pixelcmp_t sa8d = primitives.sa8d[log2SizeMinus2];<br>
             sad = costMultiplier * sa8d(fenc, scaleStride, pred, scaleStride);<br>
@@ -2174,6 +2190,27 @@<br>
     cu->setChromIntraDirSubParts(bestMode, 0, depth);<br>
 }<br>
<br>
+bool TEncSearch::isNextSection()<br>
+{<br>
+    if (m_splitMode == DONT_SPLIT)<br>
+    {<br>
+        m_section++;<br>
+        return false;<br>
+    }<br>
+    else<br>
+    {<br>
+        m_absPartIdxTURelCU += m_absPartIdxStep;<br>
+<br>
+        m_section++;<br>
+        return m_section< (1 << m_splitMode);<br>
+    }<br>
+}<br>
+<br>
+bool TEncSearch::isLastSection()<br>
+{<br>
+    return (m_section+1) >= (1<<m_splitMode);<br>
+}<br>
+<br>
 void TEncSearch::estIntraPredChromaQT(TComDataCU* cu,<br>
                                       TComYuv*    fencYuv,<br>
                                       TComYuv*    predYuv,<br>
@@ -2181,60 +2218,109 @@<br>
                                       TComYuv*    reconYuv,<br>
                                       uint32_t    preCalcDistC)<br>
 {<br>
-    uint32_t depth     = cu->getDepth(0);<br>
-    uint32_t bestMode  = 0;<br>
-    uint32_t bestDist  = 0;<br>
-    uint64_t bestCost  = MAX_INT64;<br>
-<br>
-    //----- init mode list -----<br>
-    uint32_t minMode = 0;<br>
-    uint32_t maxMode = NUM_CHROMA_MODE;<br>
-    uint32_t modeList[NUM_CHROMA_MODE];<br>
-<br>
-    cu->getAllowedChromaDir(0, modeList);<br>
-<br>
-    //----- check chroma modes -----<br>
-    for (uint32_t mode = minMode; mode < maxMode; mode++)<br>
+    uint32_t depth              = cu->getDepth(0);<br>
+    uint32_t initTrDepth        = (cu->getPartitionSize(0) != SIZE_2Nx2N) && (cu->getChromaFormat() == CHROMA_444 ? 1 : 0);<br>
+    m_splitMode                 = (initTrDepth == 0) ? DONT_SPLIT : QUAD_SPLIT;<br>
+    m_absPartIdxStep            = (cu->getPic()->getNumPartInCU() >> (depth << 1)) >> partIdxStepShift[m_splitMode];<br>
+    m_partOffset                = 0;<br>
+    m_section                   = 0;<br>
+    m_absPartIdxTURelCU         = 0;<br>
+<br>
+    do<br>
     {<br>
-        //----- restore context models -----<br>
-        m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);<br>
-<br>
-        //----- chroma coding -----<br>
-        uint32_t dist = 0;<br>
-        cu->setChromIntraDirSubParts(modeList[mode], 0, depth);<br>
-        xRecurIntraChromaCodingQT(cu, 0, 0, fencYuv, predYuv, resiYuv, dist);<br>
-        if (cu->getSlice()->getPPS()->getUseTransformSkip())<br>
+        uint32_t bestMode           = 0;<br>
+        uint32_t bestDist           = 0;<br>
+        uint64_t bestCost           = MAX_INT64;<br>
+<br>
+        //----- init mode list -----<br>
+        uint32_t minMode = 0;<br>
+        uint32_t maxMode = NUM_CHROMA_MODE;<br>
+        uint32_t modeList[NUM_CHROMA_MODE];<br>
+<br>
+        m_partOffset = m_absPartIdxTURelCU;<br>
+<br>
+        cu->getAllowedChromaDir(m_partOffset, modeList);<br>
+<br>
+        //----- check chroma modes -----<br>
+        for (uint32_t mode = minMode; mode < maxMode; mode++)<br>
         {<br>
+            //----- restore context models -----<br>
             m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);<br>
+<br>
+            //----- chroma coding -----<br>
+            uint32_t dist = 0;<br>
+<br>
+            cu->setChromIntraDirSubParts(modeList[mode], m_partOffset, depth + initTrDepth);<br>
+<br>
+            xRecurIntraChromaCodingQT(cu, initTrDepth, m_absPartIdxTURelCU, fencYuv, predYuv, resiYuv, dist);<br>
+<br>
+            if (cu->getSlice()->getPPS()->getUseTransformSkip())<br>
+            {<br>
+                m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);<br>
+            }<br>
+<br>
+            uint32_t bits = xGetIntraBitsQT(cu, initTrDepth, m_absPartIdxTURelCU, false, true);<br>
+            uint64_t cost = m_rdCost->calcRdCost(dist, bits);<br>
+<br>
+            //----- compare -----<br>
+            if (cost < bestCost)<br>
+            {<br>
+                bestCost = cost;<br>
+                bestDist = dist;<br>
+                bestMode = modeList[mode];<br>
+                xSetIntraResultChromaQT(cu, initTrDepth, m_absPartIdxTURelCU, reconYuv);<br>
+                ::memcpy(m_qtTempCbf[1], cu->getCbf(TEXT_CHROMA_U) + m_partOffset, m_absPartIdxStep * sizeof(UChar));<br>
+                ::memcpy(m_qtTempCbf[2], cu->getCbf(TEXT_CHROMA_V) + m_partOffset, m_absPartIdxStep * sizeof(UChar));<br>
+                ::memcpy(m_qtTempTransformSkipFlag[1], cu->getTransformSkip(TEXT_CHROMA_U) + m_partOffset, m_absPartIdxStep * sizeof(UChar));<br>
+                ::memcpy(m_qtTempTransformSkipFlag[2], cu->getTransformSkip(TEXT_CHROMA_V) + m_partOffset, m_absPartIdxStep * sizeof(UChar));<br>
+            }<br>
         }<br>
<br>
-        uint32_t bits = xGetIntraBitsQT(cu, 0, 0, false, true);<br>
-        uint64_t cost = m_rdCost->calcRdCost(dist, bits);<br>
-<br>
-        //----- compare -----<br>
-        if (cost < bestCost)<br>
+        if (!isLastSection())<br>
         {<br>
-            bestCost = cost;<br>
-            bestDist = dist;<br>
-            bestMode = modeList[mode];<br>
-            uint32_t qpn = cu->getPic()->getNumPartInCU() >> (depth << 1);<br>
-            xSetIntraResultChromaQT(cu, 0, 0, reconYuv);<br>
-            ::memcpy(m_qtTempCbf[1], cu->getCbf(TEXT_CHROMA_U), qpn * sizeof(UChar));<br>
-            ::memcpy(m_qtTempCbf[2], cu->getCbf(TEXT_CHROMA_V), qpn * sizeof(UChar));<br>
-            ::memcpy(m_qtTempTransformSkipFlag[1], cu->getTransformSkip(TEXT_CHROMA_U), qpn * sizeof(UChar));<br>
-            ::memcpy(m_qtTempTransformSkipFlag[2], cu->getTransformSkip(TEXT_CHROMA_V), qpn * sizeof(UChar));<br>
+            uint32_t compWidth   = (cu->getWidth(0) >> m_hChromaShift)  >> initTrDepth;<br>
+            uint32_t compHeight  = (cu->getHeight(0) >> m_vChromaShift) >> initTrDepth;<br>
+            uint32_t zorder      = cu->getZorderIdxInCU() + m_partOffset;<br>
+            Pel*     dst         = cu->getPic()->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder);<br>
+            uint32_t dststride   = cu->getPic()->getPicYuvRec()->getCStride();<br>
+            Pel*     src         = reconYuv->getCbAddr(m_partOffset);<br>
+            uint32_t srcstride   = reconYuv->getCStride();<br>
+<br>
+            primitives.blockcpy_pp(compWidth, compHeight, dst, dststride, src, srcstride);<br>
+<br>
+            dst                 = cu->getPic()->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder);<br>
+            src                 = reconYuv->getCrAddr(m_partOffset);<br>
+            primitives.blockcpy_pp(compWidth, compHeight, dst, dststride, src, srcstride);<br>
+        }<br>
+<br>
+        //----- set data -----<br>
+        ::memcpy(cu->getCbf(TEXT_CHROMA_U) + m_partOffset, m_qtTempCbf[1], m_absPartIdxStep * sizeof(UChar));<br>
+        ::memcpy(cu->getCbf(TEXT_CHROMA_V) + m_partOffset, m_qtTempCbf[2], m_absPartIdxStep * sizeof(UChar));<br>
+        ::memcpy(cu->getTransformSkip(TEXT_CHROMA_U) + m_partOffset, m_qtTempTransformSkipFlag[1], m_absPartIdxStep * sizeof(UChar));<br>
+        ::memcpy(cu->getTransformSkip(TEXT_CHROMA_V) + m_partOffset, m_qtTempTransformSkipFlag[2], m_absPartIdxStep * sizeof(UChar));<br>
+        cu->setChromIntraDirSubParts(bestMode, m_partOffset, depth + initTrDepth);<br>
+        cu->m_totalDistortion += bestDist - preCalcDistC;<br>
+<br>
+    } while(isNextSection());<br>
+<br>
+    //----- restore context models -----<br>
+    if (initTrDepth != 0)<br>
+    {   // set Cbf for all blocks<br>
+        uint32_t uiCombCbfU = 0;<br>
+        uint32_t uiCombCbfV = 0;<br>
+        uint32_t uiPartIdx  = 0;<br>
+        for (uint32_t uiPart = 0; uiPart < 4; uiPart++, uiPartIdx += m_absPartIdxStep)<br>
+        {<br>
+            uiCombCbfU |= cu->getCbf(uiPartIdx, TEXT_CHROMA_U, 1);<br>
+            uiCombCbfV |= cu->getCbf(uiPartIdx, TEXT_CHROMA_V, 1);<br>
+        }<br>
+        for (uint32_t uiOffs = 0; uiOffs < 4 * m_absPartIdxStep; uiOffs++)<br>
+        {<br>
+            cu->getCbf( TEXT_CHROMA_U )[ uiOffs ] |= uiCombCbfU;<br>
+            cu->getCbf( TEXT_CHROMA_V )[ uiOffs ] |= uiCombCbfV;<br>
         }<br>
     }<br>
<br>
-    //----- set data -----<br>
-    uint32_t qpn = cu->getPic()->getNumPartInCU() >> (depth << 1);<br>
-    ::memcpy(cu->getCbf(TEXT_CHROMA_U), m_qtTempCbf[1], qpn * sizeof(UChar));<br>
-    ::memcpy(cu->getCbf(TEXT_CHROMA_V), m_qtTempCbf[2], qpn * sizeof(UChar));<br>
-    ::memcpy(cu->getTransformSkip(TEXT_CHROMA_U), m_qtTempTransformSkipFlag[1], qpn * sizeof(UChar));<br>
-    ::memcpy(cu->getTransformSkip(TEXT_CHROMA_V), m_qtTempTransformSkipFlag[2], qpn * sizeof(UChar));<br>
-    cu->setChromIntraDirSubParts(bestMode, 0, depth);<br>
-    cu->m_totalDistortion += bestDist - preCalcDistC;<br>
-<br>
     //----- restore context models -----<br>
     m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_CURR_BEST]);<br>
 }<br>
@@ -3085,10 +3171,11 @@<br>
         outResiYuv->clear();<br>
<br>
         predYuv->copyToPartYuv(outReconYuv, 0);<br>
-<br>
+        //Luma<br>
         int part = partitionFromSizes(width, height);<br>
         distortion = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());<br>
-        part = partitionFromSizes(width >> 1, height >> 1);<br>
+        //Chroma<br>
+        part = partitionFromSizes(width >> m_hChromaShift, height >> m_vChromaShift);<br>
         distortion += m_rdCost->scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(), fencYuv->getCStride(), outReconYuv->getCbAddr(), outReconYuv->getCStride()));<br>
         distortion += m_rdCost->scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(), fencYuv->getCStride(), outReconYuv->getCrAddr(), outReconYuv->getCStride()));<br>
<br>
@@ -3208,7 +3295,7 @@<br>
     // update with clipped distortion and cost (qp estimation loop uses unclipped values)<br>
     int part = partitionFromSizes(width, height);<br>
     bdist = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());<br>
-    part = partitionFromSizes(width >> 1, height >> 1);<br>
+    part = partitionFromSizes(width >> cu->getHorzChromaShift(), height >> cu->getVertChromaShift());<br>
     bdist += m_rdCost->scaleChromaDistCb(primitives.sse_pp[part](fencYuv->getCbAddr(), fencYuv->getCStride(), outReconYuv->getCbAddr(), outReconYuv->getCStride()));<br>
     bdist += m_rdCost->scaleChromaDistCr(primitives.sse_pp[part](fencYuv->getCrAddr(), fencYuv->getCStride(), outReconYuv->getCrAddr(), outReconYuv->getCStride()));<br>
     bcost = m_rdCost->calcRdCost(bdist, bestBits);<br>
@@ -3311,7 +3398,7 @@<br>
             cu->setTransformSkipSubParts(0, TEXT_CHROMA_V, absPartIdx, cu->getDepth(0) + trModeC);<br>
         }<br>
<br>
-        m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);<br>
+        m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());<br>
         m_trQuant->selectLambda(TEXT_LUMA);<br>
<br>
         absSumY = m_trQuant->transformNxN(cu, resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width, coeffCurY,<br>
@@ -3322,7 +3409,7 @@<br>
         if (bCodeChroma)<br>
         {<br>
             int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();<br>
-            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);<br>
+            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());<br>
<br>
             m_trQuant->selectLambda(TEXT_CHROMA);<br>
<br>
@@ -3330,7 +3417,7 @@<br>
                                               trWidthC, trHeightC, TEXT_CHROMA_U, absPartIdx, &lastPosU, false, curuseRDOQ);<br>
<br>
             curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();<br>
-            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);<br>
+            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());<br>
             absSumV = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurV,<br>
                                               trWidthC, trHeightC, TEXT_CHROMA_V, absPartIdx, &lastPosV, false, curuseRDOQ);<br>
<br>
@@ -3342,7 +3429,7 @@<br>
         {<br>
             int16_t *curResiY = resiYuv->getLumaAddr(absTUPartIdx);<br>
<br>
-            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);<br>
+            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());<br>
<br>
             int scalingListType = 3 + g_eTTable[(int)TEXT_LUMA];<br>
             assert(scalingListType < 6);<br>
@@ -3362,7 +3449,7 @@<br>
                 int16_t *pcResiCurrU = resiYuv->getCbAddr(absTUPartIdxC);<br>
<br>
                 int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();<br>
-                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);<br>
+                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());<br>
<br>
                 int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_U];<br>
                 assert(scalingListType < 6);<br>
@@ -3378,7 +3465,7 @@<br>
             {<br>
                 int16_t *curResiV = resiYuv->getCrAddr(absTUPartIdxC);<br>
                 int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();<br>
-                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);<br>
+                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());<br>
<br>
                 int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_V];<br>
                 assert(scalingListType < 6);<br>
@@ -3453,6 +3540,7 @@<br>
     assert(cu->getDepth(0) == cu->getDepth(absPartIdx));<br>
     const uint32_t trMode = depth - cu->getDepth(0);<br>
     const uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> depth] + 2;<br>
+    uint32_t  trSizeCLog2 = g_convertToBit[(cu->getSlice()->getSPS()->getMaxCUWidth() >> m_hChromaShift) >> depth] + 2;;<br>
<br>
     bool bSplitFlag = ((cu->getSlice()->getSPS()->getQuadtreeTUMaxDepthInter() == 1) && cu->getPredictionMode(absPartIdx) == MODE_INTER && (cu->getPartitionSize(absPartIdx) != SIZE_2Nx2N));<br>


     bool bCheckFull;<br>
@@ -3465,12 +3553,11 @@<br>
<br>
     bool  bCodeChroma = true;<br>
     uint32_t  trModeC     = trMode;<br>
-    uint32_t  trSizeCLog2 = trSizeLog2 - 1;<br>
-    if (trSizeLog2 == 2)<br>
+    if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))<br>
     {<br>
         trSizeCLog2++;<br>
         trModeC--;<br>
-        uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trModeC) << 1);<br>
+        uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);<br>
         bCodeChroma = ((absPartIdx % qpdiv) == 0);<br>
     }<br>
<br>
@@ -3490,8 +3577,8 @@<br>
         const uint32_t numCoeffPerAbsPartIdxIncrement = cu->getSlice()->getSPS()->getMaxCUWidth() * cu->getSlice()->getSPS()->getMaxCUHeight() >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);<br>


         const uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;<br>
         TCoeff *coeffCurY = m_qtTempCoeffY[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx);<br>
-        TCoeff *coeffCurU = m_qtTempCoeffCb[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);<br>
-        TCoeff *coeffCurV = m_qtTempCoeffCr[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);<br>
+        TCoeff *coeffCurU = m_qtTempCoeffCb[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));<br>
+        TCoeff *coeffCurV = m_qtTempCoeffCr[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));<br>
<br>
         int trWidth = 0, trHeight = 0, trWidthC = 0, trHeightC = 0;<br>
         uint32_t absTUPartIdxC = absPartIdx;<br>
@@ -3520,7 +3607,7 @@<br>
             m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, trWidth, trHeight, TEXT_LUMA);<br>
         }<br>
<br>
-        m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);<br>
+        m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());<br>
         m_trQuant->selectLambda(TEXT_LUMA);<br>
<br>
         absSumY = m_trQuant->transformNxN(cu, resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width, coeffCurY,<br>
@@ -3534,17 +3621,17 @@<br>
             {<br>
                 m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, trWidthC, trHeightC, TEXT_CHROMA);<br>
             }<br>
-<br>
+            //Cb transform<br>
             int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();<br>
-            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);<br>
+            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());<br>
<br>
             m_trQuant->selectLambda(TEXT_CHROMA);<br>
<br>
             absSumU = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurU,<br>
                                               trWidthC, trHeightC, TEXT_CHROMA_U, absPartIdx, &lastPosU, false, curuseRDOQ);<br>
-<br>
+            //Cr transform<br>
             curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();<br>
-            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);<br>
+            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());<br>
             absSumV = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurV,<br>
                                               trWidthC, trHeightC, TEXT_CHROMA_V, absPartIdx, &lastPosV, false, curuseRDOQ);<br>
<br>
@@ -3586,7 +3673,7 @@<br>
         {<br>
             int16_t *curResiY = m_qtTempTComYuv[qtlayer].getLumaAddr(absTUPartIdx);<br>
<br>
-            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);<br>
+            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());<br>
<br>
             int scalingListType = 3 + g_eTTable[(int)TEXT_LUMA];<br>
             assert(scalingListType < 6);<br>
@@ -3658,16 +3745,15 @@<br>
                 int16_t *pcResiCurrU = m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC);<br>
<br>
                 int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();<br>
-                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);<br>
+                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());<br>
<br>
                 int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_U];<br>
                 assert(scalingListType < 6);<br>
-                assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);<br>
-                m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, pcResiCurrU, MAX_CU_SIZE / 2, coeffCurU, trWidthC, trHeightC, scalingListType, false, lastPosU);<br>
+                m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, pcResiCurrU, m_qtTempTComYuv[qtlayer].m_cwidth, coeffCurU, trWidthC, trHeightC, scalingListType, false, lastPosU);<br>
<br>
                 uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absTUPartIdxC), resiYuv->m_cwidth,<br>
                                                              m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC),<br>
-                                                             MAX_CU_SIZE / 2);<br>
+                                                             m_qtTempTComYuv[qtlayer].m_cwidth);<br>
                 const uint32_t nonZeroDistU = m_rdCost->scaleChromaDistCb(dist);<br>
<br>
                 if (cu->isLosslessCoded(0))<br>
@@ -3710,10 +3796,10 @@<br>
             if (!absSumU)<br>
             {<br>
                 int16_t *ptr = m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC);<br>
-                assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);<br>
+                const uint32_t stride = m_qtTempTComYuv[qtlayer].m_cwidth;<br>
<br>
                 assert(trWidthC == trHeightC);<br>
-                primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, MAX_CU_SIZE / 2, 0);<br>
+                primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, stride, 0);<br>
             }<br>
<br>
             distV = m_rdCost->scaleChromaDistCr(primitives.sse_sp[partSizeC](resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth, m_tempPel, trWidthC));<br>
@@ -3725,16 +3811,15 @@<br>
             {<br>
                 int16_t *curResiV = m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC);<br>
                 int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();<br>
-                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);<br>
+                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());<br>
<br>
                 int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_V];<br>
                 assert(scalingListType < 6);<br>
-                assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);<br>
-                m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiV, MAX_CU_SIZE / 2, coeffCurV, trWidthC, trHeightC, scalingListType, false, lastPosV);<br>
+                m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiV, m_qtTempTComYuv[qtlayer].m_cwidth, coeffCurV, trWidthC, trHeightC, scalingListType, false, lastPosV);<br>
<br>
                 uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth,<br>
                                                              m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC),<br>
-                                                             MAX_CU_SIZE / 2);<br>
+                                                             m_qtTempTComYuv[qtlayer].m_cwidth);<br>
                 const uint32_t nonZeroDistV = m_rdCost->scaleChromaDistCr(dist);<br>
<br>
                 if (cu->isLosslessCoded(0))<br>
@@ -3777,10 +3862,10 @@<br>
             if (!absSumV)<br>
             {<br>
                 int16_t *ptr =  m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC);<br>
-                assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);<br>
+                const uint32_t stride = m_qtTempTComYuv[qtlayer].m_cwidth;<br>
<br>
                 assert(trWidthC == trHeightC);<br>
-                primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, MAX_CU_SIZE / 2, 0);<br>
+                primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, stride, 0);<br>
             }<br>
         }<br>
         cu->setCbfSubParts(absSumY ? setCbf : 0, TEXT_LUMA, absPartIdx, depth);<br>
@@ -3817,7 +3902,7 @@<br>
                 m_entropyCoder->estimateBit(m_trQuant->m_estBitsSbac, trWidth, trHeight, TEXT_LUMA);<br>
             }<br>
<br>
-            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);<br>
+            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());<br>
<br>
             m_trQuant->selectLambda(TEXT_LUMA);<br>
             absSumTransformSkipY = m_trQuant->transformNxN(cu, resiYuv->getLumaAddr(absTUPartIdx), resiYuv->m_width, coeffCurY,<br>
@@ -3831,7 +3916,7 @@<br>
                 m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, trWidth, trHeight, depth, TEXT_LUMA);<br>
                 const uint32_t skipSingleBitsY = m_entropyCoder->getNumberOfWrittenBits();<br>
<br>
-                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0);<br>
+                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_LUMA, cu->getSlice()->getSPS()->getQpBDOffsetY(), 0, cu->getChromaFormat());<br>
<br>
                 int scalingListType = 3 + g_eTTable[(int)TEXT_LUMA];<br>
                 assert(scalingListType < 6);<br>
@@ -3874,7 +3959,7 @@<br>
<br>
             int16_t *curResiU = m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC);<br>
             int16_t *curResiV = m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC);<br>
-            assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);<br>
+            uint32_t stride = m_qtTempTComYuv[qtlayer].m_cwidth;<br>
<br>
             TCoeff bestCoeffU[32 * 32], bestCoeffV[32 * 32];<br>
             memcpy(bestCoeffU, coeffCurU, sizeof(TCoeff) * numSamplesChroma);<br>
@@ -3883,8 +3968,8 @@<br>
             int16_t bestResiU[32 * 32], bestResiV[32 * 32];<br>
             for (int i = 0; i < trHeightC; ++i)<br>
             {<br>
-                memcpy(&bestResiU[i * trWidthC], curResiU + i * (MAX_CU_SIZE / 2), sizeof(int16_t) * trWidthC);<br>
-                memcpy(&bestResiV[i * trWidthC], curResiV + i * (MAX_CU_SIZE / 2), sizeof(int16_t) * trWidthC);<br>
+                memcpy(&bestResiU[i * trWidthC], curResiU + i * stride, sizeof(int16_t) * trWidthC);<br>
+                memcpy(&bestResiV[i * trWidthC], curResiV + i * stride, sizeof(int16_t) * trWidthC);<br>
             }<br>
<br>
             m_rdGoOnSbacCoder->load(m_rdSbacCoders[depth][CI_QT_TRAFO_ROOT]);<br>
@@ -3898,13 +3983,13 @@<br>
             }<br>
<br>
             int curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();<br>
-            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);<br>
+            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());<br>
             m_trQuant->selectLambda(TEXT_CHROMA);<br>
<br>
             absSumTransformSkipU = m_trQuant->transformNxN(cu, resiYuv->getCbAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurU,<br>
                                                            trWidthC, trHeightC, TEXT_CHROMA_U, absPartIdx, &lastPosTransformSkipU, true, curuseRDOQ);<br>
             curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();<br>
-            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);<br>
+            m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());<br>
             absSumTransformSkipV = m_trQuant->transformNxN(cu, resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth, coeffCurV,<br>
                                                            trWidthC, trHeightC, TEXT_CHROMA_V, absPartIdx, &lastPosTransformSkipV, true, curuseRDOQ);<br>
<br>
@@ -3922,17 +4007,15 @@<br>
                 singleBitsU = m_entropyCoder->getNumberOfWrittenBits();<br>
<br>
                 curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCbQpOffset() + cu->getSlice()->getSliceQpDeltaCb();<br>
-                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);<br>
+                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());<br>
<br>
                 int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_U];<br>
                 assert(scalingListType < 6);<br>
-                assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);<br>
-<br>
-                m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiU, MAX_CU_SIZE / 2, coeffCurU, trWidthC, trHeightC, scalingListType, true, lastPosTransformSkipU);<br>
+                m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiU, m_qtTempTComYuv[qtlayer].m_cwidth, coeffCurU, trWidthC, trHeightC, scalingListType, true, lastPosTransformSkipU);<br>


<br>
                 uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCbAddr(absTUPartIdxC), resiYuv->m_cwidth,<br>
                                                              m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC),<br>
-                                                             MAX_CU_SIZE / 2);<br>
+                                                             m_qtTempTComYuv[qtlayer].m_cwidth);<br>
                 nonZeroDistU = m_rdCost->scaleChromaDistCb(dist);<br>
                 singleCostU = m_rdCost->calcRdCost(nonZeroDistU, singleBitsU);<br>
             }<br>
@@ -3944,7 +4027,7 @@<br>
                 memcpy(coeffCurU, bestCoeffU, sizeof(TCoeff) * numSamplesChroma);<br>
                 for (int i = 0; i < trHeightC; ++i)<br>
                 {<br>
-                    memcpy(curResiU + i * (MAX_CU_SIZE / 2), &bestResiU[i * trWidthC], sizeof(int16_t) * trWidthC);<br>
+                    memcpy(curResiU + i * stride, &bestResiU[i * trWidthC], sizeof(int16_t) * trWidthC);<br>
                 }<br>
             }<br>
             else<br>
@@ -3961,17 +4044,15 @@<br>
                 singleBitsV = m_entropyCoder->getNumberOfWrittenBits() - singleBitsU;<br>
<br>
                 curChromaQpOffset = cu->getSlice()->getPPS()->getChromaCrQpOffset() + cu->getSlice()->getSliceQpDeltaCr();<br>
-                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset);<br>
+                m_trQuant->setQPforQuant(cu->getQP(0), TEXT_CHROMA, cu->getSlice()->getSPS()->getQpBDOffsetC(), curChromaQpOffset, cu->getChromaFormat());<br>
<br>
                 int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_V];<br>
                 assert(scalingListType < 6);<br>
-                assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2);<br>
-<br>
-                m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiV, MAX_CU_SIZE / 2, coeffCurV, trWidthC, trHeightC, scalingListType, true, lastPosTransformSkipV);<br>
+                m_trQuant->invtransformNxN(cu->getCUTransquantBypass(absPartIdx), REG_DCT, curResiV, m_qtTempTComYuv[qtlayer].m_cwidth, coeffCurV, trWidthC, trHeightC, scalingListType, true, lastPosTransformSkipV);<br>


<br>
                 uint32_t dist = primitives.sse_ss[partSizeC](resiYuv->getCrAddr(absTUPartIdxC), resiYuv->m_cwidth,<br>
                                                              m_qtTempTComYuv[qtlayer].getCrAddr(absTUPartIdxC),<br>
-                                                             MAX_CU_SIZE / 2);<br>
+                                                             m_qtTempTComYuv[qtlayer].m_cwidth);<br>
                 nonZeroDistV = m_rdCost->scaleChromaDistCr(dist);<br>
                 singleCostV = m_rdCost->calcRdCost(nonZeroDistV, singleBitsV);<br>
             }<br>
@@ -3983,7 +4064,7 @@<br>
                 memcpy(coeffCurV, bestCoeffV, sizeof(TCoeff) * numSamplesChroma);<br>
                 for (int i = 0; i < trHeightC; ++i)<br>
                 {<br>
-                    memcpy(curResiV + i * (MAX_CU_SIZE / 2), &bestResiV[i * trWidthC], sizeof(int16_t) * trWidthC);<br>
+                    memcpy(curResiV + i * stride, &bestResiV[i * trWidthC], sizeof(int16_t) * trWidthC);<br>
                 }<br>
             }<br>
             else<br>
@@ -4115,6 +4196,7 @@<br>
     const uint32_t trMode = cu->getTransformIdx(absPartIdx);<br>
     const bool bSubdiv = curTrMode != trMode;<br>
     const uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> depth] + 2;<br>
+    uint32_t  trSizeCLog2 = g_convertToBit[(cu->getSlice()->getSPS()->getMaxCUWidth() >> m_hChromaShift) >> depth] + 2;<br>
<br>
     if (bSubdivAndCbf && trSizeLog2 <= cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() && trSizeLog2 > cu->getQuadtreeTULog2MinSizeInCU(absPartIdx))<br>
     {<br>
@@ -4145,21 +4227,20 @@<br>
<br>
     if (!bSubdiv)<br>
     {<br>
+        //Luma<br>
         const uint32_t numCoeffPerAbsPartIdxIncrement = cu->getSlice()->getSPS()->getMaxCUWidth() * cu->getSlice()->getSPS()->getMaxCUHeight() >> (cu->getSlice()->getSPS()->getMaxCUDepth() << 1);<br>


-        //assert( 16 == uiNumCoeffPerAbsPartIdxIncrement ); // check<br>
         const uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;<br>
         TCoeff *coeffCurY = m_qtTempCoeffY[qtlayer] +  numCoeffPerAbsPartIdxIncrement * absPartIdx;<br>
-        TCoeff *coeffCurU = m_qtTempCoeffCb[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);<br>
-        TCoeff *coeffCurV = m_qtTempCoeffCr[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);<br>
+<br>
+        //Chroma<br>
+        TCoeff *coeffCurU = m_qtTempCoeffCb[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));<br>
+        TCoeff *coeffCurV = m_qtTempCoeffCr[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));<br>
<br>
         bool  bCodeChroma = true;<br>
-        uint32_t  trModeC     = trMode;<br>
-        uint32_t  trSizeCLog2 = trSizeLog2 - 1;<br>
-        if (trSizeLog2 == 2)<br>
+        if ((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))<br>
         {<br>
             trSizeCLog2++;<br>
-            trModeC--;<br>
-            uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((cu->getDepth(0) + trModeC) << 1);<br>
+            uint32_t qpdiv = cu->getPic()->getNumPartInCU() >> ((depth - 1) << 1);<br>
             bCodeChroma = ((absPartIdx % qpdiv) == 0);<br>
         }<br>
<br>
@@ -4171,21 +4252,18 @@<br>
         {<br>
             if (ttype == TEXT_LUMA && cu->getCbf(absPartIdx, TEXT_LUMA, trMode))<br>
             {<br>
-                int trWidth  = 1 << trSizeLog2;<br>
-                int trHeight = 1 << trSizeLog2;<br>
-                m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, trWidth, trHeight, depth, TEXT_LUMA);<br>
+                m_entropyCoder->encodeCoeffNxN(cu, coeffCurY, absPartIdx, 1 << trSizeLog2, 1 << trSizeLog2, depth, TEXT_LUMA);<br>
             }<br>
+<br>
             if (bCodeChroma)<br>
             {<br>
-                int trWidth  = 1 << trSizeCLog2;<br>
-                int trHeight = 1 << trSizeCLog2;<br>
                 if (ttype == TEXT_CHROMA_U && cu->getCbf(absPartIdx, TEXT_CHROMA_U, trMode))<br>
                 {<br>
-                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, trWidth, trHeight, depth, TEXT_CHROMA_U);<br>
+                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurU, absPartIdx, 1 << trSizeCLog2, 1 << trSizeCLog2, depth, TEXT_CHROMA_U);<br>
                 }<br>
                 if (ttype == TEXT_CHROMA_V && cu->getCbf(absPartIdx, TEXT_CHROMA_V, trMode))<br>
                 {<br>
-                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, trWidth, trHeight, depth, TEXT_CHROMA_V);<br>
+                    m_entropyCoder->encodeCoeffNxN(cu, coeffCurV, absPartIdx, 1 << trSizeCLog2, 1 << trSizeCLog2, depth, TEXT_CHROMA_V);<br>
                 }<br>
             }<br>
         }<br>
@@ -4211,13 +4289,13 @@<br>
<br>
     if (curTrMode == trMode)<br>
     {<br>
-        const uint32_t trSizeLog2 = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> depth] + 2;<br>
+        const uint32_t trSizeLog2   = g_convertToBit[cu->getSlice()->getSPS()->getMaxCUWidth() >> depth] + 2;<br>
+        uint32_t  trSizeCLog2 = g_convertToBit[(cu->getSlice()->getSPS()->getMaxCUWidth() >> cu->getHorzChromaShift()) >> depth] + 2;;<br>
         const uint32_t qtlayer = cu->getSlice()->getSPS()->getQuadtreeTULog2MaxSize() - trSizeLog2;<br>
<br>
         bool  bCodeChroma   = true;<br>
         uint32_t  trModeC     = trMode;<br>
-        uint32_t  trSizeCLog2 = trSizeLog2 - 1;<br>
-        if (trSizeLog2 == 2)<br>
+        if((trSizeLog2 == 2) && !(cu->getChromaFormat() == CHROMA_444))<br>
         {<br>
             trSizeCLog2++;<br>
             trModeC--;<br>
@@ -4246,10 +4324,10 @@<br>
             if (bCodeChroma)<br>
             {<br>
                 uint32_t    uiNumCoeffC = (1 << (trSizeCLog2 << 1));<br>
-                TCoeff* pcCoeffSrcU = m_qtTempCoeffCb[qtlayer] + (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);<br>
-                TCoeff* pcCoeffSrcV = m_qtTempCoeffCr[qtlayer] + (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);<br>
-                TCoeff* pcCoeffDstU = cu->getCoeffCb() + (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);<br>
-                TCoeff* pcCoeffDstV = cu->getCoeffCr() + (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> 2);<br>
+                TCoeff* pcCoeffSrcU = m_qtTempCoeffCb[qtlayer] + (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));<br>
+                TCoeff* pcCoeffSrcV = m_qtTempCoeffCr[qtlayer] + (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));<br>
+                TCoeff* pcCoeffDstU = cu->getCoeffCb() + (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));<br>
+                TCoeff* pcCoeffDstV = cu->getCoeffCr() + (uiNumCoeffPerAbsPartIdxIncrement * absPartIdx >> (m_hChromaShift + m_vChromaShift));<br>
                 ::memcpy(pcCoeffDstU, pcCoeffSrcU, sizeof(TCoeff) * uiNumCoeffC);<br>
                 ::memcpy(pcCoeffDstV, pcCoeffSrcV, sizeof(TCoeff) * uiNumCoeffC);<br>
             }<br>
diff -r 4811da38078c -r f7d21da102ac source/common/TShortYUV.h<br>
--- a/source/common/TShortYUV.h Mon Jan 06 23:15:58 2014 -0600<br>
+++ b/source/common/TShortYUV.h Tue Jan 07 16:44:39 2014 +0530<br>
@@ -87,9 +87,9 @@<br>
     //  Access starting position of YUV partition unit buffer<br>
     int16_t* getLumaAddr(unsigned int partUnitIdx) { return m_bufY + getAddrOffset(partUnitIdx, m_width); }<br>
<br>
-    int16_t* getCbAddr(unsigned int partUnitIdx) { return m_bufCb + (getAddrOffset(partUnitIdx, m_cwidth) >> 1); }<br>
+    int16_t* getCbAddr(unsigned int partUnitIdx) { return m_bufCb + (getAddrOffset(partUnitIdx, m_cwidth) >> m_hChromaShift); }<br>
<br>
-    int16_t* getCrAddr(unsigned int partUnitIdx) { return m_bufCr + (getAddrOffset(partUnitIdx, m_cwidth) >> 1); }<br>
+    int16_t* getCrAddr(unsigned int partUnitIdx) { return m_bufCr + (getAddrOffset(partUnitIdx, m_cwidth) >> m_hChromaShift); }<br>
<br>
     //  Access starting position of YUV transform unit buffer<br>
     int16_t* getLumaAddr(unsigned int partIdx, unsigned int size) { return m_bufY + getAddrOffset(partIdx, size, m_width); }<br>
diff -r 4811da38078c -r f7d21da102ac source/common/ipfilter.cpp<br>
--- a/source/common/ipfilter.cpp        Mon Jan 06 23:15:58 2014 -0600<br>
+++ b/source/common/ipfilter.cpp        Tue Jan 07 16:44:39 2014 +0530<br>
@@ -449,74 +449,108 @@<br>
 namespace x265 {<br>
 // x265 private namespace<br>
<br>
-#define CHROMA(W, H) \<br>
+#define CHROMA_420(W, H) \<br>
     p.chroma[X265_CSP_I420].filter_hpp[CHROMA_ ## W ## x ## H] = interp_horiz_pp_c<4, W, H>; \<br>
     p.chroma[X265_CSP_I420].filter_hps[CHROMA_ ## W ## x ## H] = interp_horiz_ps_c<4, W, H>; \<br>
-    p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] = interp_vert_pp_c<4, W, H>; \<br>
-    p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] = interp_vert_ps_c<4, W, H>; \<br>
-    p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] = interp_vert_sp_c<4, W, H>; \<br>
+    p.chroma[X265_CSP_I420].filter_vpp[CHROMA_ ## W ## x ## H] = interp_vert_pp_c<4, W, H>;  \<br>
+    p.chroma[X265_CSP_I420].filter_vps[CHROMA_ ## W ## x ## H] = interp_vert_ps_c<4, W, H>;  \<br>
+    p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] = interp_vert_sp_c<4, W, H>;  \<br>
     p.chroma[X265_CSP_I420].filter_vss[CHROMA_ ## W ## x ## H] = interp_vert_ss_c<4, W, H>;<br>
<br>
+#define CHROMA_444(W, H) \<br>
+    p.chroma[X265_CSP_I444].filter_hpp[LUMA_ ## W ## x ## H] = interp_horiz_pp_c<4, W, H>; \<br>
+    p.chroma[X265_CSP_I444].filter_hps[LUMA_ ## W ## x ## H] = interp_horiz_ps_c<4, W, H>; \<br>
+    p.chroma[X265_CSP_I444].filter_vpp[LUMA_ ## W ## x ## H] = interp_vert_pp_c<4, W, H>;  \<br>
+    p.chroma[X265_CSP_I444].filter_vps[LUMA_ ## W ## x ## H] = interp_vert_ps_c<4, W, H>;  \<br>
+    p.chroma[X265_CSP_I444].filter_vsp[LUMA_ ## W ## x ## H] = interp_vert_sp_c<4, W, H>;  \<br>
+    p.chroma[X265_CSP_I444].filter_vss[LUMA_ ## W ## x ## H] = interp_vert_ss_c<4, W, H>;<br>
+<br>
 #define LUMA(W, H) \<br>
     p.luma_hpp[LUMA_ ## W ## x ## H]     = interp_horiz_pp_c<8, W, H>; \<br>
     p.luma_hps[LUMA_ ## W ## x ## H]     = interp_horiz_ps_c<8, W, H>; \<br>
-    p.luma_vpp[LUMA_ ## W ## x ## H]     = interp_vert_pp_c<8, W, H>; \<br>
-    p.luma_vps[LUMA_ ## W ## x ## H]     = interp_vert_ps_c<8, W, H>; \<br>
-    p.luma_vsp[LUMA_ ## W ## x ## H]     = interp_vert_sp_c<8, W, H>; \<br>
-    p.luma_vss[LUMA_ ## W ## x ## H]     = interp_vert_ss_c<8, W, H>; \<br>
+    p.luma_vpp[LUMA_ ## W ## x ## H]     = interp_vert_pp_c<8, W, H>;  \<br>
+    p.luma_vps[LUMA_ ## W ## x ## H]     = interp_vert_ps_c<8, W, H>;  \<br>
+    p.luma_vsp[LUMA_ ## W ## x ## H]     = interp_vert_sp_c<8, W, H>;  \<br>
+    p.luma_vss[LUMA_ ## W ## x ## H]     = interp_vert_ss_c<8, W, H>;  \<br>
     p.luma_hvpp[LUMA_ ## W ## x ## H]    = interp_hv_pp_c<8, W, H>;<br>
<br>
 void Setup_C_IPFilterPrimitives(EncoderPrimitives& p)<br>
 {<br>
     LUMA(4, 4);<br>
     LUMA(8, 8);<br>
-    CHROMA(4, 4);<br>
+    CHROMA_420(4,  4);<br>
     LUMA(4, 8);<br>
-    CHROMA(2, 4);<br>
+    CHROMA_420(2,  4);<br>
     LUMA(8, 4);<br>
-    CHROMA(4, 2);<br>
+    CHROMA_420(4,  2);<br>
     LUMA(16, 16);<br>
-    CHROMA(8, 8);<br>
+    CHROMA_420(8,  8);<br>
     LUMA(16,  8);<br>
-    CHROMA(8, 4);<br>
+    CHROMA_420(8,  4);<br>
     LUMA(8, 16);<br>
-    CHROMA(4, 8);<br>
+    CHROMA_420(4,  8);<br>
     LUMA(16, 12);<br>
-    CHROMA(8, 6);<br>
+    CHROMA_420(8,  6);<br>
     LUMA(12, 16);<br>
-    CHROMA(6, 8);<br>
+    CHROMA_420(6,  8);<br>
     LUMA(16,  4);<br>
-    CHROMA(8, 2);<br>
+    CHROMA_420(8,  2);<br>
     LUMA(4, 16);<br>
-    CHROMA(2, 8);<br>
+    CHROMA_420(2,  8);<br>
     LUMA(32, 32);<br>
-    CHROMA(16, 16);<br>
+    CHROMA_420(16, 16);<br>
     LUMA(32, 16);<br>
-    CHROMA(16, 8);<br>
+    CHROMA_420(16, 8);<br>
     LUMA(16, 32);<br>
-    CHROMA(8, 16);<br>
+    CHROMA_420(8,  16);<br>
     LUMA(32, 24);<br>
-    CHROMA(16, 12);<br>
+    CHROMA_420(16, 12);<br>
     LUMA(24, 32);<br>
-    CHROMA(12, 16);<br>
+    CHROMA_420(12, 16);<br>
     LUMA(32,  8);<br>
-    CHROMA(16, 4);<br>
+    CHROMA_420(16, 4);<br>
     LUMA(8, 32);<br>
-    CHROMA(4, 16);<br>
+    CHROMA_420(4,  16);<br>
     LUMA(64, 64);<br>
-    CHROMA(32, 32);<br>
+    CHROMA_420(32, 32);<br>
     LUMA(64, 32);<br>
-    CHROMA(32, 16);<br>
+    CHROMA_420(32, 16);<br>
     LUMA(32, 64);<br>
-    CHROMA(16, 32);<br>
+    CHROMA_420(16, 32);<br>
     LUMA(64, 48);<br>
-    CHROMA(32, 24);<br>
+    CHROMA_420(32, 24);<br>
     LUMA(48, 64);<br>
-    CHROMA(24, 32);<br>
+    CHROMA_420(24, 32);<br>
     LUMA(64, 16);<br>
-    CHROMA(32, 8);<br>
+    CHROMA_420(32, 8);<br>
     LUMA(16, 64);<br>
-    CHROMA(8, 32);<br>
+    CHROMA_420(8,  32);<br>
+<br>
+    CHROMA_444(4,  4);<br>
+    CHROMA_444(8,  8);<br>
+    CHROMA_444(4,  8);<br>
+    CHROMA_444(8,  4);<br>
+    CHROMA_444(16, 16);<br>
+    CHROMA_444(16, 8);<br>
+    CHROMA_444(8,  16);<br>
+    CHROMA_444(16, 12);<br>
+    CHROMA_444(12, 16);<br>
+    CHROMA_444(16, 4);<br>
+    CHROMA_444(4,  16);<br>
+    CHROMA_444(32, 32);<br>
+    CHROMA_444(32, 16);<br>
+    CHROMA_444(16, 32);<br>
+    CHROMA_444(32, 24);<br>
+    CHROMA_444(24, 32);<br>
+    CHROMA_444(32, 8);<br>
+    CHROMA_444(8,  32);<br>
+    CHROMA_444(64, 64);<br>
+    CHROMA_444(64, 32);<br>
+    CHROMA_444(32, 64);<br>
+    CHROMA_444(64, 48);<br>
+    CHROMA_444(48, 64);<br>
+    CHROMA_444(64, 16);<br>
+    CHROMA_444(16, 64);<br>
<br>
     p.ipfilter_ps[FILTER_V_P_S_8] = filterVertical_ps_c<8>;<br>
     p.ipfilter_ps[FILTER_V_P_S_4] = filterVertical_ps_c<4>;<br>
@@ -525,7 +559,9 @@<br>
<br>
     p.chroma_vsp = filterVertical_sp_c<4>;<br>
     p.luma_p2s = filterConvertPelToShort_c<MAX_CU_SIZE>;<br>
-    p.chroma_p2s = filterConvertPelToShort_c<MAX_CU_SIZE / 2>;<br>
+<br>
+    p.chroma_p2s[X265_CSP_I444] = filterConvertPelToShort_c<MAX_CU_SIZE>;<br>
+    p.chroma_p2s[X265_CSP_I420] = filterConvertPelToShort_c<MAX_CU_SIZE / 2>;<br>
<br>
     p.extendRowBorder = extendCURowColBorder;<br>
 }<br>
diff -r 4811da38078c -r f7d21da102ac source/common/pixel.cpp<br>
--- a/source/common/pixel.cpp   Mon Jan 06 23:15:58 2014 -0600<br>
+++ b/source/common/pixel.cpp   Tue Jan 07 16:44:39 2014 +0530<br>
@@ -805,6 +805,27 @@<br>
 namespace x265 {<br>
 // x265 private namespace<br>
<br>
+#define CHROMA_420(W, H) \<br>
+    p.chroma[X265_CSP_I420].copy_pp[CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \<br>
+    p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \<br>
+    p.chroma[X265_CSP_I420].copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \<br>
+    p.chroma[X265_CSP_I420].sub_ps [CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \<br>
+    p.chroma[X265_CSP_I420].add_ps [CHROMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;<br>
+<br>
+#define CHROMA_444(W, H) \<br>
+    p.chroma[X265_CSP_I444].copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \<br>
+    p.chroma[X265_CSP_I444].copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \<br>
+    p.chroma[X265_CSP_I444].copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \<br>
+    p.chroma[X265_CSP_I444].sub_ps [LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \<br>
+    p.chroma[X265_CSP_I444].add_ps [LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;<br>
+<br>
+#define LUMA(W, H) \<br>
+    p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \<br>
+    p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \<br>
+    p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \<br>
+    p.luma_sub_ps[LUMA_ ## W ## x ## H]  = pixel_sub_ps_c<W, H>; \<br>
+    p.luma_add_ps[LUMA_ ## W ## x ## H]  = pixel_add_ps_c<W, H>;<br>
+<br>
 /* It should initialize entries for pixel functions defined in this file. */<br>
 void Setup_C_PixelPrimitives(EncoderPrimitives &p)<br>
 {<br>
@@ -840,69 +861,81 @@<br>
     p.satd[LUMA_64x16] = satd8<64, 16>;<br>
     p.satd[LUMA_16x64] = satd8<16, 64>;<br>
<br>
-#define CHROMA(W, H) \<br>
-    p.chroma[X265_CSP_I420].copy_pp[CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \<br>
-    p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \<br>
-    p.chroma[X265_CSP_I420].copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \<br>
-    p.chroma[X265_CSP_I420].sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \<br>
-    p.chroma[X265_CSP_I420].add_ps[CHROMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;<br>
-<br>
-#define LUMA(W, H) \<br>
-    p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \<br>
-    p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \<br>
-    p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \<br>
-    p.luma_sub_ps[LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \<br>
-    p.luma_add_ps[LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;<br>
-<br>
     LUMA(4, 4);<br>
     LUMA(8, 8);<br>
-    CHROMA(4, 4);<br>
+    CHROMA_420(4, 4);<br>
     LUMA(4, 8);<br>
-    CHROMA(2, 4);<br>
+    CHROMA_420(2, 4);<br>
     LUMA(8, 4);<br>
-    CHROMA(4, 2);<br>
+    CHROMA_420(4, 2);<br>
     LUMA(16, 16);<br>
-    CHROMA(8, 8);<br>
+    CHROMA_420(8,  8);<br>
     LUMA(16,  8);<br>
-    CHROMA(8, 4);<br>
+    CHROMA_420(8,  4);<br>
     LUMA(8, 16);<br>
-    CHROMA(4, 8);<br>
+    CHROMA_420(4,  8);<br>
     LUMA(16, 12);<br>
-    CHROMA(8, 6);<br>
+    CHROMA_420(8,  6);<br>
     LUMA(12, 16);<br>
-    CHROMA(6, 8);<br>
+    CHROMA_420(6,  8);<br>
     LUMA(16,  4);<br>
-    CHROMA(8, 2);<br>
+    CHROMA_420(8,  2);<br>
     LUMA(4, 16);<br>
-    CHROMA(2, 8);<br>
+    CHROMA_420(2,  8);<br>
     LUMA(32, 32);<br>
-    CHROMA(16, 16);<br>
+    CHROMA_420(16, 16);<br>
     LUMA(32, 16);<br>
-    CHROMA(16, 8);<br>
+    CHROMA_420(16, 8);<br>
     LUMA(16, 32);<br>
-    CHROMA(8, 16);<br>
+    CHROMA_420(8,  16);<br>
     LUMA(32, 24);<br>
-    CHROMA(16, 12);<br>
+    CHROMA_420(16, 12);<br>
     LUMA(24, 32);<br>
-    CHROMA(12, 16);<br>
+    CHROMA_420(12, 16);<br>
     LUMA(32,  8);<br>
-    CHROMA(16, 4);<br>
+    CHROMA_420(16, 4);<br>
     LUMA(8, 32);<br>
-    CHROMA(4, 16);<br>
+    CHROMA_420(4,  16);<br>
     LUMA(64, 64);<br>
-    CHROMA(32, 32);<br>
+    CHROMA_420(32, 32);<br>
     LUMA(64, 32);<br>
-    CHROMA(32, 16);<br>
+    CHROMA_420(32, 16);<br>
     LUMA(32, 64);<br>
-    CHROMA(16, 32);<br>
+    CHROMA_420(16, 32);<br>
     LUMA(64, 48);<br>
-    CHROMA(32, 24);<br>
+    CHROMA_420(32, 24);<br>
     LUMA(48, 64);<br>
-    CHROMA(24, 32);<br>
+    CHROMA_420(24, 32);<br>
     LUMA(64, 16);<br>
-    CHROMA(32, 8);<br>
+    CHROMA_420(32, 8);<br>
     LUMA(16, 64);<br>
-    CHROMA(8, 32);<br>
+    CHROMA_420(8,  32);<br>
+<br>
+    CHROMA_444(4,  4);<br>
+    CHROMA_444(8,  8);<br>
+    CHROMA_444(4,  8);<br>
+    CHROMA_444(8,  4);<br>
+    CHROMA_444(16, 16);<br>
+    CHROMA_444(16, 8);<br>
+    CHROMA_444(8,  16);<br>
+    CHROMA_444(16, 12);<br>
+    CHROMA_444(12, 16);<br>
+    CHROMA_444(16, 4);<br>
+    CHROMA_444(4,  16);<br>
+    CHROMA_444(32, 32);<br>
+    CHROMA_444(32, 16);<br>
+    CHROMA_444(16, 32);<br>
+    CHROMA_444(32, 24);<br>
+    CHROMA_444(24, 32);<br>
+    CHROMA_444(32, 8);<br>
+    CHROMA_444(8,  32);<br>
+    CHROMA_444(64, 64);<br>
+    CHROMA_444(64, 32);<br>
+    CHROMA_444(32, 64);<br>
+    CHROMA_444(64, 48);<br>
+    CHROMA_444(48, 64);<br>
+    CHROMA_444(64, 16);<br>
+    CHROMA_444(16, 64);<br>
<br>
     SET_FUNC_PRIMITIVE_TABLE_C(sse_pp, sse, pixelcmp_t, pixel, pixel)<br>
     SET_FUNC_PRIMITIVE_TABLE_C(sse_sp, sse, pixelcmp_sp_t, int16_t, pixel)<br>
diff -r 4811da38078c -r f7d21da102ac source/common/primitives.h<br>
--- a/source/common/primitives.h        Mon Jan 06 23:15:58 2014 -0600<br>
+++ b/source/common/primitives.h        Tue Jan 07 16:44:39 2014 +0530<br>
@@ -75,7 +75,7 @@<br>
 // 4:2:0 chroma partition sizes. These enums are just a convenience for indexing into the<br>
 // chroma primitive arrays when instantiating templates. The function tables should always<br>
 // be indexed by the luma partition enum<br>
-enum Chroma420Partions<br>
+enum Chroma420Partitions<br>
 {<br>
     CHROMA_2x2, // never used by HEVC<br>
     CHROMA_4x4,   CHROMA_4x2,   CHROMA_2x4,<br>
@@ -240,7 +240,7 @@<br>
     ipfilter_ps_t   ipfilter_ps[NUM_IPFILTER_P_S];<br>
     ipfilter_ss_t   ipfilter_ss[NUM_IPFILTER_S_S];<br>
     filter_p2s_t    luma_p2s;<br>
-    filter_p2s_t    chroma_p2s;<br>
+    filter_p2s_t    chroma_p2s[NUM_CHROMA_PARTITIONS];<br>
     ipfilter_sp_t   chroma_vsp;<br>
<br>
     weightp_sp_t    weight_sp;<br>
diff -r 4811da38078c -r f7d21da102ac source/encoder/encoder.cpp<br>
--- a/source/encoder/encoder.cpp        Mon Jan 06 23:15:58 2014 -0600<br>
+++ b/source/encoder/encoder.cpp        Tue Jan 07 16:44:39 2014 +0530<br>
@@ -1288,6 +1288,8 @@<br>
         bEnableRDOQTS = 0;<br>
     }<br>
<br>
+    m_csp = _param->internalCsp;<br>
+<br>
     //====== Coding Tools ========<br>
<br>
     uint32_t tuQTMaxLog2Size = g_convertToBit[_param->maxCUSize] + 2 - 1;<br>
diff -r 4811da38078c -r f7d21da102ac source/encoder/frameencoder.cpp<br>
--- a/source/encoder/frameencoder.cpp   Mon Jan 06 23:15:58 2014 -0600<br>
+++ b/source/encoder/frameencoder.cpp   Tue Jan 07 16:44:39 2014 +0530<br>
@@ -330,11 +330,11 @@<br>
     // instead we weight the distortion of chroma.<br>
     int chromaQPOffset = slice->getPPS()->getChromaCbQpOffset() + slice->getSliceQpDeltaCb();<br>
     int qpc = Clip3(0, 70, qp + chromaQPOffset);<br>
-    double cbWeight = pow(2.0, (qp - g_chromaScale[qpc])); // takes into account of the chroma qp mapping and chroma qp Offset<br>
+    double cbWeight = pow(2.0, (qp - g_chromaScale[slice->getSPS()->getChromaFormatIdc()][qpc])); // takes into account of the chroma qp mapping and chroma qp Offset<br>
<br>
     chromaQPOffset = slice->getPPS()->getChromaCrQpOffset() + slice->getSliceQpDeltaCr();<br>
     qpc = Clip3(0, 70, qp + chromaQPOffset);<br>
-    double crWeight = pow(2.0, (qp - g_chromaScale[qpc])); // takes into account of the chroma qp mapping and chroma qp Offset<br>
+    double crWeight = pow(2.0, (qp - g_chromaScale[slice->getSPS()->getChromaFormatIdc()][qpc])); // takes into account of the chroma qp mapping and chroma qp Offset<br>
     double chromaLambda = lambda / crWeight;<br>
<br>
     m_rows[row].m_search.setQPLambda(qp, lambda, chromaLambda);<br>
@@ -369,10 +369,10 @@<br>
     int qpc;<br>
     int chromaQPOffset = slice->getPPS()->getChromaCbQpOffset() + slice->getSliceQpDeltaCb();<br>
     qpc = Clip3(0, 70, qp + chromaQPOffset);<br>
-    double cbWeight = pow(2.0, (qp - g_chromaScale[qpc])); // takes into account of the chroma qp mapping and chroma qp Offset<br>
+    double cbWeight = pow(2.0, (qp - g_chromaScale[slice->getSPS()->getChromaFormatIdc()][qpc])); // takes into account of the chroma qp mapping and chroma qp Offset<br>
     chromaQPOffset = slice->getPPS()->getChromaCrQpOffset() + slice->getSliceQpDeltaCr();<br>
     qpc = Clip3(0, 70, qp + chromaQPOffset);<br>
-    double crWeight = pow(2.0, (qp - g_chromaScale[qpc])); // takes into account of the chroma qp mapping and chroma qp Offset<br>
+    double crWeight = pow(2.0, (qp - g_chromaScale[slice->getSPS()->getChromaFormatIdc()][qpc])); // takes into account of the chroma qp mapping and chroma qp Offset<br>
     double chromaLambda = lambda / crWeight;<br>
<br>
     // NOTE: set SAO lambda every Frame<br>
diff -r 4811da38078c -r f7d21da102ac source/encoder/framefilter.cpp<br>
--- a/source/encoder/framefilter.cpp    Mon Jan 06 23:15:58 2014 -0600<br>
+++ b/source/encoder/framefilter.cpp    Tue Jan 07 16:44:39 2014 +0530<br>
@@ -64,6 +64,9 @@<br>
     m_cfg = top;<br>
     m_numRows = numRows;<br>
<br>
+    m_hChromaShift = CHROMA_H_SHIFT(m_cfg->getColorFormat());<br>
+    m_vChromaShift = CHROMA_V_SHIFT(m_cfg->getColorFormat());<br>
+<br>
     // NOTE: for sao only, I write this code because I want to exact match with HM's bug bitstream<br>
     m_rdGoOnSbacCoderRow0 = rdGoOnSbacCoder;<br>
<br>
@@ -77,7 +80,7 @@<br>
         m_sao.setSaoLcuBoundary(top->param.saoLcuBoundary);<br>
         m_sao.setSaoLcuBasedOptimization(top->param.saoLcuBasedOptimization);<br>
         m_sao.setMaxNumOffsetsPerPic(top->getMaxNumOffsetsPerPic());<br>
-        m_sao.create(top->param.sourceWidth, top->param.sourceHeight, g_maxCUWidth, g_maxCUHeight);<br>
+        m_sao.create(top->param.sourceWidth, top->param.sourceHeight, g_maxCUWidth, g_maxCUHeight, m_cfg->getColorFormat());<br>
         m_sao.createEncBuffer();<br>
     }<br>
<br>
@@ -222,8 +225,8 @@<br>
<br>
     // Border extend Left and Right<br>
     primitives.extendRowBorder(recon->getLumaAddr(lineStartCUAddr), recon->getStride(), recon->getWidth(), realH, recon->getLumaMarginX());<br>
-    primitives.extendRowBorder(recon->getCbAddr(lineStartCUAddr), recon->getCStride(), recon->getWidth() >> 1, realH >> 1, recon->getChromaMarginX());<br>
-    primitives.extendRowBorder(recon->getCrAddr(lineStartCUAddr), recon->getCStride(), recon->getWidth() >> 1, realH >> 1, recon->getChromaMarginX());<br>
+    primitives.extendRowBorder(recon->getCbAddr(lineStartCUAddr), recon->getCStride(), recon->getWidth() >> m_hChromaShift, realH >> m_vChromaShift, recon->getChromaMarginX());<br>
+    primitives.extendRowBorder(recon->getCrAddr(lineStartCUAddr), recon->getCStride(), recon->getWidth() >> m_hChromaShift, realH >> m_vChromaShift, recon->getChromaMarginX());<br>
<br>
     // Border extend Top<br>
     if (row == 0)<br>
@@ -252,8 +255,8 @@<br>
         const intptr_t stride = recon->getStride();<br>
         const intptr_t strideC = recon->getCStride();<br>
         pixel *pixY = recon->getLumaAddr(lineStartCUAddr) - recon->getLumaMarginX() + (realH - 1) * stride;<br>
-        pixel *pixU = recon->getCbAddr(lineStartCUAddr) - recon->getChromaMarginX() + ((realH >> 1) - 1) * strideC;<br>
-        pixel *pixV = recon->getCrAddr(lineStartCUAddr) - recon->getChromaMarginX() + ((realH >> 1) - 1) * strideC;<br>
+        pixel *pixU = recon->getCbAddr(lineStartCUAddr) - recon->getChromaMarginX() + ((realH >> m_vChromaShift) - 1) * strideC;<br>
+        pixel *pixV = recon->getCrAddr(lineStartCUAddr) - recon->getChromaMarginX() + ((realH >> m_vChromaShift) - 1) * strideC;<br>
<br>
         for (int y = 0; y < recon->getLumaMarginY(); y++)<br>
         {<br>
@@ -290,8 +293,8 @@<br>
<br>
         uint64_t ssdY = computeSSD(orig->getLumaAddr(cuAddr), recon->getLumaAddr(cuAddr), stride, width, height);<br>
<br>
-        height >>= 1;<br>
-        width  >>= 1;<br>
+        height >>= m_vChromaShift;<br>
+        width  >>= m_hChromaShift;<br>
         stride = recon->getCStride();<br>
<br>
         uint64_t ssdU = computeSSD(orig->getCbAddr(cuAddr), recon->getCbAddr(cuAddr), stride, width, height);<br>
@@ -337,8 +340,8 @@<br>
<br>
         updateMD5Plane(m_pic->m_state[0], recon->getLumaAddr(cuAddr), width, height, stride);<br>
<br>
-        width >>= 1;<br>
-        height >>= 1;<br>
+        width  >>= m_hChromaShift;<br>
+        height >>= m_vChromaShift;<br>
         stride = recon->getCStride();<br>
<br>
         updateMD5Plane(m_pic->m_state[1], recon->getCbAddr(cuAddr), width, height, stride);<br>
@@ -356,8 +359,8 @@<br>
         }<br>
         updateCRC(recon->getLumaAddr(cuAddr), m_pic->m_crc[0], height, width, stride);<br>
<br>
-        width >>= 1;<br>
-        height >>= 1;<br>
+        width  >>= m_hChromaShift;<br>
+        height >>= m_vChromaShift;<br>
         stride = recon->getCStride();<br>
<br>
         updateCRC(recon->getCbAddr(cuAddr), m_pic->m_crc[1], height, width, stride);<br>
@@ -374,10 +377,10 @@<br>
             m_pic->m_checksum[0] = m_pic->m_checksum[1] = m_pic->m_checksum[2] = 0;<br>
         }<br>
         updateChecksum(recon->getLumaAddr(), m_pic->m_checksum[0], height, width, stride, row, cuHeight);<br>
-        width >>= 1;<br>
-        height >>= 1;<br>
+        width  >>= m_hChromaShift;<br>
+        height >>= m_vChromaShift;<br>
         stride = recon->getCStride();<br>
-        cuHeight >>= 1;<br>
+        cuHeight >>= m_vChromaShift;<br>
         updateChecksum(recon->getCbAddr(), m_pic->m_checksum[1], height, width, stride, row, cuHeight);<br>
         updateChecksum(recon->getCrAddr(), m_pic->m_checksum[2], height, width, stride, row, cuHeight);<br>
     }<br>
diff -r 4811da38078c -r f7d21da102ac source/encoder/framefilter.h<br>
--- a/source/encoder/framefilter.h      Mon Jan 06 23:15:58 2014 -0600<br>
+++ b/source/encoder/framefilter.h      Tue Jan 07 16:44:39 2014 +0530<br>
@@ -59,6 +59,9 @@<br>
     TEncCfg*                    m_cfg;<br>
     TComPic*                    m_pic;<br>
<br>
+    int                         m_hChromaShift;<br>
+    int                         m_vChromaShift;<br>
+<br>
 public:<br>
<br>
     TComLoopFilter              m_loopFilter;<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org" target="_blank">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div></div></div><span class="HOEnZb"><font color="#888888"><br><br clear="all"><div><br></div>-- <br>Steve Borho
</font></span></div></div>
<br>_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
<br></blockquote></div><br></div>