[x265] refine depth related.

Mon Aug 18 09:36:51 CEST 2014

# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1408347239 -32400
#      Mon Aug 18 16:33:59 2014 +0900
# Node ID 81469708804f322f6c76dfc6bb88f6d78fa983df
# Parent  9a0d242743577e0c8cc56cfac4934f8ea8cb7f6e
refine depth related.

maxCUDepth:	CU depth
maxFullDepth:	CU+TU or CU+PU depth
unitSize:	always 4


diff -r 9a0d24274357 -r 81469708804f source/Lib/TLibCommon/TComDataCU.cpp

--- a/source/Lib/TLibCommon/TComDataCU.cpp	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/Lib/TLibCommon/TComDataCU.cpp	Mon Aug 18 16:33:59 2014 +0900
@@ -129,7 +129,7 @@
     return ok;
 }
 
-void TComDataCU::create(TComDataCU *cu, uint32_t numPartition, uint32_t cuSize, int unitSize, int csp, int index, bool isLossless)
+void TComDataCU::create(TComDataCU *cu, uint32_t numPartition, uint32_t cuSize, int csp, int index, bool isLossless)
 {
     m_hChromaShift = CHROMA_H_SHIFT(csp);
     m_vChromaShift = CHROMA_V_SHIFT(csp);
@@ -139,12 +139,6 @@
     m_slice         = NULL;
     m_numPartitions = numPartition;
 
-    uint32_t tmp = 4 * AMVP_DECIMATION_FACTOR / unitSize;
-    tmp = tmp * tmp;
-    X265_CHECK(tmp == (1 << (g_log2Size[tmp])), "unexpected pixel count\n");
-    tmp = g_log2Size[tmp];
-    m_unitMask = ~((1 << tmp) - 1);
-
     uint32_t sizeL = cuSize * cuSize;
     uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift);
 
@@ -577,7 +571,7 @@
 
 // Copy current predicted part to a CU in picture.
 // It is used to predict for next part
-void TComDataCU::copyToPic(uint8_t depth)
+void TComDataCU::copyToPic(uint32_t depth)
 {
     TComDataCU* cu = m_pic->getCU(m_cuAddr);
 
@@ -622,7 +616,7 @@
     m_cuMvField[1].copyTo(cu->getCUMvField(REF_PIC_LIST_1), m_absIdxInLCU);
 
     uint32_t tmpY  = 1 << ((g_maxLog2CUSize - depth) * 2);
-    uint32_t tmpY2 = m_absIdxInLCU << m_pic->getLog2UnitSize() * 2;
+    uint32_t tmpY2 = m_absIdxInLCU << LOG2_UNIT_SIZE * 2;
     memcpy(cu->getCoeffY() + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY);
 
     uint32_t tmpC  = tmpY  >> (m_hChromaShift + m_vChromaShift);
@@ -633,7 +627,7 @@
     if (m_slice->m_pps->bTransquantBypassEnabled)
     {
         uint32_t tmp  = 1 << ((g_maxLog2CUSize - depth) * 2);
-        uint32_t tmp2 = m_absIdxInLCU << m_pic->getLog2UnitSize() * 2;
+        uint32_t tmp2 = m_absIdxInLCU << LOG2_UNIT_SIZE * 2;
         memcpy(cu->getLumaOrigYuv() + tmp2, m_tqBypassOrigYuv[0], sizeof(pixel) * tmp);
 
         memcpy(cu->getChromaOrigYuv(1) + tmpC2, m_tqBypassOrigYuv[1], sizeof(pixel) * tmpC);
@@ -641,7 +635,7 @@
     }
 }
 
-void TComDataCU::copyCodedToPic(uint8_t depth)
+void TComDataCU::copyCodedToPic(uint32_t depth)
 {
     TComDataCU* cu = m_pic->getCU(m_cuAddr);
 
@@ -660,7 +654,7 @@
     memcpy(cu->getCbf(TEXT_CHROMA_V) + m_absIdxInLCU, m_cbf[2], sizeInChar);
 
     uint32_t tmpY  = 1 << ((g_maxLog2CUSize - depth) * 2);
-    uint32_t tmpY2 = m_absIdxInLCU << m_pic->getLog2UnitSize() * 2;
+    uint32_t tmpY2 = m_absIdxInLCU << LOG2_UNIT_SIZE * 2;
     memcpy(cu->getCoeffY() + tmpY2, m_trCoeff[0], sizeof(coeff_t) * tmpY);
     tmpY  >>= m_hChromaShift + m_vChromaShift;
     tmpY2 >>= m_hChromaShift + m_vChromaShift;
@@ -668,7 +662,7 @@
     memcpy(cu->m_trCoeff[2] + tmpY2, m_trCoeff[2], sizeof(coeff_t) * tmpY);
 }
 
-void TComDataCU::copyToPic(uint8_t depth, uint32_t partIdx, uint32_t partDepth)
+void TComDataCU::copyToPic(uint32_t depth, uint32_t partIdx, uint32_t partDepth)
 {
     TComDataCU* cu = m_pic->getCU(m_cuAddr);
     uint32_t qNumPart  = m_numPartitions >> (partDepth << 1);
@@ -713,7 +707,7 @@
     m_cuMvField[1].copyTo(cu->getCUMvField(REF_PIC_LIST_1), m_absIdxInLCU, partStart, qNumPart);
 
     uint32_t tmpY  = 1 << ((g_maxLog2CUSize - depth - partDepth) * 2);
-    uint32_t tmpY2 = partOffset << m_pic->getLog2UnitSize() * 2;
+    uint32_t tmpY2 = partOffset << LOG2_UNIT_SIZE * 2;
     memcpy(cu->getCoeffY() + tmpY2, m_trCoeff[0],  sizeof(coeff_t) * tmpY);
 
     uint32_t tmpC  = tmpY >> (m_hChromaShift + m_vChromaShift);
@@ -825,7 +819,7 @@
     uint32_t absPartIdxRT    = g_zscanToRaster[curPartUnitIdx];
     uint32_t numPartInCUSize = m_pic->getNumPartInCUSize();
 
-    if ((m_pic->getCU(m_cuAddr)->getCUPelX() + g_rasterToPelX[absPartIdxRT] + m_pic->getUnitSize()) >= m_slice->m_sps->picWidthInLumaSamples)
+    if ((m_pic->getCU(m_cuAddr)->getCUPelX() + g_rasterToPelX[absPartIdxRT] + UNIT_SIZE) >= m_slice->m_sps->picWidthInLumaSamples)
         return NULL;
 
     if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - 1, numPartInCUSize))
@@ -834,7 +828,7 @@
         {
             if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - numPartInCUSize + 1])
             {
-                uint32_t absZorderCUIdx  = g_zscanToRaster[m_absIdxInLCU] + (1 << (m_log2CUSize[0] - m_pic->getLog2UnitSize())) - 1;
+                uint32_t absZorderCUIdx  = g_zscanToRaster[m_absIdxInLCU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
                 arPartUnitIdx = g_rasterToZscan[absPartIdxRT - numPartInCUSize + 1];
                 if (RasterAddress::isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, numPartInCUSize))
                 {
@@ -865,7 +859,7 @@
 {
     uint32_t absPartIdxLB     = g_zscanToRaster[curPartUnitIdx];
 
-    if ((m_pic->getCU(m_cuAddr)->getCUPelY() + g_rasterToPelY[absPartIdxLB] + m_pic->getUnitSize()) >= m_slice->m_sps->picHeightInLumaSamples)
+    if ((m_pic->getCU(m_cuAddr)->getCUPelY() + g_rasterToPelY[absPartIdxLB] + UNIT_SIZE) >= m_slice->m_sps->picHeightInLumaSamples)
     {
         return NULL;
     }
@@ -878,7 +872,7 @@
         {
             if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + numPartInCUSize - 1])
             {
-                uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInLCU] + ((1 << (m_log2CUSize[0] - m_pic->getLog2UnitSize())) - 1) * m_pic->getNumPartInCUSize();
+                uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInLCU] + ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) * m_pic->getNumPartInCUSize();
                 blPartUnitIdx = g_rasterToZscan[absPartIdxLB + numPartInCUSize - 1];
                 if (RasterAddress::isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB, numPartInCUSize))
                 {
@@ -903,7 +897,7 @@
 {
     uint32_t absPartIdxLB     = g_zscanToRaster[curPartUnitIdx];
 
-    if ((m_pic->getCU(m_cuAddr)->getCUPelY() + g_rasterToPelY[absPartIdxLB] + (partUnitOffset << m_pic->getLog2UnitSize())) >=
+    if ((m_pic->getCU(m_cuAddr)->getCUPelY() + g_rasterToPelY[absPartIdxLB] + (partUnitOffset << LOG2_UNIT_SIZE)) >=
         m_slice->m_sps->picHeightInLumaSamples)
     {
         return NULL;
@@ -917,7 +911,7 @@
         {
             if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + partUnitOffset * numPartInCUSize - 1])
             {
-                uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInLCU] + ((1 << (m_log2CUSize[0] - m_pic->getLog2UnitSize())) - 1) * m_pic->getNumPartInCUSize();
+                uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInLCU] + ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) * m_pic->getNumPartInCUSize();
                 blPartUnitIdx = g_rasterToZscan[absPartIdxLB + partUnitOffset * numPartInCUSize - 1];
                 if (RasterAddress::isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB, numPartInCUSize))
                 {
@@ -946,7 +940,7 @@
 {
     uint32_t absPartIdxRT    = g_zscanToRaster[curPartUnitIdx];
 
-    if ((m_pic->getCU(m_cuAddr)->getCUPelX() + g_rasterToPelX[absPartIdxRT] + (partUnitOffset << m_pic->getLog2UnitSize())) >=
+    if ((m_pic->getCU(m_cuAddr)->getCUPelX() + g_rasterToPelX[absPartIdxRT] + (partUnitOffset << LOG2_UNIT_SIZE)) >=
         m_slice->m_sps->picWidthInLumaSamples)
     {
         return NULL;
@@ -960,7 +954,7 @@
         {
             if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - numPartInCUSize + partUnitOffset])
             {
-                uint32_t absZorderCUIdx  = g_zscanToRaster[m_absIdxInLCU] + (1 << (m_log2CUSize[0] - m_pic->getLog2UnitSize())) - 1;
+                uint32_t absZorderCUIdx  = g_zscanToRaster[m_absIdxInLCU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
                 arPartUnitIdx = g_rasterToZscan[absPartIdxRT - numPartInCUSize + partUnitOffset];
                 if (RasterAddress::isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, numPartInCUSize))
                 {
@@ -1004,8 +998,7 @@
 TComDataCU* TComDataCU::getQpMinCuLeft(uint32_t& lPartUnitIdx, uint32_t curAbsIdxInLCU)
 {
     uint32_t numPartInCUSize = m_pic->getNumPartInCUSize();
-    uint32_t absZorderQpMinCUIdx = (curAbsIdxInLCU >> ((g_maxCUDepth - m_slice->m_pps->maxCuDQPDepth) << 1)) <<
-        ((g_maxCUDepth - m_slice->m_pps->maxCuDQPDepth) << 1);
+    uint32_t absZorderQpMinCUIdx = curAbsIdxInLCU & (0xFF << (g_maxFullDepth - m_slice->m_pps->maxCuDQPDepth) * 2);
     uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx];
 
     // check for left LCU boundary
@@ -1029,8 +1022,7 @@
 TComDataCU* TComDataCU::getQpMinCuAbove(uint32_t& aPartUnitIdx, uint32_t curAbsIdxInLCU)
 {
     uint32_t numPartInCUSize = m_pic->getNumPartInCUSize();
-    uint32_t absZorderQpMinCUIdx = (curAbsIdxInLCU >> ((g_maxCUDepth - m_slice->m_pps->maxCuDQPDepth) << 1)) <<
-        ((g_maxCUDepth - m_slice->m_pps->maxCuDQPDepth) << 1);
+    uint32_t absZorderQpMinCUIdx = curAbsIdxInLCU & (0xFF << (g_maxFullDepth - m_slice->m_pps->maxCuDQPDepth) * 2);
     uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx];
 
     // check for top LCU boundary
@@ -1074,7 +1066,7 @@
 
 char TComDataCU::getLastCodedQP(uint32_t absPartIdx)
 {
-    uint32_t quPartIdxMask = ~((1 << ((g_maxCUDepth - m_slice->m_pps->maxCuDQPDepth) << 1)) - 1);
+    uint32_t quPartIdxMask = 0xFF << (g_maxFullDepth - m_slice->m_pps->maxCuDQPDepth) * 2;
     int lastValidPartIdx = getLastValidPartIdx(absPartIdx & quPartIdxMask);
 
     if (lastValidPartIdx >= 0)
@@ -1526,7 +1518,7 @@
 void TComDataCU::deriveLeftRightTopIdx(uint32_t partIdx, uint32_t& ruiPartIdxLT, uint32_t& ruiPartIdxRT)
 {
     ruiPartIdxLT = m_absIdxInLCU;
-    ruiPartIdxRT = g_rasterToZscan[g_zscanToRaster[ruiPartIdxLT] + (1 << (m_log2CUSize[0] - m_pic->getLog2UnitSize())) - 1];
+    ruiPartIdxRT = g_rasterToZscan[g_zscanToRaster[ruiPartIdxLT] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1];
 
     switch (m_partSizes[0])
     {
@@ -1567,7 +1559,7 @@
 
 void TComDataCU::deriveLeftBottomIdx(uint32_t partIdx, uint32_t& outPartIdxLB)
 {
-    outPartIdxLB = g_rasterToZscan[g_zscanToRaster[m_absIdxInLCU] + ((1 << (m_log2CUSize[0] - m_pic->getLog2UnitSize() - 1)) - 1) * m_pic->getNumPartInCUSize()];
+    outPartIdxLB = g_rasterToZscan[g_zscanToRaster[m_absIdxInLCU] + ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) * m_pic->getNumPartInCUSize()];
 
     switch (m_partSizes[0])
     {
@@ -1609,8 +1601,8 @@
 void TComDataCU::deriveRightBottomIdx(uint32_t partIdx, uint32_t& outPartIdxRB)
 {
     outPartIdxRB = g_rasterToZscan[g_zscanToRaster[m_absIdxInLCU] +
-                                   ((1 << (m_log2CUSize[0] - m_pic->getLog2UnitSize() - 1)) - 1) * m_pic->getNumPartInCUSize() +
-                                   (1 << (m_log2CUSize[0] - m_pic->getLog2UnitSize())) - 1];
+                                   ((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) * m_pic->getNumPartInCUSize() +
+                                   (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1];
 
     switch (m_partSizes[0])
     {
@@ -1646,7 +1638,7 @@
 
 void TComDataCU::deriveLeftRightTopIdxAdi(uint32_t& outPartIdxLT, uint32_t& outPartIdxRT, uint32_t partOffset, uint32_t partDepth)
 {
-    uint32_t numPartInWidth = 1 << (m_log2CUSize[0] - m_pic->getLog2UnitSize() - partDepth);
+    uint32_t numPartInWidth = 1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - partDepth);
 
     outPartIdxLT = m_absIdxInLCU + partOffset;
     outPartIdxRT = g_rasterToZscan[g_zscanToRaster[outPartIdxLT] + numPartInWidth - 1];
@@ -1837,10 +1829,10 @@
         int refIdx;
         int lcuIdx = -1;
 
-        if ((m_pic->getCU(m_cuAddr)->getCUPelX() + g_rasterToPelX[absPartIdxTmp] + m_pic->getUnitSize()) >= m_slice->m_sps->picWidthInLumaSamples)  // image boundary check
+        if ((m_pic->getCU(m_cuAddr)->getCUPelX() + g_rasterToPelX[absPartIdxTmp] + UNIT_SIZE) >= m_slice->m_sps->picWidthInLumaSamples)  // image boundary check
         {
         }
-        else if ((m_pic->getCU(m_cuAddr)->getCUPelY() + g_rasterToPelY[absPartIdxTmp] + m_pic->getUnitSize()) >= m_slice->m_sps->picHeightInLumaSamples)
+        else if ((m_pic->getCU(m_cuAddr)->getCUPelY() + g_rasterToPelY[absPartIdxTmp] + UNIT_SIZE) >= m_slice->m_sps->picHeightInLumaSamples)
         {
         }
         else
@@ -2107,10 +2099,10 @@
         //----  co-located RightBottom Temporal Predictor (H) ---//
         absPartIdx = g_zscanToRaster[partIdxRB];
         int lcuIdx = -1;
-        if ((m_pic->getCU(m_cuAddr)->getCUPelX() + g_rasterToPelX[absPartIdx] + m_pic->getUnitSize()) >= m_slice->m_sps->picWidthInLumaSamples)  // image boundary check
+        if ((m_pic->getCU(m_cuAddr)->getCUPelX() + g_rasterToPelX[absPartIdx] + UNIT_SIZE) >= m_slice->m_sps->picWidthInLumaSamples)  // image boundary check
         {
         }
-        else if ((m_pic->getCU(m_cuAddr)->getCUPelY() + g_rasterToPelY[absPartIdx] + m_pic->getUnitSize()) >= m_slice->m_sps->picHeightInLumaSamples)
+        else if ((m_pic->getCU(m_cuAddr)->getCUPelY() + g_rasterToPelY[absPartIdx] + UNIT_SIZE) >= m_slice->m_sps->picHeightInLumaSamples)
         {
         }
         else
@@ -2342,7 +2334,7 @@
  */
 bool TComDataCU::xGetColMVP(int picList, int cuAddr, int partUnitIdx, MV& outMV, int& outRefIdx)
 {
-    uint32_t absPartAddr = partUnitIdx & m_unitMask;
+    uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
 
     int colRefPicList;
     int colPOC, colRefPOC, curPOC, curRefPOC, scale;
@@ -2424,8 +2416,8 @@
 
     outPartIdxCenter = m_absIdxInLCU + partAddr; // partition origin.
     outPartIdxCenter = g_rasterToZscan[g_zscanToRaster[outPartIdxCenter]
-                                       + (partHeight >> (m_pic->getLog2UnitSize() + 1)) * m_pic->getNumPartInCUSize()
-                                       + (partWidth  >> (m_pic->getLog2UnitSize() + 1))];
+                                       + (partHeight >> (LOG2_UNIT_SIZE + 1)) * m_pic->getNumPartInCUSize()
+                                       + (partWidth  >> (LOG2_UNIT_SIZE + 1))];
 }
 
 ScanType TComDataCU::getCoefScanIdx(uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma, bool bIsIntra)
@@ -2452,8 +2444,7 @@
         dirMode = getChromaIntraDir(absPartIdx);
         if (dirMode == DM_CHROMA_IDX)
         {
-            uint32_t lumaLCUIdx = (m_chromaFormat == X265_CSP_I444) ? absPartIdx : absPartIdx & (~((1 << (2 * g_addCUDepth)) - 1));
-            dirMode = getLumaIntraDir(lumaLCUIdx);
+            dirMode = getLumaIntraDir((m_chromaFormat == X265_CSP_I444) ? absPartIdx : absPartIdx & 0xFC);
             dirMode = (m_chromaFormat == X265_CSP_I422) ? g_chroma422IntraAngleMappingTable[dirMode] : dirMode;
         }
     }
@@ -2490,7 +2481,7 @@
 
 uint32_t TComDataCU::getSCUAddr()
 {
-    return (m_cuAddr) * (1 << (g_maxCUDepth << 1)) + m_absIdxInLCU;
+    return (m_cuAddr << g_maxFullDepth * 2) + m_absIdxInLCU;
 }
 
 //! \}
diff -r 9a0d24274357 -r 81469708804f source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.h	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/Lib/TLibCommon/TComDataCU.h	Mon Aug 18 16:33:59 2014 +0900
@@ -168,7 +168,6 @@
     int           m_chromaFormat;
     int           m_hChromaShift;
     int           m_vChromaShift;
-    uint32_t      m_unitMask;        ///< mask for mapping index to CompressMV field
 
     // -------------------------------------------------------------------------------------------------------------------
     // CU data
@@ -247,7 +246,7 @@
     // -------------------------------------------------------------------------------------------------------------------
     // create / destroy / initialize / copy
     // -------------------------------------------------------------------------------------------------------------------
-    void          create(TComDataCU *p, uint32_t numPartition, uint32_t cuSize, int unitSize, int csp, int index, bool isLossLess);
+    void          create(TComDataCU *p, uint32_t numPartition, uint32_t cuSize, int csp, int index, bool isLossLess);
 
     bool          initialize(uint32_t numPartition, uint32_t sizeL, uint32_t sizeC, uint32_t numBlocks, bool isLossless);
 
@@ -260,9 +259,9 @@
     void          copyToSubCU(TComDataCU* lcu, uint32_t partUnitIdx, uint32_t depth);
     void          copyPartFrom(TComDataCU* cu, uint32_t partUnitIdx, uint32_t depth, bool isRDObasedAnalysis = true);
 
-    void          copyToPic(uint8_t depth);
-    void          copyToPic(uint8_t depth, uint32_t partIdx, uint32_t partDepth);
-    void          copyCodedToPic(uint8_t depth);
+    void          copyToPic(uint32_t depth);
+    void          copyToPic(uint32_t depth, uint32_t partIdx, uint32_t partDepth);
+    void          copyCodedToPic(uint32_t depth);
 
     // -------------------------------------------------------------------------------------------------------------------
     // member functions for CU description
diff -r 9a0d24274357 -r 81469708804f source/Lib/TLibCommon/TComPattern.cpp
--- a/source/Lib/TLibCommon/TComPattern.cpp	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/Lib/TLibCommon/TComPattern.cpp	Mon Aug 18 16:33:59 2014 +0900
@@ -184,8 +184,8 @@
 void TComPattern::initIntraNeighbors(TComDataCU* cu, uint32_t zOrderIdxInPart, uint32_t partDepth, TextType cType, IntraNeighbors *intraNeighbors)
 {
     uint32_t log2TrSize = cu->getLog2CUSize(0) - partDepth;
-    int log2UnitWidth  = g_log2UnitSize;
-    int log2UnitHeight = g_log2UnitSize;
+    int log2UnitWidth  = LOG2_UNIT_SIZE;
+    int log2UnitHeight = LOG2_UNIT_SIZE;
 
     if (cType != TEXT_LUMA)
     {
diff -r 9a0d24274357 -r 81469708804f source/Lib/TLibCommon/TComPicSym.cpp
--- a/source/Lib/TLibCommon/TComPicSym.cpp	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/Lib/TLibCommon/TComPicSym.cpp	Mon Aug 18 16:33:59 2014 +0900
@@ -50,8 +50,6 @@
 TComPicSym::TComPicSym()
     : m_widthInCU(0)
     , m_heightInCU(0)
-    , m_unitSize(0)
-    , m_log2UnitSize(0)
     , m_numPartitions(0)
     , m_numPartInCUSize(0)
     , m_numCUsInFrame(0)
@@ -65,14 +63,10 @@
 {
     uint32_t i;
 
-    m_numPartitions   = 1 << (g_maxCUDepth << 1);
+    m_numPartitions   = 1 << g_maxFullDepth * 2;
+    m_numPartInCUSize = 1 << g_maxFullDepth;
 
-    m_log2UnitSize    = g_log2UnitSize;
-    m_unitSize        = 1 << m_log2UnitSize;
-
-    m_numPartInCUSize = g_maxCUSize >> m_log2UnitSize;
-
-    m_widthInCU       = (param->sourceWidth + g_maxCUSize - 1) >> g_maxLog2CUSize;
+    m_widthInCU       = (param->sourceWidth  + g_maxCUSize - 1) >> g_maxLog2CUSize;
     m_heightInCU      = (param->sourceHeight + g_maxCUSize - 1) >> g_maxLog2CUSize;
 
     m_numCUsInFrame   = m_widthInCU * m_heightInCU;
@@ -90,7 +84,7 @@
         if (!m_cuData[i].initialize(m_numPartitions, sizeL, sizeC, 1, tqBypass))
             return false;
 
-        m_cuData[i].create(&m_cuData[i], m_numPartitions, g_maxCUSize, m_unitSize, param->internalCsp, 0, tqBypass);
+        m_cuData[i].create(&m_cuData[i], m_numPartitions, g_maxCUSize, param->internalCsp, 0, tqBypass);
     }
 
     return true;
diff -r 9a0d24274357 -r 81469708804f source/Lib/TLibCommon/TComPicSym.h
--- a/source/Lib/TLibCommon/TComPicSym.h	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/Lib/TLibCommon/TComPicSym.h	Mon Aug 18 16:33:59 2014 +0900
@@ -65,9 +65,6 @@
     uint32_t      m_widthInCU;
     uint32_t      m_heightInCU;
 
-    uint32_t      m_unitSize;
-    uint32_t      m_log2UnitSize;
-
     uint32_t      m_numPartitions;
     uint32_t      m_numPartInCUSize;
     uint32_t      m_numCUsInFrame;
@@ -91,10 +88,6 @@
 
     uint32_t    getFrameHeightInCU() const { return m_heightInCU; }
 
-    uint32_t    getUnitSize() const       { return m_unitSize; }
-
-    uint32_t    getLog2UnitSize() const   { return m_log2UnitSize; }
-
     uint32_t    getNumberOfCUsInFrame() const { return m_numCUsInFrame; }
 
     TComDataCU* getCU(uint32_t cuAddr)    { return &m_cuData[cuAddr]; }
diff -r 9a0d24274357 -r 81469708804f source/Lib/TLibCommon/TComPicYuv.cpp
--- a/source/Lib/TLibCommon/TComPicYuv.cpp	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/Lib/TLibCommon/TComPicYuv.cpp	Mon Aug 18 16:33:59 2014 +0900
@@ -64,7 +64,7 @@
 {
 }
 
-bool TComPicYuv::create(int picWidth, int picHeight, int picCsp, uint32_t maxCUSize, uint32_t maxCUDepth)
+bool TComPicYuv::create(int picWidth, int picHeight, int picCsp, uint32_t maxCUSize, uint32_t maxFullDepth)
 {
     m_picWidth  = picWidth;
     m_picHeight = picHeight;
@@ -87,6 +87,7 @@
 
     m_strideC = ((m_numCuInWidth * g_maxCUSize) >> m_hChromaShift) + (m_chromaMarginX * 2);
     int maxHeight = m_numCuInHeight * g_maxCUSize;
+    uint32_t numPartitions = 1 << maxFullDepth * 2;
 
     CHECKED_MALLOC(m_picBuf[0], pixel, m_stride * (maxHeight + (m_lumaMarginY * 2)));
     CHECKED_MALLOC(m_picBuf[1], pixel, m_strideC * ((maxHeight >> m_vChromaShift) + (m_chromaMarginY * 2)));
@@ -108,14 +109,14 @@
         }
     }
 
-    CHECKED_MALLOC(m_buOffsetY, int, (size_t)1 << (2 * maxCUDepth));
-    CHECKED_MALLOC(m_buOffsetC, int, (size_t)1 << (2 * maxCUDepth));
-    for (int buRow = 0; buRow < (1 << maxCUDepth); buRow++)
+    CHECKED_MALLOC(m_buOffsetY, int, (size_t)numPartitions);
+    CHECKED_MALLOC(m_buOffsetC, int, (size_t)numPartitions);
+    for (int buRow = 0; buRow < (1 << maxFullDepth); buRow++)
     {
-        for (int buCol = 0; buCol < (1 << maxCUDepth); buCol++)
+        for (int buCol = 0; buCol < (1 << maxFullDepth); buCol++)
         {
-            m_buOffsetY[(buRow << maxCUDepth) + buCol] = getStride() * buRow * (maxCUSize >> maxCUDepth) + buCol * (maxCUSize  >> maxCUDepth);
-            m_buOffsetC[(buRow << maxCUDepth) + buCol] = getCStride() * buRow * (maxCUSize >> maxCUDepth >> m_vChromaShift) + buCol * (maxCUSize >> maxCUDepth >> m_hChromaShift);
+            m_buOffsetY[(buRow << maxFullDepth) + buCol] = getStride() * buRow * UNIT_SIZE + buCol * UNIT_SIZE;
+            m_buOffsetC[(buRow << maxFullDepth) + buCol] = getCStride() * buRow * (UNIT_SIZE >> m_vChromaShift) + buCol * (UNIT_SIZE >> m_hChromaShift);
         }
     }
 
diff -r 9a0d24274357 -r 81469708804f source/Lib/TLibCommon/TComPicYuv.h
--- a/source/Lib/TLibCommon/TComPicYuv.h	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/Lib/TLibCommon/TComPicYuv.h	Mon Aug 18 16:33:59 2014 +0900
@@ -100,7 +100,7 @@
     //  Memory management
     // ------------------------------------------------------------------------------------------------
 
-    bool  create(int picWidth, int picHeight, int csp, uint32_t maxCUSize, uint32_t maxCUDepth);
+    bool  create(int picWidth, int picHeight, int csp, uint32_t maxCUSize, uint32_t maxFullDepth);
     void  destroy();
 
     // ------------------------------------------------------------------------------------------------
diff -r 9a0d24274357 -r 81469708804f source/Lib/TLibCommon/TComRom.cpp
--- a/source/Lib/TLibCommon/TComRom.cpp	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/Lib/TLibCommon/TComRom.cpp	Mon Aug 18 16:33:59 2014 +0900
@@ -112,10 +112,9 @@
 // ====================================================================================================================
 
 uint32_t g_maxLog2CUSize = MAX_LOG2_CU_SIZE;
-uint32_t g_maxCUSize   = MAX_CU_SIZE;
-uint32_t g_maxCUDepth  = MAX_FULL_DEPTH;
-uint32_t g_addCUDepth  = 1;
-uint32_t g_log2UnitSize = 2;
+uint32_t g_maxCUSize     = MAX_CU_SIZE;
+uint32_t g_maxFullDepth  = NUM_FULL_DEPTH - 1;
+uint32_t g_maxCUDepth    = NUM_CU_DEPTH - 1;
 uint32_t g_zscanToRaster[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, };
 uint32_t g_rasterToZscan[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, };
 uint32_t g_rasterToPelX[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, };
@@ -123,11 +122,11 @@
 
 const uint32_t g_puOffset[8] = { 0, 8, 4, 4, 2, 10, 1, 5 };
 
-void initZscanToRaster(int maxDepth, int depth, uint32_t startVal, uint32_t*& curIdx)
+void initZscanToRaster(uint32_t maxFullDepth, uint32_t depth, uint32_t startVal, uint32_t*& curIdx)
 {
-    int stride = 1 << (maxDepth - 1);
+    uint32_t stride = 1 << maxFullDepth;
 
-    if (depth == maxDepth)
+    if (depth > maxFullDepth)
     {
         curIdx[0] = startVal;
         curIdx++;
@@ -135,41 +134,38 @@
     else
     {
         int step = stride >> depth;
-        initZscanToRaster(maxDepth, depth + 1, startVal,                        curIdx);
-        initZscanToRaster(maxDepth, depth + 1, startVal + step,                 curIdx);
-        initZscanToRaster(maxDepth, depth + 1, startVal + step * stride,        curIdx);
-        initZscanToRaster(maxDepth, depth + 1, startVal + step * stride + step, curIdx);
+        initZscanToRaster(maxFullDepth, depth + 1, startVal,                        curIdx);
+        initZscanToRaster(maxFullDepth, depth + 1, startVal + step,                 curIdx);
+        initZscanToRaster(maxFullDepth, depth + 1, startVal + step * stride,        curIdx);
+        initZscanToRaster(maxFullDepth, depth + 1, startVal + step * stride + step, curIdx);
     }
 }
 
-void initRasterToZscan(uint32_t maxCUSize, uint32_t maxDepth)
+void initRasterToZscan(uint32_t maxFullDepth)
 {
-    uint32_t  unitSize = maxCUSize  >> (maxDepth - 1);
+    uint32_t numPartitions = 1 << maxFullDepth * 2;
 
-    uint32_t  numPartInCUSize  = (uint32_t)maxCUSize / unitSize;
-
-    for (uint32_t i = 0; i < numPartInCUSize * numPartInCUSize; i++)
+    for (uint32_t i = 0; i < numPartitions; i++)
     {
         g_rasterToZscan[g_zscanToRaster[i]] = i;
     }
 }
 
-void initRasterToPelXY(uint32_t maxCUSize, uint32_t maxDepth)
+void initRasterToPelXY(uint32_t maxFullDepth)
 {
     uint32_t i;
 
     uint32_t* tempX = &g_rasterToPelX[0];
     uint32_t* tempY = &g_rasterToPelY[0];
 
-    uint32_t  unitSize  = maxCUSize >> (maxDepth - 1);
-
-    uint32_t  numPartInCUSize = maxCUSize / unitSize;
+    uint32_t numPartInCUSize = 1 << maxFullDepth;
+    uint32_t numPartitions   = 1 << maxFullDepth * 2;
 
     tempX[0] = 0;
     tempX++;
     for (i = 1; i < numPartInCUSize; i++)
     {
-        tempX[0] = tempX[-1] + unitSize;
+        tempX[0] = tempX[-1] + UNIT_SIZE;
         tempX++;
     }
 
@@ -179,9 +175,9 @@
         tempX += numPartInCUSize;
     }
 
-    for (i = 1; i < numPartInCUSize * numPartInCUSize; i++)
+    for (i = 1; i < numPartitions; i++)
     {
-        tempY[i] = (i / numPartInCUSize) * unitSize;
+        tempY[i] = (i >> maxFullDepth) * UNIT_SIZE;
     }
 }
 
diff -r 9a0d24274357 -r 81469708804f source/Lib/TLibCommon/TComRom.h
--- a/source/Lib/TLibCommon/TComRom.h	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/Lib/TLibCommon/TComRom.h	Mon Aug 18 16:33:59 2014 +0900
@@ -43,10 +43,17 @@
 namespace x265 {
 // private namespace
 
-#define MAX_CU_DEPTH            4                           // maximun CU depth
-#define MAX_FULL_DEPTH          5                           // maximun full depth
-#define MAX_LOG2_CU_SIZE        6                           // log2(LCUSize)
+#define NUM_CU_DEPTH            4                           // maximun number of CU depths
+#define NUM_FULL_DEPTH          5                           // maximun number of full depths
+#define MIN_LOG2_CU_SIZE        3                           // log2(minCUSize)
+#define MAX_LOG2_CU_SIZE        6                           // log2(maxCUSize)
+#define MIN_CU_SIZE             (1 << MIN_LOG2_CU_SIZE)     // minimum allowable size of CU
 #define MAX_CU_SIZE             (1 << MAX_LOG2_CU_SIZE)     // maximum allowable size of CU
+
+#define LOG2_UNIT_SIZE          2                           // log2(unitSize)
+#define UNIT_SIZE               (1 << LOG2_UNIT_SIZE)       // unit size of CU partition
+#define TMVP_UNIT_MASK          0xF0                        // mask for mapping index to CompressMV field
+
 #define MIN_PU_SIZE             4
 #define MIN_TU_SIZE             4
 #define MAX_NUM_SPU_W           (MAX_CU_SIZE / MIN_PU_SIZE) // maximum number of SPU in horizontal line
@@ -71,21 +78,20 @@
 extern uint32_t g_zscanToRaster[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
 extern uint32_t g_rasterToZscan[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
 
-void initZscanToRaster(int maxDepth, int depth, uint32_t startVal, uint32_t*& curIdx);
-void initRasterToZscan(uint32_t maxCUSize, uint32_t maxCUDepth);
+void initZscanToRaster(uint32_t maxFullDepth, uint32_t depth, uint32_t startVal, uint32_t*& curIdx);
+void initRasterToZscan(uint32_t maxFullDepth);
 
 // conversion of partition index to picture pel position
 extern uint32_t g_rasterToPelX[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
 extern uint32_t g_rasterToPelY[MAX_NUM_SPU_W * MAX_NUM_SPU_W];
 
-void initRasterToPelXY(uint32_t maxCUSize, uint32_t maxCUDepth);
+void initRasterToPelXY(uint32_t maxFullDepth);
 
 // global variable (LCU width/height, max. CU depth)
 extern uint32_t g_maxLog2CUSize;
 extern uint32_t g_maxCUSize;
 extern uint32_t g_maxCUDepth;
-extern uint32_t g_addCUDepth;
-extern uint32_t g_log2UnitSize;
+extern uint32_t g_maxFullDepth;
 
 extern const uint32_t g_puOffset[8];
 
diff -r 9a0d24274357 -r 81469708804f source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp
--- a/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp	Mon Aug 18 16:33:59 2014 +0900
@@ -597,13 +597,12 @@
         endX   = (rpelx == picWidthTmp) ? lcuWidth - 1 : lcuWidth;
         if (lcuWidth % 16)
         {
-            int8_t signRight;
             for (y = 0; y < lcuHeight; y++)
             {
                 int8_t signLeft = xSign(rec[startX] - tmpL[y]);
                 for (x = startX; x < endX; x++)
                 {
-                    signRight = xSign(rec[x] - rec[x + 1]);
+                    int8_t signRight = xSign(rec[x] - rec[x + 1]);
                     edgeType = signRight + signLeft + 2;
                     signLeft  = -signRight;
 
@@ -1297,7 +1296,7 @@
     TComPicYuv* pcPicYuvRec = cu->m_pic->getPicYuvRec();
     int hChromaShift = cu->getHorzChromaShift();
     int vChromaShift = cu->getVertChromaShift();
-    uint32_t lumaOffset   = absZOrderIdx << cu->m_pic->getLog2UnitSize() * 2;
+    uint32_t lumaOffset   = absZOrderIdx << LOG2_UNIT_SIZE * 2;
     uint32_t chromaOffset = lumaOffset >> (hChromaShift + vChromaShift);
 
     pixel* dst = pcPicYuvRec->getLumaAddr(cu->getAddr(), absZOrderIdx);
diff -r 9a0d24274357 -r 81469708804f source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Mon Aug 18 16:33:59 2014 +0900
@@ -112,7 +112,7 @@
         ok &= m_qtTempShortYuv[i].create(MAX_CU_SIZE, MAX_CU_SIZE, m_param->internalCsp);
     }
 
-    const uint32_t numPartitions = 1 << (g_maxCUDepth << 1);
+    const uint32_t numPartitions = 1 << g_maxFullDepth * 2;
     CHECKED_MALLOC(m_qtTempTrIdx, uint8_t, numPartitions);
     CHECKED_MALLOC(m_qtTempCbf[0], uint8_t, numPartitions * 3);
     m_qtTempCbf[1] = m_qtTempCbf[0] + numPartitions;
@@ -221,8 +221,7 @@
 
     uint32_t log2TrSize = g_maxLog2CUSize - fullDepth;
     uint32_t qtLayer    = log2TrSize - 2;
-    uint32_t log2UnitSize = cu->m_pic->getLog2UnitSize();
-    uint32_t coeffOffset = absPartIdx << (log2UnitSize * 2);
+    uint32_t coeffOffset = absPartIdx << LOG2_UNIT_SIZE * 2;
     coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset;
     m_entropyCoder->codeCoeffNxN(cu, coeff, absPartIdx, log2TrSize, ttype);
 }
@@ -262,18 +261,17 @@
     }
 
     uint32_t qtLayer    = log2TrSize - 2;
-    uint32_t log2UnitSize = cu->m_pic->getLog2UnitSize();
 
     if (m_csp != X265_CSP_I422)
     {
         uint32_t shift = (m_csp == X265_CSP_I420) ? 2 : 0;
-        uint32_t coeffOffset = absPartIdx << (log2UnitSize * 2 - shift);
+        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2 - shift);
         coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset;
         m_entropyCoder->codeCoeffNxN(cu, coeff, absPartIdx, log2TrSizeC, ttype);
     }
     else
     {
-        uint32_t coeffOffset = absPartIdx << (log2UnitSize * 2 - 1);
+        uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2 - 1);
         coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset;
         uint32_t subTUSize = 1 << (log2TrSizeC * 2);
         uint32_t partIdxesPerSubTU  = cu->m_pic->getNumPartInCU() >> (((cu->getDepth(absPartIdx) + trDepthC) << 1) + 1);
@@ -586,7 +584,7 @@
         cu->setTrIdxSubParts(trDepth, absPartIdx, fullDepth);
 
         uint32_t qtLayer        = log2TrSize - 2;
-        uint32_t coeffOffsetY   = absPartIdx << cu->m_pic->getLog2UnitSize() * 2;
+        uint32_t coeffOffsetY   = absPartIdx << LOG2_UNIT_SIZE * 2;
         coeff_t* coeffY         = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
         int16_t* reconQt        = m_qtTempShortYuv[qtLayer].getLumaAddr(absPartIdx);
         X265_CHECK(m_qtTempShortYuv[qtLayer].m_width == MAX_CU_SIZE, "width is not max CU size\n");
@@ -815,7 +813,7 @@
         pixel*   pred         = predYuv->getLumaAddr(absPartIdx);
         int16_t* residual     = resiYuv->getLumaAddr(absPartIdx);
         pixel*   recon        = reconYuv->getLumaAddr(absPartIdx);
-        uint32_t coeffOffsetY = absPartIdx << cu->m_pic->getLog2UnitSize() * 2;
+        uint32_t coeffOffsetY = absPartIdx << LOG2_UNIT_SIZE * 2;
         coeff_t* coeff        = cu->getCoeffY() + coeffOffsetY;
 
         uint32_t zorder           = cu->getZorderIdxInCU() + absPartIdx;
@@ -898,7 +896,7 @@
         uint32_t qtLayer    = log2TrSize - 2;
 
         //===== copy transform coefficients =====
-        uint32_t coeffOffsetY = absPartIdx << cu->m_pic->getLog2UnitSize() * 2;
+        uint32_t coeffOffsetY = absPartIdx << LOG2_UNIT_SIZE * 2;
         coeff_t* coeffSrcY    = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
         coeff_t* coeffDestY   = cu->getCoeffY()           + coeffOffsetY;
         ::memcpy(coeffDestY, coeffSrcY, sizeof(coeff_t) << (log2TrSize * 2));
@@ -1005,7 +1003,6 @@
         }
 
         uint32_t qtLayer = log2TrSize - 2;
-        uint32_t log2UnitSize = cu->m_pic->getLog2UnitSize();
         uint32_t tuSize = 1 << log2TrSizeC;
         uint32_t stride = fencYuv->getCStride();
         const bool splitIntoSubTUs = (m_csp == X265_CSP_I422);
@@ -1044,10 +1041,7 @@
                 uint32_t chromaPredMode = cu->getChromaIntraDir(absPartIdxC);
                 //===== update chroma mode =====
                 if (chromaPredMode == DM_CHROMA_IDX)
-                {
-                    uint32_t lumaLCUIdx  = (m_csp == X265_CSP_I444) ? absPartIdxC : absPartIdxC & (~((1 << (2 * g_addCUDepth)) - 1));
-                    chromaPredMode = cu->getLumaIntraDir(lumaLCUIdx);
-                }
+                    chromaPredMode = cu->getLumaIntraDir((m_csp == X265_CSP_I444) ? absPartIdxC : 0);
                 chromaPredMode = (m_csp == X265_CSP_I422) ? g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;
                 //===== get prediction signal =====
                 predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, log2TrSizeC, m_csp);
@@ -1057,7 +1051,7 @@
 
                 int16_t* reconQt        = m_qtTempShortYuv[qtLayer].getChromaAddr(chromaId, absPartIdxC);
                 uint32_t reconQtStride  = m_qtTempShortYuv[qtLayer].m_cwidth;
-                uint32_t coeffOffsetC   = absPartIdxC << (log2UnitSize * 2 - (hChromaShift + vChromaShift));
+                uint32_t coeffOffsetC   = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
                 coeff_t* coeffC         = m_qtTempCoeff[chromaId][qtLayer] + coeffOffsetC;
 
                 if (checkTransformSkip)
@@ -1210,7 +1204,7 @@
         //===== copy transform coefficients =====
 
         uint32_t numCoeffC = 1 << (log2TrSizeC * 2 + (m_csp == X265_CSP_I422));
-        uint32_t coeffOffsetC = absPartIdx << (cu->m_pic->getLog2UnitSize() * 2 - (hChromaShift + vChromaShift));
+        uint32_t coeffOffsetC = absPartIdx << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
 
         uint32_t qtLayer   = log2TrSize - 2;
         coeff_t* coeffSrcU = m_qtTempCoeff[1][qtLayer] + coeffOffsetC;
@@ -1260,7 +1254,6 @@
                 return;
         }
 
-        uint32_t log2UnitSize = cu->m_pic->getLog2UnitSize();
         uint32_t tuSize = 1 << log2TrSizeC;
         uint32_t stride = fencYuv->getCStride();
         const bool splitIntoSubTUs = (m_csp == X265_CSP_I422);
@@ -1280,7 +1273,7 @@
                 pixel*   pred           = predYuv->getChromaAddr(chromaId, absPartIdxC);
                 int16_t* residual       = resiYuv->getChromaAddr(chromaId, absPartIdxC);
                 pixel*   recon          = reconYuv->getChromaAddr(chromaId, absPartIdxC);
-                uint32_t coeffOffsetC   = absPartIdxC << (log2UnitSize * 2 - (hChromaShift + vChromaShift));
+                uint32_t coeffOffsetC   = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
                 coeff_t* coeff          = cu->getCoeff(ttype) + coeffOffsetC;
                 uint32_t zorder         = cu->getZorderIdxInCU() + absPartIdxC;
                 pixel*   reconIPred     = cu->m_pic->getPicYuvRec()->getChromaAddr(chromaId, cu->getAddr(), zorder);
@@ -1293,10 +1286,7 @@
                 uint32_t chromaPredMode = cu->getChromaIntraDir(absPartIdxC);
                 //===== update chroma mode =====
                 if (chromaPredMode == DM_CHROMA_IDX)
-                {
-                    uint32_t lumaLCUIdx  = (m_csp == X265_CSP_I444) ? absPartIdxC : absPartIdxC & (~((1 << (2 * g_addCUDepth)) - 1));
-                    chromaPredMode = cu->getLumaIntraDir(lumaLCUIdx);
-                }
+                    chromaPredMode = cu->getLumaIntraDir((m_csp == X265_CSP_I444) ? absPartIdxC : 0);
                 chromaPredMode = (m_csp == X265_CSP_I422) ? g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode;
                 //===== init availability pattern =====
                 TComPattern::initAdiPatternChroma(cu, absPartIdxC, trDepthC, m_predBuf, chromaId);
@@ -1602,7 +1592,7 @@
 
         //=== update PU data ====
         cu->setLumaIntraDirSubParts(bestPUMode, partOffset, depth + initTrDepth);
-        cu->copyToPic((uint8_t)depth, pu, initTrDepth);
+        cu->copyToPic(depth, pu, initTrDepth);
     } // PU loop
 
     if (numPU > 1)
@@ -2275,7 +2265,7 @@
 
     uint32_t log2CUSize = cu->getLog2CUSize(0);
     uint32_t cuSize = 1 << log2CUSize;
-    uint8_t  depth  = cu->getDepth(0);
+    uint32_t depth  = cu->getDepth(0);
 
     int hChromaShift = CHROMA_H_SHIFT(m_csp);
     int vChromaShift = CHROMA_V_SHIFT(m_csp);
@@ -2338,7 +2328,7 @@
 
     uint32_t log2CUSize = cu->getLog2CUSize(0);
     uint32_t cuSize = 1 << log2CUSize;
-    uint8_t  depth  = cu->getDepth(0);
+    uint32_t depth  = cu->getDepth(0);
 
     int hChromaShift = CHROMA_H_SHIFT(m_csp);
     int vChromaShift = CHROMA_V_SHIFT(m_csp);
@@ -2525,7 +2515,7 @@
         const bool splitIntoSubTUs = (m_csp == X265_CSP_I422);
         uint32_t absPartIdxStep = cu->m_pic->getNumPartInCU() >> ((cu->getDepth(0) +  trModeC) << 1);
 
-        uint32_t coeffOffsetY = absPartIdx << cu->m_pic->getLog2UnitSize() * 2;
+        uint32_t coeffOffsetY = absPartIdx << LOG2_UNIT_SIZE * 2;
         uint32_t coeffOffsetC = coeffOffsetY >> (hChromaShift + vChromaShift);
         coeff_t *coeffCurY = cu->getCoeffY()  + coeffOffsetY;
         coeff_t *coeffCurU = cu->getCoeffCb() + coeffOffsetC;
@@ -2689,7 +2679,7 @@
         int sizeIdx  = log2TrSize - 2;
         int sizeIdxC = log2TrSizeC - 2;
         const uint32_t qtLayer = log2TrSize - 2;
-        uint32_t coeffOffsetY = absPartIdx << cu->m_pic->getLog2UnitSize() * 2;
+        uint32_t coeffOffsetY = absPartIdx << LOG2_UNIT_SIZE * 2;
         uint32_t coeffOffsetC = coeffOffsetY >> (hChromaShift + vChromaShift);
         coeff_t* coeffCurY = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
         coeff_t* coeffCurU = m_qtTempCoeff[1][qtLayer] + coeffOffsetC;
@@ -3502,7 +3492,7 @@
     {
         //Luma
         const uint32_t qtLayer = log2TrSize - 2;
-        uint32_t coeffOffsetY = absPartIdx << cu->m_pic->getLog2UnitSize() * 2;
+        uint32_t coeffOffsetY = absPartIdx << LOG2_UNIT_SIZE * 2;
         coeff_t* coeffCurY = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
 
         //Chroma
@@ -3603,7 +3593,7 @@
         else
         {
             uint32_t numCoeffY = 1 << (log2TrSize * 2);
-            uint32_t coeffOffsetY = absPartIdx << cu->m_pic->getLog2UnitSize() * 2;
+            uint32_t coeffOffsetY = absPartIdx << LOG2_UNIT_SIZE * 2;
             coeff_t* coeffSrcY = m_qtTempCoeff[0][qtLayer] + coeffOffsetY;
             coeff_t* coeffDstY = cu->getCoeffY()           + coeffOffsetY;
             ::memcpy(coeffDstY, coeffSrcY, sizeof(coeff_t) * numCoeffY);
diff -r 9a0d24274357 -r 81469708804f source/common/deblock.cpp
--- a/source/common/deblock.cpp	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/common/deblock.cpp	Mon Aug 18 16:33:59 2014 +0900
@@ -70,27 +70,21 @@
 
     for (uint32_t partIdx = absZOrderIdx; partIdx < absZOrderIdx + curNumParts; partIdx++)
     {
-        uint32_t bsCheck;
-
-        if (g_log2UnitSize == 2)
-            bsCheck = (dir == EDGE_VER && !(partIdx & 1)) || (dir == EDGE_HOR && !(partIdx & 2));
-        else
-            bsCheck = 1;
+        uint32_t bsCheck = (dir == EDGE_VER ? !(partIdx & 1) : !(partIdx & 2));
 
         if (edgeFilter[partIdx] && bsCheck)
             getBoundaryStrengthSingle(cu, dir, partIdx, blockingStrength);
     }
 
-    uint32_t log2UnitSize = g_log2UnitSize;
-    uint32_t partIdxIncr = (DEBLOCK_SMALLEST_BLOCK >> log2UnitSize) ? (DEBLOCK_SMALLEST_BLOCK >> log2UnitSize) : 1;
-    uint32_t sizeInPU = pic->getNumPartInCUSize() >> (depth);
+    uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
+    uint32_t sizeInPU = pic->getNumPartInCUSize() >> depth;
     uint32_t shiftFactor = (dir == EDGE_VER) ? cu->getHorzChromaShift() : cu->getVertChromaShift();
-    const bool alwaysDoChroma = (cu->getChromaFormat() == X265_CSP_I444 || (1 << log2UnitSize) > DEBLOCK_SMALLEST_BLOCK);
+    const bool alwaysDoChroma = cu->getChromaFormat() == X265_CSP_I444;
 
     for (uint32_t e = 0; e < sizeInPU; e += partIdxIncr)
     {
         edgeFilterLuma(cu, absZOrderIdx, depth, dir, e, blockingStrength);
-        if (alwaysDoChroma || !(e % ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> log2UnitSize)))
+        if (alwaysDoChroma || !(e % ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE)))
             edgeFilterChroma(cu, absZOrderIdx, depth, dir, e, blockingStrength);
     }
 }
@@ -136,7 +130,7 @@
         return;
     }
 
-    uint32_t widthInBaseUnits  = 1 << (cu->getLog2CUSize(absZOrderIdx) - cu->getTransformIdx(absZOrderIdx) - g_log2UnitSize);
+    uint32_t widthInBaseUnits  = 1 << (cu->getLog2CUSize(absZOrderIdx) - cu->getTransformIdx(absZOrderIdx) - LOG2_UNIT_SIZE);
     setEdgefilterMultiple(cu, absTUPartIdx, depth, dir, 0, true, edgeFilter, blockingStrength, widthInBaseUnits);
 }
 
@@ -461,8 +455,7 @@
     int32_t stride = reconYuv->getStride();
     uint32_t numParts = cu->m_pic->getNumPartInCUSize() >> depth;
 
-    uint32_t log2UnitSize = g_log2UnitSize;
-    uint32_t blocksInPart = (log2UnitSize - 2) > 0 ? 1 << (log2UnitSize - 2) : 1;
+    uint32_t blocksInPart = (LOG2_UNIT_SIZE - 2) > 0 ? 1 << (LOG2_UNIT_SIZE - 2) : 1;
     uint32_t bsAbsIdx = 0, bs = 0;
     int32_t offset, srcStep;
 
@@ -479,18 +472,18 @@
     {
         offset = 1;
         srcStep = stride;
-        tmpsrc += (edge << log2UnitSize);
+        tmpsrc += (edge << LOG2_UNIT_SIZE);
     }
     else // (dir == EDGE_HOR)
     {
         offset = stride;
         srcStep = 1;
-        tmpsrc += (edge << log2UnitSize) * stride;
+        tmpsrc += (edge << LOG2_UNIT_SIZE) * stride;
     }
 
     for (uint32_t idx = 0; idx < numParts; idx++)
     {
-        uint32_t partOffset = idx << log2UnitSize;
+        uint32_t partOffset = idx << LOG2_UNIT_SIZE;
         bsAbsIdx = calcBsIdx(cu, absZOrderIdx, dir, edge, idx);
         bs = blockingStrength[bsAbsIdx];
         if (bs)
@@ -559,8 +552,8 @@
     int32_t stride = reconYuv->getCStride();
     pixel* srcCb = reconYuv->getCbAddr(cu->getAddr(), absZOrderIdx);
     pixel* srcCr = reconYuv->getCrAddr(cu->getAddr(), absZOrderIdx);
-    uint32_t log2UnitSizeH = g_log2UnitSize - cu->getHorzChromaShift();
-    uint32_t log2UnitSizeV = g_log2UnitSize - cu->getVertChromaShift();
+    uint32_t log2UnitSizeH = LOG2_UNIT_SIZE - cu->getHorzChromaShift();
+    uint32_t log2UnitSizeV = LOG2_UNIT_SIZE - cu->getVertChromaShift();
     uint32_t sizeChromaH = 1 << log2UnitSizeH;
     uint32_t sizeChromaV = 1 << log2UnitSizeV;
     int32_t offset, srcStep;
diff -r 9a0d24274357 -r 81469708804f source/common/deblock.h
--- a/source/common/deblock.h	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/common/deblock.h	Mon Aug 18 16:33:59 2014 +0900
@@ -41,7 +41,7 @@
 
     Deblock() : m_numPartitions(0) {}
 
-    void init() { m_numPartitions = 1 << (g_maxCUDepth << 1); }
+    void init() { m_numPartitions = 1 << g_maxFullDepth * 2; }
 
     void deblockCTU(TComDataCU* cu, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[]);
 
diff -r 9a0d24274357 -r 81469708804f source/common/frame.cpp
--- a/source/common/frame.cpp	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/common/frame.cpp	Mon Aug 18 16:33:59 2014 +0900
@@ -66,7 +66,7 @@
         return false;
 
     bool ok = true;
-    ok &= m_origPicYuv->create(param->sourceWidth, param->sourceHeight, param->internalCsp, g_maxCUSize, g_maxCUDepth);
+    ok &= m_origPicYuv->create(param->sourceWidth, param->sourceHeight, param->internalCsp, g_maxCUSize, g_maxFullDepth);
     ok &= m_lowres.create(m_origPicYuv, param->bframes, !!param->rc.aqMode);
 
     bool isVbv = param->rc.vbvBufferSize > 0 && param->rc.vbvMaxBitrate > 0;
@@ -108,7 +108,7 @@
     {
         m_picSym->m_reconPicYuv = m_reconPicYuv;
         bool ok = m_picSym->create(param) &&
-             m_reconPicYuv->create(param->sourceWidth, param->sourceHeight, param->internalCsp, g_maxCUSize, g_maxCUDepth);
+             m_reconPicYuv->create(param->sourceWidth, param->sourceHeight, param->internalCsp, g_maxCUSize, g_maxFullDepth);
         if (ok)
         {
             // initialize m_reconpicYuv as SAO may read beyond the end of the picture accessing uninitialized pixels
diff -r 9a0d24274357 -r 81469708804f source/common/frame.h
--- a/source/common/frame.h	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/common/frame.h	Mon Aug 18 16:33:59 2014 +0900
@@ -119,10 +119,6 @@
     uint32_t    getFrameWidthInCU() const  { return m_picSym->getFrameWidthInCU(); }
 
     uint32_t    getFrameHeightInCU() const { return m_picSym->getFrameHeightInCU(); }
-
-    uint32_t    getUnitSize() const        { return m_picSym->getUnitSize(); }
-
-    uint32_t    getLog2UnitSize() const    { return m_picSym->getLog2UnitSize(); }
 };
 }
 
diff -r 9a0d24274357 -r 81469708804f source/common/param.cpp
--- a/source/common/param.cpp	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/common/param.cpp	Mon Aug 18 16:33:59 2014 +0900
@@ -869,13 +869,9 @@
         return check_failed;
 
     uint32_t maxLog2CUSize = (uint32_t)g_log2Size[param->maxCUSize];
-    uint32_t maxCUDepth = maxLog2CUSize - 2;
     uint32_t tuQTMaxLog2Size = maxLog2CUSize - 1;
     uint32_t tuQTMinLog2Size = 2; //log2(4)
 
-    CHECK((param->maxCUSize >> maxCUDepth) < 4,
-          "Minimum partition width size should be larger than or equal to 8");
-
     /* These checks might be temporary */
 #if HIGH_BIT_DEPTH
     CHECK(param->internalBitDepth != 10,
@@ -1049,10 +1045,6 @@
 
 int x265_set_globals(x265_param *param)
 {
-    uint32_t maxLog2CUSize = (uint32_t)g_log2Size[param->maxCUSize];
-    uint32_t maxCUDepth = maxLog2CUSize - 2;
-    uint32_t tuQTMinLog2Size = 2; //log2(4)
-
     static int once /* = 0 */;
 
     if (ATOMIC_CAS32(&once, 0, 1) == 1)
@@ -1065,25 +1057,23 @@
     }
     else
     {
+        uint32_t maxLog2CUSize = (uint32_t)g_log2Size[param->maxCUSize];
+
         // set max CU width & height
-        g_maxCUSize = param->maxCUSize;
+        g_maxCUSize     = param->maxCUSize;
         g_maxLog2CUSize = maxLog2CUSize;
 
         // compute actual CU depth with respect to config depth and max transform size
-        g_addCUDepth = g_maxLog2CUSize - maxCUDepth - tuQTMinLog2Size;
-
-        maxCUDepth += g_addCUDepth;
-        g_addCUDepth++;
-        g_maxCUDepth = maxCUDepth;
-        g_log2UnitSize = g_maxLog2CUSize - g_maxCUDepth;
+        g_maxCUDepth   = maxLog2CUSize - MIN_LOG2_CU_SIZE;
+        g_maxFullDepth = maxLog2CUSize - LOG2_UNIT_SIZE;
 
         // initialize partition order
         uint32_t* tmp = &g_zscanToRaster[0];
-        initZscanToRaster(g_maxCUDepth + 1, 1, 0, tmp);
-        initRasterToZscan(g_maxCUSize, g_maxCUDepth + 1);
+        initZscanToRaster(g_maxFullDepth, 1, 0, tmp);
+        initRasterToZscan(g_maxFullDepth);
 
         // initialize conversion matrix from partition index to pel
-        initRasterToPelXY(g_maxCUSize, g_maxCUDepth + 1);
+        initRasterToPelXY(g_maxFullDepth);
     }
     return 0;
 }
diff -r 9a0d24274357 -r 81469708804f source/common/slice.h
--- a/source/common/slice.h	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/common/slice.h	Mon Aug 18 16:33:59 2014 +0900
@@ -219,7 +219,6 @@
 struct PPS
 {
     uint32_t maxCuDQPDepth;
-    uint32_t minCuDQPSize;
 
     int      chromaCbQpOffset;       // use param
     int      chromaCrQpOffset;       // use param
diff -r 9a0d24274357 -r 81469708804f source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/encoder/analysis.cpp	Mon Aug 18 16:33:59 2014 +0900
@@ -45,39 +45,38 @@
         m_modePredYuv[i] = NULL;
 }
 
-bool Analysis::create(uint8_t totalDepth, uint32_t maxWidth)
+bool Analysis::create(uint32_t numCUDepth, uint32_t maxWidth)
 {
-    X265_CHECK(totalDepth <= MAX_CU_DEPTH, "invalid totalDepth\n");
+    X265_CHECK(numCUDepth <= NUM_CU_DEPTH, "invalid numCUDepth\n");
 
-    m_bestPredYuv = new TComYuv*[totalDepth];
-    m_bestResiYuv = new ShortYuv*[totalDepth];
-    m_bestRecoYuv = new TComYuv*[totalDepth];
+    m_bestPredYuv = new TComYuv*[numCUDepth];
+    m_bestResiYuv = new ShortYuv*[numCUDepth];
+    m_bestRecoYuv = new TComYuv*[numCUDepth];
 
-    m_tmpPredYuv     = new TComYuv*[totalDepth];
-    m_modePredYuv[0] = new TComYuv*[totalDepth];
-    m_modePredYuv[1] = new TComYuv*[totalDepth];
-    m_modePredYuv[2] = new TComYuv*[totalDepth];
-    m_modePredYuv[3] = new TComYuv*[totalDepth];
-    m_modePredYuv[4] = new TComYuv*[totalDepth];
-    m_modePredYuv[5] = new TComYuv*[totalDepth];
+    m_tmpPredYuv     = new TComYuv*[numCUDepth];
+    m_modePredYuv[0] = new TComYuv*[numCUDepth];
+    m_modePredYuv[1] = new TComYuv*[numCUDepth];
+    m_modePredYuv[2] = new TComYuv*[numCUDepth];
+    m_modePredYuv[3] = new TComYuv*[numCUDepth];
+    m_modePredYuv[4] = new TComYuv*[numCUDepth];
+    m_modePredYuv[5] = new TComYuv*[numCUDepth];
 
-    m_tmpResiYuv = new ShortYuv*[totalDepth];
-    m_tmpRecoYuv = new TComYuv*[totalDepth];
+    m_tmpResiYuv = new ShortYuv*[numCUDepth];
+    m_tmpRecoYuv = new TComYuv*[numCUDepth];
 
-    m_bestMergeRecoYuv = new TComYuv*[totalDepth];
+    m_bestMergeRecoYuv = new TComYuv*[numCUDepth];
 
-    m_origYuv = new TComYuv*[totalDepth];
+    m_origYuv = new TComYuv*[numCUDepth];
 
-    int unitSize  = maxWidth >> totalDepth;
     int csp       = m_param->internalCsp;
     bool tqBypass = m_param->bCULossless || m_param->bLossless;
 
-    m_memPool = new TComDataCU[totalDepth];
+    m_memPool = new TComDataCU[numCUDepth];
 
     bool ok = true;
-    for (int i = 0; i < totalDepth; i++)
+    for (uint32_t i = 0; i < numCUDepth; i++)
     {
-        uint32_t numPartitions = 1 << ((totalDepth - i) << 1);
+        uint32_t numPartitions = 1 << (g_maxFullDepth - i) * 2;
         uint32_t cuSize = maxWidth >> i;
 
         uint32_t sizeL = cuSize * cuSize;
@@ -86,28 +85,28 @@
         ok &= m_memPool[i].initialize(numPartitions, sizeL, sizeC, 8, tqBypass);
 
         m_interCU_2Nx2N[i]  = new TComDataCU;
-        m_interCU_2Nx2N[i]->create(&m_memPool[i], numPartitions, cuSize, unitSize, csp, 0, tqBypass);
+        m_interCU_2Nx2N[i]->create(&m_memPool[i], numPartitions, cuSize, csp, 0, tqBypass);
 
         m_interCU_2NxN[i]   = new TComDataCU;
-        m_interCU_2NxN[i]->create(&m_memPool[i], numPartitions, cuSize, unitSize, csp, 1, tqBypass);
+        m_interCU_2NxN[i]->create(&m_memPool[i], numPartitions, cuSize, csp, 1, tqBypass);
 
         m_interCU_Nx2N[i]   = new TComDataCU;
-        m_interCU_Nx2N[i]->create(&m_memPool[i], numPartitions, cuSize, unitSize, csp, 2, tqBypass);
+        m_interCU_Nx2N[i]->create(&m_memPool[i], numPartitions, cuSize, csp, 2, tqBypass);
 
         m_intraInInterCU[i] = new TComDataCU;
-        m_intraInInterCU[i]->create(&m_memPool[i], numPartitions, cuSize, unitSize, csp, 3, tqBypass);
+        m_intraInInterCU[i]->create(&m_memPool[i], numPartitions, cuSize, csp, 3, tqBypass);
 
         m_mergeCU[i]        = new TComDataCU;
-        m_mergeCU[i]->create(&m_memPool[i], numPartitions, cuSize, unitSize, csp, 4, tqBypass);
+        m_mergeCU[i]->create(&m_memPool[i], numPartitions, cuSize, csp, 4, tqBypass);
 
         m_bestMergeCU[i]    = new TComDataCU;
-        m_bestMergeCU[i]->create(&m_memPool[i], numPartitions, cuSize, unitSize, csp, 5, tqBypass);
+        m_bestMergeCU[i]->create(&m_memPool[i], numPartitions, cuSize, csp, 5, tqBypass);
 
         m_bestCU[i]         = new TComDataCU;
-        m_bestCU[i]->create(&m_memPool[i], numPartitions, cuSize, unitSize, csp, 6, tqBypass);
+        m_bestCU[i]->create(&m_memPool[i], numPartitions, cuSize, csp, 6, tqBypass);
 
         m_tempCU[i]         = new TComDataCU;
-        m_tempCU[i]->create(&m_memPool[i], numPartitions, cuSize, unitSize, csp, 7, tqBypass);
+        m_tempCU[i]->create(&m_memPool[i], numPartitions, cuSize, csp, 7, tqBypass);
 
         m_bestPredYuv[i] = new TComYuv;
         ok &= m_bestPredYuv[i]->create(cuSize, cuSize, csp);
@@ -146,7 +145,8 @@
 
 void Analysis::destroy()
 {
-    for (unsigned int i = 0; i < g_maxCUDepth; i++)
+    uint32_t numCUDepth = g_maxCUDepth + 1;
+    for (uint32_t i = 0; i < numCUDepth; i++)
     {
         m_memPool[i].destroy();
 
@@ -250,22 +250,22 @@
 
         if (m_param->bLogCuStats || m_param->rc.bStatWrite)
         {
-            uint32_t i = 0, part;
+            uint32_t i = 0;
             do
             {
                 m_log->totalCu++;
-                part = cu->getDepth(i);
-                int next = numPartition >> (part * 2);
-                m_log->qTreeIntraCnt[part]++;
-                if (part == g_maxCUDepth - 1 && cu->getPartitionSize(i) != SIZE_2Nx2N)
+                uint32_t depth = cu->getDepth(i);
+                int next = numPartition >> (depth * 2);
+                m_log->qTreeIntraCnt[depth]++;
+                if (depth == g_maxCUDepth && cu->getPartitionSize(i) != SIZE_2Nx2N)
                     m_log->cntIntraNxN++;
                 else
                 {
-                    m_log->cntIntra[part]++;
+                    m_log->cntIntra[depth]++;
                     if (cu->getLumaIntraDir(i) > 1)
-                        m_log->cuIntraDistribution[part][ANGULAR_MODE_ID]++;
+                        m_log->cuIntraDistribution[depth][ANGULAR_MODE_ID]++;
                     else
-                        m_log->cuIntraDistribution[part][cu->getLumaIntraDir(i)]++;
+                        m_log->cuIntraDistribution[depth][cu->getLumaIntraDir(i)]++;
                 }
                 i += next;
             }
@@ -287,43 +287,43 @@
 
         if (m_param->bLogCuStats || m_param->rc.bStatWrite)
         {
-            uint32_t i = 0, part;
+            uint32_t i = 0;
             do
             {
-                part = cu->getDepth(i);
-                m_log->cntTotalCu[part]++;
-                int next = numPartition >> (part * 2);
+                uint32_t depth = cu->getDepth(i);
+                m_log->cntTotalCu[depth]++;
+                int next = numPartition >> (depth * 2);
                 if (cu->isSkipped(i))
                 {
-                    m_log->cntSkipCu[part]++;
-                    m_log->qTreeSkipCnt[part]++;
+                    m_log->cntSkipCu[depth]++;
+                    m_log->qTreeSkipCnt[depth]++;
                 }
                 else
                 {
                     m_log->totalCu++;
                     if (cu->getPredictionMode(0) == MODE_INTER)
                     {
-                        m_log->cntInter[part]++;
-                        m_log->qTreeInterCnt[part]++;
+                        m_log->cntInter[depth]++;
+                        m_log->qTreeInterCnt[depth]++;
                         if (cu->getPartitionSize(0) < AMP_ID)
-                            m_log->cuInterDistribution[part][cu->getPartitionSize(0)]++;
+                            m_log->cuInterDistribution[depth][cu->getPartitionSize(0)]++;
                         else
-                            m_log->cuInterDistribution[part][AMP_ID]++;
+                            m_log->cuInterDistribution[depth][AMP_ID]++;
                     }
                     else if (cu->getPredictionMode(0) == MODE_INTRA)
                     {
-                        m_log->qTreeIntraCnt[part]++;
-                        if (part == g_maxCUDepth - 1 && cu->getPartitionSize(0) == SIZE_NxN)
+                        m_log->qTreeIntraCnt[depth]++;
+                        if (depth == g_maxCUDepth && cu->getPartitionSize(0) == SIZE_NxN)
                         {
                             m_log->cntIntraNxN++;
                         }
                         else
                         {
-                            m_log->cntIntra[part]++;
+                            m_log->cntIntra[depth]++;
                             if (cu->getLumaIntraDir(0) > 1)
-                                m_log->cuIntraDistribution[part][ANGULAR_MODE_ID]++;
+                                m_log->cuIntraDistribution[depth][ANGULAR_MODE_ID]++;
                             else
-                                m_log->cuIntraDistribution[part][cu->getLumaIntraDir(0)]++;
+                                m_log->cuIntraDistribution[depth][cu->getLumaIntraDir(0)]++;
                         }
                     }
                 }
@@ -334,7 +334,7 @@
     }
 }
 
-void Analysis::compressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint8_t depth, bool bInsidePicture)
+void Analysis::compressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, bool bInsidePicture)
 {
     //PPAScopeEvent(CompressIntraCU + depth);
 
@@ -367,15 +367,17 @@
 
         checkIntra(outBestCU, outTempCU, SIZE_2Nx2N);
 
-        if (depth == g_maxCUDepth - g_addCUDepth)
+        if (depth == g_maxCUDepth)
         {
             if (log2CUSize > slice->m_sps->quadtreeTULog2MinSize)
                 checkIntra(outBestCU, outTempCU, SIZE_NxN);
         }
-
-        m_entropyCoder->resetBits();
-        m_entropyCoder->codeSplitFlag(outBestCU, 0, depth);
-        outBestCU->m_totalBits += m_entropyCoder->getNumberOfWrittenBits(); // split bits
+        else
+        {
+            m_entropyCoder->resetBits();
+            m_entropyCoder->codeSplitFlag(outBestCU, 0, depth);
+            outBestCU->m_totalBits += m_entropyCoder->getNumberOfWrittenBits(); // split bits
+        }
         if (m_rdCost.m_psyRd)
             outBestCU->m_totalPsyCost = m_rdCost.calcPsyRdCost(outBestCU->m_totalDistortion, outBestCU->m_totalBits, outBestCU->m_psyEnergy);
         else
@@ -387,13 +389,12 @@
         fillOrigYUVBuffer(outBestCU, m_origYuv[depth]);
 
     // further split
-    if (depth < g_maxCUDepth - g_addCUDepth)
+    if (depth < g_maxCUDepth)
     {
-        uint8_t     nextDepth     = (uint8_t)(depth + 1);
+        uint32_t    nextDepth     = depth + 1;
         TComDataCU* subBestPartCU = m_bestCU[nextDepth];
         TComDataCU* subTempPartCU = m_tempCU[nextDepth];
-        uint32_t partUnitIdx = 0;
-        for (; partUnitIdx < 4; partUnitIdx++)
+        for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)
         {
             int qp = outTempCU->getQP(0);
             subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.
@@ -435,7 +436,7 @@
         else
             outTempCU->m_totalRDCost = m_rdCost.calcRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits);
 
-        if ((g_maxCUSize >> depth) == slice->m_pps->minCuDQPSize && slice->m_pps->bUseDQP)
+        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
         {
             bool hasResidual = false;
             for (uint32_t blkIdx = 0; blkIdx < outTempCU->getTotalNumPart(); blkIdx++)
@@ -524,7 +525,7 @@
     checkBestMode(outBestCU, outTempCU, depth);
 }
 
-void Analysis::compressInterCU_rd0_4(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComDataCU* cu, uint8_t depth, bool bInsidePicture, uint32_t PartitionIndex, uint8_t minDepth)
+void Analysis::compressInterCU_rd0_4(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComDataCU* cu, uint32_t depth, bool bInsidePicture, uint32_t PartitionIndex, uint32_t minDepth)
 {
     Frame* pic = outTempCU->m_pic;
     uint32_t absPartIdx = outTempCU->getZorderIdxInCU();
@@ -568,7 +569,7 @@
         TComDataCU* colocated1 = slice->m_numRefIdx[1] > 0 ? slice->m_refPicList[1][0]->getCU(outTempCU->getAddr()) : NULL;
         char currentQP = outTempCU->getQP(0);
         char previousQP = colocated0->getQP(0);
-        uint8_t delta = 0, minDepth0 = 4, minDepth1 = 4;
+        uint32_t delta = 0, minDepth0 = 4, minDepth1 = 4;
         uint32_t sum0 = 0, sum1 = 0;
         uint32_t numPartitions = outTempCU->getTotalNumPart();
         for (uint32_t i = 0; i < numPartitions; i = i + 4)
@@ -793,9 +794,12 @@
 
             if (m_param->rdLevel > 1)
             {
-                m_entropyCoder->resetBits();
-                m_entropyCoder->codeSplitFlag(outBestCU, 0, depth);
-                outBestCU->m_totalBits += m_entropyCoder->getNumberOfWrittenBits(); // split bits
+                if (depth < g_maxCUDepth)
+                {
+                    m_entropyCoder->resetBits();
+                    m_entropyCoder->codeSplitFlag(outBestCU, 0, depth);
+                    outBestCU->m_totalBits += m_entropyCoder->getNumberOfWrittenBits(); // split bits
+                }
                 if (m_rdCost.m_psyRd)
                     outBestCU->m_totalPsyCost = m_rdCost.calcPsyRdCost(outBestCU->m_totalDistortion, outBestCU->m_totalBits, outBestCU->m_psyEnergy);
                 else
@@ -809,7 +813,7 @@
         }
 
     // further split
-    if (bSubBranch && depth < g_maxCUDepth - g_addCUDepth)
+    if (bSubBranch && depth < g_maxCUDepth)
     {
 #if EARLY_EXIT // turn ON this to enable early exit
         // early exit when the RD cost of best mode at depth n is less than the sum of avgerage of RD cost of the neighbour
@@ -863,7 +867,7 @@
             if (bestavgCost < avgCost && avgCost != 0 && depth != 0)
             {
                 /* Copy Best data to Picture for next partition prediction. */
-                outBestCU->copyToPic((uint8_t)depth);
+                outBestCU->copyToPic(depth);
 
                 /* Copy Yuv data to picture Yuv */
                 if (m_param->rdLevel != 0)
@@ -873,24 +877,23 @@
         }
 #endif // if EARLY_EXIT
         outTempCU->setQPSubParts(qp, 0, depth);
-        uint8_t     nextDepth = (uint8_t)(depth + 1);
-        TComDataCU* subBestPartCU;
+        uint32_t    nextDepth = depth + 1;
         TComDataCU* subTempPartCU = m_tempCU[nextDepth];
-        for (uint32_t nextDepth_partIndex = 0; nextDepth_partIndex < 4; nextDepth_partIndex++)
+        for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)
         {
-            subBestPartCU = NULL;
-            subTempPartCU->initSubCU(outTempCU, nextDepth_partIndex, nextDepth, qp); // clear sub partition datas or init.
+            TComDataCU* subBestPartCU = NULL;
+            subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.
 
             if (bInsidePicture ||
                 ((subTempPartCU->getCUPelX() < slice->m_sps->picWidthInLumaSamples) &&
                  (subTempPartCU->getCUPelY() < slice->m_sps->picHeightInLumaSamples)))
             {
-                if (0 == nextDepth_partIndex) // initialize RD with previous depth buffer
+                if (0 == partUnitIdx) // initialize RD with previous depth buffer
                     m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
                 else
                     m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);
 
-                compressInterCU_rd0_4(subBestPartCU, subTempPartCU, outTempCU, nextDepth, bInsidePicture, nextDepth_partIndex, minDepth);
+                compressInterCU_rd0_4(subBestPartCU, subTempPartCU, outTempCU, nextDepth, bInsidePicture, partUnitIdx, minDepth);
 #if EARLY_EXIT
                 if (subBestPartCU->getPredictionMode(0) != MODE_INTRA)
                 {
@@ -900,22 +903,22 @@
                     else
                         tempavgCost = subBestPartCU->m_totalRDCost;
                     TComDataCU* rootCU = pic->getPicSym()->getCU(outTempCU->getAddr());
-                    uint64_t temp = rootCU->m_avgCost[depth + 1] * rootCU->m_count[depth + 1];
-                    rootCU->m_count[depth + 1] += 1;
-                    rootCU->m_avgCost[depth + 1] = (temp + tempavgCost) / rootCU->m_count[depth + 1];
+                    uint64_t temp = rootCU->m_avgCost[nextDepth] * rootCU->m_count[nextDepth];
+                    rootCU->m_count[nextDepth] += 1;
+                    rootCU->m_avgCost[nextDepth] = (temp + tempavgCost) / rootCU->m_count[nextDepth];
                 }
 #endif // if EARLY_EXIT
                 /* Adding costs from best SUbCUs */
-                outTempCU->copyPartFrom(subBestPartCU, nextDepth_partIndex, nextDepth, true); // Keep best part data to current temporary data.
+                outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth, true); // Keep best part data to current temporary data.
                 if (m_param->rdLevel != 0)
-                    m_bestRecoYuv[nextDepth]->copyToPartYuv(m_tmpRecoYuv[depth], subBestPartCU->getTotalNumPart() * nextDepth_partIndex);
+                    m_bestRecoYuv[nextDepth]->copyToPartYuv(m_tmpRecoYuv[depth], subBestPartCU->getTotalNumPart() * partUnitIdx);
                 else
-                    m_bestPredYuv[nextDepth]->copyToPartYuv(m_tmpPredYuv[depth], subBestPartCU->getTotalNumPart() * nextDepth_partIndex);
+                    m_bestPredYuv[nextDepth]->copyToPartYuv(m_tmpPredYuv[depth], subBestPartCU->getTotalNumPart() * partUnitIdx);
             }
             else
             {
-                subTempPartCU->copyToPic((uint8_t)nextDepth);
-                outTempCU->copyPartFrom(subTempPartCU, nextDepth_partIndex, nextDepth, false);
+                subTempPartCU->copyToPic(nextDepth);
+                outTempCU->copyPartFrom(subTempPartCU, partUnitIdx, nextDepth, false);
             }
         }
 
@@ -938,7 +941,7 @@
         else
             outTempCU->m_sa8dCost = m_rdCost.calcRdSADCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits);
 
-        if ((g_maxCUSize >> depth) == slice->m_pps->minCuDQPSize && slice->m_pps->bUseDQP)
+        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
         {
             bool hasResidual = false;
             for (uint32_t blkIdx = 0; blkIdx < outTempCU->getTotalNumPart(); blkIdx++)
@@ -998,7 +1001,7 @@
     }
 
     /* Copy Best data to Picture for next partition prediction. */
-    outBestCU->copyToPic((uint8_t)depth);
+    outBestCU->copyToPic(depth);
 
     if (m_param->rdLevel == 0 && depth == 0)
         encodeResidue(outBestCU, outBestCU, 0, 0);
@@ -1028,7 +1031,7 @@
     x265_emms();
 }
 
-void Analysis::compressInterCU_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint8_t depth, bool bInsidePicture, PartSize parentSize)
+void Analysis::compressInterCU_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, bool bInsidePicture, PartSize parentSize)
 {
     //PPAScopeEvent(CompressCU + depth);
 
@@ -1100,7 +1103,7 @@
                 // 2Nx2N, NxN
                 if (!(log2CUSize == 3))
                 {
-                    if (depth == g_maxCUDepth - g_addCUDepth && doNotBlockPu)
+                    if (depth == g_maxCUDepth && doNotBlockPu)
                     {
                         checkInter_rd5_6(outBestCU, outTempCU, SIZE_NxN);
                         outTempCU->initEstData();
@@ -1213,7 +1216,7 @@
                 checkIntraInInter_rd5_6(outBestCU, outTempCU, SIZE_2Nx2N);
                 outTempCU->initEstData();
 
-                if (depth == g_maxCUDepth - g_addCUDepth)
+                if (depth == g_maxCUDepth)
                 {
                     if (log2CUSize > slice->m_sps->quadtreeTULog2MinSize)
                     {
@@ -1224,9 +1227,12 @@
             }
         }
 
-        m_entropyCoder->resetBits();
-        m_entropyCoder->codeSplitFlag(outBestCU, 0, depth);
-        outBestCU->m_totalBits += m_entropyCoder->getNumberOfWrittenBits(); // split bits
+        if (depth < g_maxCUDepth)
+        {
+            m_entropyCoder->resetBits();
+            m_entropyCoder->codeSplitFlag(outBestCU, 0, depth);
+            outBestCU->m_totalBits += m_entropyCoder->getNumberOfWrittenBits(); // split bits
+        }
         if (m_rdCost.m_psyRd)
             outBestCU->m_totalPsyCost = m_rdCost.calcPsyRdCost(outBestCU->m_totalDistortion, outBestCU->m_totalBits, outBestCU->m_psyEnergy);
         else
@@ -1246,13 +1252,12 @@
     }
 
     // further split
-    if (bSubBranch && depth < g_maxCUDepth - g_addCUDepth)
+    if (bSubBranch && depth < g_maxCUDepth)
     {
-        uint8_t     nextDepth     = depth + 1;
+        uint32_t    nextDepth     = depth + 1;
         TComDataCU* subBestPartCU = m_bestCU[nextDepth];
         TComDataCU* subTempPartCU = m_tempCU[nextDepth];
-        uint32_t partUnitIdx = 0;
-        for (; partUnitIdx < 4; partUnitIdx++)
+        for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++)
         {
             int qp = outTempCU->getQP(0);
             subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.
@@ -1291,7 +1296,7 @@
         else
             outTempCU->m_totalRDCost = m_rdCost.calcRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits);
 
-        if ((g_maxCUSize >> depth) == slice->m_pps->minCuDQPSize && slice->m_pps->bUseDQP)
+        if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
         {
             bool hasResidual = false;
             for (uint32_t blkIdx = 0; blkIdx < outTempCU->getTotalNumPart(); blkIdx++)
@@ -1344,7 +1349,7 @@
     uint8_t interDirNeighbours[MRG_MAX_NUM_CANDS];
     uint32_t maxNumMergeCand = outTempCU->m_slice->m_maxNumMergeCand;
 
-    uint8_t depth = outTempCU->getDepth(0);
+    uint32_t depth = outTempCU->getDepth(0);
     outTempCU->setPartSizeSubParts(SIZE_2Nx2N, 0, depth); // interprets depth relative to LCU level
     outTempCU->setCUTransquantBypassSubParts(!!m_param->bLossless, 0, depth);
     outTempCU->getInterMergeCandidates(0, 0, mvFieldNeighbours, interDirNeighbours, maxNumMergeCand);
@@ -1448,7 +1453,7 @@
     uint8_t interDirNeighbours[MRG_MAX_NUM_CANDS];
     uint32_t maxNumMergeCand = outTempCU->m_slice->m_maxNumMergeCand;
 
-    uint8_t depth = outTempCU->getDepth(0);
+    uint32_t depth = outTempCU->getDepth(0);
     outTempCU->setPartSizeSubParts(SIZE_2Nx2N, 0, depth); // interprets depth relative to LCU level
     outTempCU->setCUTransquantBypassSubParts(!!m_param->bLossless, 0, depth);
     outTempCU->getInterMergeCandidates(0, 0, mvFieldNeighbours, interDirNeighbours, maxNumMergeCand);
@@ -1550,7 +1555,7 @@
 
 void Analysis::checkInter_rd0_4(TComDataCU* outTempCU, TComYuv* outPredYuv, PartSize partSize, bool bUseMRG)
 {
-    uint8_t depth = outTempCU->getDepth(0);
+    uint32_t depth = outTempCU->getDepth(0);
 
     outTempCU->setPartSizeSubParts(partSize, 0, depth);
     outTempCU->setPredModeSubParts(MODE_INTER, 0, depth);
@@ -1575,7 +1580,7 @@
 
 void Analysis::checkInter_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize, bool bUseMRG)
 {
-    uint8_t depth = outTempCU->getDepth(0);
+    uint32_t depth = outTempCU->getDepth(0);
 
     outTempCU->setSkipFlagSubParts(false, 0, depth);
     outTempCU->setPartSizeSubParts(partSize, 0, depth);
@@ -1859,15 +1864,15 @@
         cu->m_totalRDCost = m_rdCost.calcRdCost(cu->m_totalDistortion, cu->m_totalBits);
 }
 
-void Analysis::encodeResidue(TComDataCU* lcu, TComDataCU* cu, uint32_t absPartIdx, uint8_t depth)
+void Analysis::encodeResidue(TComDataCU* lcu, TComDataCU* cu, uint32_t absPartIdx, uint32_t depth)
 {
-    uint8_t nextDepth = (uint8_t)(depth + 1);
-    TComDataCU* subTempPartCU = m_tempCU[nextDepth];
     Frame* pic = cu->m_pic;
-    Slice* slice = cu->m_slice;
 
-    if (((depth < lcu->getDepth(absPartIdx)) && (depth < (g_maxCUDepth - g_addCUDepth))))
+    if (depth < lcu->getDepth(absPartIdx) && depth < g_maxCUDepth)
     {
+        Slice* slice = cu->m_slice;
+        uint32_t nextDepth = depth + 1;
+        TComDataCU* subTempPartCU = m_tempCU[nextDepth];
         uint32_t qNumParts = (pic->getNumPartInCU() >> (depth << 1)) >> 2;
         for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++, absPartIdx += qNumParts)
         {
@@ -2001,10 +2006,10 @@
     }
 
     // We need to split, so don't try these modes.
-    if (bInsidePicture)
+    if (bInsidePicture && depth < g_maxCUDepth)
         m_entropyCoder->codeSplitFlag(cu, absPartIdx, depth);
 
-    if ((g_maxCUSize >> depth) >= slice->m_pps->minCuDQPSize && slice->m_pps->bUseDQP)
+    if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
         m_bEncodeDQP = true;
 
     if (!bInsidePicture)
@@ -2025,7 +2030,7 @@
         return;
     }
 
-    if ((depth < cu->getDepth(absPartIdx)) && (depth < (g_maxCUDepth - g_addCUDepth)))
+    if (depth < cu->getDepth(absPartIdx) && depth < g_maxCUDepth)
     {
         uint32_t qNumParts = (pic->getNumPartInCU() >> (depth << 1)) >> 2;
 
@@ -2145,8 +2150,9 @@
 void Analysis::checkDQP(TComDataCU* cu)
 {
     uint32_t depth = cu->getDepth(0);
+    Slice* slice = cu->m_slice;
 
-    if (cu->m_slice->m_pps->bUseDQP && (g_maxCUSize >> depth) >= cu->m_slice->m_pps->minCuDQPSize)
+    if (slice->m_pps->bUseDQP && depth <= slice->m_pps->maxCuDQPDepth)
     {
         if (!cu->getCbf(0, TEXT_LUMA, 0) && !cu->getCbf(0, TEXT_CHROMA_U, 0) && !cu->getCbf(0, TEXT_CHROMA_V, 0))
             cu->setQPSubParts(cu->getRefQP(0), 0, depth); // set QP to default QP
diff -r 9a0d24274357 -r 81469708804f source/encoder/analysis.h
--- a/source/encoder/analysis.h	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/encoder/analysis.h	Mon Aug 18 16:33:59 2014 +0900
@@ -74,14 +74,14 @@
 
     TComDataCU*  m_memPool;
 
-    TComDataCU*  m_interCU_2Nx2N[MAX_CU_DEPTH];
-    TComDataCU*  m_interCU_2NxN[MAX_CU_DEPTH];
-    TComDataCU*  m_interCU_Nx2N[MAX_CU_DEPTH];
-    TComDataCU*  m_intraInInterCU[MAX_CU_DEPTH];
-    TComDataCU*  m_mergeCU[MAX_CU_DEPTH];
-    TComDataCU*  m_bestMergeCU[MAX_CU_DEPTH];
-    TComDataCU*  m_bestCU[MAX_CU_DEPTH]; // Best CUs at each depth
-    TComDataCU*  m_tempCU[MAX_CU_DEPTH]; // Temporary CUs at each depth
+    TComDataCU*  m_interCU_2Nx2N[NUM_CU_DEPTH];
+    TComDataCU*  m_interCU_2NxN[NUM_CU_DEPTH];
+    TComDataCU*  m_interCU_Nx2N[NUM_CU_DEPTH];
+    TComDataCU*  m_intraInInterCU[NUM_CU_DEPTH];
+    TComDataCU*  m_mergeCU[NUM_CU_DEPTH];
+    TComDataCU*  m_bestMergeCU[NUM_CU_DEPTH];
+    TComDataCU*  m_bestCU[NUM_CU_DEPTH]; // Best CUs at each depth
+    TComDataCU*  m_tempCU[NUM_CU_DEPTH]; // Temporary CUs at each depth
 
     TComYuv**    m_bestPredYuv;          // Best Prediction Yuv for each depth
     ShortYuv**   m_bestResiYuv;          // Best Residual Yuv for each depth
@@ -101,19 +101,19 @@
 
     Analysis();
 
-    bool create(uint8_t totalDepth, uint32_t maxWidth);
+    bool create(uint32_t totalDepth, uint32_t maxWidth);
     void destroy();
     void compressCU(TComDataCU* cu);
     void encodeCU(TComDataCU* cu);
 
 protected:
 
-    void compressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint8_t depth, bool bInsidePicture);
+    void compressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, bool bInsidePicture);
     void checkIntra(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize);
 
-    void compressInterCU_rd0_4(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComDataCU* cu, uint8_t depth,
-                               bool bInsidePicture, uint32_t partitionIndex, uint8_t minDepth);
-    void compressInterCU_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint8_t depth, bool bInsidePicture,
+    void compressInterCU_rd0_4(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComDataCU* cu, uint32_t depth,
+                               bool bInsidePicture, uint32_t partitionIndex, uint32_t minDepth);
+    void compressInterCU_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, bool bInsidePicture,
         PartSize parentSize = SIZE_NONE);
     void checkMerge2Nx2N_rd0_4(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComYuv*& bestPredYuv, TComYuv*& tmpPredYuv);
     void checkMerge2Nx2N_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, bool *earlyDetectionSkipMode,
@@ -128,7 +128,7 @@
     void encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bInsidePicture);
     void checkBestMode(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth);
     void encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv, TComYuv* outReconYuv);
-    void encodeResidue(TComDataCU* lcu, TComDataCU* cu, uint32_t absPartIdx, uint8_t depth);
+    void encodeResidue(TComDataCU* lcu, TComDataCU* cu, uint32_t absPartIdx, uint32_t depth);
     void checkDQP(TComDataCU* cu);
     void copyYuv2Pic(Frame* outPic, uint32_t cuAddr, uint32_t absPartIdx, uint32_t depth);
     void copyYuv2Tmp(uint32_t uhPartUnitIdx, uint32_t depth);
diff -r 9a0d24274357 -r 81469708804f source/encoder/cturow.cpp
--- a/source/encoder/cturow.cpp	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/encoder/cturow.cpp	Mon Aug 18 16:33:59 2014 +0900
@@ -33,7 +33,7 @@
 void ThreadLocalData::init(Encoder& enc)
 {
     m_cuCoder.initSearch(enc);
-    m_cuCoder.create((uint8_t)g_maxCUDepth, g_maxCUSize);
+    m_cuCoder.create(g_maxCUDepth + 1, g_maxCUSize);
 }
 
 ThreadLocalData::~ThreadLocalData()
diff -r 9a0d24274357 -r 81469708804f source/encoder/cturow.h
--- a/source/encoder/cturow.h	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/encoder/cturow.h	Mon Aug 18 16:33:59 2014 +0900
@@ -61,7 +61,7 @@
 
     Entropy         m_entropyCoder;
     Entropy         m_bufferEntropyCoder;  /* store context for next row */
-    Entropy         m_rdEntropyCoders[MAX_FULL_DEPTH + 1][CI_NUM];
+    Entropy         m_rdEntropyCoders[NUM_FULL_DEPTH][CI_NUM];
 
     // to compute stats for 2 pass
     double          m_iCuCnt;
@@ -74,7 +74,7 @@
         m_entropyCoder.resetEntropy(slice);
 
         // Note: Reset status to avoid frame parallelism output mistake on different thread number
-        for (uint32_t depth = 0; depth < g_maxCUDepth + 1; depth++)
+        for (uint32_t depth = 0; depth <= g_maxFullDepth; depth++)
         {
             for (int ciIdx = 0; ciIdx < CI_NUM; ciIdx++)
             {
diff -r 9a0d24274357 -r 81469708804f source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/encoder/encoder.cpp	Mon Aug 18 16:33:59 2014 +0900
@@ -613,7 +613,7 @@
             continue;
 
         StatisticLog finalLog;
-        for (int depth = 0; depth < (int)g_maxCUDepth; depth++)
+        for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
         {
             for (int i = 0; i < poolThreadCount; i++)
             {
@@ -628,7 +628,7 @@
                     finalLog.cuInterDistribution[depth][m] += enclog.cuInterDistribution[depth][m];
                 }
 
-                if (depth == (int)g_maxCUDepth - 1)
+                if (depth == g_maxCUDepth)
                     finalLog.cntIntraNxN += enclog.cntIntraNxN;
                 if (sliceType != I_SLICE)
                 {
@@ -729,14 +729,14 @@
                                cuIntraDistribution[1], cuIntraDistribution[2]);
                 if (sliceType != I_SLICE)
                 {
-                    if (depth == (int)g_maxCUDepth - 1)
+                    if (depth == g_maxCUDepth)
                         len += sprintf(stats + len, " %dx%d "X265_LL "%%", cuSize / 2, cuSize / 2, cntIntraNxN);
                 }
 
                 len += sprintf(stats + len, ")");
                 if (sliceType == I_SLICE)
                 {
-                    if (depth == (int)g_maxCUDepth - 1)
+                    if (depth == g_maxCUDepth)
                         len += sprintf(stats + len, " %dx%d: "X265_LL "%%", cuSize / 2, cuSize / 2, cntIntraNxN);
                 }
             }
@@ -1098,8 +1098,8 @@
     sps->picWidthInLumaSamples = m_param->sourceWidth;
     sps->picHeightInLumaSamples = m_param->sourceHeight;
 
-    sps->log2MinCodingBlockSize = g_maxLog2CUSize - (g_maxCUDepth - g_addCUDepth);
-    sps->log2DiffMaxMinCodingBlockSize = g_maxCUDepth - g_addCUDepth;
+    sps->log2MinCodingBlockSize = g_maxLog2CUSize - g_maxCUDepth;
+    sps->log2DiffMaxMinCodingBlockSize = g_maxCUDepth;
 
     sps->quadtreeTULog2MaxSize = m_quadtreeTULog2MaxSize;
     sps->quadtreeTULog2MinSize = m_quadtreeTULog2MinSize;
@@ -1109,7 +1109,7 @@
     sps->bUseSAO = m_param->bEnableSAO;
 
     sps->bUseAMP = m_param->bEnableAMP;
-    sps->maxAMPDepth = m_param->bEnableAMP ? g_maxCUDepth - g_addCUDepth : 0;
+    sps->maxAMPDepth = m_param->bEnableAMP ? g_maxCUDepth : 0;
 
     sps->maxDecPicBuffering = m_vps.maxDecPicBuffering;
     sps->numReorderPics = m_vps.numReorderPics;
@@ -1169,13 +1169,11 @@
     {
         pps->bUseDQP = true;
         pps->maxCuDQPDepth = m_maxCuDQPDepth;
-        pps->minCuDQPSize = g_maxCUSize >> pps->maxCuDQPDepth;
     }
     else
     {
         pps->bUseDQP = false;
         pps->maxCuDQPDepth = 0;
-        pps->minCuDQPSize = g_maxCUSize >> pps->maxCuDQPDepth;
     }
 
     pps->chromaCbQpOffset = m_param->cbQpOffset;
@@ -1391,13 +1389,11 @@
     m_conformanceWindow.leftOffset = 0;
 
     //======== set pad size if width is not multiple of the minimum CU size =========
-    uint32_t maxCUDepth = maxLog2CUSize - 2;
-    uint32_t minCUDepth = (p->maxCUSize >> (maxCUDepth - 1));
-    if ((p->sourceWidth % minCUDepth) != 0)
+    const uint32_t minCUSize = MIN_CU_SIZE;
+    if (p->sourceWidth & (minCUSize - 1))
     {
-        uint32_t padsize = 0;
-        uint32_t rem = p->sourceWidth % minCUDepth;
-        padsize = minCUDepth - rem;
+        uint32_t rem = p->sourceWidth & (minCUSize - 1);
+        uint32_t padsize = minCUSize - rem;
         p->sourceWidth += padsize;
 
         /* set the confirmation window offsets  */
@@ -1406,11 +1402,10 @@
     }
 
     //======== set pad size if height is not multiple of the minimum CU size =========
-    if ((p->sourceHeight % minCUDepth) != 0)
+    if (p->sourceHeight & (minCUSize - 1))
     {
-        uint32_t padsize = 0;
-        uint32_t rem = p->sourceHeight % minCUDepth;
-        padsize = minCUDepth - rem;
+        uint32_t rem = p->sourceHeight & (minCUSize - 1);
+        uint32_t padsize = minCUSize - rem;
         p->sourceHeight += padsize;
 
         /* set the confirmation window offsets  */
diff -r 9a0d24274357 -r 81469708804f source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/encoder/entropy.cpp	Mon Aug 18 16:33:59 2014 +0900
@@ -699,7 +699,7 @@
     PartSize partSize = cu->getPartitionSize(absPartIdx);
     uint32_t numPU = (partSize == SIZE_2Nx2N ? 1 : (partSize == SIZE_NxN ? 4 : 2));
     uint32_t depth = cu->getDepth(absPartIdx);
-    uint32_t puOffset = (g_puOffset[uint32_t(partSize)] << ((g_maxCUDepth - depth) << 1)) >> 4;
+    uint32_t puOffset = (g_puOffset[uint32_t(partSize)] << (g_maxFullDepth - depth) * 2) >> 4;
 
     for (uint32_t partIdx = 0, subPartIdx = absPartIdx; partIdx < numPU; partIdx++, subPartIdx += puOffset)
     {
@@ -747,7 +747,7 @@
     }
 
     uint32_t log2CUSize   = cu->getLog2CUSize(absPartIdx);
-    uint32_t lumaOffset   = absPartIdx << cu->m_pic->getLog2UnitSize() * 2;
+    uint32_t lumaOffset   = absPartIdx << LOG2_UNIT_SIZE * 2;
     uint32_t chromaOffset = lumaOffset >> (cu->getHorzChromaShift() + cu->getVertChromaShift());
     uint32_t absPartIdxStep = cu->m_pic->getNumPartInCU() >> (depth << 1);
     CoeffCodeState state;
@@ -1067,7 +1067,7 @@
 
     if (cu->isIntra(absPartIdx))
     {
-        if (depth == g_maxCUDepth - g_addCUDepth)
+        if (depth == g_maxCUDepth)
             encodeBin(partSize == SIZE_2Nx2N ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX]);
         return;
     }
@@ -1096,7 +1096,7 @@
     case SIZE_nRx2N:
         encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
         encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
-        if (depth == g_maxCUDepth - g_addCUDepth && !(cu->getLog2CUSize(absPartIdx) == 3))
+        if (depth == g_maxCUDepth && !(cu->getLog2CUSize(absPartIdx) == 3))
             encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
         if (cu->m_slice->m_sps->maxAMPDepth > depth)
         {
@@ -1107,7 +1107,7 @@
         break;
 
     case SIZE_NxN:
-        if (depth == g_maxCUDepth - g_addCUDepth && !(cu->getLog2CUSize(absPartIdx) == 3))
+        if (depth == g_maxCUDepth && !(cu->getLog2CUSize(absPartIdx) == 3))
         {
             encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
             encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
@@ -1172,8 +1172,7 @@
 
 void Entropy::codeSplitFlag(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth)
 {
-    if (depth == g_maxCUDepth - g_addCUDepth)
-        return;
+    X265_CHECK(depth < g_maxCUDepth, "invalid depth\n");
 
     uint32_t ctx           = cu->getCtxSplitFlag(absPartIdx, depth);
     uint32_t currSplitFlag = (cu->getDepth(absPartIdx) > depth) ? 1 : 0;
diff -r 9a0d24274357 -r 81469708804f source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Mon Aug 18 05:34:51 2014 +0000
+++ b/source/encoder/frameencoder.cpp	Mon Aug 18 16:33:59 2014 +0900
@@ -700,14 +700,14 @@
         if (m_param->rc.bStatWrite)
         {
             double scale = (double)(1 << (g_maxCUSize / 16));
-            for (uint32_t part = 0; part < g_maxCUDepth ; part++, scale /= 4)
+            for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++, scale /= 4)
             {
-                curRow.m_iCuCnt += scale * tld.m_cuCoder.m_log->qTreeIntraCnt[part];
-                curRow.m_pCuCnt += scale * tld.m_cuCoder.m_log->qTreeInterCnt[part];
-                curRow.m_skipCuCnt += scale * tld.m_cuCoder.m_log->qTreeSkipCnt[part];
+                curRow.m_iCuCnt += scale * tld.m_cuCoder.m_log->qTreeIntraCnt[depth];
+                curRow.m_pCuCnt += scale * tld.m_cuCoder.m_log->qTreeInterCnt[depth];
+                curRow.m_skipCuCnt += scale * tld.m_cuCoder.m_log->qTreeSkipCnt[depth];
 
                 //clear the row cu data from thread local object
-                tld.m_cuCoder.m_log->qTreeIntraCnt[part] = tld.m_cuCoder.m_log->qTreeInterCnt[part] = tld.m_cuCoder.m_log->qTreeSkipCnt[part] = 0;
+                tld.m_cuCoder.m_log->qTreeIntraCnt[depth] = tld.m_cuCoder.m_log->qTreeInterCnt[depth] = tld.m_cuCoder.m_log->qTreeSkipCnt[depth] = 0;
             }
         }