[x265] cleanup SIZE_NONE. empty CU has MODE_NONE.

Mon Nov 10 04:42:43 CET 2014

# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1415590794 -32400
#      Mon Nov 10 12:39:54 2014 +0900
# Node ID f31250e3eb5625275318bc69633e0fbc31ccdb3a
# Parent  1e04e178a349ff3a27ed0207cca7bdd9f0db4ff8
cleanup SIZE_NONE. empty CU has MODE_NONE.

diff -r 1e04e178a349 -r f31250e3eb56 source/common/cudata.cpp

--- a/source/common/cudata.cpp	Sun Nov 09 00:30:09 2014 -0600
+++ b/source/common/cudata.cpp	Mon Nov 10 12:39:54 2014 +0900
@@ -229,13 +229,13 @@
 
     m_qp          = (char*)charBuf; charBuf += m_numPartitions;
     m_log2CUSize         = charBuf; charBuf += m_numPartitions;
-    m_partSize           = charBuf; charBuf += m_numPartitions;
     m_lumaIntraDir       = charBuf; charBuf += m_numPartitions;
     m_tqBypass           = charBuf; charBuf += m_numPartitions;
     m_refIdx[0]   = (char*)charBuf; charBuf += m_numPartitions;
     m_refIdx[1]   = (char*)charBuf; charBuf += m_numPartitions;
     m_cuDepth            = charBuf; charBuf += m_numPartitions;
     m_predMode           = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */
+    m_partSize           = charBuf; charBuf += m_numPartitions;
     m_mergeFlag          = charBuf; charBuf += m_numPartitions;
     m_interDir           = charBuf; charBuf += m_numPartitions;
     m_mvpIdx[0]          = charBuf; charBuf += m_numPartitions;
@@ -277,7 +277,6 @@
     /* sequential memsets */
     m_partSet((uint8_t*)m_qp, (uint8_t)qp);
     m_partSet(m_log2CUSize,   (uint8_t)g_maxLog2CUSize);
-    m_partSet(m_partSize,     (uint8_t)SIZE_NONE);
     m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
     m_partSet(m_tqBypass,     (uint8_t)frame.m_encData->m_param->bLossless);
     if (m_slice->m_sliceType != I_SLICE)
@@ -289,7 +288,7 @@
     X265_CHECK(!(frame.m_encData->m_param->bLossless && !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without TQbypass in PPS\n");
 
     /* initialize the remaining CU data in one memset */
-    memset(m_cuDepth, 0, (BytesPerPartition - 7) * m_numPartitions);
+    memset(m_cuDepth, 0, (BytesPerPartition - 6) * m_numPartitions);
 
     uint32_t widthInCU = m_slice->m_sps->numCuInWidth;
     m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL;
@@ -316,7 +315,6 @@
     /* sequential memsets */
     m_partSet((uint8_t*)m_qp, (uint8_t)ctu.m_qp[0]);
     m_partSet(m_log2CUSize,   (uint8_t)cuGeom.log2CUSize);
-    m_partSet(m_partSize,     (uint8_t)SIZE_NONE);
     m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
     m_partSet(m_tqBypass,     (uint8_t)m_encData->m_param->bLossless);
     m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
@@ -324,7 +322,7 @@
     m_partSet(m_cuDepth,      (uint8_t)cuGeom.depth);
 
     /* initialize the remaining CU data in one memset */
-    memset(m_predMode, 0, (BytesPerPartition - 8) * m_numPartitions);
+    memset(m_predMode, 0, (BytesPerPartition - 7) * m_numPartitions);
 }
 
 /* Copy the results of a sub-part (split) CU to the parent CU */
@@ -336,13 +334,13 @@
 
     m_subPartCopy((uint8_t*)m_qp + offset, (uint8_t*)subCU.m_qp);
     m_subPartCopy(m_log2CUSize + offset, subCU.m_log2CUSize);
-    m_subPartCopy(m_partSize + offset, subCU.m_partSize);
     m_subPartCopy(m_lumaIntraDir + offset, subCU.m_lumaIntraDir);
     m_subPartCopy(m_tqBypass + offset, subCU.m_tqBypass);
     m_subPartCopy((uint8_t*)m_refIdx[0] + offset, (uint8_t*)subCU.m_refIdx[0]);
     m_subPartCopy((uint8_t*)m_refIdx[1] + offset, (uint8_t*)subCU.m_refIdx[1]);
     m_subPartCopy(m_cuDepth + offset, subCU.m_cuDepth);
     m_subPartCopy(m_predMode + offset, subCU.m_predMode);
+    m_subPartCopy(m_partSize + offset, subCU.m_partSize);
     m_subPartCopy(m_mergeFlag + offset, subCU.m_mergeFlag);
     m_subPartCopy(m_interDir + offset, subCU.m_interDir);
     m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]);
@@ -423,13 +421,13 @@
 
     m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);
     m_partCopy(ctu.m_log2CUSize + m_absIdxInCTU, m_log2CUSize);
-    m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize);
     m_partCopy(ctu.m_lumaIntraDir + m_absIdxInCTU, m_lumaIntraDir);
     m_partCopy(ctu.m_tqBypass + m_absIdxInCTU, m_tqBypass);
     m_partCopy((uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU, (uint8_t*)m_refIdx[0]);
     m_partCopy((uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU, (uint8_t*)m_refIdx[1]);
     m_partCopy(ctu.m_cuDepth + m_absIdxInCTU, m_cuDepth);
     m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
+    m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize);
     m_partCopy(ctu.m_mergeFlag + m_absIdxInCTU, m_mergeFlag);
     m_partCopy(ctu.m_interDir + m_absIdxInCTU, m_interDir);
     m_partCopy(ctu.m_mvpIdx[0] + m_absIdxInCTU, m_mvpIdx[0]);
@@ -472,12 +470,13 @@
     /* copy out all prediction info for this part */
     m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp + m_absIdxInCTU);
     m_partCopy(m_log2CUSize,   ctu.m_log2CUSize + m_absIdxInCTU);
-    m_partCopy(m_partSize,     ctu.m_partSize + m_absIdxInCTU);
     m_partCopy(m_lumaIntraDir, ctu.m_lumaIntraDir + m_absIdxInCTU);
     m_partCopy(m_tqBypass,     ctu.m_tqBypass + m_absIdxInCTU);
     m_partCopy((uint8_t*)m_refIdx[0], (uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU);
     m_partCopy((uint8_t*)m_refIdx[1], (uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU);
     m_partCopy(m_cuDepth,      ctu.m_cuDepth + m_absIdxInCTU);
+    m_partSet(m_predMode, ctu.m_predMode[m_absIdxInCTU] & (MODE_INTRA | MODE_INTER)); /* clear skip flag */
+    m_partCopy(m_partSize,     ctu.m_partSize + m_absIdxInCTU);
     m_partCopy(m_mergeFlag,    ctu.m_mergeFlag + m_absIdxInCTU);
     m_partCopy(m_interDir,     ctu.m_interDir + m_absIdxInCTU);
     m_partCopy(m_mvpIdx[0],    ctu.m_mvpIdx[0] + m_absIdxInCTU);
@@ -490,7 +489,6 @@
     memcpy(m_mvd[1], ctu.m_mvd[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
 
     /* clear residual coding flags */
-    m_partSet(m_predMode, ctu.m_predMode[m_absIdxInCTU] & (MODE_INTRA | MODE_INTER));
     m_partSet(m_tuDepth, 0);
     m_partSet(m_transformSkip[0], 0);
     m_partSet(m_transformSkip[1], 0);
@@ -901,7 +899,7 @@
 void CUData::getIntraTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const
 {
     uint32_t log2CUSize = m_log2CUSize[absPartIdx];
-    uint32_t splitFlag = m_partSize[absPartIdx] == SIZE_NxN;
+    uint32_t splitFlag = m_partSize[absPartIdx] != SIZE_2Nx2N;
 
     tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize;
     tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize;
@@ -1958,7 +1956,7 @@
     Frame *colPic = m_slice->m_refPicList[m_slice->isInterB() ? 1 - m_slice->m_colFromL0Flag : 0][m_slice->m_colRefIdx];
     CUData *colCU = colPic->m_encData->getPicCTU(cuAddr);
 
-    if (colCU->m_partSize[partUnitIdx] == SIZE_NONE)
+    if (colCU->m_predMode[partUnitIdx] == MODE_NONE)
         return false;
 
     uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
diff -r 1e04e178a349 -r f31250e3eb56 source/common/cudata.h
--- a/source/common/cudata.h	Sun Nov 09 00:30:09 2014 -0600
+++ b/source/common/cudata.h	Mon Nov 10 12:39:54 2014 +0900
@@ -46,8 +46,7 @@
     SIZE_2NxnD, // asymmetric motion partition, 2Nx(3N/2) + 2Nx( N/2)
     SIZE_nLx2N, // asymmetric motion partition, ( N/2)x2N + (3N/2)x2N
     SIZE_nRx2N, // asymmetric motion partition, (3N/2)x2N + ( N/2)x2N
-    NUM_SIZES,
-    SIZE_NONE = 15
+    NUM_SIZES
 };
 
 enum PredMode
@@ -130,12 +129,12 @@
     /* Per-part data, stored contiguously */
     char*         m_qp;               // array of QP values
     uint8_t*      m_log2CUSize;       // array of cu log2Size TODO: seems redundant to depth
-    uint8_t*      m_partSize;         // array of partition sizes
     uint8_t*      m_lumaIntraDir;     // array of intra directions (luma)
     uint8_t*      m_tqBypass;         // array of CU lossless flags
     char*         m_refIdx[2];        // array of motion reference indices per list
     uint8_t*      m_cuDepth;          // array of depths
     uint8_t*      m_predMode;         // array of prediction modes
+    uint8_t*      m_partSize;         // array of partition sizes
     uint8_t*      m_mergeFlag;        // array of merge flags
     uint8_t*      m_interDir;         // array of inter directions
     uint8_t*      m_mvpIdx[2];        // array of motion vector predictor candidates or merge candidate indices [0]
diff -r 1e04e178a349 -r f31250e3eb56 source/common/deblock.cpp
--- a/source/common/deblock.cpp	Sun Nov 09 00:30:09 2014 -0600
+++ b/source/common/deblock.cpp	Mon Nov 10 12:39:54 2014 +0900
@@ -70,7 +70,7 @@
  * param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */
 void Deblock::deblockCU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, const int32_t dir, uint8_t blockStrength[])
 {
-    if (cu->m_partSize[absPartIdx] == SIZE_NONE)
+    if (cu->m_predMode[absPartIdx] == MODE_NONE)
         return;
 
     uint32_t curNumParts = NUM_CU_PARTITIONS >> (depth << 1);
diff -r 1e04e178a349 -r f31250e3eb56 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Sun Nov 09 00:30:09 2014 -0600
+++ b/source/encoder/analysis.cpp	Mon Nov 10 12:39:54 2014 +0900
@@ -935,7 +935,7 @@
                         uint32_t tuDepthRange[2];
                         cu.getIntraTUQtDepthRange(tuDepthRange, 0);
 
-                        uint32_t initTrDepth = cu.m_partSize[0] == SIZE_NxN;
+                        uint32_t initTrDepth = cu.m_partSize[0] != SIZE_2Nx2N;
                         residualTransformQuantIntra(*md.bestMode, cuGeom, initTrDepth, 0, tuDepthRange);
                         getBestIntraModeChroma(*md.bestMode, cuGeom);
                         residualQTIntraChroma(*md.bestMode, cuGeom, 0, 0);
@@ -1517,7 +1517,7 @@
         uint32_t tuDepthRange[2];
         cu.getIntraTUQtDepthRange(tuDepthRange, 0);
 
-        uint32_t initTrDepth = cu.m_partSize[0] == SIZE_NxN;
+        uint32_t initTrDepth = cu.m_partSize[0] != SIZE_2Nx2N;
         residualTransformQuantIntra(*bestMode, cuGeom, initTrDepth, 0, tuDepthRange);
         getBestIntraModeChroma(*bestMode, cuGeom);
         residualQTIntraChroma(*bestMode, cuGeom, 0, 0);
diff -r 1e04e178a349 -r f31250e3eb56 source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp	Sun Nov 09 00:30:09 2014 -0600
+++ b/source/encoder/entropy.cpp	Mon Nov 10 12:39:54 2014 +0900
@@ -649,7 +649,7 @@
 
     /* in each of these conditions, the subdiv flag is implied and not signaled,
      * so we have checks to make sure the implied value matches our intentions */
-    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] == SIZE_NxN && depth == cu.m_cuDepth[absPartIdx])
+    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && depth == cu.m_cuDepth[absPartIdx])
     {
         X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
     }
@@ -809,7 +809,7 @@
 
             codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
 
-            if ((cu.m_chromaFormat == X265_CSP_I444) && (cu.m_partSize[absPartIdx] == SIZE_NxN))
+            if (cu.m_chromaFormat == X265_CSP_I444 && cu.m_partSize[absPartIdx] != SIZE_2Nx2N)
             {
                 uint32_t partOffset = (NUM_CU_PARTITIONS >> (cu.m_cuDepth[absPartIdx] << 1)) >> 2;
                 for (uint32_t i = 1; i <= 3; i++)
@@ -1221,8 +1221,7 @@
     uint32_t dir[4], j;
     uint32_t preds[4][3];
     int predIdx[4];
-    PartSize mode = (PartSize)cu.m_partSize[absPartIdx];
-    uint32_t partNum = isMultiple ? (mode == SIZE_NxN ? 4 : 1) : 1;
+    uint32_t partNum = isMultiple && cu.m_partSize[absPartIdx] != SIZE_2Nx2N ? 4 : 1;
     uint32_t partOffset = (NUM_CU_PARTITIONS >> (cu.m_cuDepth[absPartIdx] << 1)) >> 2;
 
     for (j = 0; j < partNum; j++)
diff -r 1e04e178a349 -r f31250e3eb56 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Sun Nov 09 00:30:09 2014 -0600
+++ b/source/encoder/frameencoder.cpp	Mon Nov 10 12:39:54 2014 +0900
@@ -971,13 +971,13 @@
             log->cntIntra[depth]++;
             log->qTreeIntraCnt[depth]++;
 
-            if (ctu.m_partSize[absPartIdx] == SIZE_NONE)
+            if (ctu.m_predMode[absPartIdx] == MODE_NONE)
             {
                 log->totalCu--;
                 log->cntIntra[depth]--;
                 log->qTreeIntraCnt[depth]--;
             }
-            else if (ctu.m_partSize[absPartIdx] == SIZE_NxN)
+            else if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N)
             {
                 /* TODO: log intra modes at absPartIdx +0 to +3 */
                 X265_CHECK(depth == g_maxCUDepth, "Intra NxN found at improbable depth\n");
@@ -1000,7 +1000,7 @@
             log->totalCu++;
             log->cntTotalCu[depth]++;
 
-            if (ctu.m_partSize[absPartIdx] == SIZE_NONE)
+            if (ctu.m_predMode[absPartIdx] == MODE_NONE)
             {
                 log->totalCu--;
                 log->cntTotalCu[depth]--;
@@ -1026,7 +1026,7 @@
                 log->cntIntra[depth]++;
                 log->qTreeIntraCnt[depth]++;
 
-                if (ctu.m_partSize[absPartIdx] == SIZE_NxN)
+                if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N)
                 {
                     X265_CHECK(depth == g_maxCUDepth, "Intra NxN found at improbable depth\n");
                     log->cntIntraNxN++;
diff -r 1e04e178a349 -r f31250e3eb56 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Sun Nov 09 00:30:09 2014 -0600
+++ b/source/encoder/search.cpp	Mon Nov 10 12:39:54 2014 +0900
@@ -387,7 +387,7 @@
 
         int checkTransformSkip = m_slice->m_pps->bTransformSkipEnabled && (log2TrSize - 1) <= MAX_LOG2_TS_SIZE && !cu.m_tqBypass[0];
         if (m_param->bEnableTSkipFast)
-            checkTransformSkip &= cu.m_partSize[absPartIdx] == SIZE_NxN;
+            checkTransformSkip &= cu.m_partSize[0] != SIZE_2Nx2N;
 
         Cost splitCost;
         uint32_t cbf = 0;
@@ -1210,7 +1210,7 @@
     cu.setPartSizeSubParts(SIZE_2Nx2N);
     cu.setPredModeSubParts(MODE_INTRA);
 
-    uint32_t initTrDepth = 0;
+    const uint32_t initTrDepth = 0;
     uint32_t log2TrSize = cu.m_log2CUSize[0] - initTrDepth;
     uint32_t tuSize = 1 << log2TrSize;
     const uint32_t absPartIdx = 0;
@@ -1425,7 +1425,7 @@
     const Yuv* fencYuv = intraMode.fencYuv;
 
     uint32_t depth        = cu.m_cuDepth[0];
-    uint32_t initTrDepth  = cu.m_partSize[0] == SIZE_2Nx2N ? 0 : 1;
+    uint32_t initTrDepth  = cu.m_partSize[0] != SIZE_2Nx2N;
     uint32_t numPU        = 1 << (2 * initTrDepth);
     uint32_t log2TrSize   = cu.m_log2CUSize[0] - initTrDepth;
     uint32_t tuSize       = 1 << log2TrSize;
@@ -1434,7 +1434,7 @@
     uint32_t absPartIdx   = 0;
     uint32_t totalDistortion = 0;
 
-    int checkTransformSkip = m_slice->m_pps->bTransformSkipEnabled && !cu.m_tqBypass[0] && cu.m_partSize[absPartIdx] == SIZE_NxN;
+    int checkTransformSkip = m_slice->m_pps->bTransformSkipEnabled && !cu.m_tqBypass[0] && cu.m_partSize[0] != SIZE_2Nx2N;
 
     // loop over partitions
     for (uint32_t pu = 0; pu < numPU; pu++, absPartIdx += qNumParts)
@@ -1684,7 +1684,7 @@
     Yuv& reconYuv = intraMode.reconYuv;
 
     uint32_t depth       = cu.m_cuDepth[0];
-    uint32_t initTrDepth = cu.m_partSize[0] == SIZE_NxN && m_csp == X265_CSP_I444;
+    uint32_t initTrDepth = cu.m_partSize[0] != SIZE_2Nx2N && m_csp == X265_CSP_I444;
     uint32_t log2TrSize  = cu.m_log2CUSize[0] - initTrDepth;
     uint32_t absPartStep = (NUM_CU_PARTITIONS >> (depth << 1));
     uint32_t totalDistortion = 0;
@@ -1953,7 +1953,7 @@
         uint32_t mrgCost = MAX_UINT;
 
         /* find best cost merge candidate */
-        if (cu.m_partSize[m_puAbsPartIdx] != SIZE_2Nx2N)
+        if (cu.m_partSize[0] != SIZE_2Nx2N)
         {
             merge.absPartIdx = m_puAbsPartIdx;
             merge.width      = m_puWidth;
@@ -2584,7 +2584,7 @@
     uint32_t tuDepth = depth - cu.m_cuDepth[0];
 
     bool bCheckFull = log2TrSize <= depthRange[1];
-    if (cu.m_partSize[absPartIdx] != SIZE_2Nx2N && depth == cu.m_cuDepth[absPartIdx] && log2TrSize > depthRange[0])
+    if (cu.m_partSize[0] != SIZE_2Nx2N && depth == cu.m_cuDepth[absPartIdx] && log2TrSize > depthRange[0])
         bCheckFull = false;
 
     if (bCheckFull)
@@ -2730,7 +2730,7 @@
     bool bCheckFull = log2TrSize <= depthRange[1];
     bool bSplitPresentFlag = bCheckSplit && bCheckFull;
 
-    if (cu.m_partSize[absPartIdx] != SIZE_2Nx2N && depth == cu.m_cuDepth[absPartIdx] && bCheckSplit)
+    if (cu.m_partSize[0] != SIZE_2Nx2N && depth == cu.m_cuDepth[absPartIdx] && bCheckSplit)
         bCheckFull = false;
 
     X265_CHECK(bCheckFull || bCheckSplit, "check-full or check-split must be set\n");