[x265-commits] [x265] cleanup SIZE_NONE. empty CU has MODE_NONE.

Tue Nov 11 05:00:21 CET 2014

details:   http://hg.videolan.org/x265/rev/32513a4c3bd4
branches:  
changeset: 8816:32513a4c3bd4
user:      Satoshi Nakagawa <nakagawa424 at oki.com>
date:      Mon Nov 10 12:39:54 2014 +0900
description:
cleanup SIZE_NONE. empty CU has MODE_NONE.

diffstat:

 source/common/cudata.cpp        |  20 +++++++++-----------
 source/common/cudata.h          |   5 ++---
 source/common/deblock.cpp       |   2 +-
 source/encoder/analysis.cpp     |   4 ++--
 source/encoder/entropy.cpp      |   7 +++----
 source/encoder/frameencoder.cpp |   8 ++++----
 source/encoder/search.cpp       |  16 ++++++++--------
 7 files changed, 29 insertions(+), 33 deletions(-)

diffs (truncated from 326 to 300 lines):

diff -r 1e04e178a349 -r 32513a4c3bd4 source/common/cudata.cpp

--- a/source/common/cudata.cpp	Sun Nov 09 00:30:09 2014 -0600
+++ b/source/common/cudata.cpp	Mon Nov 10 12:39:54 2014 +0900
@@ -229,13 +229,13 @@ void CUData::initialize(const CUDataMemP
 
     m_qp          = (char*)charBuf; charBuf += m_numPartitions;
     m_log2CUSize         = charBuf; charBuf += m_numPartitions;
-    m_partSize           = charBuf; charBuf += m_numPartitions;
     m_lumaIntraDir       = charBuf; charBuf += m_numPartitions;
     m_tqBypass           = charBuf; charBuf += m_numPartitions;
     m_refIdx[0]   = (char*)charBuf; charBuf += m_numPartitions;
     m_refIdx[1]   = (char*)charBuf; charBuf += m_numPartitions;
     m_cuDepth            = charBuf; charBuf += m_numPartitions;
     m_predMode           = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */
+    m_partSize           = charBuf; charBuf += m_numPartitions;
     m_mergeFlag          = charBuf; charBuf += m_numPartitions;
     m_interDir           = charBuf; charBuf += m_numPartitions;
     m_mvpIdx[0]          = charBuf; charBuf += m_numPartitions;
@@ -277,7 +277,6 @@ void CUData::initCTU(const Frame& frame,
     /* sequential memsets */
     m_partSet((uint8_t*)m_qp, (uint8_t)qp);
     m_partSet(m_log2CUSize,   (uint8_t)g_maxLog2CUSize);
-    m_partSet(m_partSize,     (uint8_t)SIZE_NONE);
     m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
     m_partSet(m_tqBypass,     (uint8_t)frame.m_encData->m_param->bLossless);
     if (m_slice->m_sliceType != I_SLICE)
@@ -289,7 +288,7 @@ void CUData::initCTU(const Frame& frame,
     X265_CHECK(!(frame.m_encData->m_param->bLossless && !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without TQbypass in PPS\n");
 
     /* initialize the remaining CU data in one memset */
-    memset(m_cuDepth, 0, (BytesPerPartition - 7) * m_numPartitions);
+    memset(m_cuDepth, 0, (BytesPerPartition - 6) * m_numPartitions);
 
     uint32_t widthInCU = m_slice->m_sps->numCuInWidth;
     m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL;
@@ -316,7 +315,6 @@ void CUData::initSubCU(const CUData& ctu
     /* sequential memsets */
     m_partSet((uint8_t*)m_qp, (uint8_t)ctu.m_qp[0]);
     m_partSet(m_log2CUSize,   (uint8_t)cuGeom.log2CUSize);
-    m_partSet(m_partSize,     (uint8_t)SIZE_NONE);
     m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
     m_partSet(m_tqBypass,     (uint8_t)m_encData->m_param->bLossless);
     m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
@@ -324,7 +322,7 @@ void CUData::initSubCU(const CUData& ctu
     m_partSet(m_cuDepth,      (uint8_t)cuGeom.depth);
 
     /* initialize the remaining CU data in one memset */
-    memset(m_predMode, 0, (BytesPerPartition - 8) * m_numPartitions);
+    memset(m_predMode, 0, (BytesPerPartition - 7) * m_numPartitions);
 }
 
 /* Copy the results of a sub-part (split) CU to the parent CU */
@@ -336,13 +334,13 @@ void CUData::copyPartFrom(const CUData& 
 
     m_subPartCopy((uint8_t*)m_qp + offset, (uint8_t*)subCU.m_qp);
     m_subPartCopy(m_log2CUSize + offset, subCU.m_log2CUSize);
-    m_subPartCopy(m_partSize + offset, subCU.m_partSize);
     m_subPartCopy(m_lumaIntraDir + offset, subCU.m_lumaIntraDir);
     m_subPartCopy(m_tqBypass + offset, subCU.m_tqBypass);
     m_subPartCopy((uint8_t*)m_refIdx[0] + offset, (uint8_t*)subCU.m_refIdx[0]);
     m_subPartCopy((uint8_t*)m_refIdx[1] + offset, (uint8_t*)subCU.m_refIdx[1]);
     m_subPartCopy(m_cuDepth + offset, subCU.m_cuDepth);
     m_subPartCopy(m_predMode + offset, subCU.m_predMode);
+    m_subPartCopy(m_partSize + offset, subCU.m_partSize);
     m_subPartCopy(m_mergeFlag + offset, subCU.m_mergeFlag);
     m_subPartCopy(m_interDir + offset, subCU.m_interDir);
     m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]);
@@ -423,13 +421,13 @@ void CUData::copyToPic(uint32_t depth) c
 
     m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);
     m_partCopy(ctu.m_log2CUSize + m_absIdxInCTU, m_log2CUSize);
-    m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize);
     m_partCopy(ctu.m_lumaIntraDir + m_absIdxInCTU, m_lumaIntraDir);
     m_partCopy(ctu.m_tqBypass + m_absIdxInCTU, m_tqBypass);
     m_partCopy((uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU, (uint8_t*)m_refIdx[0]);
     m_partCopy((uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU, (uint8_t*)m_refIdx[1]);
     m_partCopy(ctu.m_cuDepth + m_absIdxInCTU, m_cuDepth);
     m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
+    m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize);
     m_partCopy(ctu.m_mergeFlag + m_absIdxInCTU, m_mergeFlag);
     m_partCopy(ctu.m_interDir + m_absIdxInCTU, m_interDir);
     m_partCopy(ctu.m_mvpIdx[0] + m_absIdxInCTU, m_mvpIdx[0]);
@@ -472,12 +470,13 @@ void CUData::copyFromPic(const CUData& c
     /* copy out all prediction info for this part */
     m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp + m_absIdxInCTU);
     m_partCopy(m_log2CUSize,   ctu.m_log2CUSize + m_absIdxInCTU);
-    m_partCopy(m_partSize,     ctu.m_partSize + m_absIdxInCTU);
     m_partCopy(m_lumaIntraDir, ctu.m_lumaIntraDir + m_absIdxInCTU);
     m_partCopy(m_tqBypass,     ctu.m_tqBypass + m_absIdxInCTU);
     m_partCopy((uint8_t*)m_refIdx[0], (uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU);
     m_partCopy((uint8_t*)m_refIdx[1], (uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU);
     m_partCopy(m_cuDepth,      ctu.m_cuDepth + m_absIdxInCTU);
+    m_partSet(m_predMode, ctu.m_predMode[m_absIdxInCTU] & (MODE_INTRA | MODE_INTER)); /* clear skip flag */
+    m_partCopy(m_partSize,     ctu.m_partSize + m_absIdxInCTU);
     m_partCopy(m_mergeFlag,    ctu.m_mergeFlag + m_absIdxInCTU);
     m_partCopy(m_interDir,     ctu.m_interDir + m_absIdxInCTU);
     m_partCopy(m_mvpIdx[0],    ctu.m_mvpIdx[0] + m_absIdxInCTU);
@@ -490,7 +489,6 @@ void CUData::copyFromPic(const CUData& c
     memcpy(m_mvd[1], ctu.m_mvd[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
 
     /* clear residual coding flags */
-    m_partSet(m_predMode, ctu.m_predMode[m_absIdxInCTU] & (MODE_INTRA | MODE_INTER));
     m_partSet(m_tuDepth, 0);
     m_partSet(m_transformSkip[0], 0);
     m_partSet(m_transformSkip[1], 0);
@@ -901,7 +899,7 @@ uint32_t CUData::getCtxSplitFlag(uint32_
 void CUData::getIntraTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const
 {
     uint32_t log2CUSize = m_log2CUSize[absPartIdx];
-    uint32_t splitFlag = m_partSize[absPartIdx] == SIZE_NxN;
+    uint32_t splitFlag = m_partSize[absPartIdx] != SIZE_2Nx2N;
 
     tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize;
     tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize;
@@ -1958,7 +1956,7 @@ bool CUData::getColMVP(MV& outMV, int& o
     Frame *colPic = m_slice->m_refPicList[m_slice->isInterB() ? 1 - m_slice->m_colFromL0Flag : 0][m_slice->m_colRefIdx];
     CUData *colCU = colPic->m_encData->getPicCTU(cuAddr);
 
-    if (colCU->m_partSize[partUnitIdx] == SIZE_NONE)
+    if (colCU->m_predMode[partUnitIdx] == MODE_NONE)
         return false;
 
     uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
diff -r 1e04e178a349 -r 32513a4c3bd4 source/common/cudata.h
--- a/source/common/cudata.h	Sun Nov 09 00:30:09 2014 -0600
+++ b/source/common/cudata.h	Mon Nov 10 12:39:54 2014 +0900
@@ -46,8 +46,7 @@ enum PartSize
     SIZE_2NxnD, // asymmetric motion partition, 2Nx(3N/2) + 2Nx( N/2)
     SIZE_nLx2N, // asymmetric motion partition, ( N/2)x2N + (3N/2)x2N
     SIZE_nRx2N, // asymmetric motion partition, (3N/2)x2N + ( N/2)x2N
-    NUM_SIZES,
-    SIZE_NONE = 15
+    NUM_SIZES
 };
 
 enum PredMode
@@ -130,12 +129,12 @@ public:
     /* Per-part data, stored contiguously */
     char*         m_qp;               // array of QP values
     uint8_t*      m_log2CUSize;       // array of cu log2Size TODO: seems redundant to depth
-    uint8_t*      m_partSize;         // array of partition sizes
     uint8_t*      m_lumaIntraDir;     // array of intra directions (luma)
     uint8_t*      m_tqBypass;         // array of CU lossless flags
     char*         m_refIdx[2];        // array of motion reference indices per list
     uint8_t*      m_cuDepth;          // array of depths
     uint8_t*      m_predMode;         // array of prediction modes
+    uint8_t*      m_partSize;         // array of partition sizes
     uint8_t*      m_mergeFlag;        // array of merge flags
     uint8_t*      m_interDir;         // array of inter directions
     uint8_t*      m_mvpIdx[2];        // array of motion vector predictor candidates or merge candidate indices [0]
diff -r 1e04e178a349 -r 32513a4c3bd4 source/common/deblock.cpp
--- a/source/common/deblock.cpp	Sun Nov 09 00:30:09 2014 -0600
+++ b/source/common/deblock.cpp	Mon Nov 10 12:39:54 2014 +0900
@@ -70,7 +70,7 @@ static inline uint8_t bsCuEdge(const CUD
  * param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */
 void Deblock::deblockCU(const CUData* cu, uint32_t absPartIdx, uint32_t depth, const int32_t dir, uint8_t blockStrength[])
 {
-    if (cu->m_partSize[absPartIdx] == SIZE_NONE)
+    if (cu->m_predMode[absPartIdx] == MODE_NONE)
         return;
 
     uint32_t curNumParts = NUM_CU_PARTITIONS >> (depth << 1);
diff -r 1e04e178a349 -r 32513a4c3bd4 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Sun Nov 09 00:30:09 2014 -0600
+++ b/source/encoder/analysis.cpp	Mon Nov 10 12:39:54 2014 +0900
@@ -935,7 +935,7 @@ void Analysis::compressInterCU_rd0_4(con
                         uint32_t tuDepthRange[2];
                         cu.getIntraTUQtDepthRange(tuDepthRange, 0);
 
-                        uint32_t initTrDepth = cu.m_partSize[0] == SIZE_NxN;
+                        uint32_t initTrDepth = cu.m_partSize[0] != SIZE_2Nx2N;
                         residualTransformQuantIntra(*md.bestMode, cuGeom, initTrDepth, 0, tuDepthRange);
                         getBestIntraModeChroma(*md.bestMode, cuGeom);
                         residualQTIntraChroma(*md.bestMode, cuGeom, 0, 0);
@@ -1517,7 +1517,7 @@ void Analysis::encodeResidue(const CUDat
         uint32_t tuDepthRange[2];
         cu.getIntraTUQtDepthRange(tuDepthRange, 0);
 
-        uint32_t initTrDepth = cu.m_partSize[0] == SIZE_NxN;
+        uint32_t initTrDepth = cu.m_partSize[0] != SIZE_2Nx2N;
         residualTransformQuantIntra(*bestMode, cuGeom, initTrDepth, 0, tuDepthRange);
         getBestIntraModeChroma(*bestMode, cuGeom);
         residualQTIntraChroma(*bestMode, cuGeom, 0, 0);
diff -r 1e04e178a349 -r 32513a4c3bd4 source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp	Sun Nov 09 00:30:09 2014 -0600
+++ b/source/encoder/entropy.cpp	Mon Nov 10 12:39:54 2014 +0900
@@ -649,7 +649,7 @@ void Entropy::encodeTransform(const CUDa
 
     /* in each of these conditions, the subdiv flag is implied and not signaled,
      * so we have checks to make sure the implied value matches our intentions */
-    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] == SIZE_NxN && depth == cu.m_cuDepth[absPartIdx])
+    if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && depth == cu.m_cuDepth[absPartIdx])
     {
         X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
     }
@@ -809,7 +809,7 @@ void Entropy::codePredInfo(const CUData&
 
             codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
 
-            if ((cu.m_chromaFormat == X265_CSP_I444) && (cu.m_partSize[absPartIdx] == SIZE_NxN))
+            if (cu.m_chromaFormat == X265_CSP_I444 && cu.m_partSize[absPartIdx] != SIZE_2Nx2N)
             {
                 uint32_t partOffset = (NUM_CU_PARTITIONS >> (cu.m_cuDepth[absPartIdx] << 1)) >> 2;
                 for (uint32_t i = 1; i <= 3; i++)
@@ -1221,8 +1221,7 @@ void Entropy::codeIntraDirLumaAng(const 
     uint32_t dir[4], j;
     uint32_t preds[4][3];
     int predIdx[4];
-    PartSize mode = (PartSize)cu.m_partSize[absPartIdx];
-    uint32_t partNum = isMultiple ? (mode == SIZE_NxN ? 4 : 1) : 1;
+    uint32_t partNum = isMultiple && cu.m_partSize[absPartIdx] != SIZE_2Nx2N ? 4 : 1;
     uint32_t partOffset = (NUM_CU_PARTITIONS >> (cu.m_cuDepth[absPartIdx] << 1)) >> 2;
 
     for (j = 0; j < partNum; j++)
diff -r 1e04e178a349 -r 32513a4c3bd4 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Sun Nov 09 00:30:09 2014 -0600
+++ b/source/encoder/frameencoder.cpp	Mon Nov 10 12:39:54 2014 +0900
@@ -971,13 +971,13 @@ void FrameEncoder::collectCTUStatistics(
             log->cntIntra[depth]++;
             log->qTreeIntraCnt[depth]++;
 
-            if (ctu.m_partSize[absPartIdx] == SIZE_NONE)
+            if (ctu.m_predMode[absPartIdx] == MODE_NONE)
             {
                 log->totalCu--;
                 log->cntIntra[depth]--;
                 log->qTreeIntraCnt[depth]--;
             }
-            else if (ctu.m_partSize[absPartIdx] == SIZE_NxN)
+            else if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N)
             {
                 /* TODO: log intra modes at absPartIdx +0 to +3 */
                 X265_CHECK(depth == g_maxCUDepth, "Intra NxN found at improbable depth\n");
@@ -1000,7 +1000,7 @@ void FrameEncoder::collectCTUStatistics(
             log->totalCu++;
             log->cntTotalCu[depth]++;
 
-            if (ctu.m_partSize[absPartIdx] == SIZE_NONE)
+            if (ctu.m_predMode[absPartIdx] == MODE_NONE)
             {
                 log->totalCu--;
                 log->cntTotalCu[depth]--;
@@ -1026,7 +1026,7 @@ void FrameEncoder::collectCTUStatistics(
                 log->cntIntra[depth]++;
                 log->qTreeIntraCnt[depth]++;
 
-                if (ctu.m_partSize[absPartIdx] == SIZE_NxN)
+                if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N)
                 {
                     X265_CHECK(depth == g_maxCUDepth, "Intra NxN found at improbable depth\n");
                     log->cntIntraNxN++;
diff -r 1e04e178a349 -r 32513a4c3bd4 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Sun Nov 09 00:30:09 2014 -0600
+++ b/source/encoder/search.cpp	Mon Nov 10 12:39:54 2014 +0900
@@ -387,7 +387,7 @@ void Search::codeIntraLumaQT(Mode& mode,
 
         int checkTransformSkip = m_slice->m_pps->bTransformSkipEnabled && (log2TrSize - 1) <= MAX_LOG2_TS_SIZE && !cu.m_tqBypass[0];
         if (m_param->bEnableTSkipFast)
-            checkTransformSkip &= cu.m_partSize[absPartIdx] == SIZE_NxN;
+            checkTransformSkip &= cu.m_partSize[0] != SIZE_2Nx2N;
 
         Cost splitCost;
         uint32_t cbf = 0;
@@ -1210,7 +1210,7 @@ void Search::checkIntraInInter(Mode& int
     cu.setPartSizeSubParts(SIZE_2Nx2N);
     cu.setPredModeSubParts(MODE_INTRA);
 
-    uint32_t initTrDepth = 0;
+    const uint32_t initTrDepth = 0;
     uint32_t log2TrSize = cu.m_log2CUSize[0] - initTrDepth;
     uint32_t tuSize = 1 << log2TrSize;
     const uint32_t absPartIdx = 0;
@@ -1425,7 +1425,7 @@ uint32_t Search::estIntraPredQT(Mode &in
     const Yuv* fencYuv = intraMode.fencYuv;
 
     uint32_t depth        = cu.m_cuDepth[0];
-    uint32_t initTrDepth  = cu.m_partSize[0] == SIZE_2Nx2N ? 0 : 1;
+    uint32_t initTrDepth  = cu.m_partSize[0] != SIZE_2Nx2N;
     uint32_t numPU        = 1 << (2 * initTrDepth);
     uint32_t log2TrSize   = cu.m_log2CUSize[0] - initTrDepth;
     uint32_t tuSize       = 1 << log2TrSize;
@@ -1434,7 +1434,7 @@ uint32_t Search::estIntraPredQT(Mode &in
     uint32_t absPartIdx   = 0;
     uint32_t totalDistortion = 0;
 
-    int checkTransformSkip = m_slice->m_pps->bTransformSkipEnabled && !cu.m_tqBypass[0] && cu.m_partSize[absPartIdx] == SIZE_NxN;
+    int checkTransformSkip = m_slice->m_pps->bTransformSkipEnabled && !cu.m_tqBypass[0] && cu.m_partSize[0] != SIZE_2Nx2N;
 
     // loop over partitions
     for (uint32_t pu = 0; pu < numPU; pu++, absPartIdx += qNumParts)
@@ -1684,7 +1684,7 @@ uint32_t Search::estIntraPredChromaQT(Mo
     Yuv& reconYuv = intraMode.reconYuv;
 
     uint32_t depth       = cu.m_cuDepth[0];
-    uint32_t initTrDepth = cu.m_partSize[0] == SIZE_NxN && m_csp == X265_CSP_I444;
+    uint32_t initTrDepth = cu.m_partSize[0] != SIZE_2Nx2N && m_csp == X265_CSP_I444;
     uint32_t log2TrSize  = cu.m_log2CUSize[0] - initTrDepth;
     uint32_t absPartStep = (NUM_CU_PARTITIONS >> (depth << 1));
     uint32_t totalDistortion = 0;
@@ -1953,7 +1953,7 @@ bool Search::predInterSearch(Mode& inter