[x265] [PATCH] analysis: CU structure now holds CU-specific information,

ashok at multicorewareinc.com ashok at multicorewareinc.com
Mon Sep 1 10:26:25 CEST 2014


# HG changeset patch
# User Ashok Kumar Mishra<ashok at multicorewareinc.com>
# Date 1409211874 -19800
#      Thu Aug 28 13:14:34 2014 +0530
# Node ID 4d96eb40f4d6e5cd0883a0a61f20bf00c07ed8f0
# Parent  44b95661db56df0a98c7f4d6f023fd5e7456bd19
analysis: CU structure now holds CU-specific information,

Member fields include location inside CTU, boundary flags, offsets from CTU
origin. This will help replace the soon-to-be-gone initCU and initSubCU functions.

diff -r 44b95661db56 -r 4d96eb40f4d6 source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.h	Sat Aug 30 10:24:09 2014 +0200
+++ b/source/Lib/TLibCommon/TComDataCU.h	Thu Aug 28 13:14:34 2014 +0530
@@ -80,8 +80,6 @@
     SGU_BR,
     NUM_SGU_BORDER
 };
-
-
 typedef struct
 {
     char*    qpMemBlock;
@@ -103,6 +101,16 @@
     pixel*   m_tqBypassYuvMemBlock;
 } DataCUMemPool;
 
+/* Coding Unit Flags */
+typedef struct CU
+{
+    uint32_t lgBlockSize; // Log of the coding block size.
+    uint32_t childIdx;    // Index of the first child CU
+    int32_t  encodeIdx;   // Encoding index of this CU in terms of 8x8 blocks.
+    uint32_t offset[2];   // Offset of the luma CU in the X, Y direction in terms of pixels from the CTU origin
+    uint8_t  flags;       // CU flags.
+} CU;
+
 // Partition count table, index represents partitioning mode.
 const uint8_t nbPartsTable[8] = { 1, 2, 2, 4, 2, 2, 2, 2 };
 
@@ -209,9 +217,9 @@
 
     DataCUMemPool m_DataCUMemPool;
     TComCUMvField m_cuMvFieldMemPool;
+    CU m_CULocalData[104];
 
 protected:
-
     /// add possible motion vector predictor candidates
     bool xAddMVPCand(MV& mvp, int picList, int refIdx, uint32_t partUnitIdx, MVP_DIR dir);
 
diff -r 44b95661db56 -r 4d96eb40f4d6 source/common/common.h
--- a/source/common/common.h	Sat Aug 30 10:24:09 2014 +0200
+++ b/source/common/common.h	Thu Aug 28 13:14:34 2014 +0530
@@ -290,9 +290,26 @@
         delete[] saoLcuParam[2];
     }
 };
+#define CU_SET_FLAG(bitfield, flag, value) (bitfield) = (bitfield) & (~(flag)) | ((~((value) - 1)) & (flag))
+#define CU_GET_FLAG(bitfield, flag) (!!((bitfield) & (flag)))
+
+/* Coding block flags. */
+// The CB is intra predicted).
+#define CU_INTRA                   (1<<0)
+
+// The CB is not completely outside the frame.
+#define CU_PRESENT                 (1<<1)
+
+// CB split is mandatory if X265_CB_PRESENT and X265_CB_SPLIT.
+#define CU_SPLIT_MANDATORY           (1<<2)
+
+// The CB is a leaf node of the CTB.
+#define CU_LEAF                    (1<<3)
+
+// The CB is currently split in four child CBs.
+#define CU_SPLIT                   (1<<4)
 
 }
-
 /* defined in common.cpp */
 int64_t x265_mdate(void);
 void x265_log(const x265_param *param, int level, const char *fmt, ...);
diff -r 44b95661db56 -r 4d96eb40f4d6 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Sat Aug 30 10:24:09 2014 +0200
+++ b/source/encoder/analysis.cpp	Thu Aug 28 13:14:34 2014 +0530
@@ -232,10 +232,76 @@
 #define EARLY_EXIT                  1
 #define TOPSKIP                     1
 
+// TO DO: Remove this function with a table.
+int fenc_get_depth_scan_idx(int x, int y, int size)
+{
+    if (size == 1)
+        return 0;
+
+    int depth = 0;
+    int h = size >> 1;
+
+    if (x >= h)
+    {
+        x -= h;
+        depth += h * h;
+    }
+
+    if (y >= h)
+    {
+        y -= h;
+        depth += 2 * h * h;
+    }
+
+    return depth + fenc_get_depth_scan_idx(x, y, h);
+}
+
+void Analysis::loadCTUData(TComDataCU* parentCU)
+{
+    int8_t cuRange[2]= {MIN_LOG2_CU_SIZE, g_log2Size[m_param->maxCUSize]};
+
+    // Initialize the coding blocks inside the CTB
+    for (int rangeIdx = cuRange[1], rangeCUIdx = 0; rangeIdx >= cuRange[0]; rangeIdx--)
+    {
+        uint32_t lgBlockSize = rangeIdx;
+        int32_t blockSize = 1 << lgBlockSize;
+        uint32_t b8Width = 1 << (cuRange[1] - 3);
+        uint32_t sbWidth = 1 << (cuRange[1] - rangeIdx);
+        int32_t last_level_flag = rangeIdx == cuRange[0];
+        for (uint32_t sb_y = 0; sb_y < sbWidth; sb_y++)
+        {
+            for (uint32_t sb_x = 0; sb_x < sbWidth; sb_x++)
+            {
+                uint32_t depth_idx = fenc_get_depth_scan_idx(sb_x, sb_y, sbWidth);
+                uint32_t cuIdx = rangeCUIdx + depth_idx;
+                uint32_t child_idx = rangeCUIdx + sbWidth * sbWidth + (depth_idx << 2);
+                int32_t px = parentCU->getCUPelX() + sb_x * blockSize;
+                int32_t py = parentCU->getCUPelY() + sb_y * blockSize;
+                int32_t present_flag = px < parentCU->m_pic->m_origPicYuv->m_picWidth && py < parentCU->m_pic->m_origPicYuv->m_picHeight;
+                int32_t split_mandatory_flag = present_flag && !last_level_flag && (px + blockSize > parentCU->m_pic->m_origPicYuv->m_picWidth || py + blockSize > parentCU->m_pic->m_origPicYuv->m_picHeight);
+
+                CU *cu = parentCU->m_CULocalData + cuIdx;
+                cu->lgBlockSize = lgBlockSize;
+                cu->childIdx = child_idx;
+                cu->offset[0] = sb_x * blockSize;
+                cu->offset[1] = sb_y * blockSize;
+                cu->encodeIdx = fenc_get_depth_scan_idx(cu->offset[0] >> 3, cu->offset[1] >> 3, b8Width);
+                cu->flags = 0;
+
+                CU_SET_FLAG(cu->flags, CU_PRESENT, present_flag);
+                CU_SET_FLAG(cu->flags, CU_SPLIT_MANDATORY | CU_SPLIT, split_mandatory_flag);
+                CU_SET_FLAG(cu->flags, CU_LEAF, last_level_flag);
+            }
+        }
+        rangeCUIdx += sbWidth * sbWidth;
+    }
+}
+
 void Analysis::compressCU(TComDataCU* cu)
 {
     if (cu->m_slice->m_pps->bUseDQP)
         m_bEncodeDQP = true;
+    loadCTUData(cu);
 
     // initialize CU data
     m_bestCU[0]->initCU(cu->m_pic, cu->getAddr());
@@ -243,11 +309,9 @@
 
     // analysis of CU
     uint32_t numPartition = cu->getTotalNumPart();
-
     if (m_bestCU[0]->m_slice->m_sliceType == I_SLICE)
     {
-        compressIntraCU(m_bestCU[0], m_tempCU[0], 0, false);
-
+        compressIntraCU(m_bestCU[0], m_tempCU[0], 0, false, cu, cu->m_CULocalData);
         if (m_param->bLogCuStats || m_param->rc.bStatWrite)
         {
             uint32_t i = 0;
@@ -333,11 +397,9 @@
         }
     }
 }
-
-void Analysis::compressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, bool bInsidePicture)
+void Analysis::compressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, bool bInsidePicture, TComDataCU* cuPicsym, CU *cu)
 {
     //PPAScopeEvent(CompressIntraCU + depth);
-
     Frame* pic = outBestCU->m_pic;
 
     if (depth == 0)
@@ -346,31 +408,19 @@
     else
         // copy partition YUV from depth 0 CTU cache
         m_origYuv[0]->copyPartToYuv(m_origYuv[depth], outBestCU->getZorderIdxInCU());
+    Slice* slice = outTempCU->m_slice;
+    // We need to split, so don't try these modes.
+    int cu_split_flag = !(cu->flags & CU_LEAF);
+    int cu_unsplit_flag = !(cu->flags & CU_SPLIT_MANDATORY);
+    int cu_intra_flag = cu_unsplit_flag;
 
-    uint32_t log2CUSize = outTempCU->getLog2CUSize(0);
-    Slice* slice = outTempCU->m_slice;
-    if (!bInsidePicture)
-    {
-        uint32_t cuSize = 1 << log2CUSize;
-        uint32_t lpelx = outBestCU->getCUPelX();
-        uint32_t tpely = outBestCU->getCUPelY();
-        uint32_t rpelx = lpelx + cuSize;
-        uint32_t bpely = tpely + cuSize;
-        bInsidePicture = (rpelx <= slice->m_sps->picWidthInLumaSamples &&
-                          bpely <= slice->m_sps->picHeightInLumaSamples);
-    }
-
-    // We need to split, so don't try these modes.
-    if (bInsidePicture)
+    if (cu_intra_flag)
     {
         m_quant.setQPforQuant(outTempCU);
-
-        checkIntra(outBestCU, outTempCU, SIZE_2Nx2N);
-
+        checkIntra(outBestCU, outTempCU, SIZE_2Nx2N, cu);
         if (depth == g_maxCUDepth)
         {
-            if (log2CUSize > slice->m_sps->quadtreeTULog2MinSize)
-                checkIntra(outBestCU, outTempCU, SIZE_NxN);
+                checkIntra(outBestCU, outTempCU, SIZE_NxN, cu);
         }
         else
         {
@@ -387,9 +437,8 @@
     // copy original YUV samples in lossless mode
     if (outBestCU->isLosslessCoded(0))
         fillOrigYUVBuffer(outBestCU, m_origYuv[depth]);
-
     // further split
-    if (depth < g_maxCUDepth)
+    if (cu_split_flag)
     {
         uint32_t    nextDepth     = depth + 1;
         TComDataCU* subBestPartCU = m_bestCU[nextDepth];
@@ -398,22 +447,18 @@
         {
             int qp = outTempCU->getQP(0);
             subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.
-
-            if (bInsidePicture ||
-                ((subBestPartCU->getCUPelX() < slice->m_sps->picWidthInLumaSamples) &&
-                 (subBestPartCU->getCUPelY() < slice->m_sps->picHeightInLumaSamples)))
+            if (cu->flags & CU_PRESENT)
             {
                 subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.
                 if (0 == partUnitIdx) //initialize RD with previous depth buffer
-                {
                     m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
-                }
                 else
-                {
                     m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);
-                }
+                CU *child_cu = cuPicsym->m_CULocalData + cu->childIdx + partUnitIdx;
+                if (!(child_cu->flags & CU_PRESENT))
+                    continue;
 
-                compressIntraCU(subBestPartCU, subTempPartCU, nextDepth, bInsidePicture);
+                compressIntraCU(subBestPartCU, subTempPartCU, nextDepth, bInsidePicture, cuPicsym, child_cu);
                 outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth); // Keep best part data to current temporary data.
                 copyYuv2Tmp(subBestPartCU->getTotalNumPart() * partUnitIdx, nextDepth);
             }
@@ -423,8 +468,7 @@
                 outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth);
             }
         }
-
-        if (bInsidePicture)
+        if (cu->flags & CU_PRESENT)
         {
             m_entropyCoder->resetBits();
             m_entropyCoder->codeSplitFlag(outTempCU, 0, depth);
@@ -463,13 +507,11 @@
         m_rdEntropyCoders[nextDepth][CI_NEXT_BEST].store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);
         checkBestMode(outBestCU, outTempCU, depth); // RD compare current CU against split
     }
+
+    //TO DO: write the best CTU at the end of complete CTU analysis
     outBestCU->copyToPic(depth); // Copy Best data to Picture for next partition prediction.
-
-    if (!bInsidePicture) return;
-
     // Copy Yuv data to picture Yuv
     copyYuv2Pic(pic, outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth);
-
     X265_CHECK(outBestCU->getPartitionSize(0) != SIZE_NONE, "no best partition size\n");
     X265_CHECK(outBestCU->getPredictionMode(0) != MODE_NONE, "no best partition mode\n");
     if (m_rdCost.m_psyRd)
@@ -480,14 +522,12 @@
     {
         X265_CHECK(outBestCU->m_totalRDCost != MAX_INT64, "no best partition cost\n");
     }
+
 }
-
-void Analysis::checkIntra(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize)
+void Analysis::checkIntra(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize, CU *cu)
 {
     //PPAScopeEvent(CheckRDCostIntra + depth);
-    uint32_t depth = outTempCU->getDepth(0);
-
-    outTempCU->setSkipFlagSubParts(false, 0, depth);
+    uint32_t depth = g_log2Size[m_param->maxCUSize] - cu->lgBlockSize;
     outTempCU->setPartSizeSubParts(partSize, 0, depth);
     outTempCU->setPredModeSubParts(MODE_INTRA, 0, depth);
     outTempCU->setCUTransquantBypassSubParts(!!m_param->bLossless, 0, depth);
diff -r 44b95661db56 -r 4d96eb40f4d6 source/encoder/analysis.h
--- a/source/encoder/analysis.h	Sat Aug 30 10:24:09 2014 +0200
+++ b/source/encoder/analysis.h	Thu Aug 28 13:14:34 2014 +0530
@@ -100,16 +100,15 @@
     StatisticLog* m_log;
 
     Analysis();
-
     bool create(uint32_t totalDepth, uint32_t maxWidth);
     void destroy();
-
     void compressCU(TComDataCU* cu);
-
+    void loadCTUData(TComDataCU* cu);
 protected:
 
-    void compressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, bool bInsidePicture);
-    void checkIntra(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize);
+    /* Warning: The interface for these functions will undergo significant changes as a major refactor is under progress */
+    void compressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, bool bInsidePicture, TComDataCU* cuPicsym, CU *cu);
+    void checkIntra(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize, CU *cu);
 
     void compressInterCU_rd0_4(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComDataCU* cu, uint32_t depth,
                                bool bInsidePicture, uint32_t partitionIndex, uint32_t minDepth);
@@ -118,10 +117,8 @@
     void checkMerge2Nx2N_rd0_4(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComYuv*& bestPredYuv, TComYuv*& tmpPredYuv);
     void checkMerge2Nx2N_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, bool *earlyDetectionSkipMode,
                                TComYuv*& outBestPredYuv, TComYuv*& rpcYuvReconBest);
-
     void checkInter_rd0_4(TComDataCU* outTempCU, TComYuv* outPredYUV, PartSize partSize, bool bUseMRG = false);
     void checkInter_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize, bool bUseMRG = false);
-
     void checkIntraInInter_rd0_4(TComDataCU* cu, PartSize partSize);
     void checkIntraInInter_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize);
 


More information about the x265-devel mailing list