[x265] [PATCH] analysis: CU structure now holds CU-specific information,

Mon Sep 1 17:02:38 CEST 2014

On 09/01, ashok at multicorewareinc.com wrote:
> # HG changeset patch
> # User Ashok Kumar Mishra<ashok at multicorewareinc.com>
> # Date 1409211874 -19800
> #      Thu Aug 28 13:14:34 2014 +0530
> # Node ID 0ed902cbbb61d349d51ce9fea77a0f15dfef5911
> # Parent  c5624effb73c74e63fd2e42d2a48ea4490074dce
> analysis: CU structure now holds CU-specific information,
> 
> Member fields include location inside CTU, boundary flags, offsets from CTU
> origin. This will help replace the soon-to-be-gone initCU and initSubCU functions.

Queued, thanks

> diff -r c5624effb73c -r 0ed902cbbb61 source/Lib/TLibCommon/TComDataCU.h
> --- a/source/Lib/TLibCommon/TComDataCU.h	Mon Sep 01 14:13:37 2014 +0530
> +++ b/source/Lib/TLibCommon/TComDataCU.h	Thu Aug 28 13:14:34 2014 +0530
> @@ -81,8 +81,7 @@
>      NUM_SGU_BORDER
>  };
>  
> -
> -typedef struct
> +struct DataCUMemPool
>  {
>      char*    qpMemBlock;
>      uint8_t* depthMemBlock;
> @@ -101,7 +100,23 @@
>      uint8_t* mvpIdxMemBlock;
>      coeff_t* trCoeffMemBlock;
>      pixel*   m_tqBypassYuvMemBlock;
> -} DataCUMemPool;
> +};
> +
> +struct CU
> +{
> +    enum {
> +        INTRA           = 1<<0, // CU is intra predicted
> +        PRESENT         = 1<<1, // CU is not completely outside the frame
> +        SPLIT_MANDATORY = 1<<2, // CU split is mandatory if CU is inside frame and can be splitted
> +        LEAF            = 1<<3, // CU is a leaf node of the CTU
> +        SPLIT           = 1<<4, // CU is currently split in four child CUs.
> +    };
> +    uint32_t log2CUSize; // Log of the CU size.
> +    uint32_t childIdx;   // Index of the first child CU
> +    uint32_t encodeIdx;  // Encoding index of this CU in terms of 8x8 blocks.
> +    uint32_t offset[2];  // Offset of the luma CU in the X, Y direction in terms of pixels from the CTU origin
> +    uint32_t flags;      // CU flags.
> +};
>  
>  // Partition count table, index represents partitioning mode.
>  const uint8_t nbPartsTable[8] = { 1, 2, 2, 4, 2, 2, 2, 2 };
> @@ -210,8 +225,12 @@
>      DataCUMemPool m_DataCUMemPool;
>      TComCUMvField m_cuMvFieldMemPool;
>  
> +    // CU data. Index is the CU index. Neighbour CUs (top-left, top, top-right, left) are appended to the end,
> +    // required for prediction of current CU.
> +    // (1 + 4 + 16 + 64) + (1 + 8 + 1 + 8 + 1) = 104.
> +    CU m_CULocalData[104]; 
> +
>  protected:
> -
>      /// add possible motion vector predictor candidates
>      bool xAddMVPCand(MV& mvp, int picList, int refIdx, uint32_t partUnitIdx, MVP_DIR dir);
>  
> diff -r c5624effb73c -r 0ed902cbbb61 source/common/common.h
> --- a/source/common/common.h	Mon Sep 01 14:13:37 2014 +0530
> +++ b/source/common/common.h	Thu Aug 28 13:14:34 2014 +0530
> @@ -291,8 +291,9 @@
>      }
>  };
>  
> +#define CU_SET_FLAG(bitfield, flag, value) (bitfield) = (bitfield) & (~(flag)) | ((~((value) - 1)) & (flag))
> +#define CU_GET_FLAG(bitfield, flag) (!!((bitfield) & (flag)))
>  }
> -
>  /* defined in common.cpp */
>  int64_t x265_mdate(void);
>  void x265_log(const x265_param *param, int level, const char *fmt, ...);
> diff -r c5624effb73c -r 0ed902cbbb61 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp	Mon Sep 01 14:13:37 2014 +0530
> +++ b/source/encoder/analysis.cpp	Thu Aug 28 13:14:34 2014 +0530
> @@ -232,10 +232,76 @@
>  #define EARLY_EXIT                  1
>  #define TOPSKIP                     1
>  
> +// TO DO: Remove this function with a table.
> +int getDepthScanIdx(int x, int y, int size)
> +{
> +    if (size == 1)
> +        return 0;
> +
> +    int depth = 0;
> +    int h = size >> 1;
> +
> +    if (x >= h)
> +    {
> +        x -= h;
> +        depth += h * h;
> +    }
> +
> +    if (y >= h)
> +    {
> +        y -= h;
> +        depth += 2 * h * h;
> +    }
> +
> +    return depth + getDepthScanIdx(x, y, h);
> +}
> +
> +void Analysis::loadCTUData(TComDataCU* parentCU)
> +{
> +    int8_t cuRange[2]= {MIN_LOG2_CU_SIZE, g_log2Size[m_param->maxCUSize]};
> +
> +    // Initialize the coding blocks inside the CTB
> +    for (int rangeIdx = cuRange[1], rangeCUIdx = 0; rangeIdx >= cuRange[0]; rangeIdx--)
> +    {
> +        uint32_t log2CUSize = rangeIdx;
> +        int32_t  blockSize  = 1 << log2CUSize;
> +        uint32_t b8Width    = 1 << (cuRange[1] - 3);
> +        uint32_t sbWidth    = 1 << (cuRange[1] - rangeIdx);
> +        int32_t last_level_flag = rangeIdx == cuRange[0];
> +        for (uint32_t sb_y = 0; sb_y < sbWidth; sb_y++)
> +        {
> +            for (uint32_t sb_x = 0; sb_x < sbWidth; sb_x++)
> +            {
> +                uint32_t depth_idx = getDepthScanIdx(sb_x, sb_y, sbWidth);
> +                uint32_t cuIdx = rangeCUIdx + depth_idx;
> +                uint32_t child_idx = rangeCUIdx + sbWidth * sbWidth + (depth_idx << 2);
> +                int32_t px = parentCU->getCUPelX() + sb_x * blockSize;
> +                int32_t py = parentCU->getCUPelY() + sb_y * blockSize;
> +                int32_t present_flag = px < parentCU->m_pic->m_origPicYuv->m_picWidth && py < parentCU->m_pic->m_origPicYuv->m_picHeight;
> +                int32_t split_mandatory_flag = present_flag && !last_level_flag && (px + blockSize > parentCU->m_pic->m_origPicYuv->m_picWidth || py + blockSize > parentCU->m_pic->m_origPicYuv->m_picHeight);
> +
> +                CU *cu = parentCU->m_CULocalData + cuIdx;
> +                cu->log2CUSize = log2CUSize;
> +                cu->childIdx = child_idx;
> +                cu->offset[0] = sb_x * blockSize;
> +                cu->offset[1] = sb_y * blockSize;
> +                cu->encodeIdx = getDepthScanIdx(cu->offset[0] >> 3, cu->offset[1] >> 3, b8Width);
> +                cu->flags = 0;
> +
> +                CU_SET_FLAG(cu->flags, CU::PRESENT, present_flag);
> +                CU_SET_FLAG(cu->flags, CU::SPLIT_MANDATORY | CU::SPLIT, split_mandatory_flag);
> +                CU_SET_FLAG(cu->flags, CU::LEAF, last_level_flag);
> +            }
> +        }
> +        rangeCUIdx += sbWidth * sbWidth;
> +    }
> +}
> +
>  void Analysis::compressCU(TComDataCU* cu)
>  {
>      if (cu->m_slice->m_pps->bUseDQP)
>          m_bEncodeDQP = true;
> +    loadCTUData(cu);
>  
>      // initialize CU data
>      m_bestCU[0]->initCU(cu->m_pic, cu->getAddr());
> @@ -243,11 +309,9 @@
>  
>      // analysis of CU
>      uint32_t numPartition = cu->getTotalNumPart();
> -
>      if (m_bestCU[0]->m_slice->m_sliceType == I_SLICE)
>      {
> -        compressIntraCU(m_bestCU[0], m_tempCU[0], 0, false);
> -
> +        compressIntraCU(m_bestCU[0], m_tempCU[0], 0, false, cu, cu->m_CULocalData);
>          if (m_param->bLogCuStats || m_param->rc.bStatWrite)
>          {
>              uint32_t i = 0;
> @@ -333,11 +397,9 @@
>          }
>      }
>  }
> -
> -void Analysis::compressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, bool bInsidePicture)
> +void Analysis::compressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, bool bInsidePicture, TComDataCU* cuPicsym, CU *cu)
>  {
>      //PPAScopeEvent(CompressIntraCU + depth);
> -
>      Frame* pic = outBestCU->m_pic;
>  
>      if (depth == 0)
> @@ -346,31 +408,19 @@
>      else
>          // copy partition YUV from depth 0 CTU cache
>          m_origYuv[0]->copyPartToYuv(m_origYuv[depth], outBestCU->getZorderIdxInCU());
> +    Slice* slice = outTempCU->m_slice;
> +    // We need to split, so don't try these modes.
> +    int cu_split_flag = !(cu->flags & CU::LEAF);
> +    int cu_unsplit_flag = !(cu->flags & CU::SPLIT_MANDATORY);
> +    int cu_intra_flag = cu_unsplit_flag;
>  
> -    uint32_t log2CUSize = outTempCU->getLog2CUSize(0);
> -    Slice* slice = outTempCU->m_slice;
> -    if (!bInsidePicture)
> -    {
> -        uint32_t cuSize = 1 << log2CUSize;
> -        uint32_t lpelx = outBestCU->getCUPelX();
> -        uint32_t tpely = outBestCU->getCUPelY();
> -        uint32_t rpelx = lpelx + cuSize;
> -        uint32_t bpely = tpely + cuSize;
> -        bInsidePicture = (rpelx <= slice->m_sps->picWidthInLumaSamples &&
> -                          bpely <= slice->m_sps->picHeightInLumaSamples);
> -    }
> -
> -    // We need to split, so don't try these modes.
> -    if (bInsidePicture)
> +    if (cu_intra_flag)
>      {
>          m_quant.setQPforQuant(outTempCU);
> -
> -        checkIntra(outBestCU, outTempCU, SIZE_2Nx2N);
> -
> +        checkIntra(outBestCU, outTempCU, SIZE_2Nx2N, cu);
>          if (depth == g_maxCUDepth)
>          {
> -            if (log2CUSize > slice->m_sps->quadtreeTULog2MinSize)
> -                checkIntra(outBestCU, outTempCU, SIZE_NxN);
> +                checkIntra(outBestCU, outTempCU, SIZE_NxN, cu);
>          }
>          else
>          {
> @@ -387,9 +437,8 @@
>      // copy original YUV samples in lossless mode
>      if (outBestCU->isLosslessCoded(0))
>          fillOrigYUVBuffer(outBestCU, m_origYuv[depth]);
> -
>      // further split
> -    if (depth < g_maxCUDepth)
> +    if (cu_split_flag)
>      {
>          uint32_t    nextDepth     = depth + 1;
>          TComDataCU* subBestPartCU = m_bestCU[nextDepth];
> @@ -398,22 +447,18 @@
>          {
>              int qp = outTempCU->getQP(0);
>              subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.
> -
> -            if (bInsidePicture ||
> -                ((subBestPartCU->getCUPelX() < slice->m_sps->picWidthInLumaSamples) &&
> -                 (subBestPartCU->getCUPelY() < slice->m_sps->picHeightInLumaSamples)))
> +            if (cu->flags & CU::PRESENT)
>              {
>                  subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.
>                  if (0 == partUnitIdx) //initialize RD with previous depth buffer
> -                {
>                      m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
> -                }
>                  else
> -                {
>                      m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);
> -                }
> +                CU *child_cu = cuPicsym->m_CULocalData + cu->childIdx + partUnitIdx;
> +                if (!(child_cu->flags & CU::PRESENT))
> +                    continue;
>  
> -                compressIntraCU(subBestPartCU, subTempPartCU, nextDepth, bInsidePicture);
> +                compressIntraCU(subBestPartCU, subTempPartCU, nextDepth, bInsidePicture, cuPicsym, child_cu);
>                  outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth); // Keep best part data to current temporary data.
>                  copyYuv2Tmp(subBestPartCU->getTotalNumPart() * partUnitIdx, nextDepth);
>              }
> @@ -423,8 +468,7 @@
>                  outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth);
>              }
>          }
> -
> -        if (bInsidePicture)
> +        if (cu->flags & CU::PRESENT)
>          {
>              m_entropyCoder->resetBits();
>              m_entropyCoder->codeSplitFlag(outTempCU, 0, depth);
> @@ -463,13 +507,11 @@
>          m_rdEntropyCoders[nextDepth][CI_NEXT_BEST].store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);
>          checkBestMode(outBestCU, outTempCU, depth); // RD compare current CU against split
>      }
> +
> +    //TO DO: write the best CTU at the end of complete CTU analysis
>      outBestCU->copyToPic(depth); // Copy Best data to Picture for next partition prediction.
> -
> -    if (!bInsidePicture) return;
> -
>      // Copy Yuv data to picture Yuv
>      copyYuv2Pic(pic, outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth);
> -
>      X265_CHECK(outBestCU->getPartitionSize(0) != SIZE_NONE, "no best partition size\n");
>      X265_CHECK(outBestCU->getPredictionMode(0) != MODE_NONE, "no best partition mode\n");
>      if (m_rdCost.m_psyRd)
> @@ -480,14 +522,12 @@
>      {
>          X265_CHECK(outBestCU->m_totalRDCost != MAX_INT64, "no best partition cost\n");
>      }
> +
>  }
> -
> -void Analysis::checkIntra(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize)
> +void Analysis::checkIntra(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize, CU *cu)
>  {
>      //PPAScopeEvent(CheckRDCostIntra + depth);
> -    uint32_t depth = outTempCU->getDepth(0);
> -
> -    outTempCU->setSkipFlagSubParts(false, 0, depth);
> +    uint32_t depth = g_log2Size[m_param->maxCUSize] - cu->log2CUSize;
>      outTempCU->setPartSizeSubParts(partSize, 0, depth);
>      outTempCU->setPredModeSubParts(MODE_INTRA, 0, depth);
>      outTempCU->setCUTransquantBypassSubParts(!!m_param->bLossless, 0, depth);
> diff -r c5624effb73c -r 0ed902cbbb61 source/encoder/analysis.h
> --- a/source/encoder/analysis.h	Mon Sep 01 14:13:37 2014 +0530
> +++ b/source/encoder/analysis.h	Thu Aug 28 13:14:34 2014 +0530
> @@ -100,16 +100,15 @@
>      StatisticLog* m_log;
>  
>      Analysis();
> -
>      bool create(uint32_t totalDepth, uint32_t maxWidth);
>      void destroy();
> -
>      void compressCU(TComDataCU* cu);
> -
> +    void loadCTUData(TComDataCU* cu);
>  protected:
>  
> -    void compressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, bool bInsidePicture);
> -    void checkIntra(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize);
> +    /* Warning: The interface for these functions will undergo significant changes as a major refactor is under progress */
> +    void compressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, bool bInsidePicture, TComDataCU* cuPicsym, CU *cu);
> +    void checkIntra(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize, CU *cu);
>  
>      void compressInterCU_rd0_4(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComDataCU* cu, uint32_t depth,
>                                 bool bInsidePicture, uint32_t partitionIndex, uint32_t minDepth);
> @@ -118,10 +117,8 @@
>      void checkMerge2Nx2N_rd0_4(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComYuv*& bestPredYuv, TComYuv*& tmpPredYuv);
>      void checkMerge2Nx2N_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, bool *earlyDetectionSkipMode,
>                                 TComYuv*& outBestPredYuv, TComYuv*& rpcYuvReconBest);
> -
>      void checkInter_rd0_4(TComDataCU* outTempCU, TComYuv* outPredYUV, PartSize partSize, bool bUseMRG = false);
>      void checkInter_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize, bool bUseMRG = false);
> -
>      void checkIntraInInter_rd0_4(TComDataCU* cu, PartSize partSize);
>      void checkIntraInInter_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize);
>  
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho