[x265] [PATCH] analysis: CU structure now holds CU-specific information,
Steve Borho
steve at borho.org
Mon Sep 1 11:39:53 CEST 2014
On 09/01, ashok at multicorewareinc.com wrote:
> # HG changeset patch
> # User Ashok Kumar Mishra<ashok at multicorewareinc.com>
> # Date 1409211874 -19800
> # Thu Aug 28 13:14:34 2014 +0530
> # Node ID 4d96eb40f4d6e5cd0883a0a61f20bf00c07ed8f0
> # Parent 44b95661db56df0a98c7f4d6f023fd5e7456bd19
> analysis: CU structure now holds CU-specific information,
>
> Member fields include location inside CTU, boundary flags, offsets from CTU
> origin. This will help replace the soon-to-be-gone initCU and initSubCU functions.
>
> diff -r 44b95661db56 -r 4d96eb40f4d6 source/Lib/TLibCommon/TComDataCU.h
> --- a/source/Lib/TLibCommon/TComDataCU.h Sat Aug 30 10:24:09 2014 +0200
> +++ b/source/Lib/TLibCommon/TComDataCU.h Thu Aug 28 13:14:34 2014 +0530
> @@ -80,8 +80,6 @@
> SGU_BR,
> NUM_SGU_BORDER
> };
> -
> -
unnecessary white-space change
> typedef struct
> {
> char* qpMemBlock;
> @@ -103,6 +101,16 @@
> pixel* m_tqBypassYuvMemBlock;
> } DataCUMemPool;
>
> +/* Coding Unit Flags */
> +typedef struct CU
> +{
> + uint32_t lgBlockSize; // Log of the coding block size.
log2Size or log2BlkSize?
> + uint32_t childIdx; // Index of the first child CU
> + int32_t encodeIdx; // Encoding index of this CU in terms of 8x8 blocks.
> + uint32_t offset[2]; // Offset of the luma CU in the X, Y direction in terms of pixels from the CTU origin
> + uint8_t flags; // CU flags.
> +} CU;
You might as well make flags a uint32_t, 32-bit AND/OR operations are
going to be be faster than 8bit ops, not to mention reads and writes,
and not to mention structure size alignment.
> // Partition count table, index represents partitioning mode.
> const uint8_t nbPartsTable[8] = { 1, 2, 2, 4, 2, 2, 2, 2 };
>
> @@ -209,9 +217,9 @@
>
> DataCUMemPool m_DataCUMemPool;
> TComCUMvField m_cuMvFieldMemPool;
> + CU m_CULocalData[104];
>
104 deserves a comment or enum
> protected:
> -
> /// add possible motion vector predictor candidates
> bool xAddMVPCand(MV& mvp, int picList, int refIdx, uint32_t partUnitIdx, MVP_DIR dir);
>
> diff -r 44b95661db56 -r 4d96eb40f4d6 source/common/common.h
> --- a/source/common/common.h Sat Aug 30 10:24:09 2014 +0200
> +++ b/source/common/common.h Thu Aug 28 13:14:34 2014 +0530
> @@ -290,9 +290,26 @@
> delete[] saoLcuParam[2];
> }
> };
> +#define CU_SET_FLAG(bitfield, flag, value) (bitfield) = (bitfield) & (~(flag)) | ((~((value) - 1)) & (flag))
> +#define CU_GET_FLAG(bitfield, flag) (!!((bitfield) & (flag)))
> +
> +/* Coding block flags. */
> +// The CB is intra predicted).
> +#define CU_INTRA (1<<0)
> +
> +// The CB is not completely outside the frame.
> +#define CU_PRESENT (1<<1)
> +
> +// CB split is mandatory if X265_CB_PRESENT and X265_CB_SPLIT.
this comment is stale
> +#define CU_SPLIT_MANDATORY (1<<2)
> +
> +// The CB is a leaf node of the CTB.
> +#define CU_LEAF (1<<3)
> +
> +// The CB is currently split in four child CBs.
the mix of CU and CB is confusing
> +#define CU_SPLIT (1<<4)
these should be probably enums in CU
struct CU
{
enum {
INTRA = 1<<0,
PRESENT = 1<<1,
SPLIT_MANDATORY = 1<<2,
LEAF = 1<<3,
SPLIT = 1<<4,
};
...
};
then we can use CU::INTRA, etc
> }
> -
> /* defined in common.cpp */
> int64_t x265_mdate(void);
> void x265_log(const x265_param *param, int level, const char *fmt, ...);
> diff -r 44b95661db56 -r 4d96eb40f4d6 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Sat Aug 30 10:24:09 2014 +0200
> +++ b/source/encoder/analysis.cpp Thu Aug 28 13:14:34 2014 +0530
> @@ -232,10 +232,76 @@
> #define EARLY_EXIT 1
> #define TOPSKIP 1
>
> +// TO DO: Remove this function with a table.
> +int fenc_get_depth_scan_idx(int x, int y, int size)
> +{
> + if (size == 1)
> + return 0;
> +
> + int depth = 0;
> + int h = size >> 1;
> +
> + if (x >= h)
> + {
> + x -= h;
> + depth += h * h;
> + }
> +
> + if (y >= h)
> + {
> + y -= h;
> + depth += 2 * h * h;
> + }
> +
> + return depth + fenc_get_depth_scan_idx(x, y, h);
> +}
> +
> +void Analysis::loadCTUData(TComDataCU* parentCU)
> +{
> + int8_t cuRange[2]= {MIN_LOG2_CU_SIZE, g_log2Size[m_param->maxCUSize]};
> +
> + // Initialize the coding blocks inside the CTB
> + for (int rangeIdx = cuRange[1], rangeCUIdx = 0; rangeIdx >= cuRange[0]; rangeIdx--)
> + {
> + uint32_t lgBlockSize = rangeIdx;
> + int32_t blockSize = 1 << lgBlockSize;
> + uint32_t b8Width = 1 << (cuRange[1] - 3);
> + uint32_t sbWidth = 1 << (cuRange[1] - rangeIdx);
> + int32_t last_level_flag = rangeIdx == cuRange[0];
> + for (uint32_t sb_y = 0; sb_y < sbWidth; sb_y++)
> + {
> + for (uint32_t sb_x = 0; sb_x < sbWidth; sb_x++)
> + {
> + uint32_t depth_idx = fenc_get_depth_scan_idx(sb_x, sb_y, sbWidth);
> + uint32_t cuIdx = rangeCUIdx + depth_idx;
> + uint32_t child_idx = rangeCUIdx + sbWidth * sbWidth + (depth_idx << 2);
> + int32_t px = parentCU->getCUPelX() + sb_x * blockSize;
> + int32_t py = parentCU->getCUPelY() + sb_y * blockSize;
> + int32_t present_flag = px < parentCU->m_pic->m_origPicYuv->m_picWidth && py < parentCU->m_pic->m_origPicYuv->m_picHeight;
> + int32_t split_mandatory_flag = present_flag && !last_level_flag && (px + blockSize > parentCU->m_pic->m_origPicYuv->m_picWidth || py + blockSize > parentCU->m_pic->m_origPicYuv->m_picHeight);
> +
> + CU *cu = parentCU->m_CULocalData + cuIdx;
> + cu->lgBlockSize = lgBlockSize;
> + cu->childIdx = child_idx;
> + cu->offset[0] = sb_x * blockSize;
> + cu->offset[1] = sb_y * blockSize;
> + cu->encodeIdx = fenc_get_depth_scan_idx(cu->offset[0] >> 3, cu->offset[1] >> 3, b8Width);
> + cu->flags = 0;
> +
> + CU_SET_FLAG(cu->flags, CU_PRESENT, present_flag);
> + CU_SET_FLAG(cu->flags, CU_SPLIT_MANDATORY | CU_SPLIT, split_mandatory_flag);
> + CU_SET_FLAG(cu->flags, CU_LEAF, last_level_flag);
> + }
> + }
> + rangeCUIdx += sbWidth * sbWidth;
> + }
> +}
> +
> void Analysis::compressCU(TComDataCU* cu)
> {
> if (cu->m_slice->m_pps->bUseDQP)
> m_bEncodeDQP = true;
> + loadCTUData(cu);
>
> // initialize CU data
> m_bestCU[0]->initCU(cu->m_pic, cu->getAddr());
> @@ -243,11 +309,9 @@
>
> // analysis of CU
> uint32_t numPartition = cu->getTotalNumPart();
> -
> if (m_bestCU[0]->m_slice->m_sliceType == I_SLICE)
> {
> - compressIntraCU(m_bestCU[0], m_tempCU[0], 0, false);
> -
> + compressIntraCU(m_bestCU[0], m_tempCU[0], 0, false, cu, cu->m_CULocalData);
> if (m_param->bLogCuStats || m_param->rc.bStatWrite)
> {
> uint32_t i = 0;
> @@ -333,11 +397,9 @@
> }
> }
> }
> -
> -void Analysis::compressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, bool bInsidePicture)
> +void Analysis::compressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, bool bInsidePicture, TComDataCU* cuPicsym, CU *cu)
> {
> //PPAScopeEvent(CompressIntraCU + depth);
> -
> Frame* pic = outBestCU->m_pic;
>
> if (depth == 0)
> @@ -346,31 +408,19 @@
> else
> // copy partition YUV from depth 0 CTU cache
> m_origYuv[0]->copyPartToYuv(m_origYuv[depth], outBestCU->getZorderIdxInCU());
> + Slice* slice = outTempCU->m_slice;
> + // We need to split, so don't try these modes.
> + int cu_split_flag = !(cu->flags & CU_LEAF);
> + int cu_unsplit_flag = !(cu->flags & CU_SPLIT_MANDATORY);
> + int cu_intra_flag = cu_unsplit_flag;
>
> - uint32_t log2CUSize = outTempCU->getLog2CUSize(0);
> - Slice* slice = outTempCU->m_slice;
> - if (!bInsidePicture)
> - {
> - uint32_t cuSize = 1 << log2CUSize;
> - uint32_t lpelx = outBestCU->getCUPelX();
> - uint32_t tpely = outBestCU->getCUPelY();
> - uint32_t rpelx = lpelx + cuSize;
> - uint32_t bpely = tpely + cuSize;
> - bInsidePicture = (rpelx <= slice->m_sps->picWidthInLumaSamples &&
> - bpely <= slice->m_sps->picHeightInLumaSamples);
> - }
> -
> - // We need to split, so don't try these modes.
> - if (bInsidePicture)
> + if (cu_intra_flag)
> {
> m_quant.setQPforQuant(outTempCU);
> -
> - checkIntra(outBestCU, outTempCU, SIZE_2Nx2N);
> -
> + checkIntra(outBestCU, outTempCU, SIZE_2Nx2N, cu);
> if (depth == g_maxCUDepth)
> {
> - if (log2CUSize > slice->m_sps->quadtreeTULog2MinSize)
> - checkIntra(outBestCU, outTempCU, SIZE_NxN);
> + checkIntra(outBestCU, outTempCU, SIZE_NxN, cu);
> }
> else
> {
> @@ -387,9 +437,8 @@
> // copy original YUV samples in lossless mode
> if (outBestCU->isLosslessCoded(0))
> fillOrigYUVBuffer(outBestCU, m_origYuv[depth]);
> -
> // further split
> - if (depth < g_maxCUDepth)
> + if (cu_split_flag)
> {
> uint32_t nextDepth = depth + 1;
> TComDataCU* subBestPartCU = m_bestCU[nextDepth];
> @@ -398,22 +447,18 @@
> {
> int qp = outTempCU->getQP(0);
> subBestPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.
> -
> - if (bInsidePicture ||
> - ((subBestPartCU->getCUPelX() < slice->m_sps->picWidthInLumaSamples) &&
> - (subBestPartCU->getCUPelY() < slice->m_sps->picHeightInLumaSamples)))
> + if (cu->flags & CU_PRESENT)
> {
> subTempPartCU->initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init.
> if (0 == partUnitIdx) //initialize RD with previous depth buffer
> - {
> m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[depth][CI_CURR_BEST]);
> - }
> else
> - {
> m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);
> - }
> + CU *child_cu = cuPicsym->m_CULocalData + cu->childIdx + partUnitIdx;
> + if (!(child_cu->flags & CU_PRESENT))
> + continue;
>
> - compressIntraCU(subBestPartCU, subTempPartCU, nextDepth, bInsidePicture);
> + compressIntraCU(subBestPartCU, subTempPartCU, nextDepth, bInsidePicture, cuPicsym, child_cu);
> outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth); // Keep best part data to current temporary data.
> copyYuv2Tmp(subBestPartCU->getTotalNumPart() * partUnitIdx, nextDepth);
> }
> @@ -423,8 +468,7 @@
> outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth);
> }
> }
> -
> - if (bInsidePicture)
> + if (cu->flags & CU_PRESENT)
> {
> m_entropyCoder->resetBits();
> m_entropyCoder->codeSplitFlag(outTempCU, 0, depth);
> @@ -463,13 +507,11 @@
> m_rdEntropyCoders[nextDepth][CI_NEXT_BEST].store(m_rdEntropyCoders[depth][CI_TEMP_BEST]);
> checkBestMode(outBestCU, outTempCU, depth); // RD compare current CU against split
> }
> +
> + //TO DO: write the best CTU at the end of complete CTU analysis
> outBestCU->copyToPic(depth); // Copy Best data to Picture for next partition prediction.
> -
> - if (!bInsidePicture) return;
> -
> // Copy Yuv data to picture Yuv
> copyYuv2Pic(pic, outBestCU->getAddr(), outBestCU->getZorderIdxInCU(), depth);
> -
> X265_CHECK(outBestCU->getPartitionSize(0) != SIZE_NONE, "no best partition size\n");
> X265_CHECK(outBestCU->getPredictionMode(0) != MODE_NONE, "no best partition mode\n");
> if (m_rdCost.m_psyRd)
> @@ -480,14 +522,12 @@
> {
> X265_CHECK(outBestCU->m_totalRDCost != MAX_INT64, "no best partition cost\n");
> }
> +
> }
> -
> -void Analysis::checkIntra(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize)
> +void Analysis::checkIntra(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize, CU *cu)
> {
> //PPAScopeEvent(CheckRDCostIntra + depth);
> - uint32_t depth = outTempCU->getDepth(0);
> -
> - outTempCU->setSkipFlagSubParts(false, 0, depth);
> + uint32_t depth = g_log2Size[m_param->maxCUSize] - cu->lgBlockSize;
> outTempCU->setPartSizeSubParts(partSize, 0, depth);
> outTempCU->setPredModeSubParts(MODE_INTRA, 0, depth);
> outTempCU->setCUTransquantBypassSubParts(!!m_param->bLossless, 0, depth);
> diff -r 44b95661db56 -r 4d96eb40f4d6 source/encoder/analysis.h
> --- a/source/encoder/analysis.h Sat Aug 30 10:24:09 2014 +0200
> +++ b/source/encoder/analysis.h Thu Aug 28 13:14:34 2014 +0530
> @@ -100,16 +100,15 @@
> StatisticLog* m_log;
>
> Analysis();
> -
> bool create(uint32_t totalDepth, uint32_t maxWidth);
> void destroy();
> -
> void compressCU(TComDataCU* cu);
> -
> + void loadCTUData(TComDataCU* cu);
> protected:
>
> - void compressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, bool bInsidePicture);
> - void checkIntra(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize);
> + /* Warning: The interface for these functions will undergo significant changes as a major refactor is under progress */
> + void compressIntraCU(TComDataCU*& outBestCU, TComDataCU*& outTempCU, uint32_t depth, bool bInsidePicture, TComDataCU* cuPicsym, CU *cu);
> + void checkIntra(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize, CU *cu);
>
> void compressInterCU_rd0_4(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComDataCU* cu, uint32_t depth,
> bool bInsidePicture, uint32_t partitionIndex, uint32_t minDepth);
> @@ -118,10 +117,8 @@
> void checkMerge2Nx2N_rd0_4(TComDataCU*& outBestCU, TComDataCU*& outTempCU, TComYuv*& bestPredYuv, TComYuv*& tmpPredYuv);
> void checkMerge2Nx2N_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, bool *earlyDetectionSkipMode,
> TComYuv*& outBestPredYuv, TComYuv*& rpcYuvReconBest);
> -
> void checkInter_rd0_4(TComDataCU* outTempCU, TComYuv* outPredYUV, PartSize partSize, bool bUseMRG = false);
> void checkInter_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize, bool bUseMRG = false);
> -
> void checkIntraInInter_rd0_4(TComDataCU* cu, PartSize partSize);
> void checkIntraInInter_rd5_6(TComDataCU*& outBestCU, TComDataCU*& outTempCU, PartSize partSize);
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list