[x265] [PATCH] analysis: use AVC CU analysis-info for HEVC mode analysis
Pradeep Ramachandran
pradeep at multicorewareinc.com
Mon Nov 20 06:31:19 CET 2017
On Fri, Nov 17, 2017 at 7:23 PM, <praveen at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Praveen Tiwari <praveen at multicorewareinc.com>
> # Date 1510926794 -19800
> # Fri Nov 17 19:23:14 2017 +0530
> # Node ID 6b248ccb14169d2b0d5b84d50d94a153bd8f3b4f
> # Parent 9723e8812e63ce51e38ede41f7d5edf73cad0849
> analysis: use AVC CU analysis-info for HEVC mode analysis
>
Pushed to default. Thanks!
>
> This patch work implements the functionality for anlysis-reuselevel 7,
> here we want
> to use AVC analysis-info for HEVC mode decision and use the depth from
> offload
> for AVC sizes
>
> diff -r 9723e8812e63 -r 6b248ccb1416 source/common/cudata.cpp
> --- a/source/common/cudata.cpp Fri Nov 17 14:16:31 2017 +0530
> +++ b/source/common/cudata.cpp Fri Nov 17 19:23:14 2017 +0530
> @@ -201,6 +201,8 @@
> m_cuDepth = charBuf; charBuf += m_numPartitions;
> m_predMode = charBuf; charBuf += m_numPartitions; /*
> the order up to here is important in initCTU() and initSubCU() */
> m_partSize = charBuf; charBuf += m_numPartitions;
> + m_skipFlag[0] = charBuf; charBuf += m_numPartitions;
> + m_skipFlag[1] = charBuf; charBuf += m_numPartitions;
> m_mergeFlag = charBuf; charBuf += m_numPartitions;
> m_interDir = charBuf; charBuf += m_numPartitions;
> m_mvpIdx[0] = charBuf; charBuf += m_numPartitions;
> @@ -239,6 +241,8 @@
> m_cuDepth = charBuf; charBuf += m_numPartitions;
> m_predMode = charBuf; charBuf += m_numPartitions; /*
> the order up to here is important in initCTU() and initSubCU() */
> m_partSize = charBuf; charBuf += m_numPartitions;
> + m_skipFlag[0] = charBuf; charBuf += m_numPartitions;
> + m_skipFlag[1] = charBuf; charBuf += m_numPartitions;
> m_mergeFlag = charBuf; charBuf += m_numPartitions;
> m_interDir = charBuf; charBuf += m_numPartitions;
> m_mvpIdx[0] = charBuf; charBuf += m_numPartitions;
> diff -r 9723e8812e63 -r 6b248ccb1416 source/common/cudata.h
> --- a/source/common/cudata.h Fri Nov 17 14:16:31 2017 +0530
> +++ b/source/common/cudata.h Fri Nov 17 19:23:14 2017 +0530
> @@ -199,13 +199,14 @@
> uint8_t* m_predMode; // array of prediction modes
> uint8_t* m_partSize; // array of partition sizes
> uint8_t* m_mergeFlag; // array of merge flags
> + uint8_t* m_skipFlag[2];
> uint8_t* m_interDir; // array of inter directions
> uint8_t* m_mvpIdx[2]; // array of motion vector predictor
> candidates or merge candidate indices [0]
> uint8_t* m_tuDepth; // array of transform indices
> uint8_t* m_transformSkip[3]; // array of transform skipping
> flags per plane
> uint8_t* m_cbf[3]; // array of coded block flags (CBF)
> per plane
> uint8_t* m_chromaIntraDir; // array of intra directions
> (chroma)
> - enum { BytesPerPartition = 21 }; // combined sizeof() of all
> per-part data
> + enum { BytesPerPartition = 23 }; // combined sizeof() of all
> per-part data
>
> sse_t* m_distortion;
> coeff_t* m_trCoeff[3]; // transformed coefficient buffer
> per plane
> diff -r 9723e8812e63 -r 6b248ccb1416 source/common/framedata.h
> --- a/source/common/framedata.h Fri Nov 17 14:16:31 2017 +0530
> +++ b/source/common/framedata.h Fri Nov 17 19:23:14 2017 +0530
> @@ -195,6 +195,7 @@
> uint8_t* mvpIdx[2];
> int8_t* refIdx[2];
> MV* mv[2];
> + int64_t* sadCost;
> };
>
> struct analysis2PassFrameData
> diff -r 9723e8812e63 -r 6b248ccb1416 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Fri Nov 17 14:16:31 2017 +0530
> +++ b/source/encoder/analysis.cpp Fri Nov 17 19:23:14 2017 +0530
> @@ -75,6 +75,10 @@
> m_reuseInterDataCTU = NULL;
> m_reuseRef = NULL;
> m_bHD = false;
> + m_modeFlag[0] = false;
> + m_modeFlag[1] = false;
> + m_checkMergeAndSkipOnly[0] = false;
> + m_checkMergeAndSkipOnly[1] = false;
> m_evaluateInter = 0;
> }
>
> @@ -247,6 +251,9 @@
> memcpy(ctu.m_cuDepth, &interDataCTU->depth[posCTU],
> sizeof(uint8_t) * numPartition);
> memcpy(ctu.m_predMode, &interDataCTU->modes[posCTU],
> sizeof(uint8_t) * numPartition);
> memcpy(ctu.m_partSize, &interDataCTU->partSize[posCTU],
> sizeof(uint8_t) * numPartition);
> + for (int list = 0; list < m_slice->isInterB() + 1; list++)
> + memcpy(ctu.m_skipFlag[list], &m_frame->m_analysisData.modeFlag[list][posCTU],
> sizeof(uint8_t) * numPartition);
> +
> if ((m_slice->m_sliceType == P_SLICE ||
> m_param->bIntraInBFrames) && !m_param->bMVType)
> {
> analysis_intra_data* intraDataCTU =
> (analysis_intra_data*)m_frame->m_analysisData.intraData;
> @@ -1162,7 +1169,11 @@
> PicYuv& reconPic = *m_frame->m_reconPic;
> SplitData splitCUData;
>
> - if ((m_param->bMVType && cuGeom.numPartitions > 16) ||
> !m_param->bMVType)
> + bool bHEVCBlockAnalysis = (m_param->bMVType && cuGeom.numPartitions >
> 16);
> + bool bRefineAVCAnalysis = (m_param->analysisReuseLevel == 7 &&
> (m_modeFlag[0] || m_modeFlag[1]));
> + bool bNooffloading = !m_param->bMVType;
> +
> + if (bHEVCBlockAnalysis || bRefineAVCAnalysis || bNooffloading)
> {
> md.bestMode = NULL;
> bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
> @@ -1296,7 +1307,7 @@
> }
>
> /* Step 1. Evaluate Merge/Skip candidates for likely early-outs,
> if skip mode was not set above */
> - if (mightNotSplit && depth >= minDepth && !md.bestMode &&
> !bCtuInfoCheck) /* TODO: Re-evaluate if analysis load/save still works */
> + if ((mightNotSplit && depth >= minDepth && !md.bestMode &&
> !bCtuInfoCheck) || (m_param->bMVType && (m_modeFlag[0] || m_modeFlag[1])))
> /* TODO: Re-evaluate if analysis load/save still works */
> {
> /* Compute Merge Cost */
> md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
> @@ -1307,7 +1318,7 @@
> && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO:
> sa8d threshold per depth
> }
>
> - if (md.bestMode && m_param->bEnableRecursionSkip &&
> !bCtuInfoCheck)
> + if (md.bestMode && m_param->bEnableRecursionSkip &&
> !bCtuInfoCheck && !(m_param->bMVType && (m_modeFlag[0] || m_modeFlag[1])))
> {
> skipRecursion = md.bestMode->cu.isSkipped(0);
> if (mightSplit && depth >= minDepth && !skipRecursion)
> @@ -1319,6 +1330,9 @@
> }
> }
>
> + if (m_param->bMVType && md.bestMode && cuGeom.numPartitions <= 16)
> + skipRecursion = true;
> +
> /* Step 2. Evaluate each of the 4 split sub-blocks in series */
> if (mightSplit && !skipRecursion)
> {
> @@ -1374,6 +1388,10 @@
> splitPred->sa8dCost = m_rdCost.calcRdSADCost((
> uint32_t)splitPred->distortion, splitPred->sa8dBits);
> }
>
> + /* If analysis mode is simple do not Evaluate other modes */
> + if ((m_param->bMVType && cuGeom.numPartitions <= 16) &&
> (m_slice->m_sliceType == P_SLICE || m_slice->m_sliceType == B_SLICE))
> + mightNotSplit = !(m_checkMergeAndSkipOnly[0] ||
> (m_checkMergeAndSkipOnly[0] && m_checkMergeAndSkipOnly[1]));
> +
> /* Split CUs
> * 0 1
> * 2 3 */
> @@ -1838,7 +1856,12 @@
> }
>
> SplitData splitCUData;
> - if ((m_param->bMVType && cuGeom.numPartitions > 16) ||
> !m_param->bMVType)
> +
> + bool bHEVCBlockAnalysis = (m_param->bMVType && cuGeom.numPartitions >
> 16);
> + bool bRefineAVCAnalysis = (m_param->analysisReuseLevel == 7 &&
> (m_modeFlag[0] || m_modeFlag[1]));
> + bool bNooffloading = !m_param->bMVType;
> +
> + if (bHEVCBlockAnalysis || bRefineAVCAnalysis || bNooffloading)
> {
> bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
> bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
> @@ -1977,7 +2000,7 @@
> }
>
> /* Step 1. Evaluate Merge/Skip candidates for likely early-outs */
> - if (mightNotSplit && !md.bestMode && !bCtuInfoCheck)
> + if (mightNotSplit && !md.bestMode && !bCtuInfoCheck ||
> (m_param->bMVType && (m_modeFlag[0] || m_modeFlag[1])))
> {
> md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
> md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
> @@ -1993,6 +2016,9 @@
> skipRecursion = md.bestMode &&
> !md.bestMode->cu.getQtRootCbf(0);
> }
>
> + if (m_param->bMVType && md.bestMode && cuGeom.numPartitions <= 16)
> + skipRecursion = true;
> +
> // estimate split cost
> /* Step 2. Evaluate each of the 4 split sub-blocks in series */
> if (mightSplit && !skipRecursion)
> @@ -2045,6 +2071,10 @@
> checkDQPForSplitPred(*splitPred, cuGeom);
> }
>
> + /* If analysis mode is simple do not Evaluate other modes */
> + if ((m_param->bMVType && cuGeom.numPartitions <= 16) &&
> (m_slice->m_sliceType == P_SLICE || m_slice->m_sliceType == B_SLICE))
> + mightNotSplit = !(m_checkMergeAndSkipOnly[0] ||
> (m_checkMergeAndSkipOnly[0] && m_checkMergeAndSkipOnly[1]));
> +
> /* Split CUs
> * 0 1
> * 2 3 */
> @@ -2479,6 +2509,22 @@
> checkDQPForSplitPred(*md.bestMode, cuGeom);
> }
>
> + if (m_param->bMVType && m_param->analysisReuseLevel == 7)
> + {
> + for (int list = 0; list < m_slice->isInterB() + 1; list++)
> + {
> + m_modeFlag[list] = true;
> + if (parentCTU.m_skipFlag[list][cuGeom.absPartIdx] == 1
> && cuGeom.numPartitions <= 16)
> + m_checkMergeAndSkipOnly[list] = true;
> + }
> + m_param->rdLevel > 4 ? compressInterCU_rd5_6(parentCTU,
> cuGeom, qp) : compressInterCU_rd0_4(parentCTU, cuGeom, qp);
> + for (int list = 0; list < m_slice->isInterB() + 1; list++)
> + {
> + m_modeFlag[list] = false;
> + m_checkMergeAndSkipOnly[list] = false;
> + }
> + }
> +
> if (m_param->interRefine > 1 || (m_param->interRefine &&
> parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP &&
> !mode.cu.isSkipped(0)))
> {
> m_evaluateInter = 1;
> diff -r 9723e8812e63 -r 6b248ccb1416 source/encoder/analysis.h
> --- a/source/encoder/analysis.h Fri Nov 17 14:16:31 2017 +0530
> +++ b/source/encoder/analysis.h Fri Nov 17 19:23:14 2017 +0530
> @@ -110,6 +110,9 @@
> bool m_bChromaSa8d;
> bool m_bHD;
>
> + bool m_modeFlag[2];
> + bool m_checkMergeAndSkipOnly[2];
> +
> Analysis();
>
> bool create(ThreadLocalData* tld);
> diff -r 9723e8812e63 -r 6b248ccb1416 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Fri Nov 17 14:16:31 2017 +0530
> +++ b/source/encoder/encoder.cpp Fri Nov 17 19:23:14 2017 +0530
> @@ -48,6 +48,12 @@
> const char g_sliceTypeToChar[] = {'B', 'P', 'I'};
> }
>
> +/* Threshold for motion vection, based on expermental result.
> + * TODO: come up an algorithm for adoptive threshold */
> +
> +#define MVTHRESHOLD 10
> +#define PU_2Nx2N 1
> +
> static const char* defaultAnalysisFileName = "x265_analysis.dat";
>
> using namespace X265_NS;
> @@ -565,6 +571,14 @@
> (interData)->mvpIdx[k][cuPos + cuOffset] =
> (srcInterData)->mvpIdx[k][(mbIndex * 16) + cuOffset];
> (interData)->refIdx[k][cuPos + cuOffset] =
> (srcInterData)->refIdx[k][(mbIndex * 16) + cuOffset];
> memcpy(&(interData)->mv[k][cuPos +
> cuOffset], &(srcInterData)->mv[k][(mbIndex * 16) + cuOffset], sizeof(MV));
> + if (m_param->analysisReuseLevel == 7)
> + {
> + int mv_x = ((analysis_inter_data
> *)curFrame->m_analysisData.interData)->mv[k][(mbIndex * 16) + cuOffset].x;
> + int mv_y = ((analysis_inter_data
> *)curFrame->m_analysisData.interData)->mv[k][(mbIndex * 16) + cuOffset].y;
> + double mv = sqrt(mv_x*mv_x + mv_y*mv_y);
> + if (numPU == PU_2Nx2N &&
> ((srcInterData)->depth[cuPos + cuOffset] == (m_param->maxCUSize >> 5)) &&
> mv <= MVTHRESHOLD)
> + memset(&curFrame->m_analysisData.modeFlag[k][cuPos
> + cuOffset], 1, bytes);
> + }
> }
> }
> }
> @@ -624,6 +638,7 @@
> int bytes = curFrame->m_analysisData.numPartitions
> >> ((interData)->depth[d] * 2);
> memset(&(currInterData)->depth[count],
> (interData)->depth[d], bytes);
> memset(&(currInterData)->modes[count],
> (interData)->modes[d], bytes);
> + memcpy(&(currInterData)->sadCost[count],
> &((analysis_inter_data*)analysis_data->interData)->sadCost[d], bytes);
> if (m_param->analysisReuseLevel > 4)
> {
> memset(&(currInterData)->partSize[count],
> (interData)->partSize[d], bytes);
> @@ -639,6 +654,14 @@
> (currInterData)->mvpIdx[i][count +
> pu] = (interData)->mvpIdx[i][d];
> (currInterData)->refIdx[i][count +
> pu] = (interData)->refIdx[i][d];
> memcpy(&(currInterData)->mv[i][count
> + pu], &(interData)->mv[i][d], sizeof(MV));
> + if (m_param->analysisReuseLevel == 7)
> + {
> + int mv_x = ((analysis_inter_data
> *)curFrame->m_analysisData.interData)->mv[i][count + pu].x;
> + int mv_y = ((analysis_inter_data
> *)curFrame->m_analysisData.interData)->mv[i][count + pu].y;
> + double mv = sqrt(mv_x*mv_x +
> mv_y*mv_y);
> + if (numPU == PU_2Nx2N &&
> m_param->num4x4Partitions <= 16 && mv <= MVTHRESHOLD)
> + memset(&curFrame->m_analysisData.modeFlag[i][count
> + pu], 1, bytes);
> + }
> }
> }
> }
> @@ -3116,12 +3139,14 @@
> if (m_param->analysisReuseLevel >= 7)
> {
> X265_FREE(((analysis_inter_data*)analysis->interData)->
> interDir);
> + X265_FREE(((analysis_inter_data*)analysis->interData)->
> sadCost);
> int numDir = analysis->sliceType == X265_TYPE_P ? 1 : 2;
> for (int dir = 0; dir < numDir; dir++)
> {
> X265_FREE(((analysis_inter_
> data*)analysis->interData)->mvpIdx[dir]);
> X265_FREE(((analysis_inter_
> data*)analysis->interData)->refIdx[dir]);
> X265_FREE(((analysis_inter_
> data*)analysis->interData)->mv[dir]);
> + X265_FREE(analysis->modeFlag[dir]);
> }
> }
> else
> diff -r 9723e8812e63 -r 6b248ccb1416 source/x265.h
> --- a/source/x265.h Fri Nov 17 14:16:31 2017 +0530
> +++ b/source/x265.h Fri Nov 17 19:23:14 2017 +0530
> @@ -123,6 +123,7 @@
> void* intraData;
> uint32_t numCuInHeight;
> x265_lookahead_data lookahead;
> + uint8_t* modeFlag[2];
> } x265_analysis_data;
>
> /* cu statistics */
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20171120/62774cc7/attachment-0001.html>
More information about the x265-devel
mailing list