[x265] [PATCH] analysis: use AVC CU analysis-info for HEVC mode analysis
Ashok Kumar Mishra
ashok at multicorewareinc.com
Mon Nov 20 09:02:02 CET 2017
On Mon, Nov 20, 2017 at 12:53 PM, Ashok Kumar Mishra <
ashok at multicorewareinc.com> wrote:
>
>
> On Mon, Nov 20, 2017 at 11:01 AM, Pradeep Ramachandran <
> pradeep at multicorewareinc.com> wrote:
>
>>
>> On Fri, Nov 17, 2017 at 7:23 PM, <praveen at multicorewareinc.com> wrote:
>>
>>> # HG changeset patch
>>> # User Praveen Tiwari <praveen at multicorewareinc.com>
>>> # Date 1510926794 -19800
>>> # Fri Nov 17 19:23:14 2017 +0530
>>> # Node ID 6b248ccb14169d2b0d5b84d50d94a153bd8f3b4f
>>> # Parent 9723e8812e63ce51e38ede41f7d5edf73cad0849
>>> analysis: use AVC CU analysis-info for HEVC mode analysis
>>>
>>
>> Pushed to default. Thanks!
>>
>>
>>>
>>> This patch work implements the functionality for anlysis-reuselevel 7,
>>> here we want
>>> to use AVC analysis-info for HEVC mode decision and use the depth from
>>> offload
>>> for AVC sizes
>>>
>>> diff -r 9723e8812e63 -r 6b248ccb1416 source/common/cudata.cpp
>>> --- a/source/common/cudata.cpp Fri Nov 17 14:16:31 2017 +0530
>>> +++ b/source/common/cudata.cpp Fri Nov 17 19:23:14 2017 +0530
>>> @@ -201,6 +201,8 @@
>>> m_cuDepth = charBuf; charBuf += m_numPartitions;
>>> m_predMode = charBuf; charBuf += m_numPartitions; /*
>>> the order up to here is important in initCTU() and initSubCU() */
>>> m_partSize = charBuf; charBuf += m_numPartitions;
>>> + m_skipFlag[0] = charBuf; charBuf += m_numPartitions;
>>> + m_skipFlag[1] = charBuf; charBuf += m_numPartitions;
>>> m_mergeFlag = charBuf; charBuf += m_numPartitions;
>>> m_interDir = charBuf; charBuf += m_numPartitions;
>>> m_mvpIdx[0] = charBuf; charBuf += m_numPartitions;
>>> @@ -239,6 +241,8 @@
>>> m_cuDepth = charBuf; charBuf += m_numPartitions;
>>> m_predMode = charBuf; charBuf += m_numPartitions; /*
>>> the order up to here is important in initCTU() and initSubCU() */
>>> m_partSize = charBuf; charBuf += m_numPartitions;
>>> + m_skipFlag[0] = charBuf; charBuf += m_numPartitions;
>>> + m_skipFlag[1] = charBuf; charBuf += m_numPartitions;
>>> m_mergeFlag = charBuf; charBuf += m_numPartitions;
>>> m_interDir = charBuf; charBuf += m_numPartitions;
>>> m_mvpIdx[0] = charBuf; charBuf += m_numPartitions;
>>> diff -r 9723e8812e63 -r 6b248ccb1416 source/common/cudata.h
>>> --- a/source/common/cudata.h Fri Nov 17 14:16:31 2017 +0530
>>> +++ b/source/common/cudata.h Fri Nov 17 19:23:14 2017 +0530
>>> @@ -199,13 +199,14 @@
>>> uint8_t* m_predMode; // array of prediction modes
>>> uint8_t* m_partSize; // array of partition sizes
>>> uint8_t* m_mergeFlag; // array of merge flags
>>> + uint8_t* m_skipFlag[2];
>>> uint8_t* m_interDir; // array of inter directions
>>> uint8_t* m_mvpIdx[2]; // array of motion vector
>>> predictor candidates or merge candidate indices [0]
>>> uint8_t* m_tuDepth; // array of transform indices
>>> uint8_t* m_transformSkip[3]; // array of transform skipping
>>> flags per plane
>>> uint8_t* m_cbf[3]; // array of coded block flags
>>> (CBF) per plane
>>> uint8_t* m_chromaIntraDir; // array of intra directions
>>> (chroma)
>>> - enum { BytesPerPartition = 21 }; // combined sizeof() of all
>>> per-part data
>>> + enum { BytesPerPartition = 23 }; // combined sizeof() of all
>>> per-part data
>>>
>>> sse_t* m_distortion;
>>> coeff_t* m_trCoeff[3]; // transformed coefficient buffer
>>> per plane
>>> diff -r 9723e8812e63 -r 6b248ccb1416 source/common/framedata.h
>>> --- a/source/common/framedata.h Fri Nov 17 14:16:31 2017 +0530
>>> +++ b/source/common/framedata.h Fri Nov 17 19:23:14 2017 +0530
>>> @@ -195,6 +195,7 @@
>>> uint8_t* mvpIdx[2];
>>> int8_t* refIdx[2];
>>> MV* mv[2];
>>> + int64_t* sadCost;
>>> };
>>>
>>> struct analysis2PassFrameData
>>> diff -r 9723e8812e63 -r 6b248ccb1416 source/encoder/analysis.cpp
>>> --- a/source/encoder/analysis.cpp Fri Nov 17 14:16:31 2017 +0530
>>> +++ b/source/encoder/analysis.cpp Fri Nov 17 19:23:14 2017 +0530
>>> @@ -75,6 +75,10 @@
>>> m_reuseInterDataCTU = NULL;
>>> m_reuseRef = NULL;
>>> m_bHD = false;
>>> + m_modeFlag[0] = false;
>>> + m_modeFlag[1] = false;
>>> + m_checkMergeAndSkipOnly[0] = false;
>>> + m_checkMergeAndSkipOnly[1] = false;
>>> m_evaluateInter = 0;
>>> }
>>>
>>> @@ -247,6 +251,9 @@
>>> memcpy(ctu.m_cuDepth, &interDataCTU->depth[posCTU],
>>> sizeof(uint8_t) * numPartition);
>>> memcpy(ctu.m_predMode, &interDataCTU->modes[posCTU],
>>> sizeof(uint8_t) * numPartition);
>>> memcpy(ctu.m_partSize, &interDataCTU->partSize[posCTU],
>>> sizeof(uint8_t) * numPartition);
>>> + for (int list = 0; list < m_slice->isInterB() + 1; list++)
>>> + memcpy(ctu.m_skipFlag[list],
>>> &m_frame->m_analysisData.modeFlag[list][posCTU], sizeof(uint8_t) *
>>> numPartition);
>>> +
>>> if ((m_slice->m_sliceType == P_SLICE ||
>>> m_param->bIntraInBFrames) && !m_param->bMVType)
>>> {
>>> analysis_intra_data* intraDataCTU =
>>> (analysis_intra_data*)m_frame->m_analysisData.intraData;
>>> @@ -1162,7 +1169,11 @@
>>> PicYuv& reconPic = *m_frame->m_reconPic;
>>> SplitData splitCUData;
>>>
>>> - if ((m_param->bMVType && cuGeom.numPartitions > 16) ||
>>> !m_param->bMVType)
>>> + bool bHEVCBlockAnalysis = (m_param->bMVType && cuGeom.numPartitions
>>> > 16);
>>> + bool bRefineAVCAnalysis = (m_param->analysisReuseLevel == 7 &&
>>> (m_modeFlag[0] || m_modeFlag[1]));
>>> + bool bNooffloading = !m_param->bMVType;
>>> +
>>> + if (bHEVCBlockAnalysis || bRefineAVCAnalysis || bNooffloading)
>>> {
>>> md.bestMode = NULL;
>>> bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
>>> @@ -1296,7 +1307,7 @@
>>> }
>>>
>>> /* Step 1. Evaluate Merge/Skip candidates for likely
>>> early-outs, if skip mode was not set above */
>>> - if (mightNotSplit && depth >= minDepth && !md.bestMode &&
>>> !bCtuInfoCheck) /* TODO: Re-evaluate if analysis load/save still works */
>>> + if ((mightNotSplit && depth >= minDepth && !md.bestMode &&
>>> !bCtuInfoCheck) || (m_param->bMVType && (m_modeFlag[0] || m_modeFlag[1])))
>>> /* TODO: Re-evaluate if analysis load/save still works */
>>> {
>>> /* Compute Merge Cost */
>>> md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
>>> @@ -1307,7 +1318,7 @@
>>> && md.bestMode && md.bestMode->cu.isSkipped(0); //
>>> TODO: sa8d threshold per depth
>>> }
>>>
>>> - if (md.bestMode && m_param->bEnableRecursionSkip &&
>>> !bCtuInfoCheck)
>>> + if (md.bestMode && m_param->bEnableRecursionSkip &&
>>> !bCtuInfoCheck && !(m_param->bMVType && (m_modeFlag[0] || m_modeFlag[1])))
>>> {
>>> skipRecursion = md.bestMode->cu.isSkipped(0);
>>> if (mightSplit && depth >= minDepth && !skipRecursion)
>>> @@ -1319,6 +1330,9 @@
>>> }
>>> }
>>>
>>> + if (m_param->bMVType && md.bestMode && cuGeom.numPartitions <=
>>> 16)
>>> + skipRecursion = true;
>>> +
>>> /* Step 2. Evaluate each of the 4 split sub-blocks in series */
>>> if (mightSplit && !skipRecursion)
>>> {
>>> @@ -1374,6 +1388,10 @@
>>> splitPred->sa8dCost = m_rdCost.calcRdSADCost((uint32_t)splitPred->distortion,
>>> splitPred->sa8dBits);
>>> }
>>>
>>> + /* If analysis mode is simple do not Evaluate other modes */
>>> + if ((m_param->bMVType && cuGeom.numPartitions <= 16) &&
>>> (m_slice->m_sliceType == P_SLICE || m_slice->m_sliceType == B_SLICE))
>>> + mightNotSplit = !(m_checkMergeAndSkipOnly[0] ||
>>> (m_checkMergeAndSkipOnly[0] && m_checkMergeAndSkipOnly[1]));
>>> +
>>> /* Split CUs
>>> * 0 1
>>> * 2 3 */
>>> @@ -1838,7 +1856,12 @@
>>> }
>>>
>>> It is better to write a separate function when bMVType is enabled. When
> numPartitions > 16, call compressInterCU_rd5_6(), else write code specific
> to
> your requirement. It will be much cleaner and readable, so that the base
> code will not be disturbed.
>
> Same is applied for the case of compressInterCU_rd0_4().
>
> SplitData splitCUData;
>>> - if ((m_param->bMVType && cuGeom.numPartitions > 16) ||
>>> !m_param->bMVType)
>>> +
>>> + bool bHEVCBlockAnalysis = (m_param->bMVType && cuGeom.numPartitions
>>> > 16);
>>> + bool bRefineAVCAnalysis = (m_param->analysisReuseLevel == 7 &&
>>> (m_modeFlag[0] || m_modeFlag[1]));
>>> + bool bNooffloading = !m_param->bMVType;
>>> +
>>> + if (bHEVCBlockAnalysis || bRefineAVCAnalysis || bNooffloading)
>>> {
>>> bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
>>> bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
>>> @@ -1977,7 +2000,7 @@
>>> }
>>>
>>> /* Step 1. Evaluate Merge/Skip candidates for likely early-outs
>>> */
>>> - if (mightNotSplit && !md.bestMode && !bCtuInfoCheck)
>>> + if (mightNotSplit && !md.bestMode && !bCtuInfoCheck ||
>>> (m_param->bMVType && (m_modeFlag[0] || m_modeFlag[1])))
>>> {
>>> md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
>>> md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
>>> @@ -1993,6 +2016,9 @@
>>> skipRecursion = md.bestMode &&
>>> !md.bestMode->cu.getQtRootCbf(0);
>>> }
>>>
>>> + if (m_param->bMVType && md.bestMode && cuGeom.numPartitions <=
>>> 16)
>>> + skipRecursion = true;
>>> +
>>> // estimate split cost
>>> /* Step 2. Evaluate each of the 4 split sub-blocks in series */
>>> if (mightSplit && !skipRecursion)
>>> @@ -2045,6 +2071,10 @@
>>> checkDQPForSplitPred(*splitPred, cuGeom);
>>> }
>>>
>>> + /* If analysis mode is simple do not Evaluate other modes */
>>> + if ((m_param->bMVType && cuGeom.numPartitions <= 16) &&
>>> (m_slice->m_sliceType == P_SLICE || m_slice->m_sliceType == B_SLICE))
>>> + mightNotSplit = !(m_checkMergeAndSkipOnly[0] ||
>>> (m_checkMergeAndSkipOnly[0] && m_checkMergeAndSkipOnly[1]));
>>> +
>>> /* Split CUs
>>> * 0 1
>>> * 2 3 */
>>> @@ -2479,6 +2509,22 @@
>>> checkDQPForSplitPred(*md.bestMode, cuGeom);
>>> }
>>>
>>> + if (m_param->bMVType && m_param->analysisReuseLevel == 7)
>>> + {
>>> + for (int list = 0; list < m_slice->isInterB() + 1; list++)
>>> + {
>>> + m_modeFlag[list] = true;
>>> + if (parentCTU.m_skipFlag[list][cuGeom.absPartIdx] == 1
>>> && cuGeom.numPartitions <= 16)
>>> + m_checkMergeAndSkipOnly[list] = true;
>>> + }
>>> + m_param->rdLevel > 4 ? compressInterCU_rd5_6(parentCTU,
>>> cuGeom, qp) : compressInterCU_rd0_4(parentCTU, cuGeom, qp);
>>> + for (int list = 0; list < m_slice->isInterB() + 1; list++)
>>> + {
>>> + m_modeFlag[list] = false;
>>> + m_checkMergeAndSkipOnly[list] = false;
>>> + }
>>> + }
>>> +
>>> if (m_param->interRefine > 1 || (m_param->interRefine &&
>>> parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP &&
>>> !mode.cu.isSkipped(0)))
>>> {
>>> m_evaluateInter = 1;
>>> diff -r 9723e8812e63 -r 6b248ccb1416 source/encoder/analysis.h
>>> --- a/source/encoder/analysis.h Fri Nov 17 14:16:31 2017 +0530
>>> +++ b/source/encoder/analysis.h Fri Nov 17 19:23:14 2017 +0530
>>> @@ -110,6 +110,9 @@
>>> bool m_bChromaSa8d;
>>> bool m_bHD;
>>>
>>> + bool m_modeFlag[2];
>>> + bool m_checkMergeAndSkipOnly[2];
>>> +
>>> Analysis();
>>>
>>> bool create(ThreadLocalData* tld);
>>> diff -r 9723e8812e63 -r 6b248ccb1416 source/encoder/encoder.cpp
>>> --- a/source/encoder/encoder.cpp Fri Nov 17 14:16:31 2017 +0530
>>> +++ b/source/encoder/encoder.cpp Fri Nov 17 19:23:14 2017 +0530
>>> @@ -48,6 +48,12 @@
>>> const char g_sliceTypeToChar[] = {'B', 'P', 'I'};
>>> }
>>>
>>> +/* Threshold for motion vection, based on expermental result.
>>> + * TODO: come up an algorithm for adoptive threshold */
>>> +
>>> +#define MVTHRESHOLD 10
>>> +#define PU_2Nx2N 1
>>>
>> MVTHRESHOLD is not used anywhere, so please remove it.
we have already used one enum PartSize for different PU sizes. Make use of
it.
> +
>>> static const char* defaultAnalysisFileName = "x265_analysis.dat";
>>>
>>> using namespace X265_NS;
>>> @@ -565,6 +571,14 @@
>>> (interData)->mvpIdx[k][cuPos + cuOffset] =
>>> (srcInterData)->mvpIdx[k][(mbIndex * 16) + cuOffset];
>>> (interData)->refIdx[k][cuPos + cuOffset] =
>>> (srcInterData)->refIdx[k][(mbIndex * 16) + cuOffset];
>>> memcpy(&(interData)->mv[k][cuPos +
>>> cuOffset], &(srcInterData)->mv[k][(mbIndex * 16) + cuOffset],
>>> sizeof(MV));
>>> + if (m_param->analysisReuseLevel == 7)
>>> + {
>>> + int mv_x = ((analysis_inter_data
>>> *)curFrame->m_analysisData.interData)->mv[k][(mbIndex * 16) +
>>> cuOffset].x;
>>> + int mv_y = ((analysis_inter_data
>>> *)curFrame->m_analysisData.interData)->mv[k][(mbIndex * 16) +
>>> cuOffset].y;
>>> + double mv = sqrt(mv_x*mv_x + mv_y*mv_y);
>>> + if (numPU == PU_2Nx2N &&
>>> ((srcInterData)->depth[cuPos + cuOffset] == (m_param->maxCUSize >> 5)) &&
>>> mv <= MVTHRESHOLD)
>>> + memset(&curFrame->m_analysisData.modeFlag[k][cuPos
>>> + cuOffset], 1, bytes);
>>> + }
>>> }
>>> }
>>> }
>>> @@ -624,6 +638,7 @@
>>> int bytes = curFrame->m_analysisData.numPartitions
>>> >> ((interData)->depth[d] * 2);
>>> memset(&(currInterData)->depth[count],
>>> (interData)->depth[d], bytes);
>>> memset(&(currInterData)->modes[count],
>>> (interData)->modes[d], bytes);
>>> + memcpy(&(currInterData)->sadCost[count],
>>> &((analysis_inter_data*)analysis_data->interData)->sadCost[d], bytes);
>>> if (m_param->analysisReuseLevel > 4)
>>> {
>>> memset(&(currInterData)->partSize[count],
>>> (interData)->partSize[d], bytes);
>>> @@ -639,6 +654,14 @@
>>> (currInterData)->mvpIdx[i][count +
>>> pu] = (interData)->mvpIdx[i][d];
>>> (currInterData)->refIdx[i][count +
>>> pu] = (interData)->refIdx[i][d];
>>> memcpy(&(currInterData)->mv[i][count
>>> + pu], &(interData)->mv[i][d], sizeof(MV));
>>> + if (m_param->analysisReuseLevel ==
>>> 7)
>>> + {
>>> + int mv_x =
>>> ((analysis_inter_data *)curFrame->m_analysisData.interData)->mv[i][count
>>> + pu].x;
>>> + int mv_y =
>>> ((analysis_inter_data *)curFrame->m_analysisData.interData)->mv[i][count
>>> + pu].y;
>>>
>>
We have already a local copy
analysis_inter_data * interData = (analysis_inter_data
*)analysis_data->interData;
Use it.
+ double mv = sqrt(mv_x*mv_x +
>>> mv_y*mv_y);
>>> + if (numPU == PU_2Nx2N &&
>>> m_param->num4x4Partitions <= 16 && mv <= MVTHRESHOLD)
>>> +
>>> memset(&curFrame->m_analysisData.modeFlag[i][count + pu], 1, bytes);
>>> + }
>>> }
>>> }
>>> }
>>> @@ -3116,12 +3139,14 @@
>>> if (m_param->analysisReuseLevel >= 7)
>>> {
>>> X265_FREE(((analysis_inter_da
>>> ta*)analysis->interData)->interDir);
>>> + X265_FREE(((analysis_inter_dat
>>> a*)analysis->interData)->sadCost);
>>> int numDir = analysis->sliceType == X265_TYPE_P ? 1 : 2;
>>> for (int dir = 0; dir < numDir; dir++)
>>> {
>>> X265_FREE(((analysis_inter_da
>>> ta*)analysis->interData)->mvpIdx[dir]);
>>> X265_FREE(((analysis_inter_da
>>> ta*)analysis->interData)->refIdx[dir]);
>>> X265_FREE(((analysis_inter_da
>>> ta*)analysis->interData)->mv[dir]);
>>> + X265_FREE(analysis->modeFlag[dir]);
>>> }
>>> }
>>> else
>>> diff -r 9723e8812e63 -r 6b248ccb1416 source/x265.h
>>> --- a/source/x265.h Fri Nov 17 14:16:31 2017 +0530
>>> +++ b/source/x265.h Fri Nov 17 19:23:14 2017 +0530
>>> @@ -123,6 +123,7 @@
>>> void* intraData;
>>> uint32_t numCuInHeight;
>>> x265_lookahead_data lookahead;
>>> + uint8_t* modeFlag[2];
>>> } x265_analysis_data;
>>>
>>> /* cu statistics */
>>> _______________________________________________
>>> x265-devel mailing list
>>> x265-devel at videolan.org
>>> https://mailman.videolan.org/listinfo/x265-devel
>>>
>>
>>
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20171120/774782a7/attachment-0001.html>
More information about the x265-devel
mailing list