[x265] [PATCH] analysis: use AVC CU analysis-info for HEVC mode analysis

Mon Nov 20 09:02:02 CET 2017

On Mon, Nov 20, 2017 at 12:53 PM, Ashok Kumar Mishra <
ashok at multicorewareinc.com> wrote:

>
>
> On Mon, Nov 20, 2017 at 11:01 AM, Pradeep Ramachandran <
> pradeep at multicorewareinc.com> wrote:
>
>>
>> On Fri, Nov 17, 2017 at 7:23 PM, <praveen at multicorewareinc.com> wrote:
>>
>>> # HG changeset patch
>>> # User Praveen Tiwari <praveen at multicorewareinc.com>
>>> # Date 1510926794 -19800
>>> #      Fri Nov 17 19:23:14 2017 +0530
>>> # Node ID 6b248ccb14169d2b0d5b84d50d94a153bd8f3b4f
>>> # Parent  9723e8812e63ce51e38ede41f7d5edf73cad0849
>>> analysis: use AVC CU analysis-info for HEVC mode analysis
>>>
>>
>> Pushed to default. Thanks!
>>
>>
>>>
>>> This patch work implements the functionality for anlysis-reuselevel 7,
>>> here we want
>>> to use AVC analysis-info for HEVC mode decision and use the depth from
>>> offload
>>> for AVC sizes
>>>
>>> diff -r 9723e8812e63 -r 6b248ccb1416 source/common/cudata.cpp
>>> --- a/source/common/cudata.cpp  Fri Nov 17 14:16:31 2017 +0530
>>> +++ b/source/common/cudata.cpp  Fri Nov 17 19:23:14 2017 +0530
>>> @@ -201,6 +201,8 @@
>>>          m_cuDepth            = charBuf; charBuf += m_numPartitions;
>>>          m_predMode           = charBuf; charBuf += m_numPartitions; /*
>>> the order up to here is important in initCTU() and initSubCU() */
>>>          m_partSize           = charBuf; charBuf += m_numPartitions;
>>> +        m_skipFlag[0]        = charBuf; charBuf += m_numPartitions;
>>> +        m_skipFlag[1]        = charBuf; charBuf += m_numPartitions;
>>>          m_mergeFlag          = charBuf; charBuf += m_numPartitions;
>>>          m_interDir           = charBuf; charBuf += m_numPartitions;
>>>          m_mvpIdx[0]          = charBuf; charBuf += m_numPartitions;
>>> @@ -239,6 +241,8 @@
>>>          m_cuDepth            = charBuf; charBuf += m_numPartitions;
>>>          m_predMode           = charBuf; charBuf += m_numPartitions; /*
>>> the order up to here is important in initCTU() and initSubCU() */
>>>          m_partSize           = charBuf; charBuf += m_numPartitions;
>>> +        m_skipFlag[0]        = charBuf; charBuf += m_numPartitions;
>>> +        m_skipFlag[1]        = charBuf; charBuf += m_numPartitions;
>>>          m_mergeFlag          = charBuf; charBuf += m_numPartitions;
>>>          m_interDir           = charBuf; charBuf += m_numPartitions;
>>>          m_mvpIdx[0]          = charBuf; charBuf += m_numPartitions;
>>> diff -r 9723e8812e63 -r 6b248ccb1416 source/common/cudata.h
>>> --- a/source/common/cudata.h    Fri Nov 17 14:16:31 2017 +0530
>>> +++ b/source/common/cudata.h    Fri Nov 17 19:23:14 2017 +0530
>>> @@ -199,13 +199,14 @@
>>>      uint8_t*      m_predMode;         // array of prediction modes
>>>      uint8_t*      m_partSize;         // array of partition sizes
>>>      uint8_t*      m_mergeFlag;        // array of merge flags
>>> +    uint8_t*      m_skipFlag[2];
>>>      uint8_t*      m_interDir;         // array of inter directions
>>>      uint8_t*      m_mvpIdx[2];        // array of motion vector
>>> predictor candidates or merge candidate indices [0]
>>>      uint8_t*      m_tuDepth;          // array of transform indices
>>>      uint8_t*      m_transformSkip[3]; // array of transform skipping
>>> flags per plane
>>>      uint8_t*      m_cbf[3];           // array of coded block flags
>>> (CBF) per plane
>>>      uint8_t*      m_chromaIntraDir;   // array of intra directions
>>> (chroma)
>>> -    enum { BytesPerPartition = 21 };  // combined sizeof() of all
>>> per-part data
>>> +    enum { BytesPerPartition = 23 };  // combined sizeof() of all
>>> per-part data
>>>
>>>      sse_t*        m_distortion;
>>>      coeff_t*      m_trCoeff[3];       // transformed coefficient buffer
>>> per plane
>>> diff -r 9723e8812e63 -r 6b248ccb1416 source/common/framedata.h
>>> --- a/source/common/framedata.h Fri Nov 17 14:16:31 2017 +0530
>>> +++ b/source/common/framedata.h Fri Nov 17 19:23:14 2017 +0530
>>> @@ -195,6 +195,7 @@
>>>      uint8_t*    mvpIdx[2];
>>>      int8_t*     refIdx[2];
>>>      MV*         mv[2];
>>> +   int64_t*     sadCost;
>>>  };
>>>
>>>  struct analysis2PassFrameData
>>> diff -r 9723e8812e63 -r 6b248ccb1416 source/encoder/analysis.cpp
>>> --- a/source/encoder/analysis.cpp       Fri Nov 17 14:16:31 2017 +0530
>>> +++ b/source/encoder/analysis.cpp       Fri Nov 17 19:23:14 2017 +0530
>>> @@ -75,6 +75,10 @@
>>>      m_reuseInterDataCTU = NULL;
>>>      m_reuseRef = NULL;
>>>      m_bHD = false;
>>> +    m_modeFlag[0] = false;
>>> +    m_modeFlag[1] = false;
>>> +    m_checkMergeAndSkipOnly[0] = false;
>>> +    m_checkMergeAndSkipOnly[1] = false;
>>>      m_evaluateInter = 0;
>>>  }
>>>
>>> @@ -247,6 +251,9 @@
>>>              memcpy(ctu.m_cuDepth, &interDataCTU->depth[posCTU],
>>> sizeof(uint8_t) * numPartition);
>>>              memcpy(ctu.m_predMode, &interDataCTU->modes[posCTU],
>>> sizeof(uint8_t) * numPartition);
>>>              memcpy(ctu.m_partSize, &interDataCTU->partSize[posCTU],
>>> sizeof(uint8_t) * numPartition);
>>> +            for (int list = 0; list < m_slice->isInterB() + 1; list++)
>>> +                memcpy(ctu.m_skipFlag[list],
>>> &m_frame->m_analysisData.modeFlag[list][posCTU], sizeof(uint8_t) *
>>> numPartition);
>>> +
>>>              if ((m_slice->m_sliceType == P_SLICE ||
>>> m_param->bIntraInBFrames) && !m_param->bMVType)
>>>              {
>>>                  analysis_intra_data* intraDataCTU =
>>> (analysis_intra_data*)m_frame->m_analysisData.intraData;
>>> @@ -1162,7 +1169,11 @@
>>>      PicYuv& reconPic = *m_frame->m_reconPic;
>>>      SplitData splitCUData;
>>>
>>> -    if ((m_param->bMVType && cuGeom.numPartitions > 16) ||
>>> !m_param->bMVType)
>>> +    bool bHEVCBlockAnalysis = (m_param->bMVType && cuGeom.numPartitions
>>> > 16);
>>> +    bool bRefineAVCAnalysis = (m_param->analysisReuseLevel == 7 &&
>>> (m_modeFlag[0] || m_modeFlag[1]));
>>> +    bool bNooffloading = !m_param->bMVType;
>>> +
>>> +    if (bHEVCBlockAnalysis || bRefineAVCAnalysis || bNooffloading)
>>>      {
>>>          md.bestMode = NULL;
>>>          bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
>>> @@ -1296,7 +1307,7 @@
>>>          }
>>>
>>>          /* Step 1. Evaluate Merge/Skip candidates for likely
>>> early-outs, if skip mode was not set above */
>>> -        if (mightNotSplit && depth >= minDepth && !md.bestMode &&
>>> !bCtuInfoCheck) /* TODO: Re-evaluate if analysis load/save still works */
>>> +        if ((mightNotSplit && depth >= minDepth && !md.bestMode &&
>>> !bCtuInfoCheck) || (m_param->bMVType && (m_modeFlag[0] || m_modeFlag[1])))
>>> /* TODO: Re-evaluate if analysis load/save still works */
>>>          {
>>>              /* Compute Merge Cost */
>>>              md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
>>> @@ -1307,7 +1318,7 @@
>>>                  && md.bestMode && md.bestMode->cu.isSkipped(0); //
>>> TODO: sa8d threshold per depth
>>>          }
>>>
>>> -        if (md.bestMode && m_param->bEnableRecursionSkip &&
>>> !bCtuInfoCheck)
>>> +        if (md.bestMode && m_param->bEnableRecursionSkip &&
>>> !bCtuInfoCheck && !(m_param->bMVType && (m_modeFlag[0] || m_modeFlag[1])))
>>>          {
>>>              skipRecursion = md.bestMode->cu.isSkipped(0);
>>>              if (mightSplit && depth >= minDepth && !skipRecursion)
>>> @@ -1319,6 +1330,9 @@
>>>              }
>>>          }
>>>
>>> +        if (m_param->bMVType && md.bestMode && cuGeom.numPartitions <=
>>> 16)
>>> +            skipRecursion = true;
>>> +
>>>          /* Step 2. Evaluate each of the 4 split sub-blocks in series */
>>>          if (mightSplit && !skipRecursion)
>>>          {
>>> @@ -1374,6 +1388,10 @@
>>>                  splitPred->sa8dCost = m_rdCost.calcRdSADCost((uint32_t)splitPred->distortion,
>>> splitPred->sa8dBits);
>>>          }
>>>
>>> +        /* If analysis mode is simple do not Evaluate other modes */
>>> +        if ((m_param->bMVType && cuGeom.numPartitions <= 16) &&
>>> (m_slice->m_sliceType == P_SLICE || m_slice->m_sliceType == B_SLICE))
>>> +            mightNotSplit = !(m_checkMergeAndSkipOnly[0] ||
>>> (m_checkMergeAndSkipOnly[0] && m_checkMergeAndSkipOnly[1]));
>>> +
>>>          /* Split CUs
>>>           *   0  1
>>>           *   2  3 */
>>> @@ -1838,7 +1856,12 @@
>>>      }
>>>
>>> It is better to write a separate function when bMVType is enabled. When
> numPartitions > 16, call compressInterCU_rd5_6(), else write code specific
> to
> your requirement. It will be much cleaner and readable, so that the base
> code will not be disturbed.
>
> Same is applied for the case of compressInterCU_rd0_4().
>
>      SplitData splitCUData;
>>> -    if ((m_param->bMVType && cuGeom.numPartitions > 16) ||
>>> !m_param->bMVType)
>>> +
>>> +    bool bHEVCBlockAnalysis = (m_param->bMVType && cuGeom.numPartitions
>>> > 16);
>>> +    bool bRefineAVCAnalysis = (m_param->analysisReuseLevel == 7 &&
>>> (m_modeFlag[0] || m_modeFlag[1]));
>>> +    bool bNooffloading = !m_param->bMVType;
>>> +
>>> +    if (bHEVCBlockAnalysis || bRefineAVCAnalysis || bNooffloading)
>>>      {
>>>          bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
>>>          bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
>>> @@ -1977,7 +2000,7 @@
>>>          }
>>>
>>>          /* Step 1. Evaluate Merge/Skip candidates for likely early-outs
>>> */
>>> -        if (mightNotSplit && !md.bestMode && !bCtuInfoCheck)
>>> +        if (mightNotSplit && !md.bestMode && !bCtuInfoCheck ||
>>> (m_param->bMVType && (m_modeFlag[0] || m_modeFlag[1])))
>>>          {
>>>              md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
>>>              md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
>>> @@ -1993,6 +2016,9 @@
>>>                  skipRecursion = md.bestMode &&
>>> !md.bestMode->cu.getQtRootCbf(0);
>>>          }
>>>
>>> +        if (m_param->bMVType && md.bestMode && cuGeom.numPartitions <=
>>> 16)
>>> +            skipRecursion = true;
>>> +
>>>          // estimate split cost
>>>          /* Step 2. Evaluate each of the 4 split sub-blocks in series */
>>>          if (mightSplit && !skipRecursion)
>>> @@ -2045,6 +2071,10 @@
>>>              checkDQPForSplitPred(*splitPred, cuGeom);
>>>          }
>>>
>>> +        /* If analysis mode is simple do not Evaluate other modes */
>>> +        if ((m_param->bMVType && cuGeom.numPartitions <= 16) &&
>>> (m_slice->m_sliceType == P_SLICE || m_slice->m_sliceType == B_SLICE))
>>> +            mightNotSplit = !(m_checkMergeAndSkipOnly[0] ||
>>> (m_checkMergeAndSkipOnly[0] && m_checkMergeAndSkipOnly[1]));
>>> +
>>>          /* Split CUs
>>>           *   0  1
>>>           *   2  3 */
>>> @@ -2479,6 +2509,22 @@
>>>                  checkDQPForSplitPred(*md.bestMode, cuGeom);
>>>          }
>>>
>>> +        if (m_param->bMVType && m_param->analysisReuseLevel == 7)
>>> +        {
>>> +            for (int list = 0; list < m_slice->isInterB() + 1; list++)
>>> +            {
>>> +                m_modeFlag[list] = true;
>>> +                if (parentCTU.m_skipFlag[list][cuGeom.absPartIdx] == 1
>>> && cuGeom.numPartitions <= 16)
>>> +                    m_checkMergeAndSkipOnly[list] = true;
>>> +            }
>>> +            m_param->rdLevel > 4 ? compressInterCU_rd5_6(parentCTU,
>>> cuGeom, qp) : compressInterCU_rd0_4(parentCTU, cuGeom, qp);
>>> +            for (int list = 0; list < m_slice->isInterB() + 1; list++)
>>> +            {
>>> +                m_modeFlag[list] = false;
>>> +                m_checkMergeAndSkipOnly[list] = false;
>>> +            }
>>> +        }
>>> +
>>>          if (m_param->interRefine > 1 || (m_param->interRefine &&
>>> parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP  &&
>>> !mode.cu.isSkipped(0)))
>>>          {
>>>              m_evaluateInter = 1;
>>> diff -r 9723e8812e63 -r 6b248ccb1416 source/encoder/analysis.h
>>> --- a/source/encoder/analysis.h Fri Nov 17 14:16:31 2017 +0530
>>> +++ b/source/encoder/analysis.h Fri Nov 17 19:23:14 2017 +0530
>>> @@ -110,6 +110,9 @@
>>>      bool      m_bChromaSa8d;
>>>      bool      m_bHD;
>>>
>>> +    bool      m_modeFlag[2];
>>> +    bool      m_checkMergeAndSkipOnly[2];
>>> +
>>>      Analysis();
>>>
>>>      bool create(ThreadLocalData* tld);
>>> diff -r 9723e8812e63 -r 6b248ccb1416 source/encoder/encoder.cpp
>>> --- a/source/encoder/encoder.cpp        Fri Nov 17 14:16:31 2017 +0530
>>> +++ b/source/encoder/encoder.cpp        Fri Nov 17 19:23:14 2017 +0530
>>> @@ -48,6 +48,12 @@
>>>  const char g_sliceTypeToChar[] = {'B', 'P', 'I'};
>>>  }
>>>
>>> +/* Threshold for motion vection, based on expermental result.
>>> + * TODO: come up an algorithm for adoptive threshold */
>>> +
>>> +#define MVTHRESHOLD 10
>>> +#define PU_2Nx2N 1
>>>
>> MVTHRESHOLD is not used anywhere, so please remove it.
we have already used one enum PartSize for different PU sizes. Make use of
it.

> +
>>>  static const char* defaultAnalysisFileName = "x265_analysis.dat";
>>>
>>>  using namespace X265_NS;
>>> @@ -565,6 +571,14 @@
>>>                              (interData)->mvpIdx[k][cuPos + cuOffset] =
>>> (srcInterData)->mvpIdx[k][(mbIndex * 16) + cuOffset];
>>>                              (interData)->refIdx[k][cuPos + cuOffset] =
>>> (srcInterData)->refIdx[k][(mbIndex * 16) + cuOffset];
>>>                              memcpy(&(interData)->mv[k][cuPos +
>>> cuOffset], &(srcInterData)->mv[k][(mbIndex * 16) + cuOffset],
>>> sizeof(MV));
>>> +                            if (m_param->analysisReuseLevel == 7)
>>> +                            {
>>> +                                int mv_x = ((analysis_inter_data
>>> *)curFrame->m_analysisData.interData)->mv[k][(mbIndex * 16) +
>>> cuOffset].x;
>>> +                                int mv_y = ((analysis_inter_data
>>> *)curFrame->m_analysisData.interData)->mv[k][(mbIndex * 16) +
>>> cuOffset].y;
>>> +                                double mv = sqrt(mv_x*mv_x + mv_y*mv_y);
>>> +                                if (numPU == PU_2Nx2N &&
>>> ((srcInterData)->depth[cuPos + cuOffset] == (m_param->maxCUSize >> 5)) &&
>>> mv <= MVTHRESHOLD)
>>> +                                    memset(&curFrame->m_analysisData.modeFlag[k][cuPos
>>> + cuOffset], 1, bytes);
>>> +                            }
>>>                          }
>>>                      }
>>>                  }
>>> @@ -624,6 +638,7 @@
>>>                      int bytes = curFrame->m_analysisData.numPartitions
>>> >> ((interData)->depth[d] * 2);
>>>                      memset(&(currInterData)->depth[count],
>>> (interData)->depth[d], bytes);
>>>                      memset(&(currInterData)->modes[count],
>>> (interData)->modes[d], bytes);
>>> +                    memcpy(&(currInterData)->sadCost[count],
>>> &((analysis_inter_data*)analysis_data->interData)->sadCost[d], bytes);
>>>                      if (m_param->analysisReuseLevel > 4)
>>>                      {
>>>                          memset(&(currInterData)->partSize[count],
>>> (interData)->partSize[d], bytes);
>>> @@ -639,6 +654,14 @@
>>>                                      (currInterData)->mvpIdx[i][count +
>>> pu] = (interData)->mvpIdx[i][d];
>>>                                      (currInterData)->refIdx[i][count +
>>> pu] = (interData)->refIdx[i][d];
>>>                                      memcpy(&(currInterData)->mv[i][count
>>> + pu], &(interData)->mv[i][d], sizeof(MV));
>>> +                                    if (m_param->analysisReuseLevel ==
>>> 7)
>>> +                                    {
>>> +                                        int mv_x =
>>> ((analysis_inter_data *)curFrame->m_analysisData.interData)->mv[i][count
>>> + pu].x;
>>> +                                        int mv_y =
>>> ((analysis_inter_data *)curFrame->m_analysisData.interData)->mv[i][count
>>> + pu].y;
>>>
>>
We have already a local copy
analysis_inter_data * interData = (analysis_inter_data
*)analysis_data->interData;
Use it.

+                                        double mv = sqrt(mv_x*mv_x +
>>> mv_y*mv_y);
>>> +                                        if (numPU == PU_2Nx2N &&
>>> m_param->num4x4Partitions <= 16 && mv <= MVTHRESHOLD)
>>> +
>>> memset(&curFrame->m_analysisData.modeFlag[i][count + pu], 1, bytes);
>>> +                                    }
>>>                                  }
>>>                              }
>>>                          }
>>> @@ -3116,12 +3139,14 @@
>>>              if (m_param->analysisReuseLevel >= 7)
>>>              {
>>>                  X265_FREE(((analysis_inter_da
>>> ta*)analysis->interData)->interDir);
>>> +                X265_FREE(((analysis_inter_dat
>>> a*)analysis->interData)->sadCost);
>>>                  int numDir = analysis->sliceType == X265_TYPE_P ? 1 : 2;
>>>                  for (int dir = 0; dir < numDir; dir++)
>>>                  {
>>>                      X265_FREE(((analysis_inter_da
>>> ta*)analysis->interData)->mvpIdx[dir]);
>>>                      X265_FREE(((analysis_inter_da
>>> ta*)analysis->interData)->refIdx[dir]);
>>>                      X265_FREE(((analysis_inter_da
>>> ta*)analysis->interData)->mv[dir]);
>>> +                    X265_FREE(analysis->modeFlag[dir]);
>>>                  }
>>>              }
>>>              else
>>> diff -r 9723e8812e63 -r 6b248ccb1416 source/x265.h
>>> --- a/source/x265.h     Fri Nov 17 14:16:31 2017 +0530
>>> +++ b/source/x265.h     Fri Nov 17 19:23:14 2017 +0530
>>> @@ -123,6 +123,7 @@
>>>      void*            intraData;
>>>      uint32_t         numCuInHeight;
>>>      x265_lookahead_data lookahead;
>>> +    uint8_t*         modeFlag[2];
>>>  } x265_analysis_data;
>>>
>>>  /* cu statistics */
>>> _______________________________________________
>>> x265-devel mailing list
>>> x265-devel at videolan.org
>>> https://mailman.videolan.org/listinfo/x265-devel
>>>
>>
>>
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20171120/774782a7/attachment-0001.html>