[x265] [PATCH] analysis: use AVC CU analysis-info for HEVC mode analysis

Ashok Kumar Mishra ashok at multicorewareinc.com
Mon Nov 20 08:23:11 CET 2017


On Mon, Nov 20, 2017 at 11:01 AM, Pradeep Ramachandran <
pradeep at multicorewareinc.com> wrote:

>
> On Fri, Nov 17, 2017 at 7:23 PM, <praveen at multicorewareinc.com> wrote:
>
>> # HG changeset patch
>> # User Praveen Tiwari <praveen at multicorewareinc.com>
>> # Date 1510926794 -19800
>> #      Fri Nov 17 19:23:14 2017 +0530
>> # Node ID 6b248ccb14169d2b0d5b84d50d94a153bd8f3b4f
>> # Parent  9723e8812e63ce51e38ede41f7d5edf73cad0849
>> analysis: use AVC CU analysis-info for HEVC mode analysis
>>
>
> Pushed to default. Thanks!
>
>
>>
>> This patch work implements the functionality for anlysis-reuselevel 7,
>> here we want
>> to use AVC analysis-info for HEVC mode decision and use the depth from
>> offload
>> for AVC sizes
>>
>> diff -r 9723e8812e63 -r 6b248ccb1416 source/common/cudata.cpp
>> --- a/source/common/cudata.cpp  Fri Nov 17 14:16:31 2017 +0530
>> +++ b/source/common/cudata.cpp  Fri Nov 17 19:23:14 2017 +0530
>> @@ -201,6 +201,8 @@
>>          m_cuDepth            = charBuf; charBuf += m_numPartitions;
>>          m_predMode           = charBuf; charBuf += m_numPartitions; /*
>> the order up to here is important in initCTU() and initSubCU() */
>>          m_partSize           = charBuf; charBuf += m_numPartitions;
>> +        m_skipFlag[0]        = charBuf; charBuf += m_numPartitions;
>> +        m_skipFlag[1]        = charBuf; charBuf += m_numPartitions;
>>          m_mergeFlag          = charBuf; charBuf += m_numPartitions;
>>          m_interDir           = charBuf; charBuf += m_numPartitions;
>>          m_mvpIdx[0]          = charBuf; charBuf += m_numPartitions;
>> @@ -239,6 +241,8 @@
>>          m_cuDepth            = charBuf; charBuf += m_numPartitions;
>>          m_predMode           = charBuf; charBuf += m_numPartitions; /*
>> the order up to here is important in initCTU() and initSubCU() */
>>          m_partSize           = charBuf; charBuf += m_numPartitions;
>> +        m_skipFlag[0]        = charBuf; charBuf += m_numPartitions;
>> +        m_skipFlag[1]        = charBuf; charBuf += m_numPartitions;
>>          m_mergeFlag          = charBuf; charBuf += m_numPartitions;
>>          m_interDir           = charBuf; charBuf += m_numPartitions;
>>          m_mvpIdx[0]          = charBuf; charBuf += m_numPartitions;
>> diff -r 9723e8812e63 -r 6b248ccb1416 source/common/cudata.h
>> --- a/source/common/cudata.h    Fri Nov 17 14:16:31 2017 +0530
>> +++ b/source/common/cudata.h    Fri Nov 17 19:23:14 2017 +0530
>> @@ -199,13 +199,14 @@
>>      uint8_t*      m_predMode;         // array of prediction modes
>>      uint8_t*      m_partSize;         // array of partition sizes
>>      uint8_t*      m_mergeFlag;        // array of merge flags
>> +    uint8_t*      m_skipFlag[2];
>>      uint8_t*      m_interDir;         // array of inter directions
>>      uint8_t*      m_mvpIdx[2];        // array of motion vector
>> predictor candidates or merge candidate indices [0]
>>      uint8_t*      m_tuDepth;          // array of transform indices
>>      uint8_t*      m_transformSkip[3]; // array of transform skipping
>> flags per plane
>>      uint8_t*      m_cbf[3];           // array of coded block flags
>> (CBF) per plane
>>      uint8_t*      m_chromaIntraDir;   // array of intra directions
>> (chroma)
>> -    enum { BytesPerPartition = 21 };  // combined sizeof() of all
>> per-part data
>> +    enum { BytesPerPartition = 23 };  // combined sizeof() of all
>> per-part data
>>
>>      sse_t*        m_distortion;
>>      coeff_t*      m_trCoeff[3];       // transformed coefficient buffer
>> per plane
>> diff -r 9723e8812e63 -r 6b248ccb1416 source/common/framedata.h
>> --- a/source/common/framedata.h Fri Nov 17 14:16:31 2017 +0530
>> +++ b/source/common/framedata.h Fri Nov 17 19:23:14 2017 +0530
>> @@ -195,6 +195,7 @@
>>      uint8_t*    mvpIdx[2];
>>      int8_t*     refIdx[2];
>>      MV*         mv[2];
>> +   int64_t*     sadCost;
>>  };
>>
>>  struct analysis2PassFrameData
>> diff -r 9723e8812e63 -r 6b248ccb1416 source/encoder/analysis.cpp
>> --- a/source/encoder/analysis.cpp       Fri Nov 17 14:16:31 2017 +0530
>> +++ b/source/encoder/analysis.cpp       Fri Nov 17 19:23:14 2017 +0530
>> @@ -75,6 +75,10 @@
>>      m_reuseInterDataCTU = NULL;
>>      m_reuseRef = NULL;
>>      m_bHD = false;
>> +    m_modeFlag[0] = false;
>> +    m_modeFlag[1] = false;
>> +    m_checkMergeAndSkipOnly[0] = false;
>> +    m_checkMergeAndSkipOnly[1] = false;
>>      m_evaluateInter = 0;
>>  }
>>
>> @@ -247,6 +251,9 @@
>>              memcpy(ctu.m_cuDepth, &interDataCTU->depth[posCTU],
>> sizeof(uint8_t) * numPartition);
>>              memcpy(ctu.m_predMode, &interDataCTU->modes[posCTU],
>> sizeof(uint8_t) * numPartition);
>>              memcpy(ctu.m_partSize, &interDataCTU->partSize[posCTU],
>> sizeof(uint8_t) * numPartition);
>> +            for (int list = 0; list < m_slice->isInterB() + 1; list++)
>> +                memcpy(ctu.m_skipFlag[list],
>> &m_frame->m_analysisData.modeFlag[list][posCTU], sizeof(uint8_t) *
>> numPartition);
>> +
>>              if ((m_slice->m_sliceType == P_SLICE ||
>> m_param->bIntraInBFrames) && !m_param->bMVType)
>>              {
>>                  analysis_intra_data* intraDataCTU =
>> (analysis_intra_data*)m_frame->m_analysisData.intraData;
>> @@ -1162,7 +1169,11 @@
>>      PicYuv& reconPic = *m_frame->m_reconPic;
>>      SplitData splitCUData;
>>
>> -    if ((m_param->bMVType && cuGeom.numPartitions > 16) ||
>> !m_param->bMVType)
>> +    bool bHEVCBlockAnalysis = (m_param->bMVType && cuGeom.numPartitions
>> > 16);
>> +    bool bRefineAVCAnalysis = (m_param->analysisReuseLevel == 7 &&
>> (m_modeFlag[0] || m_modeFlag[1]));
>> +    bool bNooffloading = !m_param->bMVType;
>> +
>> +    if (bHEVCBlockAnalysis || bRefineAVCAnalysis || bNooffloading)
>>      {
>>          md.bestMode = NULL;
>>          bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
>> @@ -1296,7 +1307,7 @@
>>          }
>>
>>          /* Step 1. Evaluate Merge/Skip candidates for likely early-outs,
>> if skip mode was not set above */
>> -        if (mightNotSplit && depth >= minDepth && !md.bestMode &&
>> !bCtuInfoCheck) /* TODO: Re-evaluate if analysis load/save still works */
>> +        if ((mightNotSplit && depth >= minDepth && !md.bestMode &&
>> !bCtuInfoCheck) || (m_param->bMVType && (m_modeFlag[0] || m_modeFlag[1])))
>> /* TODO: Re-evaluate if analysis load/save still works */
>>          {
>>              /* Compute Merge Cost */
>>              md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
>> @@ -1307,7 +1318,7 @@
>>                  && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO:
>> sa8d threshold per depth
>>          }
>>
>> -        if (md.bestMode && m_param->bEnableRecursionSkip &&
>> !bCtuInfoCheck)
>> +        if (md.bestMode && m_param->bEnableRecursionSkip &&
>> !bCtuInfoCheck && !(m_param->bMVType && (m_modeFlag[0] || m_modeFlag[1])))
>>          {
>>              skipRecursion = md.bestMode->cu.isSkipped(0);
>>              if (mightSplit && depth >= minDepth && !skipRecursion)
>> @@ -1319,6 +1330,9 @@
>>              }
>>          }
>>
>> +        if (m_param->bMVType && md.bestMode && cuGeom.numPartitions <=
>> 16)
>> +            skipRecursion = true;
>> +
>>          /* Step 2. Evaluate each of the 4 split sub-blocks in series */
>>          if (mightSplit && !skipRecursion)
>>          {
>> @@ -1374,6 +1388,10 @@
>>                  splitPred->sa8dCost = m_rdCost.calcRdSADCost((uint32_t)splitPred->distortion,
>> splitPred->sa8dBits);
>>          }
>>
>> +        /* If analysis mode is simple do not Evaluate other modes */
>> +        if ((m_param->bMVType && cuGeom.numPartitions <= 16) &&
>> (m_slice->m_sliceType == P_SLICE || m_slice->m_sliceType == B_SLICE))
>> +            mightNotSplit = !(m_checkMergeAndSkipOnly[0] ||
>> (m_checkMergeAndSkipOnly[0] && m_checkMergeAndSkipOnly[1]));
>> +
>>          /* Split CUs
>>           *   0  1
>>           *   2  3 */
>> @@ -1838,7 +1856,12 @@
>>      }
>>
>> It is better to write a separate function when bMVType is enabled. When
numPartitions > 16, call compressInterCU_rd5_6(), else write code specific
to
your requirement. It will be much cleaner and readable, so that the base
code will not be disturbed.

Same is applied for the case of compressInterCU_rd0_4().

     SplitData splitCUData;
>> -    if ((m_param->bMVType && cuGeom.numPartitions > 16) ||
>> !m_param->bMVType)
>> +
>> +    bool bHEVCBlockAnalysis = (m_param->bMVType && cuGeom.numPartitions
>> > 16);
>> +    bool bRefineAVCAnalysis = (m_param->analysisReuseLevel == 7 &&
>> (m_modeFlag[0] || m_modeFlag[1]));
>> +    bool bNooffloading = !m_param->bMVType;
>> +
>> +    if (bHEVCBlockAnalysis || bRefineAVCAnalysis || bNooffloading)
>>      {
>>          bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
>>          bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
>> @@ -1977,7 +2000,7 @@
>>          }
>>
>>          /* Step 1. Evaluate Merge/Skip candidates for likely early-outs
>> */
>> -        if (mightNotSplit && !md.bestMode && !bCtuInfoCheck)
>> +        if (mightNotSplit && !md.bestMode && !bCtuInfoCheck ||
>> (m_param->bMVType && (m_modeFlag[0] || m_modeFlag[1])))
>>          {
>>              md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
>>              md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
>> @@ -1993,6 +2016,9 @@
>>                  skipRecursion = md.bestMode &&
>> !md.bestMode->cu.getQtRootCbf(0);
>>          }
>>
>> +        if (m_param->bMVType && md.bestMode && cuGeom.numPartitions <=
>> 16)
>> +            skipRecursion = true;
>> +
>>          // estimate split cost
>>          /* Step 2. Evaluate each of the 4 split sub-blocks in series */
>>          if (mightSplit && !skipRecursion)
>> @@ -2045,6 +2071,10 @@
>>              checkDQPForSplitPred(*splitPred, cuGeom);
>>          }
>>
>> +        /* If analysis mode is simple do not Evaluate other modes */
>> +        if ((m_param->bMVType && cuGeom.numPartitions <= 16) &&
>> (m_slice->m_sliceType == P_SLICE || m_slice->m_sliceType == B_SLICE))
>> +            mightNotSplit = !(m_checkMergeAndSkipOnly[0] ||
>> (m_checkMergeAndSkipOnly[0] && m_checkMergeAndSkipOnly[1]));
>> +
>>          /* Split CUs
>>           *   0  1
>>           *   2  3 */
>> @@ -2479,6 +2509,22 @@
>>                  checkDQPForSplitPred(*md.bestMode, cuGeom);
>>          }
>>
>> +        if (m_param->bMVType && m_param->analysisReuseLevel == 7)
>> +        {
>> +            for (int list = 0; list < m_slice->isInterB() + 1; list++)
>> +            {
>> +                m_modeFlag[list] = true;
>> +                if (parentCTU.m_skipFlag[list][cuGeom.absPartIdx] == 1
>> && cuGeom.numPartitions <= 16)
>> +                    m_checkMergeAndSkipOnly[list] = true;
>> +            }
>> +            m_param->rdLevel > 4 ? compressInterCU_rd5_6(parentCTU,
>> cuGeom, qp) : compressInterCU_rd0_4(parentCTU, cuGeom, qp);
>> +            for (int list = 0; list < m_slice->isInterB() + 1; list++)
>> +            {
>> +                m_modeFlag[list] = false;
>> +                m_checkMergeAndSkipOnly[list] = false;
>> +            }
>> +        }
>> +
>>          if (m_param->interRefine > 1 || (m_param->interRefine &&
>> parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP  &&
>> !mode.cu.isSkipped(0)))
>>          {
>>              m_evaluateInter = 1;
>> diff -r 9723e8812e63 -r 6b248ccb1416 source/encoder/analysis.h
>> --- a/source/encoder/analysis.h Fri Nov 17 14:16:31 2017 +0530
>> +++ b/source/encoder/analysis.h Fri Nov 17 19:23:14 2017 +0530
>> @@ -110,6 +110,9 @@
>>      bool      m_bChromaSa8d;
>>      bool      m_bHD;
>>
>> +    bool      m_modeFlag[2];
>> +    bool      m_checkMergeAndSkipOnly[2];
>> +
>>      Analysis();
>>
>>      bool create(ThreadLocalData* tld);
>> diff -r 9723e8812e63 -r 6b248ccb1416 source/encoder/encoder.cpp
>> --- a/source/encoder/encoder.cpp        Fri Nov 17 14:16:31 2017 +0530
>> +++ b/source/encoder/encoder.cpp        Fri Nov 17 19:23:14 2017 +0530
>> @@ -48,6 +48,12 @@
>>  const char g_sliceTypeToChar[] = {'B', 'P', 'I'};
>>  }
>>
>> +/* Threshold for motion vection, based on expermental result.
>> + * TODO: come up an algorithm for adoptive threshold */
>> +
>> +#define MVTHRESHOLD 10
>> +#define PU_2Nx2N 1
>> +
>>  static const char* defaultAnalysisFileName = "x265_analysis.dat";
>>
>>  using namespace X265_NS;
>> @@ -565,6 +571,14 @@
>>                              (interData)->mvpIdx[k][cuPos + cuOffset] =
>> (srcInterData)->mvpIdx[k][(mbIndex * 16) + cuOffset];
>>                              (interData)->refIdx[k][cuPos + cuOffset] =
>> (srcInterData)->refIdx[k][(mbIndex * 16) + cuOffset];
>>                              memcpy(&(interData)->mv[k][cuPos +
>> cuOffset], &(srcInterData)->mv[k][(mbIndex * 16) + cuOffset],
>> sizeof(MV));
>> +                            if (m_param->analysisReuseLevel == 7)
>> +                            {
>> +                                int mv_x = ((analysis_inter_data
>> *)curFrame->m_analysisData.interData)->mv[k][(mbIndex * 16) +
>> cuOffset].x;
>> +                                int mv_y = ((analysis_inter_data
>> *)curFrame->m_analysisData.interData)->mv[k][(mbIndex * 16) +
>> cuOffset].y;
>> +                                double mv = sqrt(mv_x*mv_x + mv_y*mv_y);
>> +                                if (numPU == PU_2Nx2N &&
>> ((srcInterData)->depth[cuPos + cuOffset] == (m_param->maxCUSize >> 5)) &&
>> mv <= MVTHRESHOLD)
>> +                                    memset(&curFrame->m_analysisData.modeFlag[k][cuPos
>> + cuOffset], 1, bytes);
>> +                            }
>>                          }
>>                      }
>>                  }
>> @@ -624,6 +638,7 @@
>>                      int bytes = curFrame->m_analysisData.numPartitions
>> >> ((interData)->depth[d] * 2);
>>                      memset(&(currInterData)->depth[count],
>> (interData)->depth[d], bytes);
>>                      memset(&(currInterData)->modes[count],
>> (interData)->modes[d], bytes);
>> +                    memcpy(&(currInterData)->sadCost[count],
>> &((analysis_inter_data*)analysis_data->interData)->sadCost[d], bytes);
>>                      if (m_param->analysisReuseLevel > 4)
>>                      {
>>                          memset(&(currInterData)->partSize[count],
>> (interData)->partSize[d], bytes);
>> @@ -639,6 +654,14 @@
>>                                      (currInterData)->mvpIdx[i][count +
>> pu] = (interData)->mvpIdx[i][d];
>>                                      (currInterData)->refIdx[i][count +
>> pu] = (interData)->refIdx[i][d];
>>                                      memcpy(&(currInterData)->mv[i][count
>> + pu], &(interData)->mv[i][d], sizeof(MV));
>> +                                    if (m_param->analysisReuseLevel == 7)
>> +                                    {
>> +                                        int mv_x = ((analysis_inter_data
>> *)curFrame->m_analysisData.interData)->mv[i][count + pu].x;
>> +                                        int mv_y = ((analysis_inter_data
>> *)curFrame->m_analysisData.interData)->mv[i][count + pu].y;
>> +                                        double mv = sqrt(mv_x*mv_x +
>> mv_y*mv_y);
>> +                                        if (numPU == PU_2Nx2N &&
>> m_param->num4x4Partitions <= 16 && mv <= MVTHRESHOLD)
>> +
>> memset(&curFrame->m_analysisData.modeFlag[i][count + pu], 1, bytes);
>> +                                    }
>>                                  }
>>                              }
>>                          }
>> @@ -3116,12 +3139,14 @@
>>              if (m_param->analysisReuseLevel >= 7)
>>              {
>>                  X265_FREE(((analysis_inter_da
>> ta*)analysis->interData)->interDir);
>> +                X265_FREE(((analysis_inter_dat
>> a*)analysis->interData)->sadCost);
>>                  int numDir = analysis->sliceType == X265_TYPE_P ? 1 : 2;
>>                  for (int dir = 0; dir < numDir; dir++)
>>                  {
>>                      X265_FREE(((analysis_inter_da
>> ta*)analysis->interData)->mvpIdx[dir]);
>>                      X265_FREE(((analysis_inter_da
>> ta*)analysis->interData)->refIdx[dir]);
>>                      X265_FREE(((analysis_inter_da
>> ta*)analysis->interData)->mv[dir]);
>> +                    X265_FREE(analysis->modeFlag[dir]);
>>                  }
>>              }
>>              else
>> diff -r 9723e8812e63 -r 6b248ccb1416 source/x265.h
>> --- a/source/x265.h     Fri Nov 17 14:16:31 2017 +0530
>> +++ b/source/x265.h     Fri Nov 17 19:23:14 2017 +0530
>> @@ -123,6 +123,7 @@
>>      void*            intraData;
>>      uint32_t         numCuInHeight;
>>      x265_lookahead_data lookahead;
>> +    uint8_t*         modeFlag[2];
>>  } x265_analysis_data;
>>
>>  /* cu statistics */
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20171120/696f4729/attachment-0001.html>


More information about the x265-devel mailing list