[x265] [PATCH] [REVIEW PATCH]sa8dCost threshold to reduce number of modes in intra analysis

Steve Borho steve at borho.org
Tue Jan 20 19:08:04 CET 2015


On 01/20, ashok at multicorewareinc.com wrote:
> # HG changeset patch
> # User Ashok Kumar Mishra<ashok at multicorewareinc.com>
> # Date 1421415068 -19800
> #      Fri Jan 16 19:01:08 2015 +0530
> # Node ID 74210e69fef93522d4b1243afab034d0544fa09f
> # Parent  6b72bb520a91fe1b354f2e93a3bd31281eb132c0
> [REVIEW PATCH]sa8dCost threshold to reduce number of modes in intra analysis

The commit message should mention that we get about a 10% speedup for
very small compression loss via this threshold based early-out check.

> diff -r 6b72bb520a91 -r 74210e69fef9 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp	Mon Jan 19 09:43:36 2015 -0800
> +++ b/source/encoder/analysis.cpp	Fri Jan 16 19:01:08 2015 +0530
> @@ -1187,13 +1187,13 @@
>                  (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0)))
>              {
>                  md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
> -                checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL);
> +                checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL, md.bestMode->sa8dCost * 1);

I think there are some predictions which are not calculating sa8dCost at
RD 5 and 6, particularly merge. You should add a X265_CHECK here for
non-zero sa8dCost and see if you can catch it.

>                  checkBestMode(md.pred[PRED_INTRA], depth);
>  
>                  if (depth == g_maxCUDepth && cuGeom.log2CUSize > m_slice->m_sps->quadtreeTULog2MinSize)
>                  {
>                      md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom);
> -                    checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, NULL);
> +                    checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, NULL, md.bestMode->sa8dCost);
>                      checkBestMode(md.pred[PRED_INTRA_NxN], depth);
>                  }
>              }
> @@ -1549,6 +1549,12 @@
>      }
>      if (predInterSearch(interMode, cuGeom, bMergeOnly, true))
>      {
> +        const Yuv& fencYuv = *interMode.fencYuv;
> +        Yuv& predYuv = interMode.predYuv;
> +        int part = partitionFromLog2Size(cuGeom.log2CUSize);
> +        interMode.distortion = primitives.cu[part].sa8d(fencYuv.m_buf[0], fencYuv.m_size, predYuv.m_buf[0], predYuv.m_size);
> +        interMode.sa8dCost = m_rdCost.calcRdSADCost(interMode.distortion, interMode.sa8dBits);
> +
>          /* predInterSearch sets interMode.sa8dBits, but this is ignored */
>          encodeResAndCalcRdInterCU(interMode, cuGeom);
>  
> diff -r 6b72bb520a91 -r 74210e69fef9 source/encoder/search.cpp
> --- a/source/encoder/search.cpp	Mon Jan 19 09:43:36 2015 -0800
> +++ b/source/encoder/search.cpp	Fri Jan 16 19:01:08 2015 +0530
> @@ -1131,7 +1131,7 @@
>      }
>  }
>  
> -void Search::checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSize, uint8_t* sharedModes)
> +void Search::checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSize, uint8_t* sharedModes, uint64_t sa8dThreshold)
>  {
>      CUData& cu = intraMode.cu;
>  
> @@ -1142,7 +1142,13 @@
>      cu.getIntraTUQtDepthRange(tuDepthRange, 0);
>  
>      intraMode.initCosts();
> -    intraMode.distortion += estIntraPredQT(intraMode, cuGeom, tuDepthRange, sharedModes);
> +    intraMode.distortion  = estIntraPredQT(intraMode, cuGeom, tuDepthRange, sharedModes, sa8dThreshold);
> +
> +    if (intraMode.distortion == MAX_INT)
> +    {
> +        intraMode.rdCost = MAX_INT64;
> +        return;
> +    }
>      intraMode.distortion += estIntraPredChromaQT(intraMode, cuGeom);
>  
>      m_entropyCoder.resetBits();
> @@ -1390,7 +1396,7 @@
>      updateModeCost(intraMode);
>  }
>  
> -uint32_t Search::estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2], uint8_t* sharedModes)
> +uint32_t Search::estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2], uint8_t* sharedModes, uint64_t sa8dThreshold)
>  {
>      CUData& cu = intraMode.cu;
>      Yuv* reconYuv = &intraMode.reconYuv;
> @@ -1532,6 +1538,7 @@
>                  candCostList[i] = MAX_INT64;
>  
>              uint64_t paddedBcost = bcost + (bcost >> 3); // 1.12%
> +            paddedBcost = X265_MIN(paddedBcost, sa8dThreshold);
>              for (int mode = 0; mode < 35; mode++)
>                  if (modeCosts[mode] < paddedBcost || (mpms & ((uint64_t)1 << mode)))
>                      updateCandList(mode, modeCosts[mode], maxCandCount, rdModeList, candCostList);
> @@ -1550,8 +1557,11 @@
>                      codeIntraLumaTSkip(intraMode, cuGeom, initTuDepth, absPartIdx, icosts);
>                  else
>                      codeIntraLumaQT(intraMode, cuGeom, initTuDepth, absPartIdx, false, icosts, depthRange);
> -                COPY2_IF_LT(bcost, icosts.rdcost, bmode, rdModeList[i]);
> +                COPY3_IF_LT(bcost, icosts.rdcost, bmode, rdModeList[i], intraMode.sa8dCost, candCostList[i]);
>              }
> +
> +            if (bcost == MAX_INT64)
> +                return MAX_INT;
>          }
>  
>          /* remeasure best mode, allowing TU splits */

After this point, the function will measure the RD cost of the top N
modes and then measure RD cost using full TU recursion.  That final TU
recursion RD cost calculation could be skipped in a similar fashion if
we passed an RD cost threshold based on the rdCost of the current best
mode. Also, it might be possible to skip it if we know the TU depth is
limited and the re-encode would produce identical results.

This is something to try after this patch is working correctly.

> diff -r 6b72bb520a91 -r 74210e69fef9 source/encoder/search.h
> --- a/source/encoder/search.h	Mon Jan 19 09:43:36 2015 -0800
> +++ b/source/encoder/search.h	Fri Jan 16 19:01:08 2015 +0530
> @@ -163,7 +163,7 @@
>      void     invalidateContexts(int fromDepth);
>  
>      // full RD search of intra modes. if sharedModes is not NULL, it directly uses them
> -    void     checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSize, uint8_t* sharedModes);
> +    void     checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSize, uint8_t* sharedModes, uint64_t sa8dThreshold = MAX_INT64);
>  
>      // select best intra mode using only sa8d costs, cannot measure NxN intra
>      void     checkIntraInInter(Mode& intraMode, const CUGeom& cuGeom);
> @@ -204,7 +204,7 @@
>      void     saveResidualQTData(CUData& cu, ShortYuv& resiYuv, uint32_t absPartIdx, uint32_t tuDepth);
>  
>      // RDO search of luma intra modes; result is fully encoded luma. luma distortion is returned
> -    uint32_t estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2], uint8_t* sharedModes);
> +    uint32_t estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2], uint8_t* sharedModes, uint64_t sa8dThreshold);
>  
>      // RDO select best chroma mode from luma; result is fully encode chroma. chroma distortion is returned
>      uint32_t estIntraPredChromaQT(Mode &intraMode, const CUGeom& cuGeom);
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho


More information about the x265-devel mailing list