[x265] [PATCH] Edge Aware Quad Tree Establishment

Tue Jan 7 11:57:30 CET 2020

On Tue, Jan 7, 2020 at 8:16 AM <srikanth.kurapati at multicorewareinc.com>
wrote:

> # HG changeset patch
> # User Srikanth Kurapati
> # Date 1577432829 -19800
> #      Fri Dec 27 13:17:09 2019 +0530
> # Node ID 3d60a9a728b37f14cbb9cb2332a1aebf87e66334
> # Parent  19f6ed1659197aaa4bd78b69eb58139e879230d9
> Edge Aware Quad Tree Establishment.

This patch does the following:
> 1. Terminates recursion using edge information.
> 2. Adds modes for "--rskip". Modes 0,1 for current usage and 2 for edge
> based rskip.
> 3. Adds option "edge-threshold" to decide recursion skip using CU edge
> density.
> 4. Re uses edge information when already available in encoder.
>
> diff -r 19f6ed165919 -r 3d60a9a728b3 doc/reST/cli.rst
> --- a/doc/reST/cli.rst  Mon Dec 30 11:58:44 2019 +0530
> +++ b/doc/reST/cli.rst  Fri Dec 27 13:17:09 2019 +0530
> @@ -842,15 +842,20 @@
>         Measure 2Nx2N merge candidates first; if no residual is found,
>         additional modes at that depth are not analysed. Default disabled
>
> -.. option:: --rskip, --no-rskip
> -
> -       This option determines early exit from CU depth recursion. When a
> skip CU is
> -       found, additional heuristics (depending on rd-level) are used to
> decide whether
> -       to terminate recursion. In rdlevels 5 and 6, comparison with
> inter2Nx2N is used,
> -       while at rdlevels 4 and neighbour costs are used to skip recursion.
> -       Provides minimal quality degradation at good performance gains
> when enabled.
> -
> -       Default: enabled, disabled for :option:`--tune grain`
> +.. option:: --rskip <0|1|2>
> +
> +       This option determines early exit from CU depth recursion when
> enabled. When a skip CU is
> +       found, additional heuristics (depending on rd-level and rskip
> mode) are used to decide whether
> +       to terminate recursion. In rdlevels 5 and 6, comparison with
> inter2Nx2N is used,
> +       while at rdlevels 4 and below, neighbour costs are used to skip
> recursion in mode 1, and CU edge density in mode 2.
> +       Provides minimal quality degradation at good performance gains
> when enabled. R-skip mode 0 means disabled.
>
[KS] R-skip mode 0 means disabled is implied. If necessary mentioning
rephrase
There is no mention that this patch introduces rkip 2 only to rd0_4

> +       Default: 1, disabled when :option:`--tune grain` is used.
> +
> +.. option:: --edge-threshold <0..100>
> +
> +       Denotes the minimum percentage of edge density in the CU, below
> which the recursion is skipped.
> +       Default: 5, requires :option:`--rskip mode 2` to be enabled.
>
[KS] It is the minimum edge density the CU is expected to possess.
Usage of density and variance is not consistent across the patch

>  .. option:: --splitrd-skip, --no-splitrd-skip
>
> diff -r 19f6ed165919 -r 3d60a9a728b3 source/CMakeLists.txt
> --- a/source/CMakeLists.txt     Mon Dec 30 11:58:44 2019 +0530
> +++ b/source/CMakeLists.txt     Fri Dec 27 13:17:09 2019 +0530
> @@ -29,7 +29,7 @@
>  option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
>  mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
>  # X265_BUILD must be incremented each time the public API is changed
> -set(X265_BUILD 184)
> +set(X265_BUILD 185)
>  configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
>                 "${PROJECT_BINARY_DIR}/x265.def")
>  configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
> diff -r 19f6ed165919 -r 3d60a9a728b3 source/common/common.h
> --- a/source/common/common.h    Mon Dec 30 11:58:44 2019 +0530
> +++ b/source/common/common.h    Fri Dec 27 13:17:09 2019 +0530
> @@ -129,6 +129,7 @@
>  typedef uint64_t sum2_t;
>  typedef uint64_t pixel4;
>  typedef int64_t  ssum2_t;
> +#define SHIFT_TO_BITPLANE 9
>  #define HISTOGRAM_BINS 1024
>  #define SHIFT 1
>  #else
> @@ -137,6 +138,7 @@
>  typedef uint32_t sum2_t;
>  typedef uint32_t pixel4;
>  typedef int32_t  ssum2_t; // Signed sum
> +#define SHIFT_TO_BITPLANE 7
>  #define HISTOGRAM_BINS 256
>  #define SHIFT 0
>  #endif // if HIGH_BIT_DEPTH
> @@ -272,6 +274,9 @@
>  #define MAX_TR_SIZE (1 << MAX_LOG2_TR_SIZE)
>  #define MAX_TS_SIZE (1 << MAX_LOG2_TS_SIZE)
>
> +#define RDCOST_BASED_RSKIP 1
> +#define EDGE_BASED_RSKIP 2
> +
>  #define COEF_REMAIN_BIN_REDUCTION   3 // indicates the level at which the
> VLC
>                                        // transitions from Golomb-Rice to
> TU+EG(k)
>
> diff -r 19f6ed165919 -r 3d60a9a728b3 source/common/frame.cpp
> --- a/source/common/frame.cpp   Mon Dec 30 11:58:44 2019 +0530
> +++ b/source/common/frame.cpp   Fri Dec 27 13:17:09 2019 +0530
> @@ -61,6 +61,7 @@
>      m_edgePic = NULL;
>      m_gaussianPic = NULL;
>      m_thetaPic = NULL;
> +    m_edgeBitPlane = NULL;
>  }
>
>  bool Frame::create(x265_param *param, float* quantOffsets)
> @@ -115,6 +116,18 @@
>          m_thetaPic = X265_MALLOC(pixel, m_stride * (maxHeight +
> (m_lumaMarginY * 2)));
>      }
>
> +    if (param->bEnableRecursionSkip == EDGE_BASED_RSKIP)
> +    {
> +        uint32_t numCuInWidth = (param->sourceWidth + param->maxCUSize -
> 1) / param->maxCUSize;
> +        uint32_t numCuInHeight = (param->sourceHeight + param->maxCUSize
> - 1) / param->maxCUSize;
> +        uint32_t lumaMarginX = param->maxCUSize + 32;
> +        uint32_t lumaMarginY = param->maxCUSize + 16;
> +        uint32_t stride = (numCuInWidth * param->maxCUSize) +
> (lumaMarginX << 1);
> +        uint32_t maxHeight = numCuInHeight * param->maxCUSize;
> +        m_bitPlaneSize = stride * (maxHeight + (lumaMarginY * 2));
> +        CHECKED_MALLOC_ZERO(m_edgeBitPlane, pixel, m_bitPlaneSize);
> +    }
> +
>      if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) &&
> m_lowres.create(param, m_fencPic, param->rc.qgSize))
>      {
>          X265_CHECK((m_reconColCount == NULL), "m_reconColCount was
> initialized");
> @@ -267,4 +280,9 @@
>          X265_FREE(m_gaussianPic);
>          X265_FREE(m_thetaPic);
>      }
> +
> +    if (m_param->bEnableRecursionSkip == EDGE_BASED_RSKIP)
> +    {
> +        X265_FREE(m_edgeBitPlane);
> +    }
>  }
> diff -r 19f6ed165919 -r 3d60a9a728b3 source/common/frame.h
> --- a/source/common/frame.h     Mon Dec 30 11:58:44 2019 +0530
> +++ b/source/common/frame.h     Fri Dec 27 13:17:09 2019 +0530
> @@ -137,6 +137,8 @@
>      pixel*                 m_gaussianPic;
>      pixel*                 m_thetaPic;
>
> +    pixel*                 m_edgeBitPlane;
> +    uint32_t               m_bitPlaneSize;
>      Frame();
>
>      bool create(x265_param *param, float* quantOffsets);
> diff -r 19f6ed165919 -r 3d60a9a728b3 source/common/param.cpp
> --- a/source/common/param.cpp   Mon Dec 30 11:58:44 2019 +0530
> +++ b/source/common/param.cpp   Fri Dec 27 13:17:09 2019 +0530
> @@ -198,6 +198,7 @@
>      param->bEnableWeightedBiPred = 0;
>      param->bEnableEarlySkip = 1;
>      param->bEnableRecursionSkip = 1;
> +    param->edgeThreshold = 5.0;
>      param->bEnableAMP = 0;
>      param->bEnableRectInter = 0;
>      param->rdLevel = 3;
> @@ -694,7 +695,8 @@
>      OPT("ref") p->maxNumReferences = atoi(value);
>      OPT("fast-intra") p->bEnableFastIntra = atobool(value);
>      OPT("early-skip") p->bEnableEarlySkip = atobool(value);
> -    OPT("rskip") p->bEnableRecursionSkip = atobool(value);
> +    OPT("rskip") p->bEnableRecursionSkip = atoi(value);
> +    OPT("edge-threshold") p->edgeThreshold = atoi(value)/100.0;
>
[KS] I don't understand the purpose of receiving it as percent and dividing
by 100 internally. What is the goal?
And across the patch percent and double is used interchangeably providing
less clarity.

     OPT("me")p->searchMethod = parseName(value, x265_motion_est_names,
> bError);
>      OPT("subme") p->subpelRefine = atoi(value);
>      OPT("merange") p->searchRange = atoi(value);
> @@ -911,7 +913,7 @@
>      OPT("max-merge") p->maxNumMergeCand = (uint32_t)atoi(value);
>      OPT("temporal-mvp") p->bEnableTemporalMvp = atobool(value);
>      OPT("early-skip") p->bEnableEarlySkip = atobool(value);
> -    OPT("rskip") p->bEnableRecursionSkip = atobool(value);
> +    OPT("rskip") p->bEnableRecursionSkip = atoi(value);
>      OPT("rdpenalty") p->rdPenalty = atoi(value);
>      OPT("tskip") p->bEnableTransformSkip = atobool(value);
>      OPT("no-tskip-fast") p->bEnableTSkipFast = atobool(value);
> @@ -1213,6 +1215,7 @@
>              }
>          }
>          OPT("hist-threshold") p->edgeTransitionThreshold = atof(value);
> +        OPT("edge-threshold") p->edgeThreshold = atoi(value)/100.0;
>          OPT("lookahead-threads") p->lookaheadThreads = atoi(value);
>          OPT("opt-cu-delta-qp") p->bOptCUDeltaQP = atobool(value);
>          OPT("multi-pass-opt-analysis") p->analysisMultiPassRefine =
> atobool(value);
> @@ -1579,9 +1582,13 @@
>      CHECK(param->rdLevel < 1 || param->rdLevel > 6,
>            "RD Level is out of range");
>      CHECK(param->rdoqLevel < 0 || param->rdoqLevel > 2,
> -        "RDOQ Level is out of range");
> +          "RDOQ Level is out of range");
>      CHECK(param->dynamicRd < 0 || param->dynamicRd >
> x265_ADAPT_RD_STRENGTH,
> -        "Dynamic RD strength must be between 0 and 4");
> +          "Dynamic RD strength must be between 0 and 4");
> +    CHECK(param->bEnableRecursionSkip > 2 || param->bEnableRecursionSkip
> < 0,
> +          "Invalid Recursion skip mode. Valid modes 0,1,2");
> +    CHECK(param->edgeThreshold < 0 || param->edgeThreshold > 100,
> +          "Percentage Edge threshold for a CU should be integer between 0
> to 100");
>
[KS] You are saying threshold is a percent but you divide by 100 internally
and make it double. How is this check valid now?
[KS] Where are you checking rkip mode 2 with edge threshold combination?

     CHECK(param->bframes && param->bframes >= param->lookaheadDepth &&
> !param->rc.bStatRead,
>            "Lookahead depth must be greater than the max consecutive
> bframe count");
>      CHECK(param->bframes < 0,
> @@ -1887,7 +1894,11 @@
>      TOOLVAL(param->psyRdoq, "psy-rdoq=%.2lf");
>      TOOLOPT(param->bEnableRdRefine, "rd-refine");
>      TOOLOPT(param->bEnableEarlySkip, "early-skip");
> -    TOOLOPT(param->bEnableRecursionSkip, "rskip");
> +    TOOLVAL(param->bEnableRecursionSkip, "rskip mode=%d");
> +    if (param->bEnableRecursionSkip == EDGE_BASED_RSKIP)
> +    {
> +        TOOLVAL(param->edgeThreshold, "rskip-threshold=%.2lf");
> +    }
>      TOOLOPT(param->bEnableSplitRdSkip, "splitrd-skip");
>      TOOLVAL(param->noiseReductionIntra, "nr-intra=%d");
>      TOOLVAL(param->noiseReductionInter, "nr-inter=%d");
> @@ -2046,6 +2057,10 @@
>      s += sprintf(s, " selective-sao=%d", p->selectiveSAO);
>      BOOL(p->bEnableEarlySkip, "early-skip");
>      BOOL(p->bEnableRecursionSkip, "rskip");
> +    if (p->bEnableRecursionSkip == EDGE_BASED_RSKIP)
> +    {
> +        s += sprintf(s, " edge-threshold=%lf", p->edgeThreshold);
> +    }
>      BOOL(p->bEnableFastIntra, "fast-intra");
>      BOOL(p->bEnableTSkipFast, "tskip-fast");
>      BOOL(p->bCULossless, "cu-lossless");
> @@ -2350,6 +2365,7 @@
>      dst->rdLevel = src->rdLevel;
>      dst->bEnableEarlySkip = src->bEnableEarlySkip;
>      dst->bEnableRecursionSkip = src->bEnableRecursionSkip;
> +    dst->edgeThreshold = src->edgeThreshold;
>      dst->bEnableFastIntra = src->bEnableFastIntra;
>      dst->bEnableTSkipFast = src->bEnableTSkipFast;
>      dst->bCULossless = src->bCULossless;
> diff -r 19f6ed165919 -r 3d60a9a728b3 source/common/pixel.cpp
> --- a/source/common/pixel.cpp   Mon Dec 30 11:58:44 2019 +0530
> +++ b/source/common/pixel.cpp   Fri Dec 27 13:17:09 2019 +0530
> @@ -876,6 +876,18 @@
>      }
>  }
>
> +static void planecopy_pp_shr_c(const pixel* src, intptr_t srcStride,
> pixel* dst, intptr_t dstStride, int width, int height, int shift)
> +{
> +    for (int r = 0; r < height; r++)
> +    {
> +        for (int c = 0; c < width; c++)
> +            dst[c] = (pixel)((src[c] >> shift));

+        dst += dstStride;
> +        src += srcStride;
> +    }
> +}
> +
>
[KS] Why do we not have an asm version of this primitive?

>  static void planecopy_sp_shl_c(const uint16_t* src, intptr_t srcStride,
> pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t
> mask)
>  {
>      for (int r = 0; r < height; r++)
> @@ -1316,6 +1328,7 @@
>      p.planecopy_cp = planecopy_cp_c;
>      p.planecopy_sp = planecopy_sp_c;
>      p.planecopy_sp_shl = planecopy_sp_shl_c;
> +    p.planecopy_pp_shr = planecopy_pp_shr_c;
>  #if HIGH_BIT_DEPTH
>      p.planeClipAndMax = planeClipAndMax_c;
>  #endif
> diff -r 19f6ed165919 -r 3d60a9a728b3 source/common/primitives.h
> --- a/source/common/primitives.h        Mon Dec 30 11:58:44 2019 +0530
> +++ b/source/common/primitives.h        Fri Dec 27 13:17:09 2019 +0530
> @@ -204,6 +204,7 @@
>  typedef void (*sign_t)(int8_t *dst, const pixel *src1, const pixel *src2,
> const int endX);
>  typedef void (*planecopy_cp_t) (const uint8_t* src, intptr_t srcStride,
> pixel* dst, intptr_t dstStride, int width, int height, int shift);
>  typedef void (*planecopy_sp_t) (const uint16_t* src, intptr_t srcStride,
> pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t
> mask);
> +typedef void (*planecopy_pp_t) (const pixel* src, intptr_t srcStride,
> pixel* dst, intptr_t dstStride, int width, int height, int shift);
>  typedef pixel (*planeClipAndMax_t)(pixel *src, intptr_t stride, int
> width, int height, uint64_t *outsum, const pixel minPix, const pixel
> maxPix);
>
>  typedef void (*cutree_propagate_cost) (int* dst, const uint16_t*
> propagateIn, const int32_t* intraCosts, const uint16_t* interCosts, const
> int32_t* invQscales, const double* fpsFactor, int len);
> @@ -358,6 +359,7 @@
>      planecopy_cp_t        planecopy_cp;
>      planecopy_sp_t        planecopy_sp;
>      planecopy_sp_t        planecopy_sp_shl;
> +    planecopy_pp_t        planecopy_pp_shr;
>      planeClipAndMax_t     planeClipAndMax;
>
>      weightp_sp_t          weight_sp;
> diff -r 19f6ed165919 -r 3d60a9a728b3 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp       Mon Dec 30 11:58:44 2019 +0530
> +++ b/source/encoder/analysis.cpp       Fri Dec 27 13:17:09 2019 +0530
> @@ -1313,14 +1313,22 @@
>          if (md.bestMode && m_param->bEnableRecursionSkip &&
> !bCtuInfoCheck && !(m_param->bAnalysisType == AVC_INFO &&
> m_param->analysisReuseLevel == 7 && (m_modeFlag[0] || m_modeFlag[1])))
>          {
>              skipRecursion = md.bestMode->cu.isSkipped(0);
> -            if (mightSplit && depth >= minDepth && !skipRecursion)
> +            if (mightSplit && !skipRecursion)
>              {
> -                if (depth)
> -                    skipRecursion = recursionDepthCheck(parentCTU,
> cuGeom, *md.bestMode);
> -                if (m_bHD && !skipRecursion && m_param->rdLevel == 2 &&
> md.fencYuv.m_size != MAX_CU_SIZE)
> -                    skipRecursion = complexityCheckCU(*md.bestMode);
> +                if (depth >= minDepth && m_param->bEnableRecursionSkip ==
> RDCOST_BASED_RSKIP)
> +                {
> +                    if (depth)
> +                        skipRecursion = recursionDepthCheck(parentCTU,
> cuGeom, *md.bestMode);
> +                    if (m_bHD && !skipRecursion && m_param->rdLevel == 2
> && md.fencYuv.m_size != MAX_CU_SIZE)
> +                        skipRecursion = complexityCheckCU(*md.bestMode);
> +                }
> +                else if (m_param->bEnableRecursionSkip ==
> EDGE_BASED_RSKIP)
> +                {
>
[KS] Regarding my question on not considering topSkip's decision, you said
there is FPS loss. Can you elaborate the theory behind it?

> +                    skipRecursion = edgeRecursionSkip(parentCTU, depth);
> +                }
>              }
>          }
> +
>          if (m_param->bAnalysisType == AVC_INFO && md.bestMode &&
> cuGeom.numPartitions <= 16 && m_param->analysisReuseLevel == 7)
>              skipRecursion = true;
>          /* Step 2. Evaluate each of the 4 split sub-blocks in series */
> @@ -3543,6 +3551,33 @@
>      return false;
>  }
>
> +bool Analysis::edgeRecursionSkip(const CUData& ctu, int depth)
> +{
> +    int cuLen = m_param->maxCUSize >> depth;
> +    if (cuLen >= RSKIP2_MIN_CUSIZE)
> +    {
> +        uint8_t blockType = g_log2Size[cuLen] - 2;
>
[KS] Can you tell me the purpose of cuLen computation and its subsequent
usage when we have cuGeom's log2size?

> +        int shift = g_log2Size[cuLen] * 2;
> +        intptr_t stride = m_frame->m_fencPic->m_stride;
> +        pixel* edgePic = m_frame->m_edgeBitPlane +
> m_frame->m_fencPic->m_lumaMarginY * m_frame->m_fencPic->m_stride +
> m_frame->m_fencPic->m_lumaMarginX;
> +        intptr_t blockOffsetLuma = ctu.m_cuPelX + ctu.m_cuPelY * stride;
> +        uint64_t sum_ss = primitives.cu[blockType].var(edgePic +
> blockOffsetLuma, stride);
> +        uint32_t sum = (uint32_t)sum_ss;
> +        uint32_t ss = (uint32_t)(sum_ss >> 32);
> +        double pixelCount = pow(2.0, shift);
>
[KS] When pixel count is pow of 2, what is the reason for using pow instead
of shift?

> +        double cuEdgeVariance = (ss - (sum * sum / pixelCount)) /
> pixelCount;
>
[KS] The difference between complexityCheckCU and this module's core
functionality is sum of abs diff and sum of squared diff. Although squared
diff is more accurate(as you mentioned), are you seeing a very big
difference in accuracy of variance across these 2 functions? Have we
measured this? I am skeptical about having new module doing same
functionality

> +
> +        if (cuEdgeVariance > m_param->edgeThreshold)
> +            return false;
> +        else
> +            return true;
> +    }
> +    else
> +    {
> +        return true;
> +    }
>
[KS] If the size < 32x32 you force recursion skipping? Why? What is the
quality impact you are seeing?
Can you share some results?

> +}
> +
>
[KS] If CU size < 32x32 why do you even call this function? I see this as
an overhead

>  uint32_t Analysis::calculateCUVariance(const CUData& ctu, const CUGeom&
> cuGeom)
>  {
>      uint32_t cuVariance = 0;
> @@ -3566,7 +3601,6 @@
>              cnt++;
>          }
>      }
> -
>      return cuVariance / cnt;
>  }
>
> diff -r 19f6ed165919 -r 3d60a9a728b3 source/encoder/analysis.h
> --- a/source/encoder/analysis.h Mon Dec 30 11:58:44 2019 +0530
> +++ b/source/encoder/analysis.h Fri Dec 27 13:17:09 2019 +0530
> @@ -36,6 +36,8 @@
>  #include "entropy.h"
>  #include "search.h"
>
> +#define RSKIP2_MIN_CUSIZE 32 /* CU size until which the cu edge variance
> will be computed. */
> +
>
[KS] When you can use of the relevant global parameters, can you tell me
why this additional macro is necessary?

>  namespace X265_NS {
>  // private namespace
>
> @@ -52,7 +54,7 @@
>          splitRefs = 0;
>          mvCost[0] = 0; // L0
>          mvCost[1] = 0; // L1
> -        sa8dCost    = 0;
> +        sa8dCost  = 0;
>      }
>  };
>
> @@ -120,7 +122,6 @@
>
>      Mode& compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom,
> const Entropy& initialContext);
>      int32_t loadTUDepth(CUGeom cuGeom, CUData parentCTU);
> -
>  protected:
>      /* Analysis data for save/load mode, writes/reads data based on
> absPartIdx */
>      x265_analysis_inter_data*  m_reuseInterDataCTU;
> @@ -192,6 +193,7 @@
>      uint32_t topSkipMinDepth(const CUData& parentCTU, const CUGeom&
> cuGeom);
>      bool recursionDepthCheck(const CUData& parentCTU, const CUGeom&
> cuGeom, const Mode& bestMode);
>      bool complexityCheckCU(const Mode& bestMode);
> +    bool edgeRecursionSkip(const CUData& parentCTU, int depth);
>
>      /* generate residual and recon pixels for an entire CTU recursively
> (RD0) */
>      void encodeResidue(const CUData& parentCTU, const CUGeom& cuGeom);
> diff -r 19f6ed165919 -r 3d60a9a728b3 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp        Mon Dec 30 11:58:44 2019 +0530
> +++ b/source/encoder/encoder.cpp        Fri Dec 27 13:17:09 2019 +0530
> @@ -1351,9 +1351,9 @@
>      int32_t numBytes = m_param->sourceBitDepth > 8 ? 2 : 1;
>      memset(m_edgePic, 0, bufSize * numBytes);
>
> -    if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height,
> pic->width, false))
> -    {
> -        x265_log(m_param, X265_LOG_ERROR, "Failed edge computation!");
> +    if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height,
> pic->width, false, 1))
> +    {
> +        x265_log(m_param, X265_LOG_ERROR, "Failed to compute edge!");
>          return false;
>      }
>
> @@ -1668,6 +1668,13 @@
>                          }
>                      }
>                  }
> +                if (m_param->bEnableRecursionSkip == EDGE_BASED_RSKIP &&
> m_param->bHistBasedSceneCut)
> +                {
> +                    pixel* src = m_edgePic;
> +                    pixel* edgePic = inFrame->m_edgeBitPlane +
> inFrame->m_fencPic->m_lumaMarginY * inFrame->m_fencPic->m_stride +
> inFrame->m_fencPic->m_lumaMarginX;
> +                    primitives.planecopy_pp_shr(src,
> inFrame->m_fencPic->m_picWidth, edgePic, inFrame->m_fencPic->m_stride,
> +                        inFrame->m_fencPic->m_picWidth,
> inFrame->m_fencPic->m_picHeight, 0);
> +                }
>              }
>              else
>              {
> diff -r 19f6ed165919 -r 3d60a9a728b3 source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp   Mon Dec 30 11:58:44 2019 +0530
> +++ b/source/encoder/frameencoder.cpp   Fri Dec 27 13:17:09 2019 +0530
> @@ -130,7 +130,7 @@
>          {
>              rowSum += sliceGroupSizeAccu;
>              m_sliceBaseRow[++sidx] = i;
> -        }
> +        }
>      }
>      X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");
>      m_sliceBaseRow[0] = 0;
> @@ -268,6 +268,20 @@
>      curFrame->m_encData->m_jobProvider = this;
>      curFrame->m_encData->m_slice->m_mref = m_mref;
>
> +    if (!m_param->bHistBasedSceneCut && m_param->rc.aqMode !=
> X265_AQ_EDGE && m_param->bEnableRecursionSkip == EDGE_BASED_RSKIP)
> +    {
> +        int height = curFrame->m_fencPic->m_picHeight;
> +        int width = curFrame->m_fencPic->m_picWidth;
> +        intptr_t stride = curFrame->m_fencPic->m_stride;
> +        pixel* edgePic = curFrame->m_edgeBitPlane +
> curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride +
> curFrame->m_fencPic->m_lumaMarginX;
> +
> +        if (!computeEdge(edgePic, curFrame->m_fencPic->m_picOrg[0], NULL,
> stride, height, width, false, 1))
> +        {
> +            x265_log(m_param, X265_LOG_ERROR, " Failed to compute edge
> !");
> +            return false;
> +        }
> +    }
> +
>      if (!m_cuGeoms)
>      {
>          if (!initializeGeoms())
> diff -r 19f6ed165919 -r 3d60a9a728b3 source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp      Mon Dec 30 11:58:44 2019 +0530
> +++ b/source/encoder/slicetype.cpp      Fri Dec 27 13:17:09 2019 +0530
> @@ -87,7 +87,7 @@
>
>  namespace X265_NS {
>
> -bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta,
> intptr_t stride, int height, int width, bool bcalcTheta)
> +bool computeEdge(pixel* edgePic, pixel* refPic, pixel* edgeTheta,
> intptr_t stride, int height, int width, bool bcalcTheta, pixel whitePixel)
>  {
>      intptr_t rowOne = 0, rowTwo = 0, rowThree = 0, colOne = 0, colTwo =
> 0, colThree = 0;
>      intptr_t middle = 0, topLeft = 0, topRight = 0, bottomLeft = 0,
> bottomRight = 0;
> @@ -141,7 +141,7 @@
>                         theta = 180 + theta;
>                      edgeTheta[middle] = (pixel)theta;
>                  }
> -                edgePic[middle] = (pixel)(gradientMagnitude >=
> edgeThreshold ? edgeThreshold : blackPixel);
> +                edgePic[middle] = (pixel)(gradientMagnitude >=
> EDGE_THRESHOLD ? whitePixel : blackPixel);
>

> [KS] Does the output of edgePic differ when AQ_EDGE is enabled and
otherwise? Does this make the variance computation in analysis
inconsistent?
You mentioned about the accuracy of AQ_EDGE due to denoise but would like
to confirm if output of edgeRecursionSkip module will vary with/without
AQ_EDGE?

>              }
>          }
>          return true;
> @@ -519,6 +519,14 @@
>                  if (param->rc.aqMode == X265_AQ_EDGE)
>                      edgeFilter(curFrame, param);
>
> +                if (param->rc.aqMode == X265_AQ_EDGE &&
> !param->bHistBasedSceneCut && param->bEnableRecursionSkip ==
> EDGE_BASED_RSKIP)
> +                {
> +                    pixel* src = curFrame->m_edgePic +
> curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride +
> curFrame->m_fencPic->m_lumaMarginX;
> +                    pixel* dst = curFrame->m_edgeBitPlane +
> curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride +
> curFrame->m_fencPic->m_lumaMarginX;
> +                    primitives.planecopy_pp_shr(src,
> curFrame->m_fencPic->m_stride, dst,
> +                        curFrame->m_fencPic->m_stride,
> curFrame->m_fencPic->m_picWidth, curFrame->m_fencPic->m_picHeight,
> SHIFT_TO_BITPLANE);
> +                }
> +
>                  if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE ||
> param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED || param->rc.aqMode ==
> X265_AQ_EDGE)
>                  {
>                      double bit_depth_correction = 1.f / (1 << (2 *
> (X265_DEPTH - 8)));
> diff -r 19f6ed165919 -r 3d60a9a728b3 source/encoder/slicetype.h
> --- a/source/encoder/slicetype.h        Mon Dec 30 11:58:44 2019 +0530
> +++ b/source/encoder/slicetype.h        Fri Dec 27 13:17:09 2019 +0530
> @@ -44,9 +44,9 @@
>  #define EDGE_INCLINATION 45
>
>  #if HIGH_BIT_DEPTH
> -#define edgeThreshold 1023.0
> +#define EDGE_THRESHOLD 1023.0
>  #else
> -#define edgeThreshold 255.0
> +#define EDGE_THRESHOLD 255.0
>  #endif
>  #define PI 3.14159265
>
> @@ -101,7 +101,7 @@
>  protected:
>
>      uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t
> blockY, int csp, uint32_t qgSize);
> -    uint32_t edgeDensityCu(Frame*curFrame, uint32_t &avgAngle, uint32_t
> blockX, uint32_t blockY, uint32_t qgSize);
> +    uint32_t edgeDensityCu(Frame* curFrame, uint32_t &avgAngle, uint32_t
> blockX, uint32_t blockY, uint32_t qgSize);
>      uint32_t lumaSumCu(Frame* curFrame, uint32_t blockX, uint32_t blockY,
> uint32_t qgSize);
>      uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp);
>      bool     allocWeightedRef(Lowres& fenc);
> @@ -265,7 +265,6 @@
>      CostEstimateGroup& operator=(const CostEstimateGroup&);
>  };
>
> -bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta,
> intptr_t stride, int height, int width, bool bcalcTheta);
> -
> +bool computeEdge(pixel* edgePic, pixel* refPic, pixel* edgeTheta,
> intptr_t stride, int height, int width, bool bcalcTheta, pixel whitePixel =
> EDGE_THRESHOLD);
>  }
>  #endif // ifndef X265_SLICETYPE_H
> diff -r 19f6ed165919 -r 3d60a9a728b3 source/test/regression-tests.txt
> --- a/source/test/regression-tests.txt  Mon Dec 30 11:58:44 2019 +0530
> +++ b/source/test/regression-tests.txt  Fri Dec 27 13:17:09 2019 +0530
> @@ -161,6 +161,8 @@
>  Island_960x540_24.yuv,--no-cutree --aq-mode 0 --bitrate 6000
> --scenecut-aware-qp
>  sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut
> --hist-threshold 0.02 --frame-dup --dup-threshold 60 --hrd --bitrate 10000
> --vbv-bufsize 15000 --vbv-maxrate 12000
>  sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut
> --hist-threshold 0.02
> +crowd_run_1080p50.yuv, --rskip 2 --edge-threshold 5 --hist-scenecut
> --hist-threshold 0.1 --aq-mode 4
> +crowd_run_1080p50.yuv, --preset slow --rskip 2 --edge-threshold 5
> --hist-scenecut --hist-threshold 0.1 --aq-mode 4
>
[KS] What about other hist-scenecut & aq-mode combinations? We have
different flows right?
[KS] I don't understand why we should restrict this to rd level 0 to 4 when
there are so many x265 users doing offline encodes with slower/veryslow
preset.
Even though rd level 5 or 6 does not consider additional heuristics to
perform rskip like rd levels 0 to 4, it still skips recursion based on
residual from inter2Nx2N.
Hence I am interested to know why this algorithm doesn't support rd 5/6

>
>  # Main12 intraCost overflow bug test
>  720p50_parkrun_ter.y4m,--preset medium
> diff -r 19f6ed165919 -r 3d60a9a728b3 source/x265.h
> --- a/source/x265.h     Mon Dec 30 11:58:44 2019 +0530
> +++ b/source/x265.h     Fri Dec 27 13:17:09 2019 +0530
> @@ -1859,6 +1859,9 @@
>
>      /* Enable HME search ranges for L0, L1 and L2 respectively. */
>      int       hmeRange[3];
> +
> +    /* Edge variance threshold for quad tree establishment. */
> +    double       edgeThreshold;
>
[KS] Given the range it holds,why not float?

>  } x265_param;
>
>  /* x265_param_alloc:
> diff -r 19f6ed165919 -r 3d60a9a728b3 source/x265cli.h
> --- a/source/x265cli.h  Mon Dec 30 11:58:44 2019 +0530
> +++ b/source/x265cli.h  Fri Dec 27 13:17:09 2019 +0530
> @@ -105,8 +105,8 @@
>      { "amp",                  no_argument, NULL, 0 },
>      { "no-early-skip",        no_argument, NULL, 0 },
>      { "early-skip",           no_argument, NULL, 0 },
> -    { "no-rskip",             no_argument, NULL, 0 },
> -    { "rskip",                no_argument, NULL, 0 },
> +    { "rskip",                required_argument, NULL, 0 },
> +    { "edge-threshold",       required_argument, NULL, 0 },
>      { "no-fast-cbf",          no_argument, NULL, 0 },
>      { "fast-cbf",             no_argument, NULL, 0 },
>      { "no-tskip",             no_argument, NULL, 0 },
> @@ -451,7 +451,8 @@
>      H0("   --[no-]ssim-rd                Enable ssim rate distortion
> optimization, 0 to disable. Default %s\n", OPT(param->bSsimRd));
>      H0("   --[no-]rd-refine              Enable QP based RD refinement
> for rd levels 5 and 6. Default %s\n", OPT(param->bEnableRdRefine));
>      H0("   --[no-]early-skip             Enable early SKIP detection.
> Default %s\n", OPT(param->bEnableEarlySkip));
> -    H0("   --[no-]rskip                  Enable early exit from
> recursion. Default %s\n", OPT(param->bEnableRecursionSkip));
> +    H0("   --rskip <mode>                Enable or disable early exit
> from recursion. Mode 0: Disabled. Mode 1: exit using rdcost. Mode 2: exit
> using edge density. Default %s\n", OPT(param->bEnableRecursionSkip));
>
[KS] This is no longer on/off CLI to enable/disable. Keep the description
concise

> +    H1("   --edge-threshold              Threshold in terms of percentage
> for edge density in CUs to terminate the recursion depth. Applicable only
> for rskip mode 2. Default %s\n", OPT(param->edgeThreshold));
>      H1("   --[no-]tskip-fast             Enable fast intra transform
> skipping. Default %s\n", OPT(param->bEnableTSkipFast));
>      H1("   --[no-]splitrd-skip           Enable skipping split RD
> analysis when sum of split CU rdCost larger than one split CU rdCost for
> Intra CU. Default %s\n", OPT(param->bEnableSplitRdSkip));
>      H1("   --nr-intra <integer>          An integer value in range of 0
> to 2000, which denotes strength of noise reduction in intra CUs. Default
> 0\n");
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>

-- 
Regards,
Kavitha
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20200107/33c64436/attachment-0001.html>