[x265] [PATCH] new aq implementation

Tue Apr 30 13:06:54 CEST 2019

Where is  the --qp-adaptation-range parameter used?

On Sat, Feb 9, 2019 at 4:44 AM Pooja Venkatesan <pooja at multicorewareinc.com>
wrote:

> # HG changeset patch
> # User Ashok Kumar Mishra <ashok at multicorewareinc.com>
> # Date 1545902034 -19800
> #      Thu Dec 27 14:43:54 2018 +0530
> # Node ID 3cd0b5ed0b91bcb3d5d6cfa1395cb502fc6d01ca
> # Parent  129416ec047966f7d7e7898fbe16110444b9a183
> new aq implementation
>
> It scales the quantization step size according to the spatial activity of
> one
> coding unit relative to frame average spatial activity. This AQ method
> utilizes
> the minimum variance of sub-unit in each coding unit to represent the
> coding
> units spatial complexity.
>
> diff -r 129416ec0479 -r 3cd0b5ed0b91 doc/reST/cli.rst
> --- a/doc/reST/cli.rst Fri Dec 28 09:03:26 2018 +0530
> +++ b/doc/reST/cli.rst Thu Dec 27 14:43:54 2018 +0530
> @@ -1646,6 +1646,21 @@
>   Default 1.0.
>   **Range of values:** 0.0 to 3.0
>
> +.. option:: --hevc-aq
> +
> + Enable adaptive quantization
> + It scales the quantization step size according to the spatial activity
> of one
> + coding unit relative to frame average spatial activity. This AQ method
> utilizes
> + the minimum variance of sub-unit in each coding unit to represent the
> coding
> + unit’s spatial complexity.
> +
> +.. option:: --qp-adaptation-range
> +
> + Delta-QP range by QP adaptation based on a psycho-visual model.
> +
> + Default 1.0.
> + **Range of values:** 1.0 to 6.0
> +
>  .. option:: --aq-motion, --no-aq-motion
>
>   Adjust the AQ offsets based on the relative motion of each block with
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/common/lowres.cpp
> --- a/source/common/lowres.cpp Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/common/lowres.cpp Thu Dec 27 14:43:54 2018 +0530
> @@ -2,6 +2,7 @@
>   * Copyright (C) 2013-2017 MulticoreWare, Inc
>   *
>   * Authors: Gopu Govindaswamy <gopu at multicorewareinc.com>
> + *          Ashok Kumar Mishra <ashok at multicorewareinc.com>
>   *
>   * This program is free software; you can redistribute it and/or modify
>   * it under the terms of the GNU General Public License as published by
> @@ -27,10 +28,31 @@
>
>  using namespace X265_NS;
>
> +bool PicQPAdaptationLayer::create(uint32_t width, uint32_t height,
> uint32_t partWidth, uint32_t partHeight, uint32_t numAQPartInWidthExt,
> uint32_t numAQPartInHeightExt)
> +{
> +    aqPartWidth = partWidth;
> +    aqPartHeight = partHeight;
> +    numAQPartInWidth = (width + partWidth - 1) / partWidth;
> +    numAQPartInHeight = (height + partHeight - 1) / partHeight;
> +
> +    CHECKED_MALLOC_ZERO(dActivity, double, numAQPartInWidthExt *
> numAQPartInHeightExt);
> +    CHECKED_MALLOC_ZERO(dQpOffset, double, numAQPartInWidthExt *
> numAQPartInHeightExt);
> +    CHECKED_MALLOC_ZERO(dCuTreeOffset, double, numAQPartInWidthExt *
> numAQPartInHeightExt);
> +
> +    if (bQpSize)
> +        CHECKED_MALLOC_ZERO(dCuTreeOffset8x8, double, numAQPartInWidthExt
> * numAQPartInHeightExt);
> +
> +    return true;
> +fail:
> +    return false;
> +}
> +
>  bool Lowres::create(x265_param* param, PicYuv *origPic, uint32_t qgSize)
>  {
>      isLowres = true;
>      bframes = param->bframes;
> +    widthFullRes = origPic->m_picWidth;
> +    heightFullRes = origPic->m_picHeight;
>      width = origPic->m_picWidth / 2;
>      lines = origPic->m_picHeight / 2;
>      lumaStride = width + 2 * origPic->m_lumaMarginX;
> @@ -49,7 +71,7 @@
>
>      size_t planesize = lumaStride * (lines + 2 * origPic->m_lumaMarginY);
>      size_t padoffset = lumaStride * origPic->m_lumaMarginY +
> origPic->m_lumaMarginX;
> -    if (!!param->rc.aqMode)
> +    if (!!param->rc.aqMode || !!param->rc.hevcAq)
>      {
>          CHECKED_MALLOC_ZERO(qpAqOffset, double, cuCountFullRes);
>          CHECKED_MALLOC_ZERO(invQscaleFactor, int, cuCountFullRes);
> @@ -57,10 +79,50 @@
>          if (qgSize == 8)
>              CHECKED_MALLOC_ZERO(invQscaleFactor8x8, int, cuCount);
>      }
> +
>      if (origPic->m_param->bAQMotion)
>          CHECKED_MALLOC_ZERO(qpAqMotionOffset, double, cuCountFullRes);
>      if (origPic->m_param->bDynamicRefine)
>          CHECKED_MALLOC_ZERO(blockVariance, uint32_t, cuCountFullRes);
> +
> +    if (!!param->rc.hevcAq)
> +    {
> +        m_maxCUSize = param->maxCUSize;
> +        m_qgSize = qgSize;
> +
> +        uint32_t partWidth, partHeight, nAQPartInWidth, nAQPartInHeight;
> +
> +        pAQLayer = new PicQPAdaptationLayer[4];
> +        maxAQDepth = 0;
> +        for (uint32_t d = 0; d < 4; d++)
> +        {
> +            int ctuSizeIdx = 6 - g_log2Size[param->maxCUSize];
> +            int aqDepth = g_log2Size[param->maxCUSize] -
> g_log2Size[qgSize];
> +            if (!aqLayerDepth[ctuSizeIdx][aqDepth][d])
> +                continue;
> +
> +            pAQLayer->minAQDepth = d;
> +            partWidth = param->maxCUSize >> d;
> +            partHeight = param->maxCUSize >> d;
> +
> +            if (minAQSize[ctuSizeIdx] == d)
> +            {
> +                pAQLayer[d].bQpSize = true;
> +                nAQPartInWidth = maxBlocksInRow * 2;
> +                nAQPartInHeight = maxBlocksInCol * 2;
> +            }
> +            else
> +            {
> +                pAQLayer[d].bQpSize = false;
> +                nAQPartInWidth = (origPic->m_picWidth + partWidth - 1) /
> partWidth;
> +                nAQPartInHeight = (origPic->m_picHeight + partHeight - 1)
> / partHeight;
> +            }
> +
> +            maxAQDepth++;
> +
> +            pAQLayer[d].create(origPic->m_picWidth, origPic->m_picHeight,
> partWidth, partHeight, nAQPartInWidth, nAQPartInHeight);
> +        }
> +    }
>      CHECKED_MALLOC(propagateCost, uint16_t, cuCount);
>
>      /* allocate lowres buffers */
> @@ -130,6 +192,25 @@
>      X265_FREE(invQscaleFactor8x8);
>      X265_FREE(qpAqMotionOffset);
>      X265_FREE(blockVariance);
> +    if (maxAQDepth > 0)
> +    {
> +        for (uint32_t d = 0; d < 4; d++)
> +        {
> +            int ctuSizeIdx = 6 - g_log2Size[m_maxCUSize];
> +            int aqDepth = g_log2Size[m_maxCUSize] - g_log2Size[m_qgSize];
> +            if (!aqLayerDepth[ctuSizeIdx][aqDepth][d])
> +                continue;
> +
> +            X265_FREE(pAQLayer[d].dActivity);
> +            X265_FREE(pAQLayer[d].dQpOffset);
> +            X265_FREE(pAQLayer[d].dCuTreeOffset);
> +
> +            if (pAQLayer[d].bQpSize == true)
> +                X265_FREE(pAQLayer[d].dCuTreeOffset8x8);
> +        }
> +
> +        delete[] pAQLayer;
> +    }
>  }
>  // (re) initialize lowres state
>  void Lowres::init(PicYuv *origPic, int poc)
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/common/lowres.h
> --- a/source/common/lowres.h Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/common/lowres.h Thu Dec 27 14:43:54 2018 +0530
> @@ -103,6 +103,49 @@
>      }
>  };
>
> +static const uint32_t aqLayerDepth[3][4][4] = {
> +    {  // ctu size 64
> +        { 1, 0, 1, 0 },
> +        { 1, 1, 1, 0 },
> +        { 1, 1, 1, 0 },
> +        { 1, 1, 1, 1 }
> +    },
> +    {  // ctu size 32
> +        { 1, 1, 0, 0 },
> +        { 1, 1, 0, 0 },
> +        { 1, 1, 1, 0 },
> +        { 0, 0, 0, 0 },
> +    },
> +    {  // ctu size 16
> +        { 1, 0, 0, 0 },
> +        { 1, 1, 0, 0 },
> +        { 0, 0, 0, 0 },
> +        { 0, 0, 0, 0 }
> +    }
> +};
> +
> +// min aq size for ctu size 64, 32 and 16
> +static const uint32_t minAQSize[3] = { 3, 2, 1 };
> +
> +struct PicQPAdaptationLayer
> +{
> +    uint32_t aqPartWidth;
> +    uint32_t aqPartHeight;
> +    uint32_t numAQPartInWidth;
> +    uint32_t numAQPartInHeight;
> +    uint32_t minAQDepth;
> +    double*  dActivity;
> +    double*  dQpOffset;
> +
> +    double*  dCuTreeOffset;
> +    double*  dCuTreeOffset8x8;
> +    double   dAvgActivity;
> +    bool     bQpSize;
> +
> +    bool  create(uint32_t width, uint32_t height, uint32_t aqPartWidth,
> uint32_t aqPartHeight, uint32_t numAQPartInWidthExt, uint32_t
> numAQPartInHeightExt);
> +    void  destroy();
> +};
> +
>  /* lowres buffers, sizes and strides */
>  struct Lowres : public ReferencePlanes
>  {
> @@ -154,6 +197,13 @@
>      uint64_t  wp_sum[3];
>
>      /* cutree intermediate data */
> +    PicQPAdaptationLayer* pAQLayer;
> +    uint32_t maxAQDepth;
> +    uint32_t widthFullRes;
> +    uint32_t heightFullRes;
> +    uint32_t m_maxCUSize;
> +    uint32_t m_qgSize;
> +
>      uint16_t* propagateCost;
>      double    weightedCostDelta[X265_BFRAME_MAX + 2];
>      ReferencePlanes weightedRef[X265_BFRAME_MAX + 2];
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/common/param.cpp
> --- a/source/common/param.cpp Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/common/param.cpp Thu Dec 27 14:43:54 2018 +0530
> @@ -233,8 +233,10 @@
>      param->rc.rateControlMode = X265_RC_CRF;
>      param->rc.qp = 32;
>      param->rc.aqMode = X265_AQ_AUTO_VARIANCE;
> +    param->rc.hevcAq = 0;
>      param->rc.qgSize = 32;
>      param->rc.aqStrength = 1.0;
> +    param->rc.qpAdaptationRange = 1.0;
>      param->rc.cuTree = 1;
>      param->rc.rfConstantMax = 0;
>      param->rc.rfConstantMin = 0;
> @@ -528,6 +530,7 @@
>              param->rc.pbFactor = 1.0;
>              param->rc.cuTree = 0;
>              param->rc.aqMode = 0;
> +            param->rc.hevcAq = 0;
>              param->rc.qpStep = 1;
>              param->rc.bEnableGrain = 1;
>              param->bEnableRecursionSkip = 0;
> @@ -1186,6 +1189,8 @@
>          }
>          OPT("hrd-concat") p->bEnableHRDConcatFlag = atobool(value);
>          OPT("refine-ctu-distortion") p->ctuDistortionRefine = atoi(value);
> +        OPT("hevc-aq") p->rc.hevcAq = atobool(value);
> +        OPT("qp-adaptation-range") p->rc.qpAdaptationRange = atof(value);
>          else
>              return X265_PARAM_BAD_NAME;
>      }
> @@ -1430,6 +1435,8 @@
>            "Aq-Mode is out of range");
>      CHECK(param->rc.aqStrength < 0 || param->rc.aqStrength > 3,
>            "Aq-Strength is out of range");
> +    CHECK(param->rc.qpAdaptationRange < 1.0f ||
> param->rc.qpAdaptationRange > 6.0f,
> +        "qp adaptation range is out of range");
>      CHECK(param->deblockingFilterTCOffset < -6 ||
> param->deblockingFilterTCOffset > 6,
>            "deblocking filter tC offset must be in the range of -6 to +6");
>      CHECK(param->deblockingFilterBetaOffset < -6 ||
> param->deblockingFilterBetaOffset > 6,
> @@ -1956,6 +1963,8 @@
>      s += sprintf(s, " max-ausize-factor=%.1f", p->maxAUSizeFactor);
>      BOOL(p->bDynamicRefine, "dynamic-refine");
>      BOOL(p->bSingleSeiNal, "single-sei");
> +    BOOL(p->rc.hevcAq, "hevc-aq");
> +    s += sprintf(s, " qp-adaptation-range=%.2f", p->rc.qpAdaptationRange);
>  #undef BOOL
>      return buf;
>  }
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/encoder/analysis.cpp Thu Dec 27 14:43:54 2018 +0530
> @@ -3556,10 +3556,39 @@
>      return cuVariance / cnt;
>  }
>
> +double Analysis::aqQPOffset(const CUData& ctu, const CUGeom& cuGeom)
> +{
> +    uint32_t aqDepth = X265_MIN(cuGeom.depth,
> m_frame->m_lowres.maxAQDepth - 1);
> +    PicQPAdaptationLayer* pQPLayer = &m_frame->m_lowres.pAQLayer[aqDepth];
> +
> +    uint32_t aqPosX = (ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx]) /
> pQPLayer->aqPartWidth;
> +    uint32_t aqPosY = (ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx]) /
> pQPLayer->aqPartHeight;
> +
> +    uint32_t aqStride = pQPLayer->numAQPartInWidth;
> +
> +    double dQpOffset = pQPLayer->dQpOffset[aqPosY * aqStride + aqPosX];
> +    return dQpOffset;
> +}
> +
> +double Analysis::cuTreeQPOffset(const CUData& ctu, const CUGeom& cuGeom)
> +{
> +    uint32_t aqDepth = X265_MIN(cuGeom.depth,
> m_frame->m_lowres.maxAQDepth - 1);
> +    PicQPAdaptationLayer* pcAQLayer =
> &m_frame->m_lowres.pAQLayer[aqDepth];
> +
> +    uint32_t aqPosX = (ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx]) /
> pcAQLayer->aqPartWidth;
> +    uint32_t aqPosY = (ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx]) /
> pcAQLayer->aqPartHeight;
> +
> +    uint32_t aqStride = pcAQLayer->numAQPartInWidth;
> +
> +    double dQpOffset = pcAQLayer->dCuTreeOffset[aqPosY * aqStride +
> aqPosX];
> +    return dQpOffset;
> +}
> +
>  int Analysis::calculateQpforCuSize(const CUData& ctu, const CUGeom&
> cuGeom, int32_t complexCheck, double baseQp)
>  {
>      FrameData& curEncData = *m_frame->m_encData;
>      double qp = baseQp >= 0 ? baseQp :
> curEncData.m_cuStat[ctu.m_cuAddr].baseQp;
> +    bool bCuTreeOffset = IS_REFERENCED(m_frame) && m_param->rc.cuTree &&
> !complexCheck;
>
>      if ((m_param->analysisMultiPassDistortion && m_param->rc.bStatRead)
> || (m_param->ctuDistortionRefine && m_param->analysisLoad))
>      {
> @@ -3577,40 +3606,60 @@
>          else
>              return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
> (int32_t)(qp + 0.5 +
> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]));
>      }
> -    int loopIncr = (m_param->rc.qgSize == 8) ? 8 : 16;
> -
> -    /* Use cuTree offsets if cuTree enabled and frame is referenced, else
> use AQ offsets */
> -    bool isReferenced = IS_REFERENCED(m_frame);
> -    double *qpoffs = (isReferenced && m_param->rc.cuTree &&
> !complexCheck) ? m_frame->m_lowres.qpCuTreeOffset :
> -
>    m_frame->m_lowres.qpAqOffset;
> -    if (qpoffs)
> +    if (m_param->rc.hevcAq)
>      {
> -        uint32_t width = m_frame->m_fencPic->m_picWidth;
> -        uint32_t height = m_frame->m_fencPic->m_picHeight;
> -        uint32_t block_x = ctu.m_cuPelX +
> g_zscanToPelX[cuGeom.absPartIdx];
> -        uint32_t block_y = ctu.m_cuPelY +
> g_zscanToPelY[cuGeom.absPartIdx];
> -        uint32_t maxCols = (m_frame->m_fencPic->m_picWidth + (loopIncr -
> 1)) / loopIncr;
> -        uint32_t blockSize = m_param->maxCUSize >> cuGeom.depth;
> -        double qp_offset = 0;
> -        uint32_t cnt = 0;
> -        for (uint32_t block_yy = block_y; block_yy < block_y + blockSize
> && block_yy < height; block_yy += loopIncr)
> +        /* Use cuTree offsets if cuTree enabled and frame is referenced,
> else use AQ offsets */
> +        double dQpOffset = 0;
> +        if (bCuTreeOffset)
>          {
> -            for (uint32_t block_xx = block_x; block_xx < block_x +
> blockSize && block_xx < width; block_xx += loopIncr)
> +            dQpOffset = cuTreeQPOffset(ctu, cuGeom);
> +        }
> +        else
> +        {
> +            dQpOffset = aqQPOffset(ctu, cuGeom);
> +            if (complexCheck)
>              {
> -                uint32_t idx = ((block_yy / loopIncr) * (maxCols)) +
> (block_xx / loopIncr);
> -                qp_offset += qpoffs[idx];
> -                cnt++;
> +                int32_t offset = (int32_t)(dQpOffset * 100 + .5);
> +                double threshold = (1 - ((x265_ADAPT_RD_STRENGTH -
> m_param->dynamicRd) * 0.5));
> +                int32_t max_threshold = (int32_t)(threshold * 100 + .5);
> +                return (offset < max_threshold);
>              }
>          }
> -
> -        qp_offset /= cnt;
> -        qp += qp_offset;
> -        if (complexCheck)
> +        qp += dQpOffset;
> +    }
> +    else
> +    {
> +        int loopIncr = (m_param->rc.qgSize == 8) ? 8 : 16;
> +        /* Use cuTree offsets if cuTree enabled and frame is referenced,
> else use AQ offsets */
> +        double *qpoffs = bCuTreeOffset ? m_frame->m_lowres.qpCuTreeOffset
> : m_frame->m_lowres.qpAqOffset;
> +        if (qpoffs)
>          {
> -            int32_t offset = (int32_t)(qp_offset * 100 + .5);
> -            double threshold = (1 - ((x265_ADAPT_RD_STRENGTH -
> m_param->dynamicRd) * 0.5));
> -            int32_t max_threshold = (int32_t)(threshold * 100 + .5);
> -            return (offset < max_threshold);
> +            uint32_t width = m_frame->m_fencPic->m_picWidth;
> +            uint32_t height = m_frame->m_fencPic->m_picHeight;
> +            uint32_t block_x = ctu.m_cuPelX +
> g_zscanToPelX[cuGeom.absPartIdx];
> +            uint32_t block_y = ctu.m_cuPelY +
> g_zscanToPelY[cuGeom.absPartIdx];
> +            uint32_t maxCols = (m_frame->m_fencPic->m_picWidth +
> (loopIncr - 1)) / loopIncr;
> +            uint32_t blockSize = m_param->maxCUSize >> cuGeom.depth;
> +            double dQpOffset = 0;
> +            uint32_t cnt = 0;
> +            for (uint32_t block_yy = block_y; block_yy < block_y +
> blockSize && block_yy < height; block_yy += loopIncr)
> +            {
> +                for (uint32_t block_xx = block_x; block_xx < block_x +
> blockSize && block_xx < width; block_xx += loopIncr)
> +                {
> +                    uint32_t idx = ((block_yy / loopIncr) * (maxCols)) +
> (block_xx / loopIncr);
> +                    dQpOffset += qpoffs[idx];
> +                    cnt++;
> +                }
> +            }
> +            dQpOffset /= cnt;
> +            qp += dQpOffset;
> +            if (complexCheck)
> +            {
> +                int32_t offset = (int32_t)(dQpOffset * 100 + .5);
> +                double threshold = (1 - ((x265_ADAPT_RD_STRENGTH -
> m_param->dynamicRd) * 0.5));
> +                int32_t max_threshold = (int32_t)(threshold * 100 + .5);
> +                return (offset < max_threshold);
> +            }
>          }
>      }
>
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/encoder/analysis.h
> --- a/source/encoder/analysis.h Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/encoder/analysis.h Thu Dec 27 14:43:54 2018 +0530
> @@ -201,7 +201,8 @@
>
>      void classifyCU(const CUData& ctu, const CUGeom& cuGeom, const Mode&
> bestMode, TrainingData& trainData);
>      void trainCU(const CUData& ctu, const CUGeom& cuGeom, const Mode&
> bestMode, TrainingData& trainData);
> -
> +    double aqQPOffset(const CUData& ctu, const CUGeom& cuGeom);
> +    double cuTreeQPOffset(const CUData& ctu, const CUGeom& cuGeom);
>      void calculateNormFactor(CUData& ctu, int qp);
>      void normFactor(const pixel* src, uint32_t blockSize, CUData& ctu,
> int qp, TextType ttype);
>
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/encoder/encoder.cpp Thu Dec 27 14:43:54 2018 +0530
> @@ -2839,6 +2839,12 @@
>      if (p->rc.aqMode == X265_AQ_NONE && p->rc.cuTree == 0)
>          p->rc.aqStrength = 0;
>
> +    if (p->rc.hevcAq && p->rc.aqMode)
> +    {
> +        p->rc.aqMode = X265_AQ_NONE;
> +        x265_log(p, X265_LOG_WARNING, "hevc-aq enabled, disabling other
> aq-modes\n");
> +    }
> +
>      if (p->totalFrames && p->totalFrames <= 2 * ((float)p->fpsNum) /
> p->fpsDenom && p->rc.bStrictCbr)
>          p->lookaheadDepth = p->totalFrames;
>      if (p->bIntraRefresh)
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/encoder/ratecontrol.cpp
> --- a/source/encoder/ratecontrol.cpp Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/encoder/ratecontrol.cpp Thu Dec 27 14:43:54 2018 +0530
> @@ -153,10 +153,7 @@
>      int lowresCuHeight = ((m_param->sourceHeight / 2) +
> X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
>      m_ncu = lowresCuWidth * lowresCuHeight;
>
> -    if (m_param->rc.cuTree)
> -        m_qCompress = 1;
> -    else
> -        m_qCompress = m_param->rc.qCompress;
> +    m_qCompress = (m_param->rc.cuTree && !m_param->rc.hevcAq) ? 1 :
> m_param->rc.qCompress;
>
>      // validate for param->rc, maybe it is need to add a function like
> x265_parameters_valiate()
>      m_residualFrames = 0;
> @@ -381,13 +378,14 @@
>
>      m_isGrainEnabled = false;
>      if(m_param->rc.bEnableGrain) // tune for grainy content OR equal p-b
> frame sizes
> -    m_isGrainEnabled = true;
> +        m_isGrainEnabled = true;
>      for (int i = 0; i < 3; i++)
> -    m_lastQScaleFor[i] = x265_qp2qScale(m_param->rc.rateControlMode ==
> X265_RC_CRF ? CRF_INIT_QP : ABR_INIT_QP_MIN);
> +        m_lastQScaleFor[i] = x265_qp2qScale(m_param->rc.rateControlMode
> == X265_RC_CRF ? CRF_INIT_QP : ABR_INIT_QP_MIN);
>      m_avgPFrameQp = 0 ;
>
>      /* 720p videos seem to be a good cutoff for cplxrSum */
> -    double tuneCplxFactor = (m_ncu > 3600 && m_param->rc.cuTree) ? 2.5 :
> m_isGrainEnabled ? 1.9 : 1;
> +    double tuneCplxFactor = (m_ncu > 3600 && m_param->rc.cuTree &&
> !m_param->rc.hevcAq) ? 2.5 : m_param->rc.hevcAq ? 1.5 : m_isGrainEnabled ?
> 1.9 : 1.0;
> +
>      /* estimated ratio that produces a reasonable QP for the first
> I-frame */
>      m_cplxrSum = .01 * pow(7.0e5, m_qCompress) * pow(m_ncu, 0.5) *
> tuneCplxFactor;
>      m_wantedBitsWindow = m_bitrate * m_frameDuration;
> @@ -2563,7 +2561,7 @@
>  {
>      double q;
>
> -    if (m_param->rc.cuTree)
> +    if (m_param->rc.cuTree && !m_param->rc.hevcAq)
>      {
>          // Scale and units are obtained from rateNum and rateDenom for
> videos with fixed frame rates.
>          double timescale = (double)m_param->fpsDenom / (2 *
> m_param->fpsNum);
> @@ -2571,6 +2569,7 @@
>      }
>      else
>          q = pow(rce->blurredComplexity, 1 - m_param->rc.qCompress);
> +
>      // avoid NaN's in the Rceq
>      if (rce->coeffBits + rce->mvBits == 0)
>          q = m_lastQScaleFor[rce->sliceType];
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/encoder/slicetype.cpp Thu Dec 27 14:43:54 2018 +0530
> @@ -3,6 +3,7 @@
>   *
>   * Authors: Gopu Govindaswamy <gopu at multicorewareinc.com>
>   *          Steve Borho <steve at borho.org>
> + *          Ashok Kumar Mishra <ashok at multicorewareinc.com>
>   *
>   * This program is free software; you can redistribute it and/or modify
>   * it under the terms of the GNU General Public License as published by
> @@ -105,6 +106,7 @@
>      x265_emms();
>      return var;
>  }
> +
>  /* Find the sum of pixels of each block for luma plane */
>  uint32_t LookaheadTLD::lumaSumCu(Frame* curFrame, uint32_t blockX,
> uint32_t blockY, uint32_t qgSize)
>  {
> @@ -121,6 +123,157 @@
>      return (uint32_t)sum_ssd;
>  }
>
> +void LookaheadTLD::xPreanalyzeQp(Frame* curFrame)
> +{
> +    const uint32_t width = curFrame->m_fencPic->m_picWidth;
> +    const uint32_t height = curFrame->m_fencPic->m_picHeight;
> +
> +    for (uint32_t d = 0; d < 4; d++)
> +    {
> +        int ctuSizeIdx = 6 - g_log2Size[curFrame->m_param->maxCUSize];
> +        int aqDepth = g_log2Size[curFrame->m_param->maxCUSize] -
> g_log2Size[curFrame->m_param->rc.qgSize];
> +        if (!aqLayerDepth[ctuSizeIdx][aqDepth][d])
> +            continue;
> +
> +        PicQPAdaptationLayer* pcAQLayer = &curFrame->m_lowres.pAQLayer[d];
> +        const uint32_t aqPartWidth = pcAQLayer->aqPartWidth;
> +        const uint32_t aqPartHeight = pcAQLayer->aqPartHeight;
> +        double* pcAQU = pcAQLayer->dActivity;
> +        double* pcQP = pcAQLayer->dQpOffset;
> +        double* pcCuTree = pcAQLayer->dCuTreeOffset;
> +
> +        for (uint32_t y = 0; y < height; y += aqPartHeight)
> +        {
> +            for (uint32_t x = 0; x < width; x += aqPartWidth, pcAQU++,
> pcQP++, pcCuTree++)
> +            {
> +                double dMaxQScale = pow(2.0,
> curFrame->m_param->rc.qpAdaptationRange / 6.0);
> +                double dCUAct = *pcAQU;
> +                double dAvgAct = pcAQLayer->dAvgActivity;
> +
> +                double dNormAct = (dMaxQScale*dCUAct + dAvgAct) / (dCUAct
> + dMaxQScale*dAvgAct);
> +                double dQpOffset = (X265_LOG2(dNormAct) / X265_LOG2(2.0))
> * 6.0;
> +                *pcQP = dQpOffset;
> +                *pcCuTree = dQpOffset;
> +            }
> +        }
> +    }
> +}
> +
> +void LookaheadTLD::xPreanalyze(Frame* curFrame)
> +{
> +    const uint32_t width = curFrame->m_fencPic->m_picWidth;
> +    const uint32_t height = curFrame->m_fencPic->m_picHeight;
> +    const intptr_t stride = curFrame->m_fencPic->m_stride;
> +
> +    for (uint32_t d = 0; d < 4; d++)
> +    {
> +        int ctuSizeIdx = 6 - g_log2Size[curFrame->m_param->maxCUSize];
> +        int aqDepth = g_log2Size[curFrame->m_param->maxCUSize] -
> g_log2Size[curFrame->m_param->rc.qgSize];
> +        if (!aqLayerDepth[ctuSizeIdx][aqDepth][d])
> +            continue;
> +
> +        const pixel* src = curFrame->m_fencPic->m_picOrg[0];;
> +        PicQPAdaptationLayer* pQPLayer = &curFrame->m_lowres.pAQLayer[d];
> +        const uint32_t aqPartWidth = pQPLayer->aqPartWidth;
> +        const uint32_t aqPartHeight = pQPLayer->aqPartHeight;
> +        double* pcAQU = pQPLayer->dActivity;
> +
> +        double dSumAct = 0.0;
> +        for (uint32_t y = 0; y < height; y += aqPartHeight)
> +        {
> +            const uint32_t currAQPartHeight = X265_MIN(aqPartHeight,
> height - y);
> +            for (uint32_t x = 0; x < width; x += aqPartWidth, pcAQU++)
> +            {
> +                const uint32_t currAQPartWidth = X265_MIN(aqPartWidth,
> width - x);
> +                const pixel* pBlkY = &src[x];
> +                uint64_t sum[4] = { 0, 0, 0, 0 };
> +                uint64_t sumSq[4] = { 0, 0, 0, 0 };
> +                uint32_t by = 0;
> +                for (; by < currAQPartHeight >> 1; by++)
> +                {
> +                    uint32_t bx = 0;
> +                    for (; bx < currAQPartWidth >> 1; bx++)
> +                    {
> +                        sum[0] += pBlkY[bx];
> +                        sumSq[0] += pBlkY[bx] * pBlkY[bx];
> +                    }
> +                    for (; bx < currAQPartWidth; bx++)
> +                    {
> +                        sum[1] += pBlkY[bx];
> +                        sumSq[1] += pBlkY[bx] * pBlkY[bx];
> +                    }
> +                    pBlkY += stride;
> +                }
> +                for (; by < currAQPartHeight; by++)
> +                {
> +                    uint32_t bx = 0;
> +                    for (; bx < currAQPartWidth >> 1; bx++)
> +                    {
> +                        sum[2] += pBlkY[bx];
> +                        sumSq[2] += pBlkY[bx] * pBlkY[bx];
> +                    }
> +                    for (; bx < currAQPartWidth; bx++)
> +                    {
> +                        sum[3] += pBlkY[bx];
> +                        sumSq[3] += pBlkY[bx] * pBlkY[bx];
> +                    }
> +                    pBlkY += stride;
> +                }
> +
> +                assert((currAQPartWidth & 1) == 0);
> +                assert((currAQPartHeight & 1) == 0);
> +                const uint32_t pixelWidthOfQuadrants = currAQPartWidth >>
> 1;
> +                const uint32_t pixelHeightOfQuadrants = currAQPartHeight
> >> 1;
> +                const uint32_t numPixInAQPart = pixelWidthOfQuadrants *
> pixelHeightOfQuadrants;
> +
> +                double dMinVar = MAX_DOUBLE;
> +                if (numPixInAQPart != 0)
> +                {
> +                    for (int i = 0; i < 4; i++)
> +                    {
> +                        const double dAverage = double(sum[i]) /
> numPixInAQPart;
> +                        const double dVariance = double(sumSq[i]) /
> numPixInAQPart - dAverage * dAverage;
> +                        dMinVar = X265_MIN(dMinVar, dVariance);
> +                    }
> +                }
> +                else
> +                {
> +                    dMinVar = 0.0;
> +                }
> +                double dActivity = 1.0 + dMinVar;
> +                *pcAQU = dActivity;
> +                dSumAct += dActivity;
> +            }
> +            src += stride * currAQPartHeight;
> +        }
> +
> +        const double dAvgAct = dSumAct / (pQPLayer->numAQPartInWidth *
> pQPLayer->numAQPartInHeight);
> +        pQPLayer->dAvgActivity = dAvgAct;
> +    }
> +
> +    xPreanalyzeQp(curFrame);
> +
> +    int minAQDepth = curFrame->m_lowres.pAQLayer->minAQDepth;
> +
> +    PicQPAdaptationLayer* pQPLayer =
> &curFrame->m_lowres.pAQLayer[minAQDepth];
> +    const uint32_t aqPartWidth = pQPLayer->aqPartWidth;
> +    const uint32_t aqPartHeight = pQPLayer->aqPartHeight;
> +    double* pcQP = pQPLayer->dQpOffset;
> +
> +    // Use new qp offset values for qpAqOffset, qpCuTreeOffset and
> invQscaleFactor buffer
> +    int blockXY = 0;
> +    for (uint32_t y = 0; y < height; y += aqPartHeight)
> +    {
> +        for (uint32_t x = 0; x < width; x += aqPartWidth, pcQP++)
> +        {
> +            curFrame->m_lowres.invQscaleFactor[blockXY] =
> x265_exp2fix8(*pcQP);
> +            blockXY++;
> +
> +            acEnergyCu(curFrame, x, y, curFrame->m_param->internalCsp,
> curFrame->m_param->rc.qgSize);
> +        }
> +    }
> +}
> +
>  void LookaheadTLD::calcAdaptiveQuantFrame(Frame *curFrame, x265_param*
> param)
>  {
>      /* Actual adaptive quantization */
> @@ -176,90 +329,99 @@
>          if (param->bEnableWeightedPred || param->bEnableWeightedBiPred)
>          {
>              for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
> -                for (int blockX = 0; blockX < maxCol; blockX +=
> loopIncr)
> -                    acEnergyCu(curFrame, blockX, blockY,
> param->internalCsp, param->rc.qgSize);
> +                for (int blockX = 0; blockX < maxCol; blockX += loopIncr)
> +                    acEnergyCu(curFrame, blockX, blockY,
> param->internalCsp, param->rc.qgSize);
>          }
>      }
>      else
>      {
> -        int blockXY = 0;
> -        double avg_adj_pow2 = 0.f, avg_adj = 0.f, qp_adj = 0.f;
> -        double bias_strength = 0.f, strength = 0.f;
> -        if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE || param->rc.aqMode
> == X265_AQ_AUTO_VARIANCE_BIASED)
> +        if (param->rc.hevcAq)
>          {
> -            double bit_depth_correction = 1.f / (1 <<
> (2*(X265_DEPTH-8)));
> -
> -            for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
> -            {
> -                for (int blockX = 0; blockX < maxCol; blockX += loopIncr)
> -                {
> -                    uint32_t energy = acEnergyCu(curFrame, blockX,
> blockY, param->internalCsp, param->rc.qgSize);
> -                    qp_adj = pow(energy * bit_depth_correction + 1, 0.1);
> -                    curFrame->m_lowres.qpCuTreeOffset[blockXY] = qp_adj;
> -                    avg_adj += qp_adj;
> -                    avg_adj_pow2 += qp_adj * qp_adj;
> -                    blockXY++;
> -                }
> -            }
> -            avg_adj /= blockCount;
> -            avg_adj_pow2 /= blockCount;
> -            strength = param->rc.aqStrength * avg_adj;
> -            avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - modeTwoConst) /
> avg_adj;
> -            bias_strength = param->rc.aqStrength;
> +            // New method for calculating variance and qp offset
> +            xPreanalyze(curFrame);
>          }
>          else
> -            strength = param->rc.aqStrength * 1.0397f;
> -
> -        blockXY = 0;
> -        for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
>          {
> -            for (int blockX = 0; blockX < maxCol; blockX += loopIncr)
> +            int blockXY = 0;
> +            double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0;
> +            double bias_strength = 0.f;
> +            double strength = 0.f;
> +            if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE ||
> param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED)
>              {
> -                if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED)
> -                {
> -                    qp_adj = curFrame->m_lowres.qpCuTreeOffset[blockXY];
> -                    qp_adj = strength * (qp_adj - avg_adj) +
> bias_strength * (1.f - modeTwoConst / (qp_adj * qp_adj));
> -                }
> -                else if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE)
> -                {
> -                    qp_adj = curFrame->m_lowres.qpCuTreeOffset[blockXY];
> -                    qp_adj = strength * (qp_adj - avg_adj);
> -                }
> -                else
> -                {
> -                    uint32_t energy = acEnergyCu(curFrame, blockX,
> blockY, param->internalCsp,param->rc.qgSize);
> -                    qp_adj = strength * (X265_LOG2(X265_MAX(energy, 1)) -
> (modeOneConst + 2 * (X265_DEPTH - 8)));
> -                }
> -
> -                if (param->bHDROpt)
> +                double bit_depth_correction = 1.f / (1 << (2 *
> (X265_DEPTH - 8)));
> +
> +                for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
>                  {
> -                    uint32_t sum = lumaSumCu(curFrame, blockX, blockY,
> param->rc.qgSize);
> -                    uint32_t lumaAvg = sum / (loopIncr * loopIncr);
> -                    if (lumaAvg < 301)
> -                        qp_adj += 3;
> -                    else if (lumaAvg >= 301 && lumaAvg < 367)
> -                        qp_adj += 2;
> -                    else if (lumaAvg >= 367 && lumaAvg < 434)
> -                        qp_adj += 1;
> -                    else if (lumaAvg >= 501 && lumaAvg < 567)
> -                        qp_adj -= 1;
> -                    else if (lumaAvg >= 567 && lumaAvg < 634)
> -                        qp_adj -= 2;
> -                    else if (lumaAvg >= 634 && lumaAvg < 701)
> -                        qp_adj -= 3;
> -                    else if (lumaAvg >= 701 && lumaAvg < 767)
> -                        qp_adj -= 4;
> -                    else if (lumaAvg >= 767 && lumaAvg < 834)
> -                        qp_adj -= 5;
> -                    else if (lumaAvg >= 834)
> -                        qp_adj -= 6;
> +                    for (int blockX = 0; blockX < maxCol; blockX +=
> loopIncr)
> +                    {
> +                        uint32_t energy = acEnergyCu(curFrame, blockX,
> blockY, param->internalCsp, param->rc.qgSize);
> +                        qp_adj = pow(energy * bit_depth_correction + 1,
> 0.1);
> +                        curFrame->m_lowres.qpCuTreeOffset[blockXY] =
> qp_adj;
> +                        avg_adj += qp_adj;
> +                        avg_adj_pow2 += qp_adj * qp_adj;
> +                        blockXY++;
> +                    }
>                  }
> -                if (quantOffsets != NULL)
> -                    qp_adj += quantOffsets[blockXY];
> -                curFrame->m_lowres.qpAqOffset[blockXY] = qp_adj;
> -                curFrame->m_lowres.qpCuTreeOffset[blockXY] = qp_adj;
> -                curFrame->m_lowres.invQscaleFactor[blockXY] =
> x265_exp2fix8(qp_adj);
> -                blockXY++;
> +                avg_adj /= blockCount;
> +                avg_adj_pow2 /= blockCount;
> +                strength = param->rc.aqStrength * avg_adj;
> +                avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - modeTwoConst)
> / avg_adj;
> +                bias_strength = param->rc.aqStrength;
> +            }
> +            else
> +                strength = param->rc.aqStrength * 1.0397f;
> +
> +            blockXY = 0;
> +            for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
> +            {
> +                for (int blockX = 0; blockX < maxCol; blockX += loopIncr)
> +                {
> +                    if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED)
> +                    {
> +                        qp_adj =
> curFrame->m_lowres.qpCuTreeOffset[blockXY];
> +                        qp_adj = strength * (qp_adj - avg_adj) +
> bias_strength * (1.f - modeTwoConst / (qp_adj * qp_adj));
> +                    }
> +                    else if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE)
> +                    {
> +                        qp_adj =
> curFrame->m_lowres.qpCuTreeOffset[blockXY];
> +                        qp_adj = strength * (qp_adj - avg_adj);
> +                    }
> +                    else
> +                    {
> +                        uint32_t energy = acEnergyCu(curFrame, blockX,
> blockY, param->internalCsp, param->rc.qgSize);
> +                        qp_adj = strength * (X265_LOG2(X265_MAX(energy,
> 1)) - (modeOneConst + 2 * (X265_DEPTH - 8)));
> +                    }
> +
> +                    if (param->bHDROpt)
> +                    {
> +                        uint32_t sum = lumaSumCu(curFrame, blockX,
> blockY, param->rc.qgSize);
> +                        uint32_t lumaAvg = sum / (loopIncr * loopIncr);
> +                        if (lumaAvg < 301)
> +                            qp_adj += 3;
> +                        else if (lumaAvg >= 301 && lumaAvg < 367)
> +                            qp_adj += 2;
> +                        else if (lumaAvg >= 367 && lumaAvg < 434)
> +                            qp_adj += 1;
> +                        else if (lumaAvg >= 501 && lumaAvg < 567)
> +                            qp_adj -= 1;
> +                        else if (lumaAvg >= 567 && lumaAvg < 634)
> +                            qp_adj -= 2;
> +                        else if (lumaAvg >= 634 && lumaAvg < 701)
> +                            qp_adj -= 3;
> +                        else if (lumaAvg >= 701 && lumaAvg < 767)
> +                            qp_adj -= 4;
> +                        else if (lumaAvg >= 767 && lumaAvg < 834)
> +                            qp_adj -= 5;
> +                        else if (lumaAvg >= 834)
> +                            qp_adj -= 6;
> +                    }
> +                    if (quantOffsets != NULL)
> +                        qp_adj += quantOffsets[blockXY];
> +                    curFrame->m_lowres.qpAqOffset[blockXY] = qp_adj;
> +                    curFrame->m_lowres.qpCuTreeOffset[blockXY] = qp_adj;
> +                    curFrame->m_lowres.invQscaleFactor[blockXY] =
> x265_exp2fix8(qp_adj);
> +                    blockXY++;
> +                }
>              }
>          }
>      }
> @@ -301,11 +463,13 @@
>      {
>          int blockXY = 0;
>          for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
> +        {
>              for (int blockX = 0; blockX < maxCol; blockX += loopIncr)
>              {
>                  curFrame->m_lowres.blockVariance[blockXY] =
> acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
>                  blockXY++;
>              }
> +        }
>      }
>  }
>
> @@ -596,13 +760,16 @@
>
>      /* Allow the strength to be adjusted via qcompress, since the two
> concepts
>       * are very similar. */
> -
> -    m_cuTreeStrength = 5.0 * (1.0 - m_param->rc.qCompress);
> +    m_cuTreeStrength = (m_param->rc.hevcAq ? 6.0 : 5.0) * (1.0 -
> m_param->rc.qCompress);
>
>      m_lastKeyframe = -m_param->keyframeMax;
>      m_sliceTypeBusy = false;
>      m_fullQueueSize = X265_MAX(1, m_param->lookaheadDepth);
> -    m_bAdaptiveQuant = m_param->rc.aqMode || m_param->bEnableWeightedPred
> || m_param->bEnableWeightedBiPred || m_param->bAQMotion;
> +    m_bAdaptiveQuant = m_param->rc.aqMode ||
> +                       m_param->bEnableWeightedPred ||
> +                       m_param->bEnableWeightedBiPred ||
> +                       m_param->bAQMotion ||
> +                       m_param->rc.hevcAq;
>
>      /* If we have a thread pool and are using --b-adapt 2, it is generally
>       * preferable to perform all motion searches for each lowres frame in
> large
> @@ -919,6 +1086,7 @@
>      if (!m_param->analysisLoad || !m_param->bDisableLookahead)
>      {
>          X265_CHECK(curFrame->m_lowres.costEst[b - p0][p1 - b] > 0, "Slice
> cost not estimated\n")
> +
>          if (m_param->rc.cuTree && !m_param->rc.bStatRead)
>              /* update row satds based on cutree offsets */
>              curFrame->m_lowres.satdCost = frameCostRecalculate(frames,
> p0, p1, b);
> @@ -1695,6 +1863,7 @@
>
>      if (m_param->rc.cuTree)
>          cuTree(frames, X265_MIN(numFrames, m_param->keyframeMax),
> bKeyframe);
> +
>      if (m_param->gopLookahead && (keyFrameLimit >= 0) && (keyFrameLimit
> <= m_param->bframes + 1) && !m_extendGopBoundary)
>          keyintLimit = keyFrameLimit;
>
> @@ -1928,6 +2097,7 @@
>
>      return cost;
>  }
> +
>  void Lookahead::aqMotion(Lowres **frames, bool bIntra)
>  {
>      if (!bIntra)
> @@ -2223,44 +2393,191 @@
>          cuTreeFinish(frames[b], averageDuration, b == p1 ? b - p0 : 0);
>  }
>
> -void Lookahead::cuTreeFinish(Lowres *frame, double averageDuration, int
> ref0Distance)
> +void Lookahead::computeCUTreeQpOffset(Lowres *frame, double
> averageDuration, int ref0Distance)
>  {
>      int fpsFactor = (int)(CLIP_DURATION(averageDuration) /
> CLIP_DURATION((double)m_param->fpsDenom / m_param->fpsNum) * 256);
> +    uint32_t loopIncr = (m_param->rc.qgSize == 8) ? 8 : 16;
> +
>      double weightdelta = 0.0;
> -
>      if (ref0Distance && frame->weightedCostDelta[ref0Distance - 1] > 0)
>          weightdelta = (1.0 - frame->weightedCostDelta[ref0Distance - 1]);
>
> +    uint32_t widthFullRes = frame->widthFullRes;
> +    uint32_t heightFullRes = frame->heightFullRes;
> +
>      if (m_param->rc.qgSize == 8)
>      {
> +        int minAQDepth = frame->pAQLayer->minAQDepth;
> +
> +        PicQPAdaptationLayer* pQPLayerMin = &frame->pAQLayer[minAQDepth];
> +        double* pcCuTree8x8 = pQPLayerMin->dCuTreeOffset8x8;
> +
>          for (int cuY = 0; cuY < m_8x8Height; cuY++)
>          {
>              for (int cuX = 0; cuX < m_8x8Width; cuX++)
>              {
>                  const int cuXY = cuX + cuY * m_8x8Width;
> -                int intracost = ((frame->intraCost[cuXY]) / 4 *
> frame->invQscaleFactor8x8[cuXY] + 128) >> 8;
> +                int intracost = ((frame->intraCost[cuXY] / 4) *
> frame->invQscaleFactor8x8[cuXY] + 128) >> 8;
>                  if (intracost)
>                  {
> -                    int propagateCost = ((frame->propagateCost[cuXY]) / 4
> * fpsFactor + 128) >> 8;
> +                    int propagateCost = ((frame->propagateCost[cuXY] /
> 4)  * fpsFactor + 128) >> 8;
>                      double log2_ratio = X265_LOG2(intracost +
> propagateCost) - X265_LOG2(intracost) + weightdelta;
> -                    frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width * 4]
> = frame->qpAqOffset[cuX * 2 + cuY * m_8x8Width * 4] - m_cuTreeStrength *
> (log2_ratio);
> -                    frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width * 4
> + 1] = frame->qpAqOffset[cuX * 2 + cuY * m_8x8Width * 4 + 1] -
> m_cuTreeStrength * (log2_ratio);
> -                    frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width * 4
> + frame->maxBlocksInRowFullRes] = frame->qpAqOffset[cuX * 2 + cuY *
> m_8x8Width * 4 + frame->maxBlocksInRowFullRes] - m_cuTreeStrength *
> (log2_ratio);
> -                    frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width * 4
> + frame->maxBlocksInRowFullRes + 1] = frame->qpAqOffset[cuX * 2 + cuY *
> m_8x8Width * 4 + frame->maxBlocksInRowFullRes + 1] - m_cuTreeStrength *
> (log2_ratio);
> +
> +                    pcCuTree8x8[cuX * 2 + cuY * m_8x8Width * 4] =
> log2_ratio;
> +                    pcCuTree8x8[cuX * 2 + cuY * m_8x8Width * 4 + 1] =
> log2_ratio;
> +                    pcCuTree8x8[cuX * 2 + cuY * m_8x8Width * 4 +
> frame->maxBlocksInRowFullRes] = log2_ratio;
> +                    pcCuTree8x8[cuX * 2 + cuY * m_8x8Width * 4 +
> frame->maxBlocksInRowFullRes + 1] = log2_ratio;
> +                }
> +            }
> +        }
> +
> +        for (uint32_t d = 0; d < 4; d++)
> +        {
> +            int ctuSizeIdx = 6 - g_log2Size[m_param->maxCUSize];
> +            int aqDepth = g_log2Size[m_param->maxCUSize] -
> g_log2Size[m_param->rc.qgSize];
> +            if (!aqLayerDepth[ctuSizeIdx][aqDepth][d])
> +                continue;
> +
> +            PicQPAdaptationLayer* pQPLayer = &frame->pAQLayer[d];
> +            const uint32_t aqPartWidth = pQPLayer->aqPartWidth;
> +            const uint32_t aqPartHeight = pQPLayer->aqPartHeight;
> +
> +            const uint32_t numAQPartInWidth = pQPLayer->numAQPartInWidth;
> +            const uint32_t numAQPartInHeight =
> pQPLayer->numAQPartInHeight;
> +
> +            double* pcQP = pQPLayer->dQpOffset;
> +            double* pcCuTree = pQPLayer->dCuTreeOffset;
> +
> +            uint32_t maxCols = frame->maxBlocksInRowFullRes;
> +
> +            for (uint32_t y = 0; y < numAQPartInHeight; y++)
> +            {
> +                for (uint32_t x = 0; x < numAQPartInWidth; x++, pcQP++,
> pcCuTree++)
> +                {
> +                    uint32_t block_x = x * aqPartWidth;
> +                    uint32_t block_y = y * aqPartHeight;
> +
> +                    uint32_t blockXY = 0;
> +                    double log2_ratio = 0;
> +                    for (uint32_t block_yy = block_y; block_yy < block_y
> + aqPartHeight && block_yy < heightFullRes; block_yy += loopIncr)
> +                    {
> +                        for (uint32_t block_xx = block_x; block_xx <
> block_x + aqPartWidth && block_xx < widthFullRes; block_xx += loopIncr)
> +                        {
> +                            uint32_t idx = ((block_yy / loopIncr) *
> (maxCols)) + (block_xx / loopIncr);
> +
> +                            log2_ratio += *(pcCuTree8x8 + idx);
> +
> +                            blockXY++;
> +                        }
> +                    }
> +
> +                    double qp_offset = (m_cuTreeStrength * log2_ratio) /
> blockXY;
> +
> +                    *pcCuTree = *pcQP - qp_offset;
>                  }
>              }
>          }
>      }
>      else
>      {
> -        for (int cuIndex = 0; cuIndex < m_cuCount; cuIndex++)
> +        for (uint32_t d = 0; d < 4; d++)
>          {
> -            int intracost = (frame->intraCost[cuIndex] *
> frame->invQscaleFactor[cuIndex] + 128) >> 8;
> -            if (intracost)
> +            int ctuSizeIdx = 6 - g_log2Size[m_param->maxCUSize];
> +            int aqDepth = g_log2Size[m_param->maxCUSize] -
> g_log2Size[m_param->rc.qgSize];
> +            if (!aqLayerDepth[ctuSizeIdx][aqDepth][d])
> +                continue;
> +
> +            PicQPAdaptationLayer* pQPLayer = &frame->pAQLayer[d];
> +            const uint32_t aqPartWidth = pQPLayer->aqPartWidth;
> +            const uint32_t aqPartHeight = pQPLayer->aqPartHeight;
> +
> +            const uint32_t numAQPartInWidth = pQPLayer->numAQPartInWidth;
> +            const uint32_t numAQPartInHeight =
> pQPLayer->numAQPartInHeight;
> +
> +            double* pcQP = pQPLayer->dQpOffset;
> +            double* pcCuTree = pQPLayer->dCuTreeOffset;
> +
> +            uint32_t maxCols = frame->maxBlocksInRow;
> +
> +            for (uint32_t y = 0; y < numAQPartInHeight; y++)
>              {
> -                int propagateCost = (frame->propagateCost[cuIndex] *
> fpsFactor + 128) >> 8;
> -                double log2_ratio = X265_LOG2(intracost + propagateCost)
> - X265_LOG2(intracost) + weightdelta;
> -                frame->qpCuTreeOffset[cuIndex] =
> frame->qpAqOffset[cuIndex] - m_cuTreeStrength * log2_ratio;
> +                for (uint32_t x = 0; x < numAQPartInWidth; x++, pcQP++,
> pcCuTree++)
> +                {
> +                    uint32_t block_x = x * aqPartWidth;
> +                    uint32_t block_y = y * aqPartHeight;
> +
> +                    uint32_t blockXY = 0;
> +                    double log2_ratio = 0;
> +                    for (uint32_t block_yy = block_y; block_yy < block_y
> + aqPartHeight && block_yy < heightFullRes; block_yy += loopIncr)
> +                    {
> +                        for (uint32_t block_xx = block_x; block_xx <
> block_x + aqPartWidth && block_xx < widthFullRes; block_xx += loopIncr)
> +                        {
> +                            uint32_t idx = ((block_yy / loopIncr) *
> (maxCols)) + (block_xx / loopIncr);
> +
> +                            int intraCost = (frame->intraCost[idx] *
> frame->invQscaleFactor[idx] + 128) >> 8;
> +                            int propagateCost =
> (frame->propagateCost[idx] * fpsFactor + 128) >> 8;
> +
> +                            log2_ratio += (X265_LOG2(intraCost +
> propagateCost) - X265_LOG2(intraCost) + weightdelta);
> +
> +                            blockXY++;
> +                        }
> +                    }
> +
> +                    double qp_offset = (m_cuTreeStrength * log2_ratio) /
> blockXY;
> +
> +                    *pcCuTree = *pcQP - qp_offset;
> +
> +                }
> +            }
> +        }
> +    }
> +}
> +
> +void Lookahead::cuTreeFinish(Lowres *frame, double averageDuration, int
> ref0Distance)
> +{
> +    if (m_param->rc.hevcAq)
> +    {
> +        computeCUTreeQpOffset(frame, averageDuration, ref0Distance);
> +    }
> +    else
> +    {
> +        int fpsFactor = (int)(CLIP_DURATION(averageDuration) /
> CLIP_DURATION((double)m_param->fpsDenom / m_param->fpsNum) * 256);
> +        double weightdelta = 0.0;
> +
> +        if (ref0Distance && frame->weightedCostDelta[ref0Distance - 1] >
> 0)
> +            weightdelta = (1.0 - frame->weightedCostDelta[ref0Distance -
> 1]);
> +
> +        if (m_param->rc.qgSize == 8)
> +        {
> +            for (int cuY = 0; cuY < m_8x8Height; cuY++)
> +            {
> +                for (int cuX = 0; cuX < m_8x8Width; cuX++)
> +                {
> +                    const int cuXY = cuX + cuY * m_8x8Width;
> +                    int intracost = ((frame->intraCost[cuXY]) / 4 *
> frame->invQscaleFactor8x8[cuXY] + 128) >> 8;
> +                    if (intracost)
> +                    {
> +                        int propagateCost = ((frame->propagateCost[cuXY])
> / 4 * fpsFactor + 128) >> 8;
> +                        double log2_ratio = X265_LOG2(intracost +
> propagateCost) - X265_LOG2(intracost) + weightdelta;
> +                        frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width
> * 4] = frame->qpAqOffset[cuX * 2 + cuY * m_8x8Width * 4] - m_cuTreeStrength
> * (log2_ratio);
> +                        frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width
> * 4 + 1] = frame->qpAqOffset[cuX * 2 + cuY * m_8x8Width * 4 + 1] -
> m_cuTreeStrength * (log2_ratio);
> +                        frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width
> * 4 + frame->maxBlocksInRowFullRes] = frame->qpAqOffset[cuX * 2 + cuY *
> m_8x8Width * 4 + frame->maxBlocksInRowFullRes] - m_cuTreeStrength *
> (log2_ratio);
> +                        frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width
> * 4 + frame->maxBlocksInRowFullRes + 1] = frame->qpAqOffset[cuX * 2 + cuY *
> m_8x8Width * 4 + frame->maxBlocksInRowFullRes + 1] - m_cuTreeStrength *
> (log2_ratio);
> +                    }
> +                }
> +            }
> +        }
> +        else
> +        {
> +            for (int cuIndex = 0; cuIndex < m_cuCount; cuIndex++)
> +            {
> +                int intracost = (frame->intraCost[cuIndex] *
> frame->invQscaleFactor[cuIndex] + 128) >> 8;
> +                if (intracost)
> +                {
> +                    int propagateCost = (frame->propagateCost[cuIndex] *
> fpsFactor + 128) >> 8;
> +                    double log2_ratio = X265_LOG2(intracost +
> propagateCost) - X265_LOG2(intracost) + weightdelta;
> +                    frame->qpCuTreeOffset[cuIndex] =
> frame->qpAqOffset[cuIndex] - m_cuTreeStrength * log2_ratio;
> +                }
>              }
>          }
>      }
> @@ -2275,31 +2592,71 @@
>
>      int64_t score = 0;
>      int *rowSatd = frames[b]->rowSatds[b - p0][p1 - b];
> -    double *qp_offset = frames[b]->qpCuTreeOffset;
>
>      x265_emms();
> -    for (int cuy = m_8x8Height - 1; cuy >= 0; cuy--)
> +
> +    if (m_param->rc.hevcAq)
>      {
> -        rowSatd[cuy] = 0;
> -        for (int cux = m_8x8Width - 1; cux >= 0; cux--)
> +        int minAQDepth = frames[b]->pAQLayer->minAQDepth;
> +        PicQPAdaptationLayer* pQPLayer = &frames[b]->pAQLayer[minAQDepth];
> +        double* pcQPCuTree = pQPLayer->dCuTreeOffset;
> +
> +        // Use new qp offset values for qpAqOffset, qpCuTreeOffset and
> invQscaleFactor buffer
> +        for (int cuy = m_8x8Height - 1; cuy >= 0; cuy--)
>          {
> -            int cuxy = cux + cuy * m_8x8Width;
> -            int cuCost = frames[b]->lowresCosts[b - p0][p1 - b][cuxy] &
> LOWRES_COST_MASK;
> -            double qp_adj;
> -            if (m_param->rc.qgSize == 8)
> -                qp_adj = (qp_offset[cux * 2 + cuy * m_8x8Width * 4] +
> -                          qp_offset[cux * 2 + cuy * m_8x8Width * 4 + 1] +
> -                          qp_offset[cux * 2 + cuy * m_8x8Width * 4 +
> frames[b]->maxBlocksInRowFullRes] +
> -                          qp_offset[cux * 2 + cuy * m_8x8Width * 4 +
> frames[b]->maxBlocksInRowFullRes + 1]) / 4;
> -            else
> -                qp_adj = qp_offset[cuxy];
> -            cuCost = (cuCost * x265_exp2fix8(qp_adj) + 128) >> 8;
> -            rowSatd[cuy] += cuCost;
> -            if ((cuy > 0 && cuy < m_8x8Height - 1 &&
> -                 cux > 0 && cux < m_8x8Width - 1) ||
> -                m_8x8Width <= 2 || m_8x8Height <= 2)
> +            rowSatd[cuy] = 0;
> +            for (int cux = m_8x8Width - 1; cux >= 0; cux--)
>              {
> -                score += cuCost;
> +                int cuxy = cux + cuy * m_8x8Width;
> +                int cuCost = frames[b]->lowresCosts[b - p0][p1 - b][cuxy]
> & LOWRES_COST_MASK;
> +                double qp_adj;
> +
> +                if (m_param->rc.qgSize == 8)
> +                    qp_adj = (pcQPCuTree[cux * 2 + cuy * m_8x8Width * 4] +
> +                    pcQPCuTree[cux * 2 + cuy * m_8x8Width * 4 + 1] +
> +                    pcQPCuTree[cux * 2 + cuy * m_8x8Width * 4 +
> frames[b]->maxBlocksInRowFullRes] +
> +                    pcQPCuTree[cux * 2 + cuy * m_8x8Width * 4 +
> frames[b]->maxBlocksInRowFullRes + 1]) / 4;
> +                else
> +                    qp_adj = *(pcQPCuTree + cuxy);
> +
> +                cuCost = (cuCost * x265_exp2fix8(qp_adj) + 128) >> 8;
> +                rowSatd[cuy] += cuCost;
> +                if ((cuy > 0 && cuy < m_8x8Height - 1 &&
> +                    cux > 0 && cux < m_8x8Width - 1) ||
> +                    m_8x8Width <= 2 || m_8x8Height <= 2)
> +                {
> +                    score += cuCost;
> +                }
> +            }
> +        }
> +    }
> +    else
> +    {
> +        double *qp_offset = frames[b]->qpCuTreeOffset;
> +
> +        for (int cuy = m_8x8Height - 1; cuy >= 0; cuy--)
> +        {
> +            rowSatd[cuy] = 0;
> +            for (int cux = m_8x8Width - 1; cux >= 0; cux--)
> +            {
> +                int cuxy = cux + cuy * m_8x8Width;
> +                int cuCost = frames[b]->lowresCosts[b - p0][p1 - b][cuxy]
> & LOWRES_COST_MASK;
> +                double qp_adj;
> +                if (m_param->rc.qgSize == 8)
> +                    qp_adj = (qp_offset[cux * 2 + cuy * m_8x8Width * 4] +
> +                    qp_offset[cux * 2 + cuy * m_8x8Width * 4 + 1] +
> +                    qp_offset[cux * 2 + cuy * m_8x8Width * 4 +
> frames[b]->maxBlocksInRowFullRes] +
> +                    qp_offset[cux * 2 + cuy * m_8x8Width * 4 +
> frames[b]->maxBlocksInRowFullRes + 1]) / 4;
> +                else
> +                    qp_adj = qp_offset[cuxy];
> +                cuCost = (cuCost * x265_exp2fix8(qp_adj) + 128) >> 8;
> +                rowSatd[cuy] += cuCost;
> +                if ((cuy > 0 && cuy < m_8x8Height - 1 &&
> +                    cux > 0 && cux < m_8x8Width - 1) ||
> +                    m_8x8Width <= 2 || m_8x8Height <= 2)
> +                {
> +                    score += cuCost;
> +                }
>              }
>          }
>      }
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/encoder/slicetype.h
> --- a/source/encoder/slicetype.h Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/encoder/slicetype.h Thu Dec 27 14:43:54 2018 +0530
> @@ -87,7 +87,8 @@
>      void lowresIntraEstimate(Lowres& fenc, uint32_t qgSize);
>
>      void weightsAnalyse(Lowres& fenc, Lowres& ref);
> -
> +    void xPreanalyze(Frame* curFrame);
> +    void xPreanalyzeQp(Frame* curFrame);
>  protected:
>
>      uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t
> blockY, int csp, uint32_t qgSize);
> @@ -175,6 +176,7 @@
>      void    cuTree(Lowres **frames, int numframes, bool bintra);
>      void    estimateCUPropagate(Lowres **frames, double average_duration,
> int p0, int p1, int b, int referenced);
>      void    cuTreeFinish(Lowres *frame, double averageDuration, int
> ref0Distance);
> +    void    computeCUTreeQpOffset(Lowres *frame, double averageDuration,
> int ref0Distance);
>
>      /* called by getEstimatedPictureCost() to finalize cuTree costs */
>      int64_t frameCostRecalculate(Lowres **frames, int p0, int p1, int b);
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/x265.h
> --- a/source/x265.h Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/x265.h Thu Dec 27 14:43:54 2018 +0530
> @@ -1347,10 +1347,22 @@
>           * generally improves. Default: X265_AQ_AUTO_VARIANCE */
>          int       aqMode;
>
> +        /*
> +         * Enable adaptive quantization.
> +         * It scales the quantization step size according to the spatial
> activity of one
> +         * coding unit relative to frame average spatial activity. This
> AQ method utilizes
> +         * the minimum variance of sub-unit in each coding unit to
> represent the coding
> +         * unitâ€™s spatial complexity. */
> +        int       hevcAq;
> +
>          /* Sets the strength of AQ bias towards low detail CTUs. Valid
> only if
>           * AQ is enabled. Default value: 1.0. Acceptable values between
> 0.0 and 3.0 */
>          double    aqStrength;
>
> +        /* Delta QP range by QP adaptation based on a psycho-visual model.
> +         * Acceptable values between 1.0 to 6.0 */
> +        double    qpAdaptationRange;
> +
>          /* Sets the maximum rate the VBV buffer should be assumed to
> refill at
>           * Default is zero */
>          int       vbvMaxBitrate;
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/x265cli.h
> --- a/source/x265cli.h Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/x265cli.h Thu Dec 27 14:43:54 2018 +0530
> @@ -311,6 +311,9 @@
>      { "dolby-vision-rpu", required_argument, NULL, 0 },
>      { "hrd-concat",          no_argument, NULL, 0},
>      { "no-hrd-concat",       no_argument, NULL, 0 },
> +    { "hevc-aq", no_argument, NULL, 0 },
> +    { "no-hevc-aq", no_argument, NULL, 0 },
> +    { "qp-adaptation-range", required_argument, NULL, 0 },
>      { 0, 0, 0, 0 },
>      { 0, 0, 0, 0 },
>      { 0, 0, 0, 0 },
> @@ -361,7 +364,7 @@
>      H0("   --dhdr10-info <filename>      JSON file containing the
> Creative Intent Metadata to be encoded as Dynamic Tone Mapping\n");
>      H0("   --[no-]dhdr10-opt             Insert tone mapping SEI only for
> IDR frames and when the tone mapping information changes. Default
> disabled\n");
>  #endif
> -    H0("   --dolby-vision-profile <float|integer> Specifies Dolby Vision
> profile ID. Currently only profile 5, profile 8.1 and profile 8.2 enabled.
> Specified as '5' or '50'. Default 0 (disabled).\n");
> +    H0("   --dolby-vision-profile <float|integer> Specifies Dolby Vision
> profile ID. Currently only profile 5, profile 8.1 and profile 8.2 enabled.
> Specified as '5' or '50'. Default 0 (disabled).\n");
>      H0("   --dolby-vision-rpu <filename> File containing Dolby Vision RPU
> metadata.\n"
>         "                                 If given, x265's Dolby Vision
> metadata parser will fill the RPU field of input pictures with the metadata
> read from the file. Default NULL(disabled).\n");
>      H0("   --nalu-file <filename>        Text file containing SEI
> messages in the following format : <POC><space><PREFIX><space><NAL UNIT
> TYPE>/<SEI TYPE><space><SEI Payload>\n");
> @@ -521,7 +524,9 @@
>          "                                    - 1 : Store/Load ctu
> distortion to/from the file specified in analysis-save/load.\n"
>          "                                Default 0 - Disabled\n");
>      H0("   --aq-mode <integer>           Mode for Adaptive Quantization -
> 0:none 1:uniform AQ 2:auto variance 3:auto variance with bias to dark
> scenes. Default %d\n", param->rc.aqMode);
> +    H0("   --[no-]hevc-aq                Mode for HEVC Adaptive
> Quantization. Default %s\n", OPT(param->rc.hevcAq));
>      H0("   --aq-strength <float>         Reduces blocking and blurring in
> flat and textured areas (0 to 3.0). Default %.2f\n", param->rc.aqStrength);
> +    H0("   --qp-adaptation-range <float> Delta QP range by QP adaptation
> based on a psycho-visual model (1.0 to 6.0). Default %.2f\n",
> param->rc.qpAdaptationRange);
>      H0("   --[no-]aq-motion              Adaptive Quantization based on
> the relative motion of each CU w.r.t., frame. Default %s\n",
> OPT(param->bOptCUDeltaQP));
>      H0("   --qg-size <int>               Specifies the size of the
> quantization group (64, 32, 16, 8). Default %d\n", param->rc.qgSize);
>      H0("   --[no-]cutree                 Enable cutree for Adaptive
> Quantization. Default %s\n", OPT(param->rc.cuTree));
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20190430/c355d5da/attachment-0001.html>