[x265] [PATCH] new aq implementation
Alex Giladi
alex.giladi at gmail.com
Tue Apr 30 13:06:54 CEST 2019
Where is the --qp-adaptation-range parameter used?
On Sat, Feb 9, 2019 at 4:44 AM Pooja Venkatesan <pooja at multicorewareinc.com>
wrote:
> # HG changeset patch
> # User Ashok Kumar Mishra <ashok at multicorewareinc.com>
> # Date 1545902034 -19800
> # Thu Dec 27 14:43:54 2018 +0530
> # Node ID 3cd0b5ed0b91bcb3d5d6cfa1395cb502fc6d01ca
> # Parent 129416ec047966f7d7e7898fbe16110444b9a183
> new aq implementation
>
> It scales the quantization step size according to the spatial activity of
> one
> coding unit relative to frame average spatial activity. This AQ method
> utilizes
> the minimum variance of sub-unit in each coding unit to represent the
> coding
> units spatial complexity.
>
> diff -r 129416ec0479 -r 3cd0b5ed0b91 doc/reST/cli.rst
> --- a/doc/reST/cli.rst Fri Dec 28 09:03:26 2018 +0530
> +++ b/doc/reST/cli.rst Thu Dec 27 14:43:54 2018 +0530
> @@ -1646,6 +1646,21 @@
> Default 1.0.
> **Range of values:** 0.0 to 3.0
>
> +.. option:: --hevc-aq
> +
> + Enable adaptive quantization
> + It scales the quantization step size according to the spatial activity
> of one
> + coding unit relative to frame average spatial activity. This AQ method
> utilizes
> + the minimum variance of sub-unit in each coding unit to represent the
> coding
> + unit’s spatial complexity.
> +
> +.. option:: --qp-adaptation-range
> +
> + Delta-QP range by QP adaptation based on a psycho-visual model.
> +
> + Default 1.0.
> + **Range of values:** 1.0 to 6.0
> +
> .. option:: --aq-motion, --no-aq-motion
>
> Adjust the AQ offsets based on the relative motion of each block with
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/common/lowres.cpp
> --- a/source/common/lowres.cpp Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/common/lowres.cpp Thu Dec 27 14:43:54 2018 +0530
> @@ -2,6 +2,7 @@
> * Copyright (C) 2013-2017 MulticoreWare, Inc
> *
> * Authors: Gopu Govindaswamy <gopu at multicorewareinc.com>
> + * Ashok Kumar Mishra <ashok at multicorewareinc.com>
> *
> * This program is free software; you can redistribute it and/or modify
> * it under the terms of the GNU General Public License as published by
> @@ -27,10 +28,31 @@
>
> using namespace X265_NS;
>
> +bool PicQPAdaptationLayer::create(uint32_t width, uint32_t height,
> uint32_t partWidth, uint32_t partHeight, uint32_t numAQPartInWidthExt,
> uint32_t numAQPartInHeightExt)
> +{
> + aqPartWidth = partWidth;
> + aqPartHeight = partHeight;
> + numAQPartInWidth = (width + partWidth - 1) / partWidth;
> + numAQPartInHeight = (height + partHeight - 1) / partHeight;
> +
> + CHECKED_MALLOC_ZERO(dActivity, double, numAQPartInWidthExt *
> numAQPartInHeightExt);
> + CHECKED_MALLOC_ZERO(dQpOffset, double, numAQPartInWidthExt *
> numAQPartInHeightExt);
> + CHECKED_MALLOC_ZERO(dCuTreeOffset, double, numAQPartInWidthExt *
> numAQPartInHeightExt);
> +
> + if (bQpSize)
> + CHECKED_MALLOC_ZERO(dCuTreeOffset8x8, double, numAQPartInWidthExt
> * numAQPartInHeightExt);
> +
> + return true;
> +fail:
> + return false;
> +}
> +
> bool Lowres::create(x265_param* param, PicYuv *origPic, uint32_t qgSize)
> {
> isLowres = true;
> bframes = param->bframes;
> + widthFullRes = origPic->m_picWidth;
> + heightFullRes = origPic->m_picHeight;
> width = origPic->m_picWidth / 2;
> lines = origPic->m_picHeight / 2;
> lumaStride = width + 2 * origPic->m_lumaMarginX;
> @@ -49,7 +71,7 @@
>
> size_t planesize = lumaStride * (lines + 2 * origPic->m_lumaMarginY);
> size_t padoffset = lumaStride * origPic->m_lumaMarginY +
> origPic->m_lumaMarginX;
> - if (!!param->rc.aqMode)
> + if (!!param->rc.aqMode || !!param->rc.hevcAq)
> {
> CHECKED_MALLOC_ZERO(qpAqOffset, double, cuCountFullRes);
> CHECKED_MALLOC_ZERO(invQscaleFactor, int, cuCountFullRes);
> @@ -57,10 +79,50 @@
> if (qgSize == 8)
> CHECKED_MALLOC_ZERO(invQscaleFactor8x8, int, cuCount);
> }
> +
> if (origPic->m_param->bAQMotion)
> CHECKED_MALLOC_ZERO(qpAqMotionOffset, double, cuCountFullRes);
> if (origPic->m_param->bDynamicRefine)
> CHECKED_MALLOC_ZERO(blockVariance, uint32_t, cuCountFullRes);
> +
> + if (!!param->rc.hevcAq)
> + {
> + m_maxCUSize = param->maxCUSize;
> + m_qgSize = qgSize;
> +
> + uint32_t partWidth, partHeight, nAQPartInWidth, nAQPartInHeight;
> +
> + pAQLayer = new PicQPAdaptationLayer[4];
> + maxAQDepth = 0;
> + for (uint32_t d = 0; d < 4; d++)
> + {
> + int ctuSizeIdx = 6 - g_log2Size[param->maxCUSize];
> + int aqDepth = g_log2Size[param->maxCUSize] -
> g_log2Size[qgSize];
> + if (!aqLayerDepth[ctuSizeIdx][aqDepth][d])
> + continue;
> +
> + pAQLayer->minAQDepth = d;
> + partWidth = param->maxCUSize >> d;
> + partHeight = param->maxCUSize >> d;
> +
> + if (minAQSize[ctuSizeIdx] == d)
> + {
> + pAQLayer[d].bQpSize = true;
> + nAQPartInWidth = maxBlocksInRow * 2;
> + nAQPartInHeight = maxBlocksInCol * 2;
> + }
> + else
> + {
> + pAQLayer[d].bQpSize = false;
> + nAQPartInWidth = (origPic->m_picWidth + partWidth - 1) /
> partWidth;
> + nAQPartInHeight = (origPic->m_picHeight + partHeight - 1)
> / partHeight;
> + }
> +
> + maxAQDepth++;
> +
> + pAQLayer[d].create(origPic->m_picWidth, origPic->m_picHeight,
> partWidth, partHeight, nAQPartInWidth, nAQPartInHeight);
> + }
> + }
> CHECKED_MALLOC(propagateCost, uint16_t, cuCount);
>
> /* allocate lowres buffers */
> @@ -130,6 +192,25 @@
> X265_FREE(invQscaleFactor8x8);
> X265_FREE(qpAqMotionOffset);
> X265_FREE(blockVariance);
> + if (maxAQDepth > 0)
> + {
> + for (uint32_t d = 0; d < 4; d++)
> + {
> + int ctuSizeIdx = 6 - g_log2Size[m_maxCUSize];
> + int aqDepth = g_log2Size[m_maxCUSize] - g_log2Size[m_qgSize];
> + if (!aqLayerDepth[ctuSizeIdx][aqDepth][d])
> + continue;
> +
> + X265_FREE(pAQLayer[d].dActivity);
> + X265_FREE(pAQLayer[d].dQpOffset);
> + X265_FREE(pAQLayer[d].dCuTreeOffset);
> +
> + if (pAQLayer[d].bQpSize == true)
> + X265_FREE(pAQLayer[d].dCuTreeOffset8x8);
> + }
> +
> + delete[] pAQLayer;
> + }
> }
> // (re) initialize lowres state
> void Lowres::init(PicYuv *origPic, int poc)
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/common/lowres.h
> --- a/source/common/lowres.h Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/common/lowres.h Thu Dec 27 14:43:54 2018 +0530
> @@ -103,6 +103,49 @@
> }
> };
>
> +static const uint32_t aqLayerDepth[3][4][4] = {
> + { // ctu size 64
> + { 1, 0, 1, 0 },
> + { 1, 1, 1, 0 },
> + { 1, 1, 1, 0 },
> + { 1, 1, 1, 1 }
> + },
> + { // ctu size 32
> + { 1, 1, 0, 0 },
> + { 1, 1, 0, 0 },
> + { 1, 1, 1, 0 },
> + { 0, 0, 0, 0 },
> + },
> + { // ctu size 16
> + { 1, 0, 0, 0 },
> + { 1, 1, 0, 0 },
> + { 0, 0, 0, 0 },
> + { 0, 0, 0, 0 }
> + }
> +};
> +
> +// min aq size for ctu size 64, 32 and 16
> +static const uint32_t minAQSize[3] = { 3, 2, 1 };
> +
> +struct PicQPAdaptationLayer
> +{
> + uint32_t aqPartWidth;
> + uint32_t aqPartHeight;
> + uint32_t numAQPartInWidth;
> + uint32_t numAQPartInHeight;
> + uint32_t minAQDepth;
> + double* dActivity;
> + double* dQpOffset;
> +
> + double* dCuTreeOffset;
> + double* dCuTreeOffset8x8;
> + double dAvgActivity;
> + bool bQpSize;
> +
> + bool create(uint32_t width, uint32_t height, uint32_t aqPartWidth,
> uint32_t aqPartHeight, uint32_t numAQPartInWidthExt, uint32_t
> numAQPartInHeightExt);
> + void destroy();
> +};
> +
> /* lowres buffers, sizes and strides */
> struct Lowres : public ReferencePlanes
> {
> @@ -154,6 +197,13 @@
> uint64_t wp_sum[3];
>
> /* cutree intermediate data */
> + PicQPAdaptationLayer* pAQLayer;
> + uint32_t maxAQDepth;
> + uint32_t widthFullRes;
> + uint32_t heightFullRes;
> + uint32_t m_maxCUSize;
> + uint32_t m_qgSize;
> +
> uint16_t* propagateCost;
> double weightedCostDelta[X265_BFRAME_MAX + 2];
> ReferencePlanes weightedRef[X265_BFRAME_MAX + 2];
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/common/param.cpp
> --- a/source/common/param.cpp Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/common/param.cpp Thu Dec 27 14:43:54 2018 +0530
> @@ -233,8 +233,10 @@
> param->rc.rateControlMode = X265_RC_CRF;
> param->rc.qp = 32;
> param->rc.aqMode = X265_AQ_AUTO_VARIANCE;
> + param->rc.hevcAq = 0;
> param->rc.qgSize = 32;
> param->rc.aqStrength = 1.0;
> + param->rc.qpAdaptationRange = 1.0;
> param->rc.cuTree = 1;
> param->rc.rfConstantMax = 0;
> param->rc.rfConstantMin = 0;
> @@ -528,6 +530,7 @@
> param->rc.pbFactor = 1.0;
> param->rc.cuTree = 0;
> param->rc.aqMode = 0;
> + param->rc.hevcAq = 0;
> param->rc.qpStep = 1;
> param->rc.bEnableGrain = 1;
> param->bEnableRecursionSkip = 0;
> @@ -1186,6 +1189,8 @@
> }
> OPT("hrd-concat") p->bEnableHRDConcatFlag = atobool(value);
> OPT("refine-ctu-distortion") p->ctuDistortionRefine = atoi(value);
> + OPT("hevc-aq") p->rc.hevcAq = atobool(value);
> + OPT("qp-adaptation-range") p->rc.qpAdaptationRange = atof(value);
> else
> return X265_PARAM_BAD_NAME;
> }
> @@ -1430,6 +1435,8 @@
> "Aq-Mode is out of range");
> CHECK(param->rc.aqStrength < 0 || param->rc.aqStrength > 3,
> "Aq-Strength is out of range");
> + CHECK(param->rc.qpAdaptationRange < 1.0f ||
> param->rc.qpAdaptationRange > 6.0f,
> + "qp adaptation range is out of range");
> CHECK(param->deblockingFilterTCOffset < -6 ||
> param->deblockingFilterTCOffset > 6,
> "deblocking filter tC offset must be in the range of -6 to +6");
> CHECK(param->deblockingFilterBetaOffset < -6 ||
> param->deblockingFilterBetaOffset > 6,
> @@ -1956,6 +1963,8 @@
> s += sprintf(s, " max-ausize-factor=%.1f", p->maxAUSizeFactor);
> BOOL(p->bDynamicRefine, "dynamic-refine");
> BOOL(p->bSingleSeiNal, "single-sei");
> + BOOL(p->rc.hevcAq, "hevc-aq");
> + s += sprintf(s, " qp-adaptation-range=%.2f", p->rc.qpAdaptationRange);
> #undef BOOL
> return buf;
> }
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/encoder/analysis.cpp Thu Dec 27 14:43:54 2018 +0530
> @@ -3556,10 +3556,39 @@
> return cuVariance / cnt;
> }
>
> +double Analysis::aqQPOffset(const CUData& ctu, const CUGeom& cuGeom)
> +{
> + uint32_t aqDepth = X265_MIN(cuGeom.depth,
> m_frame->m_lowres.maxAQDepth - 1);
> + PicQPAdaptationLayer* pQPLayer = &m_frame->m_lowres.pAQLayer[aqDepth];
> +
> + uint32_t aqPosX = (ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx]) /
> pQPLayer->aqPartWidth;
> + uint32_t aqPosY = (ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx]) /
> pQPLayer->aqPartHeight;
> +
> + uint32_t aqStride = pQPLayer->numAQPartInWidth;
> +
> + double dQpOffset = pQPLayer->dQpOffset[aqPosY * aqStride + aqPosX];
> + return dQpOffset;
> +}
> +
> +double Analysis::cuTreeQPOffset(const CUData& ctu, const CUGeom& cuGeom)
> +{
> + uint32_t aqDepth = X265_MIN(cuGeom.depth,
> m_frame->m_lowres.maxAQDepth - 1);
> + PicQPAdaptationLayer* pcAQLayer =
> &m_frame->m_lowres.pAQLayer[aqDepth];
> +
> + uint32_t aqPosX = (ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx]) /
> pcAQLayer->aqPartWidth;
> + uint32_t aqPosY = (ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx]) /
> pcAQLayer->aqPartHeight;
> +
> + uint32_t aqStride = pcAQLayer->numAQPartInWidth;
> +
> + double dQpOffset = pcAQLayer->dCuTreeOffset[aqPosY * aqStride +
> aqPosX];
> + return dQpOffset;
> +}
> +
> int Analysis::calculateQpforCuSize(const CUData& ctu, const CUGeom&
> cuGeom, int32_t complexCheck, double baseQp)
> {
> FrameData& curEncData = *m_frame->m_encData;
> double qp = baseQp >= 0 ? baseQp :
> curEncData.m_cuStat[ctu.m_cuAddr].baseQp;
> + bool bCuTreeOffset = IS_REFERENCED(m_frame) && m_param->rc.cuTree &&
> !complexCheck;
>
> if ((m_param->analysisMultiPassDistortion && m_param->rc.bStatRead)
> || (m_param->ctuDistortionRefine && m_param->analysisLoad))
> {
> @@ -3577,40 +3606,60 @@
> else
> return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax,
> (int32_t)(qp + 0.5 +
> ((x265_analysis_inter_data*)m_frame->m_analysisData.interData)->cuQPOff[cuIdx]));
> }
> - int loopIncr = (m_param->rc.qgSize == 8) ? 8 : 16;
> -
> - /* Use cuTree offsets if cuTree enabled and frame is referenced, else
> use AQ offsets */
> - bool isReferenced = IS_REFERENCED(m_frame);
> - double *qpoffs = (isReferenced && m_param->rc.cuTree &&
> !complexCheck) ? m_frame->m_lowres.qpCuTreeOffset :
> -
> m_frame->m_lowres.qpAqOffset;
> - if (qpoffs)
> + if (m_param->rc.hevcAq)
> {
> - uint32_t width = m_frame->m_fencPic->m_picWidth;
> - uint32_t height = m_frame->m_fencPic->m_picHeight;
> - uint32_t block_x = ctu.m_cuPelX +
> g_zscanToPelX[cuGeom.absPartIdx];
> - uint32_t block_y = ctu.m_cuPelY +
> g_zscanToPelY[cuGeom.absPartIdx];
> - uint32_t maxCols = (m_frame->m_fencPic->m_picWidth + (loopIncr -
> 1)) / loopIncr;
> - uint32_t blockSize = m_param->maxCUSize >> cuGeom.depth;
> - double qp_offset = 0;
> - uint32_t cnt = 0;
> - for (uint32_t block_yy = block_y; block_yy < block_y + blockSize
> && block_yy < height; block_yy += loopIncr)
> + /* Use cuTree offsets if cuTree enabled and frame is referenced,
> else use AQ offsets */
> + double dQpOffset = 0;
> + if (bCuTreeOffset)
> {
> - for (uint32_t block_xx = block_x; block_xx < block_x +
> blockSize && block_xx < width; block_xx += loopIncr)
> + dQpOffset = cuTreeQPOffset(ctu, cuGeom);
> + }
> + else
> + {
> + dQpOffset = aqQPOffset(ctu, cuGeom);
> + if (complexCheck)
> {
> - uint32_t idx = ((block_yy / loopIncr) * (maxCols)) +
> (block_xx / loopIncr);
> - qp_offset += qpoffs[idx];
> - cnt++;
> + int32_t offset = (int32_t)(dQpOffset * 100 + .5);
> + double threshold = (1 - ((x265_ADAPT_RD_STRENGTH -
> m_param->dynamicRd) * 0.5));
> + int32_t max_threshold = (int32_t)(threshold * 100 + .5);
> + return (offset < max_threshold);
> }
> }
> -
> - qp_offset /= cnt;
> - qp += qp_offset;
> - if (complexCheck)
> + qp += dQpOffset;
> + }
> + else
> + {
> + int loopIncr = (m_param->rc.qgSize == 8) ? 8 : 16;
> + /* Use cuTree offsets if cuTree enabled and frame is referenced,
> else use AQ offsets */
> + double *qpoffs = bCuTreeOffset ? m_frame->m_lowres.qpCuTreeOffset
> : m_frame->m_lowres.qpAqOffset;
> + if (qpoffs)
> {
> - int32_t offset = (int32_t)(qp_offset * 100 + .5);
> - double threshold = (1 - ((x265_ADAPT_RD_STRENGTH -
> m_param->dynamicRd) * 0.5));
> - int32_t max_threshold = (int32_t)(threshold * 100 + .5);
> - return (offset < max_threshold);
> + uint32_t width = m_frame->m_fencPic->m_picWidth;
> + uint32_t height = m_frame->m_fencPic->m_picHeight;
> + uint32_t block_x = ctu.m_cuPelX +
> g_zscanToPelX[cuGeom.absPartIdx];
> + uint32_t block_y = ctu.m_cuPelY +
> g_zscanToPelY[cuGeom.absPartIdx];
> + uint32_t maxCols = (m_frame->m_fencPic->m_picWidth +
> (loopIncr - 1)) / loopIncr;
> + uint32_t blockSize = m_param->maxCUSize >> cuGeom.depth;
> + double dQpOffset = 0;
> + uint32_t cnt = 0;
> + for (uint32_t block_yy = block_y; block_yy < block_y +
> blockSize && block_yy < height; block_yy += loopIncr)
> + {
> + for (uint32_t block_xx = block_x; block_xx < block_x +
> blockSize && block_xx < width; block_xx += loopIncr)
> + {
> + uint32_t idx = ((block_yy / loopIncr) * (maxCols)) +
> (block_xx / loopIncr);
> + dQpOffset += qpoffs[idx];
> + cnt++;
> + }
> + }
> + dQpOffset /= cnt;
> + qp += dQpOffset;
> + if (complexCheck)
> + {
> + int32_t offset = (int32_t)(dQpOffset * 100 + .5);
> + double threshold = (1 - ((x265_ADAPT_RD_STRENGTH -
> m_param->dynamicRd) * 0.5));
> + int32_t max_threshold = (int32_t)(threshold * 100 + .5);
> + return (offset < max_threshold);
> + }
> }
> }
>
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/encoder/analysis.h
> --- a/source/encoder/analysis.h Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/encoder/analysis.h Thu Dec 27 14:43:54 2018 +0530
> @@ -201,7 +201,8 @@
>
> void classifyCU(const CUData& ctu, const CUGeom& cuGeom, const Mode&
> bestMode, TrainingData& trainData);
> void trainCU(const CUData& ctu, const CUGeom& cuGeom, const Mode&
> bestMode, TrainingData& trainData);
> -
> + double aqQPOffset(const CUData& ctu, const CUGeom& cuGeom);
> + double cuTreeQPOffset(const CUData& ctu, const CUGeom& cuGeom);
> void calculateNormFactor(CUData& ctu, int qp);
> void normFactor(const pixel* src, uint32_t blockSize, CUData& ctu,
> int qp, TextType ttype);
>
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/encoder/encoder.cpp Thu Dec 27 14:43:54 2018 +0530
> @@ -2839,6 +2839,12 @@
> if (p->rc.aqMode == X265_AQ_NONE && p->rc.cuTree == 0)
> p->rc.aqStrength = 0;
>
> + if (p->rc.hevcAq && p->rc.aqMode)
> + {
> + p->rc.aqMode = X265_AQ_NONE;
> + x265_log(p, X265_LOG_WARNING, "hevc-aq enabled, disabling other
> aq-modes\n");
> + }
> +
> if (p->totalFrames && p->totalFrames <= 2 * ((float)p->fpsNum) /
> p->fpsDenom && p->rc.bStrictCbr)
> p->lookaheadDepth = p->totalFrames;
> if (p->bIntraRefresh)
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/encoder/ratecontrol.cpp
> --- a/source/encoder/ratecontrol.cpp Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/encoder/ratecontrol.cpp Thu Dec 27 14:43:54 2018 +0530
> @@ -153,10 +153,7 @@
> int lowresCuHeight = ((m_param->sourceHeight / 2) +
> X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
> m_ncu = lowresCuWidth * lowresCuHeight;
>
> - if (m_param->rc.cuTree)
> - m_qCompress = 1;
> - else
> - m_qCompress = m_param->rc.qCompress;
> + m_qCompress = (m_param->rc.cuTree && !m_param->rc.hevcAq) ? 1 :
> m_param->rc.qCompress;
>
> // validate for param->rc, maybe it is need to add a function like
> x265_parameters_valiate()
> m_residualFrames = 0;
> @@ -381,13 +378,14 @@
>
> m_isGrainEnabled = false;
> if(m_param->rc.bEnableGrain) // tune for grainy content OR equal p-b
> frame sizes
> - m_isGrainEnabled = true;
> + m_isGrainEnabled = true;
> for (int i = 0; i < 3; i++)
> - m_lastQScaleFor[i] = x265_qp2qScale(m_param->rc.rateControlMode ==
> X265_RC_CRF ? CRF_INIT_QP : ABR_INIT_QP_MIN);
> + m_lastQScaleFor[i] = x265_qp2qScale(m_param->rc.rateControlMode
> == X265_RC_CRF ? CRF_INIT_QP : ABR_INIT_QP_MIN);
> m_avgPFrameQp = 0 ;
>
> /* 720p videos seem to be a good cutoff for cplxrSum */
> - double tuneCplxFactor = (m_ncu > 3600 && m_param->rc.cuTree) ? 2.5 :
> m_isGrainEnabled ? 1.9 : 1;
> + double tuneCplxFactor = (m_ncu > 3600 && m_param->rc.cuTree &&
> !m_param->rc.hevcAq) ? 2.5 : m_param->rc.hevcAq ? 1.5 : m_isGrainEnabled ?
> 1.9 : 1.0;
> +
> /* estimated ratio that produces a reasonable QP for the first
> I-frame */
> m_cplxrSum = .01 * pow(7.0e5, m_qCompress) * pow(m_ncu, 0.5) *
> tuneCplxFactor;
> m_wantedBitsWindow = m_bitrate * m_frameDuration;
> @@ -2563,7 +2561,7 @@
> {
> double q;
>
> - if (m_param->rc.cuTree)
> + if (m_param->rc.cuTree && !m_param->rc.hevcAq)
> {
> // Scale and units are obtained from rateNum and rateDenom for
> videos with fixed frame rates.
> double timescale = (double)m_param->fpsDenom / (2 *
> m_param->fpsNum);
> @@ -2571,6 +2569,7 @@
> }
> else
> q = pow(rce->blurredComplexity, 1 - m_param->rc.qCompress);
> +
> // avoid NaN's in the Rceq
> if (rce->coeffBits + rce->mvBits == 0)
> q = m_lastQScaleFor[rce->sliceType];
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/encoder/slicetype.cpp Thu Dec 27 14:43:54 2018 +0530
> @@ -3,6 +3,7 @@
> *
> * Authors: Gopu Govindaswamy <gopu at multicorewareinc.com>
> * Steve Borho <steve at borho.org>
> + * Ashok Kumar Mishra <ashok at multicorewareinc.com>
> *
> * This program is free software; you can redistribute it and/or modify
> * it under the terms of the GNU General Public License as published by
> @@ -105,6 +106,7 @@
> x265_emms();
> return var;
> }
> +
> /* Find the sum of pixels of each block for luma plane */
> uint32_t LookaheadTLD::lumaSumCu(Frame* curFrame, uint32_t blockX,
> uint32_t blockY, uint32_t qgSize)
> {
> @@ -121,6 +123,157 @@
> return (uint32_t)sum_ssd;
> }
>
> +void LookaheadTLD::xPreanalyzeQp(Frame* curFrame)
> +{
> + const uint32_t width = curFrame->m_fencPic->m_picWidth;
> + const uint32_t height = curFrame->m_fencPic->m_picHeight;
> +
> + for (uint32_t d = 0; d < 4; d++)
> + {
> + int ctuSizeIdx = 6 - g_log2Size[curFrame->m_param->maxCUSize];
> + int aqDepth = g_log2Size[curFrame->m_param->maxCUSize] -
> g_log2Size[curFrame->m_param->rc.qgSize];
> + if (!aqLayerDepth[ctuSizeIdx][aqDepth][d])
> + continue;
> +
> + PicQPAdaptationLayer* pcAQLayer = &curFrame->m_lowres.pAQLayer[d];
> + const uint32_t aqPartWidth = pcAQLayer->aqPartWidth;
> + const uint32_t aqPartHeight = pcAQLayer->aqPartHeight;
> + double* pcAQU = pcAQLayer->dActivity;
> + double* pcQP = pcAQLayer->dQpOffset;
> + double* pcCuTree = pcAQLayer->dCuTreeOffset;
> +
> + for (uint32_t y = 0; y < height; y += aqPartHeight)
> + {
> + for (uint32_t x = 0; x < width; x += aqPartWidth, pcAQU++,
> pcQP++, pcCuTree++)
> + {
> + double dMaxQScale = pow(2.0,
> curFrame->m_param->rc.qpAdaptationRange / 6.0);
> + double dCUAct = *pcAQU;
> + double dAvgAct = pcAQLayer->dAvgActivity;
> +
> + double dNormAct = (dMaxQScale*dCUAct + dAvgAct) / (dCUAct
> + dMaxQScale*dAvgAct);
> + double dQpOffset = (X265_LOG2(dNormAct) / X265_LOG2(2.0))
> * 6.0;
> + *pcQP = dQpOffset;
> + *pcCuTree = dQpOffset;
> + }
> + }
> + }
> +}
> +
> +void LookaheadTLD::xPreanalyze(Frame* curFrame)
> +{
> + const uint32_t width = curFrame->m_fencPic->m_picWidth;
> + const uint32_t height = curFrame->m_fencPic->m_picHeight;
> + const intptr_t stride = curFrame->m_fencPic->m_stride;
> +
> + for (uint32_t d = 0; d < 4; d++)
> + {
> + int ctuSizeIdx = 6 - g_log2Size[curFrame->m_param->maxCUSize];
> + int aqDepth = g_log2Size[curFrame->m_param->maxCUSize] -
> g_log2Size[curFrame->m_param->rc.qgSize];
> + if (!aqLayerDepth[ctuSizeIdx][aqDepth][d])
> + continue;
> +
> + const pixel* src = curFrame->m_fencPic->m_picOrg[0];;
> + PicQPAdaptationLayer* pQPLayer = &curFrame->m_lowres.pAQLayer[d];
> + const uint32_t aqPartWidth = pQPLayer->aqPartWidth;
> + const uint32_t aqPartHeight = pQPLayer->aqPartHeight;
> + double* pcAQU = pQPLayer->dActivity;
> +
> + double dSumAct = 0.0;
> + for (uint32_t y = 0; y < height; y += aqPartHeight)
> + {
> + const uint32_t currAQPartHeight = X265_MIN(aqPartHeight,
> height - y);
> + for (uint32_t x = 0; x < width; x += aqPartWidth, pcAQU++)
> + {
> + const uint32_t currAQPartWidth = X265_MIN(aqPartWidth,
> width - x);
> + const pixel* pBlkY = &src[x];
> + uint64_t sum[4] = { 0, 0, 0, 0 };
> + uint64_t sumSq[4] = { 0, 0, 0, 0 };
> + uint32_t by = 0;
> + for (; by < currAQPartHeight >> 1; by++)
> + {
> + uint32_t bx = 0;
> + for (; bx < currAQPartWidth >> 1; bx++)
> + {
> + sum[0] += pBlkY[bx];
> + sumSq[0] += pBlkY[bx] * pBlkY[bx];
> + }
> + for (; bx < currAQPartWidth; bx++)
> + {
> + sum[1] += pBlkY[bx];
> + sumSq[1] += pBlkY[bx] * pBlkY[bx];
> + }
> + pBlkY += stride;
> + }
> + for (; by < currAQPartHeight; by++)
> + {
> + uint32_t bx = 0;
> + for (; bx < currAQPartWidth >> 1; bx++)
> + {
> + sum[2] += pBlkY[bx];
> + sumSq[2] += pBlkY[bx] * pBlkY[bx];
> + }
> + for (; bx < currAQPartWidth; bx++)
> + {
> + sum[3] += pBlkY[bx];
> + sumSq[3] += pBlkY[bx] * pBlkY[bx];
> + }
> + pBlkY += stride;
> + }
> +
> + assert((currAQPartWidth & 1) == 0);
> + assert((currAQPartHeight & 1) == 0);
> + const uint32_t pixelWidthOfQuadrants = currAQPartWidth >>
> 1;
> + const uint32_t pixelHeightOfQuadrants = currAQPartHeight
> >> 1;
> + const uint32_t numPixInAQPart = pixelWidthOfQuadrants *
> pixelHeightOfQuadrants;
> +
> + double dMinVar = MAX_DOUBLE;
> + if (numPixInAQPart != 0)
> + {
> + for (int i = 0; i < 4; i++)
> + {
> + const double dAverage = double(sum[i]) /
> numPixInAQPart;
> + const double dVariance = double(sumSq[i]) /
> numPixInAQPart - dAverage * dAverage;
> + dMinVar = X265_MIN(dMinVar, dVariance);
> + }
> + }
> + else
> + {
> + dMinVar = 0.0;
> + }
> + double dActivity = 1.0 + dMinVar;
> + *pcAQU = dActivity;
> + dSumAct += dActivity;
> + }
> + src += stride * currAQPartHeight;
> + }
> +
> + const double dAvgAct = dSumAct / (pQPLayer->numAQPartInWidth *
> pQPLayer->numAQPartInHeight);
> + pQPLayer->dAvgActivity = dAvgAct;
> + }
> +
> + xPreanalyzeQp(curFrame);
> +
> + int minAQDepth = curFrame->m_lowres.pAQLayer->minAQDepth;
> +
> + PicQPAdaptationLayer* pQPLayer =
> &curFrame->m_lowres.pAQLayer[minAQDepth];
> + const uint32_t aqPartWidth = pQPLayer->aqPartWidth;
> + const uint32_t aqPartHeight = pQPLayer->aqPartHeight;
> + double* pcQP = pQPLayer->dQpOffset;
> +
> + // Use new qp offset values for qpAqOffset, qpCuTreeOffset and
> invQscaleFactor buffer
> + int blockXY = 0;
> + for (uint32_t y = 0; y < height; y += aqPartHeight)
> + {
> + for (uint32_t x = 0; x < width; x += aqPartWidth, pcQP++)
> + {
> + curFrame->m_lowres.invQscaleFactor[blockXY] =
> x265_exp2fix8(*pcQP);
> + blockXY++;
> +
> + acEnergyCu(curFrame, x, y, curFrame->m_param->internalCsp,
> curFrame->m_param->rc.qgSize);
> + }
> + }
> +}
> +
> void LookaheadTLD::calcAdaptiveQuantFrame(Frame *curFrame, x265_param*
> param)
> {
> /* Actual adaptive quantization */
> @@ -176,90 +329,99 @@
> if (param->bEnableWeightedPred || param->bEnableWeightedBiPred)
> {
> for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
> - for (int blockX = 0; blockX < maxCol; blockX +=
> loopIncr)
> - acEnergyCu(curFrame, blockX, blockY,
> param->internalCsp, param->rc.qgSize);
> + for (int blockX = 0; blockX < maxCol; blockX += loopIncr)
> + acEnergyCu(curFrame, blockX, blockY,
> param->internalCsp, param->rc.qgSize);
> }
> }
> else
> {
> - int blockXY = 0;
> - double avg_adj_pow2 = 0.f, avg_adj = 0.f, qp_adj = 0.f;
> - double bias_strength = 0.f, strength = 0.f;
> - if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE || param->rc.aqMode
> == X265_AQ_AUTO_VARIANCE_BIASED)
> + if (param->rc.hevcAq)
> {
> - double bit_depth_correction = 1.f / (1 <<
> (2*(X265_DEPTH-8)));
> -
> - for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
> - {
> - for (int blockX = 0; blockX < maxCol; blockX += loopIncr)
> - {
> - uint32_t energy = acEnergyCu(curFrame, blockX,
> blockY, param->internalCsp, param->rc.qgSize);
> - qp_adj = pow(energy * bit_depth_correction + 1, 0.1);
> - curFrame->m_lowres.qpCuTreeOffset[blockXY] = qp_adj;
> - avg_adj += qp_adj;
> - avg_adj_pow2 += qp_adj * qp_adj;
> - blockXY++;
> - }
> - }
> - avg_adj /= blockCount;
> - avg_adj_pow2 /= blockCount;
> - strength = param->rc.aqStrength * avg_adj;
> - avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - modeTwoConst) /
> avg_adj;
> - bias_strength = param->rc.aqStrength;
> + // New method for calculating variance and qp offset
> + xPreanalyze(curFrame);
> }
> else
> - strength = param->rc.aqStrength * 1.0397f;
> -
> - blockXY = 0;
> - for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
> {
> - for (int blockX = 0; blockX < maxCol; blockX += loopIncr)
> + int blockXY = 0;
> + double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0;
> + double bias_strength = 0.f;
> + double strength = 0.f;
> + if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE ||
> param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED)
> {
> - if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED)
> - {
> - qp_adj = curFrame->m_lowres.qpCuTreeOffset[blockXY];
> - qp_adj = strength * (qp_adj - avg_adj) +
> bias_strength * (1.f - modeTwoConst / (qp_adj * qp_adj));
> - }
> - else if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE)
> - {
> - qp_adj = curFrame->m_lowres.qpCuTreeOffset[blockXY];
> - qp_adj = strength * (qp_adj - avg_adj);
> - }
> - else
> - {
> - uint32_t energy = acEnergyCu(curFrame, blockX,
> blockY, param->internalCsp,param->rc.qgSize);
> - qp_adj = strength * (X265_LOG2(X265_MAX(energy, 1)) -
> (modeOneConst + 2 * (X265_DEPTH - 8)));
> - }
> -
> - if (param->bHDROpt)
> + double bit_depth_correction = 1.f / (1 << (2 *
> (X265_DEPTH - 8)));
> +
> + for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
> {
> - uint32_t sum = lumaSumCu(curFrame, blockX, blockY,
> param->rc.qgSize);
> - uint32_t lumaAvg = sum / (loopIncr * loopIncr);
> - if (lumaAvg < 301)
> - qp_adj += 3;
> - else if (lumaAvg >= 301 && lumaAvg < 367)
> - qp_adj += 2;
> - else if (lumaAvg >= 367 && lumaAvg < 434)
> - qp_adj += 1;
> - else if (lumaAvg >= 501 && lumaAvg < 567)
> - qp_adj -= 1;
> - else if (lumaAvg >= 567 && lumaAvg < 634)
> - qp_adj -= 2;
> - else if (lumaAvg >= 634 && lumaAvg < 701)
> - qp_adj -= 3;
> - else if (lumaAvg >= 701 && lumaAvg < 767)
> - qp_adj -= 4;
> - else if (lumaAvg >= 767 && lumaAvg < 834)
> - qp_adj -= 5;
> - else if (lumaAvg >= 834)
> - qp_adj -= 6;
> + for (int blockX = 0; blockX < maxCol; blockX +=
> loopIncr)
> + {
> + uint32_t energy = acEnergyCu(curFrame, blockX,
> blockY, param->internalCsp, param->rc.qgSize);
> + qp_adj = pow(energy * bit_depth_correction + 1,
> 0.1);
> + curFrame->m_lowres.qpCuTreeOffset[blockXY] =
> qp_adj;
> + avg_adj += qp_adj;
> + avg_adj_pow2 += qp_adj * qp_adj;
> + blockXY++;
> + }
> }
> - if (quantOffsets != NULL)
> - qp_adj += quantOffsets[blockXY];
> - curFrame->m_lowres.qpAqOffset[blockXY] = qp_adj;
> - curFrame->m_lowres.qpCuTreeOffset[blockXY] = qp_adj;
> - curFrame->m_lowres.invQscaleFactor[blockXY] =
> x265_exp2fix8(qp_adj);
> - blockXY++;
> + avg_adj /= blockCount;
> + avg_adj_pow2 /= blockCount;
> + strength = param->rc.aqStrength * avg_adj;
> + avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - modeTwoConst)
> / avg_adj;
> + bias_strength = param->rc.aqStrength;
> + }
> + else
> + strength = param->rc.aqStrength * 1.0397f;
> +
> + blockXY = 0;
> + for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
> + {
> + for (int blockX = 0; blockX < maxCol; blockX += loopIncr)
> + {
> + if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED)
> + {
> + qp_adj =
> curFrame->m_lowres.qpCuTreeOffset[blockXY];
> + qp_adj = strength * (qp_adj - avg_adj) +
> bias_strength * (1.f - modeTwoConst / (qp_adj * qp_adj));
> + }
> + else if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE)
> + {
> + qp_adj =
> curFrame->m_lowres.qpCuTreeOffset[blockXY];
> + qp_adj = strength * (qp_adj - avg_adj);
> + }
> + else
> + {
> + uint32_t energy = acEnergyCu(curFrame, blockX,
> blockY, param->internalCsp, param->rc.qgSize);
> + qp_adj = strength * (X265_LOG2(X265_MAX(energy,
> 1)) - (modeOneConst + 2 * (X265_DEPTH - 8)));
> + }
> +
> + if (param->bHDROpt)
> + {
> + uint32_t sum = lumaSumCu(curFrame, blockX,
> blockY, param->rc.qgSize);
> + uint32_t lumaAvg = sum / (loopIncr * loopIncr);
> + if (lumaAvg < 301)
> + qp_adj += 3;
> + else if (lumaAvg >= 301 && lumaAvg < 367)
> + qp_adj += 2;
> + else if (lumaAvg >= 367 && lumaAvg < 434)
> + qp_adj += 1;
> + else if (lumaAvg >= 501 && lumaAvg < 567)
> + qp_adj -= 1;
> + else if (lumaAvg >= 567 && lumaAvg < 634)
> + qp_adj -= 2;
> + else if (lumaAvg >= 634 && lumaAvg < 701)
> + qp_adj -= 3;
> + else if (lumaAvg >= 701 && lumaAvg < 767)
> + qp_adj -= 4;
> + else if (lumaAvg >= 767 && lumaAvg < 834)
> + qp_adj -= 5;
> + else if (lumaAvg >= 834)
> + qp_adj -= 6;
> + }
> + if (quantOffsets != NULL)
> + qp_adj += quantOffsets[blockXY];
> + curFrame->m_lowres.qpAqOffset[blockXY] = qp_adj;
> + curFrame->m_lowres.qpCuTreeOffset[blockXY] = qp_adj;
> + curFrame->m_lowres.invQscaleFactor[blockXY] =
> x265_exp2fix8(qp_adj);
> + blockXY++;
> + }
> }
> }
> }
> @@ -301,11 +463,13 @@
> {
> int blockXY = 0;
> for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
> + {
> for (int blockX = 0; blockX < maxCol; blockX += loopIncr)
> {
> curFrame->m_lowres.blockVariance[blockXY] =
> acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
> blockXY++;
> }
> + }
> }
> }
>
> @@ -596,13 +760,16 @@
>
> /* Allow the strength to be adjusted via qcompress, since the two
> concepts
> * are very similar. */
> -
> - m_cuTreeStrength = 5.0 * (1.0 - m_param->rc.qCompress);
> + m_cuTreeStrength = (m_param->rc.hevcAq ? 6.0 : 5.0) * (1.0 -
> m_param->rc.qCompress);
>
> m_lastKeyframe = -m_param->keyframeMax;
> m_sliceTypeBusy = false;
> m_fullQueueSize = X265_MAX(1, m_param->lookaheadDepth);
> - m_bAdaptiveQuant = m_param->rc.aqMode || m_param->bEnableWeightedPred
> || m_param->bEnableWeightedBiPred || m_param->bAQMotion;
> + m_bAdaptiveQuant = m_param->rc.aqMode ||
> + m_param->bEnableWeightedPred ||
> + m_param->bEnableWeightedBiPred ||
> + m_param->bAQMotion ||
> + m_param->rc.hevcAq;
>
> /* If we have a thread pool and are using --b-adapt 2, it is generally
> * preferable to perform all motion searches for each lowres frame in
> large
> @@ -919,6 +1086,7 @@
> if (!m_param->analysisLoad || !m_param->bDisableLookahead)
> {
> X265_CHECK(curFrame->m_lowres.costEst[b - p0][p1 - b] > 0, "Slice
> cost not estimated\n")
> +
> if (m_param->rc.cuTree && !m_param->rc.bStatRead)
> /* update row satds based on cutree offsets */
> curFrame->m_lowres.satdCost = frameCostRecalculate(frames,
> p0, p1, b);
> @@ -1695,6 +1863,7 @@
>
> if (m_param->rc.cuTree)
> cuTree(frames, X265_MIN(numFrames, m_param->keyframeMax),
> bKeyframe);
> +
> if (m_param->gopLookahead && (keyFrameLimit >= 0) && (keyFrameLimit
> <= m_param->bframes + 1) && !m_extendGopBoundary)
> keyintLimit = keyFrameLimit;
>
> @@ -1928,6 +2097,7 @@
>
> return cost;
> }
> +
> void Lookahead::aqMotion(Lowres **frames, bool bIntra)
> {
> if (!bIntra)
> @@ -2223,44 +2393,191 @@
> cuTreeFinish(frames[b], averageDuration, b == p1 ? b - p0 : 0);
> }
>
> -void Lookahead::cuTreeFinish(Lowres *frame, double averageDuration, int
> ref0Distance)
> +void Lookahead::computeCUTreeQpOffset(Lowres *frame, double
> averageDuration, int ref0Distance)
> {
> int fpsFactor = (int)(CLIP_DURATION(averageDuration) /
> CLIP_DURATION((double)m_param->fpsDenom / m_param->fpsNum) * 256);
> + uint32_t loopIncr = (m_param->rc.qgSize == 8) ? 8 : 16;
> +
> double weightdelta = 0.0;
> -
> if (ref0Distance && frame->weightedCostDelta[ref0Distance - 1] > 0)
> weightdelta = (1.0 - frame->weightedCostDelta[ref0Distance - 1]);
>
> + uint32_t widthFullRes = frame->widthFullRes;
> + uint32_t heightFullRes = frame->heightFullRes;
> +
> if (m_param->rc.qgSize == 8)
> {
> + int minAQDepth = frame->pAQLayer->minAQDepth;
> +
> + PicQPAdaptationLayer* pQPLayerMin = &frame->pAQLayer[minAQDepth];
> + double* pcCuTree8x8 = pQPLayerMin->dCuTreeOffset8x8;
> +
> for (int cuY = 0; cuY < m_8x8Height; cuY++)
> {
> for (int cuX = 0; cuX < m_8x8Width; cuX++)
> {
> const int cuXY = cuX + cuY * m_8x8Width;
> - int intracost = ((frame->intraCost[cuXY]) / 4 *
> frame->invQscaleFactor8x8[cuXY] + 128) >> 8;
> + int intracost = ((frame->intraCost[cuXY] / 4) *
> frame->invQscaleFactor8x8[cuXY] + 128) >> 8;
> if (intracost)
> {
> - int propagateCost = ((frame->propagateCost[cuXY]) / 4
> * fpsFactor + 128) >> 8;
> + int propagateCost = ((frame->propagateCost[cuXY] /
> 4) * fpsFactor + 128) >> 8;
> double log2_ratio = X265_LOG2(intracost +
> propagateCost) - X265_LOG2(intracost) + weightdelta;
> - frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width * 4]
> = frame->qpAqOffset[cuX * 2 + cuY * m_8x8Width * 4] - m_cuTreeStrength *
> (log2_ratio);
> - frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width * 4
> + 1] = frame->qpAqOffset[cuX * 2 + cuY * m_8x8Width * 4 + 1] -
> m_cuTreeStrength * (log2_ratio);
> - frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width * 4
> + frame->maxBlocksInRowFullRes] = frame->qpAqOffset[cuX * 2 + cuY *
> m_8x8Width * 4 + frame->maxBlocksInRowFullRes] - m_cuTreeStrength *
> (log2_ratio);
> - frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width * 4
> + frame->maxBlocksInRowFullRes + 1] = frame->qpAqOffset[cuX * 2 + cuY *
> m_8x8Width * 4 + frame->maxBlocksInRowFullRes + 1] - m_cuTreeStrength *
> (log2_ratio);
> +
> + pcCuTree8x8[cuX * 2 + cuY * m_8x8Width * 4] =
> log2_ratio;
> + pcCuTree8x8[cuX * 2 + cuY * m_8x8Width * 4 + 1] =
> log2_ratio;
> + pcCuTree8x8[cuX * 2 + cuY * m_8x8Width * 4 +
> frame->maxBlocksInRowFullRes] = log2_ratio;
> + pcCuTree8x8[cuX * 2 + cuY * m_8x8Width * 4 +
> frame->maxBlocksInRowFullRes + 1] = log2_ratio;
> + }
> + }
> + }
> +
> + for (uint32_t d = 0; d < 4; d++)
> + {
> + int ctuSizeIdx = 6 - g_log2Size[m_param->maxCUSize];
> + int aqDepth = g_log2Size[m_param->maxCUSize] -
> g_log2Size[m_param->rc.qgSize];
> + if (!aqLayerDepth[ctuSizeIdx][aqDepth][d])
> + continue;
> +
> + PicQPAdaptationLayer* pQPLayer = &frame->pAQLayer[d];
> + const uint32_t aqPartWidth = pQPLayer->aqPartWidth;
> + const uint32_t aqPartHeight = pQPLayer->aqPartHeight;
> +
> + const uint32_t numAQPartInWidth = pQPLayer->numAQPartInWidth;
> + const uint32_t numAQPartInHeight =
> pQPLayer->numAQPartInHeight;
> +
> + double* pcQP = pQPLayer->dQpOffset;
> + double* pcCuTree = pQPLayer->dCuTreeOffset;
> +
> + uint32_t maxCols = frame->maxBlocksInRowFullRes;
> +
> + for (uint32_t y = 0; y < numAQPartInHeight; y++)
> + {
> + for (uint32_t x = 0; x < numAQPartInWidth; x++, pcQP++,
> pcCuTree++)
> + {
> + uint32_t block_x = x * aqPartWidth;
> + uint32_t block_y = y * aqPartHeight;
> +
> + uint32_t blockXY = 0;
> + double log2_ratio = 0;
> + for (uint32_t block_yy = block_y; block_yy < block_y
> + aqPartHeight && block_yy < heightFullRes; block_yy += loopIncr)
> + {
> + for (uint32_t block_xx = block_x; block_xx <
> block_x + aqPartWidth && block_xx < widthFullRes; block_xx += loopIncr)
> + {
> + uint32_t idx = ((block_yy / loopIncr) *
> (maxCols)) + (block_xx / loopIncr);
> +
> + log2_ratio += *(pcCuTree8x8 + idx);
> +
> + blockXY++;
> + }
> + }
> +
> + double qp_offset = (m_cuTreeStrength * log2_ratio) /
> blockXY;
> +
> + *pcCuTree = *pcQP - qp_offset;
> }
> }
> }
> }
> else
> {
> - for (int cuIndex = 0; cuIndex < m_cuCount; cuIndex++)
> + for (uint32_t d = 0; d < 4; d++)
> {
> - int intracost = (frame->intraCost[cuIndex] *
> frame->invQscaleFactor[cuIndex] + 128) >> 8;
> - if (intracost)
> + int ctuSizeIdx = 6 - g_log2Size[m_param->maxCUSize];
> + int aqDepth = g_log2Size[m_param->maxCUSize] -
> g_log2Size[m_param->rc.qgSize];
> + if (!aqLayerDepth[ctuSizeIdx][aqDepth][d])
> + continue;
> +
> + PicQPAdaptationLayer* pQPLayer = &frame->pAQLayer[d];
> + const uint32_t aqPartWidth = pQPLayer->aqPartWidth;
> + const uint32_t aqPartHeight = pQPLayer->aqPartHeight;
> +
> + const uint32_t numAQPartInWidth = pQPLayer->numAQPartInWidth;
> + const uint32_t numAQPartInHeight =
> pQPLayer->numAQPartInHeight;
> +
> + double* pcQP = pQPLayer->dQpOffset;
> + double* pcCuTree = pQPLayer->dCuTreeOffset;
> +
> + uint32_t maxCols = frame->maxBlocksInRow;
> +
> + for (uint32_t y = 0; y < numAQPartInHeight; y++)
> {
> - int propagateCost = (frame->propagateCost[cuIndex] *
> fpsFactor + 128) >> 8;
> - double log2_ratio = X265_LOG2(intracost + propagateCost)
> - X265_LOG2(intracost) + weightdelta;
> - frame->qpCuTreeOffset[cuIndex] =
> frame->qpAqOffset[cuIndex] - m_cuTreeStrength * log2_ratio;
> + for (uint32_t x = 0; x < numAQPartInWidth; x++, pcQP++,
> pcCuTree++)
> + {
> + uint32_t block_x = x * aqPartWidth;
> + uint32_t block_y = y * aqPartHeight;
> +
> + uint32_t blockXY = 0;
> + double log2_ratio = 0;
> + for (uint32_t block_yy = block_y; block_yy < block_y
> + aqPartHeight && block_yy < heightFullRes; block_yy += loopIncr)
> + {
> + for (uint32_t block_xx = block_x; block_xx <
> block_x + aqPartWidth && block_xx < widthFullRes; block_xx += loopIncr)
> + {
> + uint32_t idx = ((block_yy / loopIncr) *
> (maxCols)) + (block_xx / loopIncr);
> +
> + int intraCost = (frame->intraCost[idx] *
> frame->invQscaleFactor[idx] + 128) >> 8;
> + int propagateCost =
> (frame->propagateCost[idx] * fpsFactor + 128) >> 8;
> +
> + log2_ratio += (X265_LOG2(intraCost +
> propagateCost) - X265_LOG2(intraCost) + weightdelta);
> +
> + blockXY++;
> + }
> + }
> +
> + double qp_offset = (m_cuTreeStrength * log2_ratio) /
> blockXY;
> +
> + *pcCuTree = *pcQP - qp_offset;
> +
> + }
> + }
> + }
> + }
> +}
> +
> +void Lookahead::cuTreeFinish(Lowres *frame, double averageDuration, int
> ref0Distance)
> +{
> + if (m_param->rc.hevcAq)
> + {
> + computeCUTreeQpOffset(frame, averageDuration, ref0Distance);
> + }
> + else
> + {
> + int fpsFactor = (int)(CLIP_DURATION(averageDuration) /
> CLIP_DURATION((double)m_param->fpsDenom / m_param->fpsNum) * 256);
> + double weightdelta = 0.0;
> +
> + if (ref0Distance && frame->weightedCostDelta[ref0Distance - 1] >
> 0)
> + weightdelta = (1.0 - frame->weightedCostDelta[ref0Distance -
> 1]);
> +
> + if (m_param->rc.qgSize == 8)
> + {
> + for (int cuY = 0; cuY < m_8x8Height; cuY++)
> + {
> + for (int cuX = 0; cuX < m_8x8Width; cuX++)
> + {
> + const int cuXY = cuX + cuY * m_8x8Width;
> + int intracost = ((frame->intraCost[cuXY]) / 4 *
> frame->invQscaleFactor8x8[cuXY] + 128) >> 8;
> + if (intracost)
> + {
> + int propagateCost = ((frame->propagateCost[cuXY])
> / 4 * fpsFactor + 128) >> 8;
> + double log2_ratio = X265_LOG2(intracost +
> propagateCost) - X265_LOG2(intracost) + weightdelta;
> + frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width
> * 4] = frame->qpAqOffset[cuX * 2 + cuY * m_8x8Width * 4] - m_cuTreeStrength
> * (log2_ratio);
> + frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width
> * 4 + 1] = frame->qpAqOffset[cuX * 2 + cuY * m_8x8Width * 4 + 1] -
> m_cuTreeStrength * (log2_ratio);
> + frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width
> * 4 + frame->maxBlocksInRowFullRes] = frame->qpAqOffset[cuX * 2 + cuY *
> m_8x8Width * 4 + frame->maxBlocksInRowFullRes] - m_cuTreeStrength *
> (log2_ratio);
> + frame->qpCuTreeOffset[cuX * 2 + cuY * m_8x8Width
> * 4 + frame->maxBlocksInRowFullRes + 1] = frame->qpAqOffset[cuX * 2 + cuY *
> m_8x8Width * 4 + frame->maxBlocksInRowFullRes + 1] - m_cuTreeStrength *
> (log2_ratio);
> + }
> + }
> + }
> + }
> + else
> + {
> + for (int cuIndex = 0; cuIndex < m_cuCount; cuIndex++)
> + {
> + int intracost = (frame->intraCost[cuIndex] *
> frame->invQscaleFactor[cuIndex] + 128) >> 8;
> + if (intracost)
> + {
> + int propagateCost = (frame->propagateCost[cuIndex] *
> fpsFactor + 128) >> 8;
> + double log2_ratio = X265_LOG2(intracost +
> propagateCost) - X265_LOG2(intracost) + weightdelta;
> + frame->qpCuTreeOffset[cuIndex] =
> frame->qpAqOffset[cuIndex] - m_cuTreeStrength * log2_ratio;
> + }
> }
> }
> }
> @@ -2275,31 +2592,71 @@
>
> int64_t score = 0;
> int *rowSatd = frames[b]->rowSatds[b - p0][p1 - b];
> - double *qp_offset = frames[b]->qpCuTreeOffset;
>
> x265_emms();
> - for (int cuy = m_8x8Height - 1; cuy >= 0; cuy--)
> +
> + if (m_param->rc.hevcAq)
> {
> - rowSatd[cuy] = 0;
> - for (int cux = m_8x8Width - 1; cux >= 0; cux--)
> + int minAQDepth = frames[b]->pAQLayer->minAQDepth;
> + PicQPAdaptationLayer* pQPLayer = &frames[b]->pAQLayer[minAQDepth];
> + double* pcQPCuTree = pQPLayer->dCuTreeOffset;
> +
> + // Use new qp offset values for qpAqOffset, qpCuTreeOffset and
> invQscaleFactor buffer
> + for (int cuy = m_8x8Height - 1; cuy >= 0; cuy--)
> {
> - int cuxy = cux + cuy * m_8x8Width;
> - int cuCost = frames[b]->lowresCosts[b - p0][p1 - b][cuxy] &
> LOWRES_COST_MASK;
> - double qp_adj;
> - if (m_param->rc.qgSize == 8)
> - qp_adj = (qp_offset[cux * 2 + cuy * m_8x8Width * 4] +
> - qp_offset[cux * 2 + cuy * m_8x8Width * 4 + 1] +
> - qp_offset[cux * 2 + cuy * m_8x8Width * 4 +
> frames[b]->maxBlocksInRowFullRes] +
> - qp_offset[cux * 2 + cuy * m_8x8Width * 4 +
> frames[b]->maxBlocksInRowFullRes + 1]) / 4;
> - else
> - qp_adj = qp_offset[cuxy];
> - cuCost = (cuCost * x265_exp2fix8(qp_adj) + 128) >> 8;
> - rowSatd[cuy] += cuCost;
> - if ((cuy > 0 && cuy < m_8x8Height - 1 &&
> - cux > 0 && cux < m_8x8Width - 1) ||
> - m_8x8Width <= 2 || m_8x8Height <= 2)
> + rowSatd[cuy] = 0;
> + for (int cux = m_8x8Width - 1; cux >= 0; cux--)
> {
> - score += cuCost;
> + int cuxy = cux + cuy * m_8x8Width;
> + int cuCost = frames[b]->lowresCosts[b - p0][p1 - b][cuxy]
> & LOWRES_COST_MASK;
> + double qp_adj;
> +
> + if (m_param->rc.qgSize == 8)
> + qp_adj = (pcQPCuTree[cux * 2 + cuy * m_8x8Width * 4] +
> + pcQPCuTree[cux * 2 + cuy * m_8x8Width * 4 + 1] +
> + pcQPCuTree[cux * 2 + cuy * m_8x8Width * 4 +
> frames[b]->maxBlocksInRowFullRes] +
> + pcQPCuTree[cux * 2 + cuy * m_8x8Width * 4 +
> frames[b]->maxBlocksInRowFullRes + 1]) / 4;
> + else
> + qp_adj = *(pcQPCuTree + cuxy);
> +
> + cuCost = (cuCost * x265_exp2fix8(qp_adj) + 128) >> 8;
> + rowSatd[cuy] += cuCost;
> + if ((cuy > 0 && cuy < m_8x8Height - 1 &&
> + cux > 0 && cux < m_8x8Width - 1) ||
> + m_8x8Width <= 2 || m_8x8Height <= 2)
> + {
> + score += cuCost;
> + }
> + }
> + }
> + }
> + else
> + {
> + double *qp_offset = frames[b]->qpCuTreeOffset;
> +
> + for (int cuy = m_8x8Height - 1; cuy >= 0; cuy--)
> + {
> + rowSatd[cuy] = 0;
> + for (int cux = m_8x8Width - 1; cux >= 0; cux--)
> + {
> + int cuxy = cux + cuy * m_8x8Width;
> + int cuCost = frames[b]->lowresCosts[b - p0][p1 - b][cuxy]
> & LOWRES_COST_MASK;
> + double qp_adj;
> + if (m_param->rc.qgSize == 8)
> + qp_adj = (qp_offset[cux * 2 + cuy * m_8x8Width * 4] +
> + qp_offset[cux * 2 + cuy * m_8x8Width * 4 + 1] +
> + qp_offset[cux * 2 + cuy * m_8x8Width * 4 +
> frames[b]->maxBlocksInRowFullRes] +
> + qp_offset[cux * 2 + cuy * m_8x8Width * 4 +
> frames[b]->maxBlocksInRowFullRes + 1]) / 4;
> + else
> + qp_adj = qp_offset[cuxy];
> + cuCost = (cuCost * x265_exp2fix8(qp_adj) + 128) >> 8;
> + rowSatd[cuy] += cuCost;
> + if ((cuy > 0 && cuy < m_8x8Height - 1 &&
> + cux > 0 && cux < m_8x8Width - 1) ||
> + m_8x8Width <= 2 || m_8x8Height <= 2)
> + {
> + score += cuCost;
> + }
> }
> }
> }
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/encoder/slicetype.h
> --- a/source/encoder/slicetype.h Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/encoder/slicetype.h Thu Dec 27 14:43:54 2018 +0530
> @@ -87,7 +87,8 @@
> void lowresIntraEstimate(Lowres& fenc, uint32_t qgSize);
>
> void weightsAnalyse(Lowres& fenc, Lowres& ref);
> -
> + void xPreanalyze(Frame* curFrame);
> + void xPreanalyzeQp(Frame* curFrame);
> protected:
>
> uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t
> blockY, int csp, uint32_t qgSize);
> @@ -175,6 +176,7 @@
> void cuTree(Lowres **frames, int numframes, bool bintra);
> void estimateCUPropagate(Lowres **frames, double average_duration,
> int p0, int p1, int b, int referenced);
> void cuTreeFinish(Lowres *frame, double averageDuration, int
> ref0Distance);
> + void computeCUTreeQpOffset(Lowres *frame, double averageDuration,
> int ref0Distance);
>
> /* called by getEstimatedPictureCost() to finalize cuTree costs */
> int64_t frameCostRecalculate(Lowres **frames, int p0, int p1, int b);
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/x265.h
> --- a/source/x265.h Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/x265.h Thu Dec 27 14:43:54 2018 +0530
> @@ -1347,10 +1347,22 @@
> * generally improves. Default: X265_AQ_AUTO_VARIANCE */
> int aqMode;
>
> + /*
> + * Enable adaptive quantization.
> + * It scales the quantization step size according to the spatial
> activity of one
> + * coding unit relative to frame average spatial activity. This
> AQ method utilizes
> + * the minimum variance of sub-unit in each coding unit to
> represent the coding
> + * unit’s spatial complexity. */
> + int hevcAq;
> +
> /* Sets the strength of AQ bias towards low detail CTUs. Valid
> only if
> * AQ is enabled. Default value: 1.0. Acceptable values between
> 0.0 and 3.0 */
> double aqStrength;
>
> + /* Delta QP range by QP adaptation based on a psycho-visual model.
> + * Acceptable values between 1.0 to 6.0 */
> + double qpAdaptationRange;
> +
> /* Sets the maximum rate the VBV buffer should be assumed to
> refill at
> * Default is zero */
> int vbvMaxBitrate;
> diff -r 129416ec0479 -r 3cd0b5ed0b91 source/x265cli.h
> --- a/source/x265cli.h Fri Dec 28 09:03:26 2018 +0530
> +++ b/source/x265cli.h Thu Dec 27 14:43:54 2018 +0530
> @@ -311,6 +311,9 @@
> { "dolby-vision-rpu", required_argument, NULL, 0 },
> { "hrd-concat", no_argument, NULL, 0},
> { "no-hrd-concat", no_argument, NULL, 0 },
> + { "hevc-aq", no_argument, NULL, 0 },
> + { "no-hevc-aq", no_argument, NULL, 0 },
> + { "qp-adaptation-range", required_argument, NULL, 0 },
> { 0, 0, 0, 0 },
> { 0, 0, 0, 0 },
> { 0, 0, 0, 0 },
> @@ -361,7 +364,7 @@
> H0(" --dhdr10-info <filename> JSON file containing the
> Creative Intent Metadata to be encoded as Dynamic Tone Mapping\n");
> H0(" --[no-]dhdr10-opt Insert tone mapping SEI only for
> IDR frames and when the tone mapping information changes. Default
> disabled\n");
> #endif
> - H0(" --dolby-vision-profile <float|integer> Specifies Dolby Vision
> profile ID. Currently only profile 5, profile 8.1 and profile 8.2 enabled.
> Specified as '5' or '50'. Default 0 (disabled).\n");
> + H0(" --dolby-vision-profile <float|integer> Specifies Dolby Vision
> profile ID. Currently only profile 5, profile 8.1 and profile 8.2 enabled.
> Specified as '5' or '50'. Default 0 (disabled).\n");
> H0(" --dolby-vision-rpu <filename> File containing Dolby Vision RPU
> metadata.\n"
> " If given, x265's Dolby Vision
> metadata parser will fill the RPU field of input pictures with the metadata
> read from the file. Default NULL(disabled).\n");
> H0(" --nalu-file <filename> Text file containing SEI
> messages in the following format : <POC><space><PREFIX><space><NAL UNIT
> TYPE>/<SEI TYPE><space><SEI Payload>\n");
> @@ -521,7 +524,9 @@
> " - 1 : Store/Load ctu
> distortion to/from the file specified in analysis-save/load.\n"
> " Default 0 - Disabled\n");
> H0(" --aq-mode <integer> Mode for Adaptive Quantization -
> 0:none 1:uniform AQ 2:auto variance 3:auto variance with bias to dark
> scenes. Default %d\n", param->rc.aqMode);
> + H0(" --[no-]hevc-aq Mode for HEVC Adaptive
> Quantization. Default %s\n", OPT(param->rc.hevcAq));
> H0(" --aq-strength <float> Reduces blocking and blurring in
> flat and textured areas (0 to 3.0). Default %.2f\n", param->rc.aqStrength);
> + H0(" --qp-adaptation-range <float> Delta QP range by QP adaptation
> based on a psycho-visual model (1.0 to 6.0). Default %.2f\n",
> param->rc.qpAdaptationRange);
> H0(" --[no-]aq-motion Adaptive Quantization based on
> the relative motion of each CU w.r.t., frame. Default %s\n",
> OPT(param->bOptCUDeltaQP));
> H0(" --qg-size <int> Specifies the size of the
> quantization group (64, 32, 16, 8). Default %d\n", param->rc.qgSize);
> H0(" --[no-]cutree Enable cutree for Adaptive
> Quantization. Default %s\n", OPT(param->rc.cuTree));
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20190430/c355d5da/attachment-0001.html>
More information about the x265-devel
mailing list