[x265] [PATCH] Add emergency denoising when frame qp > QP_MAX_SPEC

Thu Aug 20 08:26:43 CEST 2015

I have added some information, you can review that.

On Thu, Aug 20, 2015 at 11:53 AM, Sagar Kotecha <sagar at multicorewareinc.com>
wrote:

>
>
> On Thu, Aug 20, 2015 at 8:47 AM, Steve Borho <steve at borho.org> wrote:
>
>> On 08/19, sagar at multicorewareinc.com wrote:
>> > # HG changeset patch
>> > # User Sagar Kotecha <sagar at multicorewareinc.com>
>> > # Date 1439978360 -19800
>> > # Node ID 8878f03570b2a8d0207f5e96bae3900de5653ec0
>> > # Parent  2980141a744a569ad6f60dbebdece76a4eababfd
>> > Add emergency denoising when frame qp > QP_MAX_SPEC
>> >
>> > This feature is ported from x264, and is turned on for VBV encodes
>>
>> this needs to be documented in the reST docs somewhere, since our
>> emergency denoise works differently than in x264
>>
>>
> OK, I Will send it in separate patch.
> 
>
>
>> > diff -r 2980141a744a -r 8878f03570b2 source/common/common.h
>> > --- a/source/common/common.h  Tue Aug 18 12:45:52 2015 +0530
>> > +++ b/source/common/common.h  Wed Aug 19 15:29:20 2015 +0530
>> > @@ -317,6 +317,9 @@
>> >  #define CHROMA_V_SHIFT(x) (x == X265_CSP_I420)
>> >  #define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8
>> >
>> > +#define MAX_NUM_TR_COEFFS           MAX_TR_SIZE * MAX_TR_SIZE //
>> Maximum number of transform coefficients, for a 32x32 transform
>> > +#define MAX_NUM_TR_CATEGORIES       16                        // 32,
>> 16, 8, 4 transform categories each for luma and chroma
>> > +
>> >  namespace X265_NS {
>> >
>> >  enum { SAO_NUM_OFFSET = 4 };
>> > diff -r 2980141a744a -r 8878f03570b2 source/common/quant.cpp
>> > --- a/source/common/quant.cpp Tue Aug 18 12:45:52 2015 +0530
>> > +++ b/source/common/quant.cpp Wed Aug 19 15:29:20 2015 +0530
>> > @@ -444,12 +444,12 @@
>> >              primitives.cu[sizeIdx].dct(m_fencShortBuf,
>> m_fencDctCoeff, trSize);
>> >          }
>> >
>> > -        if (m_nr)
>> > +        if (m_nr && m_nr->offset)
>> >          {
>> >              /* denoise is not applied to intra residual, so DST can be
>> ignored */
>> >              int cat = sizeIdx + 4 * !isLuma + 8 * !isIntra;
>> >              int numCoeff = 1 << (log2TrSize * 2);
>> > -            primitives.denoiseDct(m_resiDctCoeff,
>> m_nr->residualSum[cat], m_nr->offsetDenoise[cat], numCoeff);
>> > +            primitives.denoiseDct(m_resiDctCoeff,
>> m_nr->residualSum[cat], m_nr->offset[cat], numCoeff);
>> >              m_nr->count[cat]++;
>> >          }
>> >      }
>> > diff -r 2980141a744a -r 8878f03570b2 source/common/quant.h
>> > --- a/source/common/quant.h   Tue Aug 18 12:45:52 2015 +0530
>> > +++ b/source/common/quant.h   Wed Aug 19 15:29:20 2015 +0530
>> > @@ -59,18 +59,19 @@
>> >      }
>> >  };
>> >
>> > -#define MAX_NUM_TR_COEFFS        MAX_TR_SIZE * MAX_TR_SIZE /* Maximum
>> number of transform coefficients, for a 32x32 transform */
>> > -#define MAX_NUM_TR_CATEGORIES    16                        /* 32, 16,
>> 8, 4 transform categories each for luma and chroma */
>> > -
>> >  // NOTE: MUST be 16-byte aligned for asm code
>> >  struct NoiseReduction
>> >  {
>> >      /* 0 = luma 4x4,   1 = luma 8x8,   2 = luma 16x16,   3 = luma 32x32
>> >       * 4 = chroma 4x4, 5 = chroma 8x8, 6 = chroma 16x16, 7 = chroma
>> 32x32
>> >       * Intra 0..7 - Inter 8..15 */
>> > -    uint16_t offsetDenoise[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
>> > -    uint32_t residualSum[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
>> > -    uint32_t count[MAX_NUM_TR_CATEGORIES];
>> > +    uint16_t nrOffsetDenoise[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
>> > +    uint32_t nrResidualSum[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
>> > +    uint32_t nrCount[MAX_NUM_TR_CATEGORIES];
>> > +
>> > +    ALIGN_VAR_16(uint16_t, (*offset)[MAX_NUM_TR_COEFFS]);
>> > +    uint32_t(*residualSum)[MAX_NUM_TR_COEFFS];
>> > +    uint32_t *count;
>> >  };
>> >
>> >  class Quant
>> > diff -r 2980141a744a -r 8878f03570b2 source/encoder/encoder.cpp
>> > --- a/source/encoder/encoder.cpp      Tue Aug 18 12:45:52 2015 +0530
>> > +++ b/source/encoder/encoder.cpp      Wed Aug 19 15:29:20 2015 +0530
>> > @@ -68,6 +68,7 @@
>> >      m_latestParam = NULL;
>> >      m_threadPool = NULL;
>> >      m_analysisFile = NULL;
>> > +    m_offsetEmergency = NULL;
>> >      for (int i = 0; i < X265_MAX_FRAME_THREADS; i++)
>> >          m_frameEncoder[i] = NULL;
>> >
>> > @@ -187,6 +188,7 @@
>> >      {
>> >          x265_log(m_param, X265_LOG_ERROR, "Unable to allocate scaling
>> list arrays\n");
>> >          m_aborted = true;
>> > +        return;
>> >      }
>> >      else if (!m_param->scalingLists || !strcmp(m_param->scalingLists,
>> "off"))
>> >          m_scalingList.m_bEnabled = false;
>> > @@ -194,7 +196,6 @@
>> >          m_scalingList.setDefaultScalingList();
>> >      else if (m_scalingList.parseScalingList(m_param->scalingLists))
>> >          m_aborted = true;
>> > -    m_scalingList.setupQuantMatrices();
>> >
>> >      m_lookahead = new Lookahead(m_param, m_threadPool);
>> >      if (m_numPools)
>> > @@ -221,6 +222,83 @@
>> >          }
>> >      }
>> >
>> > +    if (m_param->rc.vbvBufferSize)
>> > +    {
>> > +        m_offsetEmergency =
>> (uint16_t(*)[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS])X265_MALLOC(uint16_t,
>> MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS * (QP_MAX_MAX - QP_MAX_SPEC));
>> > +        if (!m_offsetEmergency)
>> > +        {
>> > +            x265_log(m_param, X265_LOG_ERROR, "Unable to allocate
>> memory\n");
>> > +            m_aborted = true;
>> > +            return;
>> > +        }
>> > +
>> > +        bool scalingEnabled = m_scalingList.m_bEnabled;
>> > +        if (!scalingEnabled)
>> > +        {
>> > +            m_scalingList.setDefaultScalingList();
>> > +            m_scalingList.setupQuantMatrices();
>> > +        }
>> > +        else
>> > +            m_scalingList.setupQuantMatrices();
>> > +
>> > +        for (int q = 0; q < QP_MAX_MAX - QP_MAX_SPEC; q++)
>> > +        {
>> > +            for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)
>> > +            {
>> > +                uint16_t *nrOffset = m_offsetEmergency[q][cat];
>> > +
>> > +                int trSize = cat & 3;
>> > +
>> > +                int coefCount = 1 << ((trSize + 2) * 2);
>> > +
>> > +                /* Denoise chroma first then luma, then DC. */
>> > +                int dcThreshold = (QP_MAX_MAX - QP_MAX_SPEC) * 2 / 3;
>> > +                int lumaThreshold = (QP_MAX_MAX - QP_MAX_SPEC) * 2 / 3;
>> > +                int chromaThreshold = 0;
>> > +
>> > +                int thresh = (cat < 4 || (cat >= 8 && cat < 12)) ?
>> lumaThreshold : chromaThreshold;
>> > +
>> > +                double quantF = (double)(1ULL << (q / 6 + 16 + 8));
>> > +
>> > +                for (int i = 0; i < coefCount; i++)
>> > +                {
>> > +                    uint16_t max = (1 << (7 + X265_DEPTH)) - 1;
>> > +                    /* True "emergency mode": remove all DCT
>> coefficients */
>> > +                    if (q == QP_MAX_MAX - QP_MAX_SPEC - 1)
>> > +                    {
>> > +                        nrOffset[i] = max;
>> > +                        continue;
>> > +                    }
>> > +
>> > +                    int iThresh = i == 0 ? dcThreshold : thresh;
>> > +                    if (q < iThresh)
>> > +                    {
>> > +                        nrOffset[i] = 0;
>> > +                        continue;
>> > +                    }
>> > +
>> > +                    int numList = (cat >= 8) * 3 + ((int)!iThresh);
>> > +
>> > +                    double pos = (double)(q - iThresh + 1) /
>> (QP_MAX_MAX - QP_MAX_SPEC - iThresh);
>> > +                    double start = quantF /
>> (m_scalingList.m_quantCoef[trSize][numList][QP_MAX_SPEC % 6][i]);
>> > +
>> > +                    // Formula chosen as an exponential scale to
>> vaguely mimic the effects of a higher quantizer.
>> > +                    double bias = (pow(2, pos * (QP_MAX_MAX -
>> QP_MAX_SPEC)) * 0.003 - 0.003) * start;
>> > +                    nrOffset[i] = (uint16_t)X265_MIN(bias + 0.5, max);
>> > +                }
>> > +            }
>> > +        }
>> > +
>> > +        if (!scalingEnabled)
>> > +        {
>> > +            m_scalingList.m_bEnabled = false;
>> > +            m_scalingList.m_bDataPresent = false;
>> > +            m_scalingList.setupQuantMatrices();
>> > +        }
>> > +    }
>> > +    else
>> > +        m_scalingList.setupQuantMatrices();
>> > +
>> >      for (int i = 0; i < m_param->frameNumThreads; i++)
>> >      {
>> >          m_frameEncoder[i]->start();
>> > @@ -314,6 +392,8 @@
>> >          delete m_rateControl;
>> >      }
>> >
>> > +    X265_FREE(m_offsetEmergency);
>> > +
>> >      if (m_analysisFile)
>> >          fclose(m_analysisFile);
>> >
>> > diff -r 2980141a744a -r 8878f03570b2 source/encoder/encoder.h
>> > --- a/source/encoder/encoder.h        Tue Aug 18 12:45:52 2015 +0530
>> > +++ b/source/encoder/encoder.h        Wed Aug 19 15:29:20 2015 +0530
>> > @@ -130,6 +130,10 @@
>> >      bool               m_aborted;          // fatal error detected
>> >      bool               m_reconfigured;      // reconfigure of encoder
>> detected
>> >
>> > +    uint16_t
>>  (*m_offsetEmergency)[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
>> > +    ALIGN_VAR_32(uint32_t,
>> m_residualSumEmergency[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS]);
>> > +    uint32_t           m_countEmergency[MAX_NUM_TR_CATEGORIES];
>> > +
>> >      Encoder();
>> >      ~Encoder() {}
>> >
>> > diff -r 2980141a744a -r 8878f03570b2 source/encoder/frameencoder.cpp
>> > --- a/source/encoder/frameencoder.cpp Tue Aug 18 12:45:52 2015 +0530
>> > +++ b/source/encoder/frameencoder.cpp Wed Aug 19 15:29:20 2015 +0530
>> > @@ -135,7 +135,7 @@
>> >          ok &= m_rce.picTimingSEI && m_rce.hrdTiming;
>> >      }
>> >
>> > -    if (m_param->noiseReductionIntra || m_param->noiseReductionInter)
>> > +    if (m_param->noiseReductionIntra || m_param->noiseReductionInter
>> || m_param->rc.vbvBufferSize)
>> >          m_nr = X265_MALLOC(NoiseReduction, 1);
>> >      if (m_nr)
>> >          memset(m_nr, 0, sizeof(NoiseReduction));
>> > @@ -362,11 +362,47 @@
>> >          }
>> >      }
>> >
>> > +    int numTLD;
>> > +    if (m_pool)
>> > +        numTLD = m_param->bEnableWavefront ? m_pool->m_numWorkers :
>> m_pool->m_numWorkers + m_pool->m_numProviders;
>> > +    else
>> > +        numTLD = 1;
>> > +
>> >      /* Get the QP for this frame from rate control. This call may
>> block until
>> >       * frames ahead of it in encode order have called rateControlEnd()
>> */
>> >      int qp = m_top->m_rateControl->rateControlStart(m_frame, &m_rce,
>> m_top);
>> >      m_rce.newQp = qp;
>> >
>> > +    if (m_nr)
>> > +    {
>> > +        if (qp > QP_MAX_SPEC && m_frame->m_param->rc.vbvBufferSize)
>> > +        {
>> > +            for (int i = 0; i < numTLD; i++)
>> > +            {
>> > +                m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset =
>> m_top->m_offsetEmergency[qp - QP_MAX_SPEC - 1];
>> > +
>> m_tld[i].analysis.m_quant.m_frameNr[m_jpId].residualSum =
>> m_top->m_residualSumEmergency;
>> > +                m_tld[i].analysis.m_quant.m_frameNr[m_jpId].count =
>> m_top->m_countEmergency;
>> > +            }
>> > +        }
>> > +        else
>> > +        {
>> > +            if (m_param->noiseReductionIntra ||
>> m_param->noiseReductionInter)
>> > +            {
>> > +                for (int i = 0; i < numTLD; i++)
>> > +                {
>> > +                    m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset
>> = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrOffsetDenoise;
>> > +
>> m_tld[i].analysis.m_quant.m_frameNr[m_jpId].residualSum =
>> m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrResidualSum;
>> > +                    m_tld[i].analysis.m_quant.m_frameNr[m_jpId].count
>> = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrCount;
>> > +                }
>> > +            }
>> > +            else
>> > +            {
>> > +                for (int i = 0; i < numTLD; i++)
>> > +                    m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset
>> = NULL;
>> > +            }
>> > +        }
>> > +    }
>> > +
>> >      /* Clip slice QP to 0-51 spec range before encoding */
>> >      slice->m_sliceQp = x265_clip3(-QP_BD_OFFSET, QP_MAX_SPEC, qp);
>> >
>> > @@ -699,37 +735,36 @@
>> >          }
>> >      }
>> >
>> > -    int numTLD;
>> > -    if (m_pool)
>> > -        numTLD = m_param->bEnableWavefront ? m_pool->m_numWorkers :
>> m_pool->m_numWorkers + m_pool->m_numProviders;
>> > -    else
>> > -        numTLD = 1;
>> > -
>> >      if (m_nr)
>> >      {
>> > -        /* Accumulate NR statistics from all worker threads */
>> > -        for (int i = 0; i < numTLD; i++)
>> > -        {
>> > -            NoiseReduction* nr =
>> &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];
>> > -            for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)
>> > -            {
>> > -                for (int coeff = 0; coeff < MAX_NUM_TR_COEFFS; coeff++)
>> > -                    m_nr->residualSum[cat][coeff] +=
>> nr->residualSum[cat][coeff];
>> > -
>> > -                m_nr->count[cat] += nr->count[cat];
>> > -            }
>> > -        }
>> > -
>> > -        noiseReductionUpdate();
>> > -
>> > -        /* Copy updated NR coefficients back to all worker threads */
>> > -        for (int i = 0; i < numTLD; i++)
>> > -        {
>> > -            NoiseReduction* nr =
>> &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];
>> > -            memcpy(nr->offsetDenoise, m_nr->offsetDenoise,
>> sizeof(uint16_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
>> > -            memset(nr->count, 0, sizeof(uint32_t) *
>> MAX_NUM_TR_CATEGORIES);
>> > -            memset(nr->residualSum, 0, sizeof(uint32_t) *
>> MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
>> > -        }
>> > +        bool nrEnabled = (m_rce.newQp < QP_MAX_SPEC ||
>> !m_param->rc.vbvBufferSize) && (m_param->noiseReductionIntra ||
>> m_param->noiseReductionInter);
>> > +
>> > +        if (nrEnabled)
>> > +        {
>> > +            /* Accumulate NR statistics from all worker threads */
>> > +            for (int i = 0; i < numTLD; i++)
>> > +            {
>> > +                NoiseReduction* nr =
>> &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];
>> > +                for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)
>> > +                {
>> > +                    for (int coeff = 0; coeff < MAX_NUM_TR_COEFFS;
>> coeff++)
>> > +                        m_nr->nrResidualSum[cat][coeff] +=
>> nr->nrResidualSum[cat][coeff];
>> > +
>> > +                    m_nr->nrCount[cat] += nr->nrCount[cat];
>> > +                }
>> > +            }
>> > +
>> > +            noiseReductionUpdate();
>> > +
>> > +            /* Copy updated NR coefficients back to all worker threads
>> */
>> > +            for (int i = 0; i < numTLD; i++)
>> > +            {
>> > +                NoiseReduction* nr =
>> &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];
>> > +                memcpy(nr->nrOffsetDenoise, m_nr->nrOffsetDenoise,
>> sizeof(uint16_t)* MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
>> > +                memset(nr->nrCount, 0, sizeof(uint32_t)*
>> MAX_NUM_TR_CATEGORIES);
>> > +                memset(nr->nrResidualSum, 0, sizeof(uint32_t)*
>> MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
>> > +            }
>> > +        }
>> >      }
>> >
>> >  #if DETAILED_CU_STATS
>> > @@ -1253,25 +1288,25 @@
>> >          int trSize = cat & 3;
>> >          int coefCount = 1 << ((trSize + 2) * 2);
>> >
>> > -        if (m_nr->count[cat] > maxBlocksPerTrSize[trSize])
>> > +        if (m_nr->nrCount[cat] > maxBlocksPerTrSize[trSize])
>> >          {
>> >              for (int i = 0; i < coefCount; i++)
>> > -                m_nr->residualSum[cat][i] >>= 1;
>> > -            m_nr->count[cat] >>= 1;
>> > +                m_nr->nrResidualSum[cat][i] >>= 1;
>> > +            m_nr->nrCount[cat] >>= 1;
>> >          }
>> >
>> >          int nrStrength = cat < 8 ? m_param->noiseReductionIntra :
>> m_param->noiseReductionInter;
>> > -        uint64_t scaledCount = (uint64_t)nrStrength * m_nr->count[cat];
>> > +        uint64_t scaledCount = (uint64_t)nrStrength *
>> m_nr->nrCount[cat];
>> >
>> >          for (int i = 0; i < coefCount; i++)
>> >          {
>> > -            uint64_t value = scaledCount + m_nr->residualSum[cat][i] /
>> 2;
>> > -            uint64_t denom = m_nr->residualSum[cat][i] + 1;
>> > -            m_nr->offsetDenoise[cat][i] = (uint16_t)(value / denom);
>> > +            uint64_t value = scaledCount + m_nr->nrResidualSum[cat][i]
>> / 2;
>> > +            uint64_t denom = m_nr->nrResidualSum[cat][i] + 1;
>> > +            m_nr->nrOffsetDenoise[cat][i] = (uint16_t)(value / denom);
>> >          }
>> >
>> >          // Don't denoise DC coefficients
>> > -        m_nr->offsetDenoise[cat][0] = 0;
>> > +        m_nr->nrOffsetDenoise[cat][0] = 0;
>> >      }
>> >  }
>> >
>> > diff -r 2980141a744a -r 8878f03570b2 source/encoder/search.cpp
>> > --- a/source/encoder/search.cpp       Tue Aug 18 12:45:52 2015 +0530
>> > +++ b/source/encoder/search.cpp       Wed Aug 19 15:29:20 2015 +0530
>> > @@ -80,7 +80,7 @@
>> >      m_me.init(param.searchMethod, param.subpelRefine,
>> param.internalCsp);
>> >
>> >      bool ok = m_quant.init(param.rdoqLevel, param.psyRdoq,
>> scalingList, m_entropyCoder);
>> > -    if (m_param->noiseReductionIntra || m_param->noiseReductionInter)
>> > +    if (m_param->noiseReductionIntra || m_param->noiseReductionInter
>> || m_param->rc.vbvBufferSize)
>> >          ok &= m_quant.allocNoiseReduction(param);
>> >
>> >      ok &= Predict::allocBuffers(param.internalCsp); /* sets
>> m_hChromaShift & m_vChromaShift */
>> > _______________________________________________
>> > x265-devel mailing list
>> > x265-devel at videolan.org
>> > https://mailman.videolan.org/listinfo/x265-devel
>>
>> --
>> Steve Borho
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150820/438d5588/attachment-0001.html>