<div dir="ltr"><div class="gmail_default" style="font-family:georgia,serif;font-size:small;color:#000000"><br></div><div class="gmail_extra"><br><div class="gmail_quote">On Wed, Aug 5, 2015 at 8:42 PM, Steve Borho <span dir="ltr"><<a href="mailto:steve@borho.org" target="_blank">steve@borho.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5">On 08/05, Sagar Kotecha wrote:<br>

> On Tue, Aug 4, 2015 at 11:02 PM, Steve Borho <<a href="mailto:steve@borho.org">steve@borho.org</a>> wrote:<br>

><br>

> > On 08/04, <a href="mailto:sagar@multicorewareinc.com">sagar@multicorewareinc.com</a> wrote:<br>

> > > # HG changeset patch<br>

> > > # User Sagar Kotecha <<a href="mailto:sagar@multicorewareinc.com">sagar@multicorewareinc.com</a>><br>

> > > # Date 1438676290 -19800<br>

> > > #      Tue Aug 04 13:48:10 2015 +0530<br>

> > > # Node ID bf5c5aca1a24eb4699d99a3ce4de386096219a5a<br>

> > > # Parent  d5278c76d341b3bac405938dbfb64cb7e2d9bce5<br>

> > > Add emergency denoising when frame qp > QP_MAX_SPEC<br>

> > ><br>

> > > This feature is ported from x264, and is turned on for VBV encodes<br>

> ><br>

> > this looks a lot better than the previous patch<br>

> ><br>

> > > diff -r d5278c76d341 -r bf5c5aca1a24 source/common/common.h<br>

> > > --- a/source/common/common.h  Mon Aug 03 10:18:46 2015 -0500<br>

> > > +++ b/source/common/common.h  Tue Aug 04 13:48:10 2015 +0530<br>

> > > @@ -311,6 +311,9 @@<br>

> > >  #define CHROMA_V_SHIFT(x) (x == X265_CSP_I420)<br>

> > >  #define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8<br>

> > ><br>

> > > +#define MAX_NUM_TR_COEFFS           MAX_TR_SIZE * MAX_TR_SIZE //<br>

> > Maximum number of transform coefficients, for a 32x32 transform<br>

> > > +#define MAX_NUM_TR_CATEGORIES       16                        // 32,<br>

> > 16, 8, 4 transform categories each for luma and chroma<br>

> > > +<br>

> > >  namespace X265_NS {<br>

> > ><br>

> > >  enum { SAO_NUM_OFFSET = 4 };<br>

> > > diff -r d5278c76d341 -r bf5c5aca1a24 source/common/quant.cpp<br>

> > > --- a/source/common/quant.cpp Mon Aug 03 10:18:46 2015 -0500<br>

> > > +++ b/source/common/quant.cpp Tue Aug 04 13:48:10 2015 +0530<br>

> > > @@ -447,12 +447,12 @@<br>

> > >              <a href="http://primitives.cu" rel="noreferrer" target="_blank">primitives.cu</a>[sizeIdx].dct(m_fencShortBuf, m_fencDctCoeff,<br>

> > trSize);<br>

> > >          }<br>

> > ><br>

> > > -        if (m_nr)<br>

> > > +        if (m_nr && m_nr->offset)<br>

> > >          {<br>

> > >              /* denoise is not applied to intra residual, so DST can be<br>

> > ignored */<br>

> > >              int cat = sizeIdx + 4 * !isLuma + 8 * !isIntra;<br>

> > >              int numCoeff = 1 << (log2TrSize * 2);<br>

> > > -            primitives.denoiseDct(m_resiDctCoeff,<br>

> > m_nr->residualSum[cat], m_nr->offsetDenoise[cat], numCoeff);<br>

> > > +            primitives.denoiseDct(m_resiDctCoeff,<br>

> > m_nr->residualSum[cat], m_nr->offset[cat], numCoeff);<br>

> > >              m_nr->count[cat]++;<br>

> > >          }<br>

> > >      }<br>

> > > diff -r d5278c76d341 -r bf5c5aca1a24 source/common/quant.h<br>

> > > --- a/source/common/quant.h   Mon Aug 03 10:18:46 2015 -0500<br>

> > > +++ b/source/common/quant.h   Tue Aug 04 13:48:10 2015 +0530<br>

> > > @@ -59,18 +59,19 @@<br>

> > >      }<br>

> > >  };<br>

> > ><br>

> > > -#define MAX_NUM_TR_COEFFS        MAX_TR_SIZE * MAX_TR_SIZE /* Maximum<br>

> > number of transform coefficients, for a 32x32 transform */<br>

> > > -#define MAX_NUM_TR_CATEGORIES    16                        /* 32, 16,<br>

> > 8, 4 transform categories each for luma and chroma */<br>

> > > -<br>

> > >  // NOTE: MUST be 16-byte aligned for asm code<br>

> > >  struct NoiseReduction<br>

> > >  {<br>

> > >      /* 0 = luma 4x4,   1 = luma 8x8,   2 = luma 16x16,   3 = luma 32x32<br>

> > >       * 4 = chroma 4x4, 5 = chroma 8x8, 6 = chroma 16x16, 7 = chroma<br>

> > 32x32<br>

> > >       * Intra 0..7 - Inter 8..15 */<br>

> > > -    uint16_t offsetDenoise[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];<br>

> > > -    uint32_t residualSum[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];<br>

> > > -    uint32_t count[MAX_NUM_TR_CATEGORIES];<br>

> > > +    uint16_t nrOffsetDenoise[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];<br>

> > > +    uint32_t nrResidualSum[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];<br>

> > > +    uint32_t nrCount[MAX_NUM_TR_CATEGORIES];<br>

> > > +<br>

> > > +    ALIGN_VAR_16(uint16_t, (*offset)[MAX_NUM_TR_COEFFS]);<br>

> > > +    uint32_t (*residualSum)[MAX_NUM_TR_COEFFS];<br>

> > > +    uint32_t *count;<br>

> > >  };<br>

> ><br>

> > I can see that the way we've split these structures to preserve<br>

> > determinism makes it rather difficult to swap out the user-specified<br>

> > denoise coeff with the emergency coeff.<br>

> ><br>

> > ><br>

> > >  class Quant<br>

> > > diff -r d5278c76d341 -r bf5c5aca1a24 source/encoder/encoder.cpp<br>

> > > --- a/source/encoder/encoder.cpp      Mon Aug 03 10:18:46 2015 -0500<br>

> > > +++ b/source/encoder/encoder.cpp      Tue Aug 04 13:48:10 2015 +0530<br>

> > > @@ -72,6 +72,7 @@<br>

> > >      m_buOffsetC = NULL;<br>

> > >      m_threadPool = NULL;<br>

> > >      m_analysisFile = NULL;<br>

> > > +    m_offsetEmergency = NULL;<br>

> > >      for (int i = 0; i < X265_MAX_FRAME_THREADS; i++)<br>

> > >          m_frameEncoder[i] = NULL;<br>

> > ><br>

> > > @@ -191,6 +192,7 @@<br>

> > >      {<br>

> > >          x265_log(m_param, X265_LOG_ERROR, "Unable to allocate scaling<br>

> > list arrays\n");<br>

> > >          m_aborted = true;<br>

> > > +        return;<br>

> > >      }<br>

> > >      else if (!m_param->scalingLists || !strcmp(m_param->scalingLists,<br>

> > "off"))<br>

> > >          m_scalingList.m_bEnabled = false;<br>

> > > @@ -198,7 +200,6 @@<br>

> > >          m_scalingList.setDefaultScalingList();<br>

> > >      else if (m_scalingList.parseScalingList(m_param->scalingLists))<br>

> > >          m_aborted = true;<br>

> > > -    m_scalingList.setupQuantMatrices();<br>

> > ><br>

> > >      m_lookahead = new Lookahead(m_param, m_threadPool);<br>

> > >      if (m_numPools)<br>

> > > @@ -213,6 +214,83 @@<br>

> > >      initVPS(&m_vps);<br>

> > >      initSPS(&m_sps);<br>

> > >      initPPS(&m_pps);<br>

> > > +<br>

> > > +    if (m_param->rc.vbvBufferSize)<br>

> > > +    {<br>

> > > +        m_offsetEmergency =<br>

> > (uint16_t(*)[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS])malloc(sizeof(*m_offsetEmergency)<br>

> > * (QP_MAX_MAX - QP_MAX_SPEC));<br>

> ><br>

> > did you check that sizeof(*m_offsetEmergency) is what you expect?<br>

> ><br>

> > why not use X265_ALLOC/X265_FREE? we generally need our mallocs aligned<br>

> ><br>

</div></div>> ???OK???<br>

<div><div class="h5">><br></div></div></blockquote><div><div class="gmail_default" style="font-family:georgia,serif;font-size:small;color:rgb(0,0,0);display:inline">Yes, sizeof(*m_offsetEmergency) returns </div><div class="gmail_default" style="font-family:georgia,serif;font-size:small;color:rgb(0,0,0);display:inline">expected value. </div></div><div><div class="gmail_default" style="font-family:georgia,serif;font-size:small;color:rgb(0,0,0);display:inline"><br></div></div><div><div class="gmail_default" style="font-family:georgia,serif;font-size:small;color:rgb(0,0,0);display:inline">I will use X265_MALLOC/X265_FREE.</div></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div><div class="h5">

><br>

> ><br>

> > > +        if (!m_offsetEmergency)<br>

> > > +        {<br>

> > > +            x265_log(m_param, X265_LOG_ERROR, "Unable to allocate<br>

> > memory\n");<br>

> > > +            m_aborted = true;<br>

> > > +            return;<br>

> > > +        }<br>

> > > +<br>

> > > +        bool scalingEnabled = m_scalingList.m_bEnabled;<br>

> > > +        if (!scalingEnabled)<br>

> > > +        {<br>

> > > +            m_scalingList.setDefaultScalingList();<br>

> > > +            m_scalingList.setupQuantMatrices();<br>

> > > +        }<br>

> > > +        else<br>

> > > +            m_scalingList.setupQuantMatrices();<br>

> > > +<br>

> > > +        for (int q = 0; q < QP_MAX_MAX - QP_MAX_SPEC; q++)<br>

> > > +        {<br>

> > > +            for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)<br>

> > > +            {<br>

> > > +                uint16_t *nrOffset = m_offsetEmergency[q][cat];<br>

> > > +<br>

> > > +                int trSize = cat & 3;<br>

> > > +<br>

> > > +                int coefCount = 1 << ((trSize + 2) * 2);<br>

> > > +<br>

> > > +                /* Denoise chroma first then luma, then DC. */<br>

> > > +                int dcThreshold = (QP_MAX_MAX - QP_MAX_SPEC) * 2 / 3;<br>

> > > +                int lumaThreshold = (QP_MAX_MAX - QP_MAX_SPEC) * 2 / 3;<br>

> > > +                int chromaThreshold = 0;<br>

> > > +<br>

> > > +                int thresh = (cat < 4 || (cat >= 8 && cat < 12)) ?<br>

> > lumaThreshold : chromaThreshold;<br>

> > > +<br>

> > > +                double quantF = (double)(1ULL << (q / 6 + 16 + 8));<br>

> > > +<br>

> > > +                for (int i = 0; i < coefCount; i++)<br>

> > > +                {<br>

> > > +                    uint16_t max = (1 << (7 + X265_DEPTH)) - 1;<br>

> > > +                    /* True "emergency mode": remove all DCT<br>

> > coefficients */<br>

> > > +                    if (q == QP_MAX_MAX - QP_MAX_SPEC - 1)<br>

> > > +                    {<br>

> > > +                        nrOffset[i] = max;<br>

> > > +                        continue;<br>

> > > +                    }<br>

> > > +<br>

> > > +                    int iThresh = i == 0 ? dcThreshold : thresh;<br>

> > > +                    if (q < iThresh)<br>

> > > +                    {<br>

> > > +                        nrOffset[i] = 0;<br>

> > > +                        continue;<br>

> > > +                    }<br>

> > > +<br>

> > > +                    int numList = (cat >= 8) * 3 + ((int)!iThresh);<br>

> > > +<br>

> > > +                    double pos = (double)(q - iThresh + 1) /<br>

> > (QP_MAX_MAX - QP_MAX_SPEC - iThresh);<br>

> > > +                    double start = quantF /<br>

> > (m_scalingList.m_quantCoef[trSize][numList][QP_MAX_SPEC % 6][i]);<br>

> > > +<br>

> > > +                    // Formula chosen as an exponential scale to<br>

> > vaguely mimic the effects of a higher quantizer.<br>

> > > +                    double bias = (pow(2, pos * (QP_MAX_MAX -<br>

> > QP_MAX_SPEC)) * 0.003 - 0.003) * start;<br>

> > > +                    nrOffset[i] = (uint16_t)X265_MIN(bias + 0.5, max);<br>

> > > +                }<br>

> > > +            }<br>

> > > +        }<br>

> > > +<br>

> > > +        if (!scalingEnabled)<br>

> > > +        {<br>

> > > +            m_scalingList.m_bEnabled = false;<br>

> > > +            m_scalingList.m_bDataPresent = false;<br>

> > > +            m_scalingList.setupQuantMatrices();<br>

> > > +        }<br>

> > > +    }<br>

> > > +    else<br>

> > > +        m_scalingList.setupQuantMatrices();<br>

> > ><br>

> > >      int numRows = (m_param->sourceHeight + g_maxCUSize - 1) /<br>

> > g_maxCUSize;<br>

> > >      int numCols = (m_param->sourceWidth  + g_maxCUSize - 1) /<br>

> > g_maxCUSize;<br>

> > > @@ -323,6 +401,8 @@<br>

> > >      X265_FREE(m_buOffsetY);<br>

> > >      X265_FREE(m_buOffsetC);<br>

> > ><br>

> > > +    free(m_offsetEmergency);<br>

> > > +<br>

> > >      if (m_analysisFile)<br>

> > >          fclose(m_analysisFile);<br>

> > ><br>

> > > diff -r d5278c76d341 -r bf5c5aca1a24 source/encoder/encoder.h<br>

> > > --- a/source/encoder/encoder.h        Mon Aug 03 10:18:46 2015 -0500<br>

> > > +++ b/source/encoder/encoder.h        Tue Aug 04 13:48:10 2015 +0530<br>

> > > @@ -133,6 +133,10 @@<br>

> > >      bool               m_aborted;          // fatal error detected<br>

> > >      bool               m_reconfigured;      // reconfigure of encoder<br>

> > detected<br>

> > ><br>

> > > +    uint16_t<br>

> >  (*m_offsetEmergency)[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];<br>

> > > +    uint32_t<br>

> >  m_residualSumEmergency[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];<br>

> > > +    uint32_t           m_countEmergency[MAX_NUM_TR_CATEGORIES];<br>

> > > +<br>

> > >      Encoder();<br>

> > >      ~Encoder() {}<br>

> > ><br>

> > > diff -r d5278c76d341 -r bf5c5aca1a24 source/encoder/frameencoder.cpp<br>

> > > --- a/source/encoder/frameencoder.cpp Mon Aug 03 10:18:46 2015 -0500<br>

> > > +++ b/source/encoder/frameencoder.cpp Tue Aug 04 13:48:10 2015 +0530<br>

> > > @@ -135,7 +135,7 @@<br>

> > >          ok &= m_rce.picTimingSEI && m_rce.hrdTiming;<br>

> > >      }<br>

> > ><br>

> > > -    if (m_param->noiseReductionIntra || m_param->noiseReductionInter)<br>

> > > +    if (m_param->noiseReductionIntra || m_param->noiseReductionInter ||<br>

> > m_param->rc.vbvBufferSize)<br>

> > >          m_nr = X265_MALLOC(NoiseReduction, 1);<br>

> > >      if (m_nr)<br>

> > >          memset(m_nr, 0, sizeof(NoiseReduction));<br>

> > > @@ -361,11 +361,45 @@<br>

> > >          }<br>

> > >      }<br>

> > ><br>

> > > +    int numTLD;<br>

> > > +    if (m_pool)<br>

> > > +        numTLD = m_param->bEnableWavefront ? m_pool->m_numWorkers :<br>

> > m_pool->m_numWorkers + m_pool->m_numProviders;<br>

> > > +    else<br>

> > > +        numTLD = 1;<br>

> > > +<br>

> > >      /* Get the QP for this frame from rate control. This call may block<br>

> > until<br>

> > >       * frames ahead of it in encode order have called rateControlEnd()<br>

> > */<br>

> > >      int qp = m_top->m_rateControl->rateControlStart(m_frame, &m_rce,<br>

> > m_top);<br>

> > >      m_rce.newQp = qp;<br>

> > ><br>

> > > +    if (m_nr)<br>

> > > +    {<br>

> > > +<br>

</div></div>> > ??????<br>

<div><div class="h5">> > if (qp > QP_MAX_SPEC && m_frame->m_param->rc.vbvBufferSize)<br>

> > > +        {<br>

> > > +            for (int i = 0; i < numTLD; i++)<br>

> > > +            {<br>

> > > +                m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset =<br>

> > m_top->m_offsetEmergency[qp - QP_MAX_SPEC - 1];<br>

> > > +                m_tld[i].analysis.m_quant.m_frameNr[m_jpId].residualSum<br>

> > = m_top->m_residualSumEmergency;<br>

> > > +                m_tld[i].analysis.m_quant.m_frameNr[m_jpId].count =<br>

> > m_top->m_countEmergency;<br>

> > > +            }<br>

> > > +        }<br>

> > > +        else<br>

> > > +        {<br>

> > > +            if (m_param->noiseReductionIntra ||<br>

> > m_param->noiseReductionInter)<br>

> > > +            {<br>

> > > +                for (int i = 0; i < numTLD; i++)<br>

> > > +                {<br>

> > > +                    m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset<br>

> > = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrOffsetDenoise;<br>

> > > +<br>

> > m_tld[i].analysis.m_quant.m_frameNr[m_jpId].residualSum =<br>

> > m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrResidualSum;<br>

> > > +                    m_tld[i].analysis.m_quant.m_frameNr[m_jpId].count =<br>

> > m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrCount;<br>

> > > +                }<br>

> > > +            }<br>

> > > +            else<br>

> > > +            for (int i = 0; i < numTLD; i++)<br>

> > > +                m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset =<br>

> > NULL;<br>

> ><br>

> > w/s<br>

> ><br>

> > > +        }<br>

> > > +    }<br>

> > > +<br>

> > >      /* Clip slice QP to 0-51 spec range before encoding */<br>

> > >      slice->m_sliceQp = x265_clip3(-QP_BD_OFFSET, QP_MAX_SPEC, qp);<br>

> > ><br>

> > > @@ -702,37 +736,36 @@<br>

> > >          }<br>

> > >      }<br>

> > ><br>

> > > -    int numTLD;<br>

> > > -    if (m_pool)<br>

> > > -        numTLD = m_param->bEnableWavefront ? m_pool->m_numWorkers :<br>

> > m_pool->m_numWorkers + m_pool->m_numProviders;<br>

> > > -    else<br>

> > > -        numTLD = 1;<br>

> > > -<br>

> > >      if (m_nr)<br>

> > >      {<br>

> > > -        /* Accumulate NR statistics from all worker threads */<br>

> > > -        for (int i = 0; i < numTLD; i++)<br>

> > > -        {<br>

> > > -            NoiseReduction* nr =<br>

> > &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];<br>

> > > -            for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)<br>

> > > +        bool nrEnabled = (m_rce.newQp < QP_MAX_SPEC ||<br>

> > !m_param->rc.vbvBufferSize) && (m_param->noiseReductionIntra ||<br>

> > m_param->noiseReductionInter);<br>

> > > +<br>

> > > +        if (nrEnabled)<br>

> > > +        {<br>

> > > +            /* Accumulate NR statistics from all worker threads */<br>

> > > +            for (int i = 0; i < numTLD; i++)<br>

> > > +            {<br>

> > > +                NoiseReduction* nr =<br>

> > &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];<br>

> > > +                for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)<br>

> > > +                {<br>

> > > +                    for (int coeff = 0; coeff < MAX_NUM_TR_COEFFS;<br>

> > coeff++)<br>

> > > +                        m_nr->nrResidualSum[cat][coeff] +=<br>

> > nr->nrResidualSum[cat][coeff];<br>

> > > +<br>

> > > +                    m_nr->nrCount[cat] += nr->nrCount[cat];<br>

> > > +                }<br>

> > > +            }<br>

> > > +<br>

> > > +            noiseReductionUpdate();<br>

> > > +<br>

> > > +            /* Copy updated NR coefficients back to all worker threads<br>

> > */<br>

> > > +            for (int i = 0; i < numTLD; i++)<br>

> > >              {<br>

> > > -                for (int coeff = 0; coeff < MAX_NUM_TR_COEFFS; coeff++)<br>

> > > -                    m_nr->residualSum[cat][coeff] +=<br>

> > nr->residualSum[cat][coeff];<br>

> > > -<br>

> > > -                m_nr->count[cat] += nr->count[cat];<br>

> > > +                NoiseReduction* nr =<br>

> > &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];<br>

> > > +                memcpy(nr->nrOffsetDenoise, m_nr->nrOffsetDenoise,<br>

> > sizeof(uint16_t)* MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);<br>

> > > +                memset(nr->nrCount, 0, sizeof(uint32_t)*<br>

> > MAX_NUM_TR_CATEGORIES);<br>

> > > +                memset(nr->nrResidualSum, 0, sizeof(uint32_t)*<br>

> > MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);<br>

> > >              }<br>

> > >          }<br>

> > > -<br>

> > > -        noiseReductionUpdate();<br>

> > > -<br>

> > > -        /* Copy updated NR coefficients back to all worker threads */<br>

> > > -        for (int i = 0; i < numTLD; i++)<br>

> > > -        {<br>

> > > -            NoiseReduction* nr =<br>

> > &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];<br>

> > > -            memcpy(nr->offsetDenoise, m_nr->offsetDenoise,<br>

> > sizeof(uint16_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);<br>

> > > -            memset(nr->count, 0, sizeof(uint32_t) *<br>

> > MAX_NUM_TR_CATEGORIES);<br>

> > > -            memset(nr->residualSum, 0, sizeof(uint32_t) *<br>

> > MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);<br>

> > > -        }<br>

> > >      }<br>

> > ><br>

> > >  #if DETAILED_CU_STATS<br>

> > > @@ -1265,25 +1298,25 @@<br>

> > >          int trSize = cat & 3;<br>

> > >          int coefCount = 1 << ((trSize + 2) * 2);<br>

> > ><br>

> > > -        if (m_nr->count[cat] > maxBlocksPerTrSize[trSize])<br>

> > > +        if (m_nr->nrCount[cat] > maxBlocksPerTrSize[trSize])<br>

> > >          {<br>

> > >              for (int i = 0; i < coefCount; i++)<br>

> > > -                m_nr->residualSum[cat][i] >>= 1;<br>

> > > -            m_nr->count[cat] >>= 1;<br>

> > > +                m_nr->nrResidualSum[cat][i] >>= 1;<br>

> > > +            m_nr->nrCount[cat] >>= 1;<br>

> > >          }<br>

> > ><br>

> > >          int nrStrength = cat < 8 ? m_param->noiseReductionIntra :<br>

> > m_param->noiseReductionInter;<br>

> > > -        uint64_t scaledCount = (uint64_t)nrStrength * m_nr->count[cat];<br>

> > > +        uint64_t scaledCount = (uint64_t)nrStrength *<br>

> > m_nr->nrCount[cat];<br>

> > ><br>

> > >          for (int i = 0; i < coefCount; i++)<br>

> > >          {<br>

> > > -            uint64_t value = scaledCount + m_nr->residualSum[cat][i] /<br>

> > 2;<br>

> > > -            uint64_t denom = m_nr->residualSum[cat][i] + 1;<br>

> > > -            m_nr->offsetDenoise[cat][i] = (uint16_t)(value / denom);<br>

> > > +            uint64_t value = scaledCount + m_nr->nrResidualSum[cat][i]<br>

> > / 2;<br>

> > > +            uint64_t denom = m_nr->nrResidualSum[cat][i] + 1;<br>

> > > +            m_nr->nrOffsetDenoise[cat][i] = (uint16_t)(value / denom);<br>

> > >          }<br>

> > ><br>

> > >          // Don't denoise DC coefficients<br>

> > > -        m_nr->offsetDenoise[cat][0] = 0;<br>

> > > +        m_nr->nrOffsetDenoise[cat][0] = 0;<br>

> > >      }<br>

> > >  }<br>

> ><br>

> > this seems to enable the emergency denoise only when the slice QP is<br>

> > over QP_MAX_SPEC, but I believe the main intent for the feature is to<br>

> > enable emergency denoise when the mid-frame VBV updates cause the QP to<br>

> > jump above QP_MAX_SPEC (and disable again if the QP drops below<br>

> > QP_MAX_SPEC), which leads to a design where the quant function itself<br>

> > must select between the FE's denoise (if denoise was user-configured) or<br>

> > emergency denoise if the QP is above spec and emergency denoise is<br>

> > enabled. Which I think in the end would be a cleaner design anyway.<br>

> ><br>

> > won't it better if we apply the denoise on the entire frame and avoid<br>

</div></div>> strong denosing on the particular CU??? (qp  > QP_MAX_SPEC),<br>

<span class="">> this way we will save the bits from start.<br>

><br>

> For some of the frames, CU_QP  > QP_MAX_SPEC but FRAME_QP < QP_MAX_SPEC in<br>

> such cases we can generalize by having<br>

> if  frame_qp > (QP_MAX_SPEC - 1/2/3)<br>

<br>

</span>As the name implies, it is supposed to be used for "emergency" purposes<br>

only, to ensure that VBV can stay below the frame target bitsize.  It<br>

should only be applied to CUs which are over the spec limit QP.<br>

<br>

We shouldn't penalize the entire frame if it has a high slice QP. The<br>

emergency denoise should bring the bitrate down pretty harshly, allowing<br>

VBV to lower QP below the spec max, which should allow quant to switch<br>

back to the user-specified denoise (possibly none at all).<br>

<br>

This actually simplifies the implementation. Quant::setQPforQuant() can<br>

check the CU QP and pick the frameEncoder's denoise structure if it is<br>

below QP_MAX_SPEC or pick (globally static) emergency denoise if it is<br>

above QP_MAX_SPEC.  I think all of the FrameEncoder and NoiseReduction<br>

changes can then be dropped.<br>

<div class="HOEnZb"><div class="h5"><br>

--<br>

Steve Borho<br>

_______________________________________________<br>

x265-devel mailing list<br>

<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>

<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>

</div></div></blockquote></div><br></div></div>