[x265] [PATCH] denoise: further cleanups

Wed Aug 6 20:17:55 CEST 2014

# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1407348176 18000
#      Wed Aug 06 13:02:56 2014 -0500
# Node ID 94cefc095d2fdd041b9df22cb7eb8c947bfe505a
# Parent  ba6729e34f77b6fa254995746f3dbb29596cc857
denoise: further cleanups

x264 used buffer pointers so that they could swap between normal denoise and
"emergency denoise" when the QP became very high.  We do not have an emergency
denoise and thus we don't need these pointers at this time. This simplifies
initialization and update logic.

diff -r ba6729e34f77 -r 94cefc095d2f source/common/common.h

--- a/source/common/common.h	Wed Aug 06 18:16:08 2014 +0530
+++ b/source/common/common.h	Wed Aug 06 13:02:56 2014 -0500
@@ -185,13 +185,9 @@
 
     /* 0 = luma 4x4, 1 = luma 8x8, 2 = luma 16x16, 3 = luma 32x32
      * 4 = chroma 4x4, 5 = chroma 8x8, 6 = chroma 16x16, 7 = chroma 32x32 */
-    uint16_t (*offset)[1024];
-    uint32_t (*residualSum)[1024];
-    uint32_t *count;
-
     uint16_t offsetDenoise[8][1024];
-    uint32_t residualSumBuf[8][1024];
-    uint32_t countBuf[8];
+    uint32_t residualSum[8][1024];
+    uint32_t count[8];
 };
 
 /* defined in common.cpp */
diff -r ba6729e34f77 -r 94cefc095d2f source/common/quant.cpp
--- a/source/common/quant.cpp	Wed Aug 06 18:16:08 2014 +0530
+++ b/source/common/quant.cpp	Wed Aug 06 13:02:56 2014 -0500
@@ -339,7 +339,9 @@
         return primitives.cvt16to32_cnt[log2TrSize - 2](coeff, residual, stride);
     }
 
-    bool usePsy = m_psyRdoqScale && ttype == TEXT_LUMA && !useTransformSkip;
+    bool isLuma  = ttype == TEXT_LUMA;
+    bool usePsy  = m_psyRdoqScale && isLuma && !useTransformSkip;
+    bool isIntra = cu->getPredictionMode(absPartIdx) == MODE_INTRA;
     int trSize = 1 << log2TrSize;
 
     X265_CHECK((cu->m_slice->m_sps->quadtreeTULog2MaxSize >= log2TrSize), "transform size too large\n");
@@ -362,7 +364,7 @@
     else
     {
         const uint32_t sizeIdx = log2TrSize - 2;
-        int useDST = !sizeIdx && ttype == TEXT_LUMA && cu->getPredictionMode(absPartIdx) == MODE_INTRA;
+        int useDST = !sizeIdx && isLuma && isIntra;
         int index = DCT_4x4 + sizeIdx - useDST;
 
         primitives.dct[index](residual, m_resiDctCoeff, stride);
@@ -376,10 +378,12 @@
             primitives.dct[index](m_fencShortBuf, m_fencDctCoeff, trSize);
         }
 
-        if (m_nr->bNoiseReduction && (cu->getPredictionMode(absPartIdx) == MODE_INTER))
+        if (m_nr->bNoiseReduction && !isIntra)
         {
-            denoiseDct(m_resiDctCoeff, m_nr->residualSum[sizeIdx + (4 * !!ttype)], m_nr->offset[sizeIdx + (4 * !!ttype)], trSize << 1);
-            m_nr->count[sizeIdx + (4 * !!ttype)]++;
+            /* denoise is not applied to intra residual, so DST can be ignored */
+            int cat = sizeIdx + 4 * !isLuma;
+            denoiseDct(m_resiDctCoeff, m_nr->residualSum[cat], m_nr->offsetDenoise[cat], trSize << 1);
+            m_nr->count[cat]++;
         }
     }
 
@@ -389,7 +393,7 @@
     {
         int deltaU[32 * 32];
 
-        int scalingListType = ttype + (cu->isIntra(absPartIdx) ? 0 : 3);
+        int scalingListType = ttype + (isLuma ? 3 : 0);
         int rem = m_qpParam[ttype].rem;
         int per = m_qpParam[ttype].per;
         int32_t *quantCoeff = m_scalingList->m_quantCoef[log2TrSize - 2][scalingListType][rem];
@@ -404,7 +408,7 @@
         if (numSig >= 2 && cu->m_slice->m_pps->bSignHideEnabled)
         {
             TUEntropyCodingParameters codingParameters;
-            cu->getTUEntropyCodingParameters(codingParameters, absPartIdx, log2TrSize, ttype == TEXT_LUMA);
+            cu->getTUEntropyCodingParameters(codingParameters, absPartIdx, log2TrSize, isLuma);
             return signBitHidingHDQ(coeff, deltaU, numSig, codingParameters);
         }
         else
diff -r ba6729e34f77 -r 94cefc095d2f source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Wed Aug 06 18:16:08 2014 +0530
+++ b/source/encoder/frameencoder.cpp	Wed Aug 06 13:02:56 2014 -0500
@@ -116,13 +116,7 @@
     }
 
     memset(&m_frameStats, 0, sizeof(m_frameStats));
-    memset(m_nr.offsetDenoise, 0, sizeof(m_nr.offsetDenoise[0][0]) * 8 * 1024);
-    memset(m_nr.residualSumBuf, 0, sizeof(m_nr.residualSumBuf[0][0]) * 8 * 1024);
-    memset(m_nr.countBuf, 0, sizeof(m_nr.countBuf[0]) * 8);
-
-    m_nr.offset = m_nr.offsetDenoise;
-    m_nr.residualSum = m_nr.residualSumBuf;
-    m_nr.count = m_nr.countBuf;
+    memset(&m_nr, 0, sizeof(m_nr));
     m_nr.bNoiseReduction = !!m_param->noiseReduction;
 
     start();
@@ -893,33 +887,31 @@
     if (!m_nr.bNoiseReduction)
         return;
 
-    m_nr.offset = m_nr.offsetDenoise;
-    m_nr.residualSum = m_nr.residualSumBuf;
-    m_nr.count = m_nr.countBuf;
-
-    int transformSize[4] = {16, 64, 256, 1024};
-    uint32_t blockCount[4] = {1 << 18, 1 << 16, 1 << 14, 1 << 12};
+    static const uint32_t maxBlocksPerTrSize[4] = {1 << 18, 1 << 16, 1 << 14, 1 << 12};
 
     for (int cat = 0; cat < 8; cat++)
     {
-        int index = cat % 4;
-        int size = transformSize[index];
+        int trSize = cat & 3;
+        int coefCount = 1 << ((trSize + 2) * 2);
 
-        if (m_nr.count[cat] > blockCount[index])
+        if (m_nr.count[cat] > maxBlocksPerTrSize[trSize])
         {
-            for (int i = 0; i < size; i++)
+            for (int i = 0; i < coefCount; i++)
                 m_nr.residualSum[cat][i] >>= 1;
             m_nr.count[cat] >>= 1;
         }
 
-        for (int i = 0; i < size; i++)
-            m_nr.offset[cat][i] =
-                (uint16_t)(((uint64_t)m_param->noiseReduction * m_nr.count[cat]
-                 + m_nr.residualSum[cat][i] / 2)
-              / ((uint64_t)m_nr.residualSum[cat][i] + 1));
+        uint64_t scaledCount = (uint64_t)m_param->noiseReduction * m_nr.count[cat];
+
+        for (int i = 0; i < coefCount; i++)
+        {
+            uint64_t value = scaledCount + m_nr.residualSum[cat][i] / 2;
+            uint64_t denom = m_nr.residualSum[cat][i] + 1;
+            m_nr.offsetDenoise[cat][i] = (uint16_t)(value / denom);
+        }
 
         // Don't denoise DC coefficients
-        m_nr.offset[cat][0] = 0;
+        m_nr.offsetDenoise[cat][0] = 0;
     }
 }