[x265] [PATCH] Add emergency denoising when frame qp > QP_MAX_SPEC
sagar at multicorewareinc.com
sagar at multicorewareinc.com
Tue Aug 4 10:47:27 CEST 2015
# HG changeset patch
# User Sagar Kotecha <sagar at multicorewareinc.com>
# Date 1438676290 -19800
# Tue Aug 04 13:48:10 2015 +0530
# Node ID bf5c5aca1a24eb4699d99a3ce4de386096219a5a
# Parent d5278c76d341b3bac405938dbfb64cb7e2d9bce5
Add emergency denoising when frame qp > QP_MAX_SPEC
This feature is ported from x264, and is turned on for VBV encodes
diff -r d5278c76d341 -r bf5c5aca1a24 source/common/common.h
--- a/source/common/common.h Mon Aug 03 10:18:46 2015 -0500
+++ b/source/common/common.h Tue Aug 04 13:48:10 2015 +0530
@@ -311,6 +311,9 @@
#define CHROMA_V_SHIFT(x) (x == X265_CSP_I420)
#define X265_MAX_PRED_MODE_PER_CTU 85 * 2 * 8
+#define MAX_NUM_TR_COEFFS MAX_TR_SIZE * MAX_TR_SIZE // Maximum number of transform coefficients, for a 32x32 transform
+#define MAX_NUM_TR_CATEGORIES 16 // 32, 16, 8, 4 transform categories each for luma and chroma
+
namespace X265_NS {
enum { SAO_NUM_OFFSET = 4 };
diff -r d5278c76d341 -r bf5c5aca1a24 source/common/quant.cpp
--- a/source/common/quant.cpp Mon Aug 03 10:18:46 2015 -0500
+++ b/source/common/quant.cpp Tue Aug 04 13:48:10 2015 +0530
@@ -447,12 +447,12 @@
primitives.cu[sizeIdx].dct(m_fencShortBuf, m_fencDctCoeff, trSize);
}
- if (m_nr)
+ if (m_nr && m_nr->offset)
{
/* denoise is not applied to intra residual, so DST can be ignored */
int cat = sizeIdx + 4 * !isLuma + 8 * !isIntra;
int numCoeff = 1 << (log2TrSize * 2);
- primitives.denoiseDct(m_resiDctCoeff, m_nr->residualSum[cat], m_nr->offsetDenoise[cat], numCoeff);
+ primitives.denoiseDct(m_resiDctCoeff, m_nr->residualSum[cat], m_nr->offset[cat], numCoeff);
m_nr->count[cat]++;
}
}
diff -r d5278c76d341 -r bf5c5aca1a24 source/common/quant.h
--- a/source/common/quant.h Mon Aug 03 10:18:46 2015 -0500
+++ b/source/common/quant.h Tue Aug 04 13:48:10 2015 +0530
@@ -59,18 +59,19 @@
}
};
-#define MAX_NUM_TR_COEFFS MAX_TR_SIZE * MAX_TR_SIZE /* Maximum number of transform coefficients, for a 32x32 transform */
-#define MAX_NUM_TR_CATEGORIES 16 /* 32, 16, 8, 4 transform categories each for luma and chroma */
-
// NOTE: MUST be 16-byte aligned for asm code
struct NoiseReduction
{
/* 0 = luma 4x4, 1 = luma 8x8, 2 = luma 16x16, 3 = luma 32x32
* 4 = chroma 4x4, 5 = chroma 8x8, 6 = chroma 16x16, 7 = chroma 32x32
* Intra 0..7 - Inter 8..15 */
- uint16_t offsetDenoise[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
- uint32_t residualSum[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
- uint32_t count[MAX_NUM_TR_CATEGORIES];
+ uint16_t nrOffsetDenoise[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
+ uint32_t nrResidualSum[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
+ uint32_t nrCount[MAX_NUM_TR_CATEGORIES];
+
+ ALIGN_VAR_16(uint16_t, (*offset)[MAX_NUM_TR_COEFFS]);
+ uint32_t (*residualSum)[MAX_NUM_TR_COEFFS];
+ uint32_t *count;
};
class Quant
diff -r d5278c76d341 -r bf5c5aca1a24 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Mon Aug 03 10:18:46 2015 -0500
+++ b/source/encoder/encoder.cpp Tue Aug 04 13:48:10 2015 +0530
@@ -72,6 +72,7 @@
m_buOffsetC = NULL;
m_threadPool = NULL;
m_analysisFile = NULL;
+ m_offsetEmergency = NULL;
for (int i = 0; i < X265_MAX_FRAME_THREADS; i++)
m_frameEncoder[i] = NULL;
@@ -191,6 +192,7 @@
{
x265_log(m_param, X265_LOG_ERROR, "Unable to allocate scaling list arrays\n");
m_aborted = true;
+ return;
}
else if (!m_param->scalingLists || !strcmp(m_param->scalingLists, "off"))
m_scalingList.m_bEnabled = false;
@@ -198,7 +200,6 @@
m_scalingList.setDefaultScalingList();
else if (m_scalingList.parseScalingList(m_param->scalingLists))
m_aborted = true;
- m_scalingList.setupQuantMatrices();
m_lookahead = new Lookahead(m_param, m_threadPool);
if (m_numPools)
@@ -213,6 +214,83 @@
initVPS(&m_vps);
initSPS(&m_sps);
initPPS(&m_pps);
+
+ if (m_param->rc.vbvBufferSize)
+ {
+ m_offsetEmergency = (uint16_t(*)[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS])malloc(sizeof(*m_offsetEmergency) * (QP_MAX_MAX - QP_MAX_SPEC));
+ if (!m_offsetEmergency)
+ {
+ x265_log(m_param, X265_LOG_ERROR, "Unable to allocate memory\n");
+ m_aborted = true;
+ return;
+ }
+
+ bool scalingEnabled = m_scalingList.m_bEnabled;
+ if (!scalingEnabled)
+ {
+ m_scalingList.setDefaultScalingList();
+ m_scalingList.setupQuantMatrices();
+ }
+ else
+ m_scalingList.setupQuantMatrices();
+
+ for (int q = 0; q < QP_MAX_MAX - QP_MAX_SPEC; q++)
+ {
+ for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)
+ {
+ uint16_t *nrOffset = m_offsetEmergency[q][cat];
+
+ int trSize = cat & 3;
+
+ int coefCount = 1 << ((trSize + 2) * 2);
+
+ /* Denoise chroma first then luma, then DC. */
+ int dcThreshold = (QP_MAX_MAX - QP_MAX_SPEC) * 2 / 3;
+ int lumaThreshold = (QP_MAX_MAX - QP_MAX_SPEC) * 2 / 3;
+ int chromaThreshold = 0;
+
+ int thresh = (cat < 4 || (cat >= 8 && cat < 12)) ? lumaThreshold : chromaThreshold;
+
+ double quantF = (double)(1ULL << (q / 6 + 16 + 8));
+
+ for (int i = 0; i < coefCount; i++)
+ {
+ uint16_t max = (1 << (7 + X265_DEPTH)) - 1;
+ /* True "emergency mode": remove all DCT coefficients */
+ if (q == QP_MAX_MAX - QP_MAX_SPEC - 1)
+ {
+ nrOffset[i] = max;
+ continue;
+ }
+
+ int iThresh = i == 0 ? dcThreshold : thresh;
+ if (q < iThresh)
+ {
+ nrOffset[i] = 0;
+ continue;
+ }
+
+ int numList = (cat >= 8) * 3 + ((int)!iThresh);
+
+ double pos = (double)(q - iThresh + 1) / (QP_MAX_MAX - QP_MAX_SPEC - iThresh);
+ double start = quantF / (m_scalingList.m_quantCoef[trSize][numList][QP_MAX_SPEC % 6][i]);
+
+ // Formula chosen as an exponential scale to vaguely mimic the effects of a higher quantizer.
+ double bias = (pow(2, pos * (QP_MAX_MAX - QP_MAX_SPEC)) * 0.003 - 0.003) * start;
+ nrOffset[i] = (uint16_t)X265_MIN(bias + 0.5, max);
+ }
+ }
+ }
+
+ if (!scalingEnabled)
+ {
+ m_scalingList.m_bEnabled = false;
+ m_scalingList.m_bDataPresent = false;
+ m_scalingList.setupQuantMatrices();
+ }
+ }
+ else
+ m_scalingList.setupQuantMatrices();
int numRows = (m_param->sourceHeight + g_maxCUSize - 1) / g_maxCUSize;
int numCols = (m_param->sourceWidth + g_maxCUSize - 1) / g_maxCUSize;
@@ -323,6 +401,8 @@
X265_FREE(m_buOffsetY);
X265_FREE(m_buOffsetC);
+ free(m_offsetEmergency);
+
if (m_analysisFile)
fclose(m_analysisFile);
diff -r d5278c76d341 -r bf5c5aca1a24 source/encoder/encoder.h
--- a/source/encoder/encoder.h Mon Aug 03 10:18:46 2015 -0500
+++ b/source/encoder/encoder.h Tue Aug 04 13:48:10 2015 +0530
@@ -133,6 +133,10 @@
bool m_aborted; // fatal error detected
bool m_reconfigured; // reconfigure of encoder detected
+ uint16_t (*m_offsetEmergency)[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
+ uint32_t m_residualSumEmergency[MAX_NUM_TR_CATEGORIES][MAX_NUM_TR_COEFFS];
+ uint32_t m_countEmergency[MAX_NUM_TR_CATEGORIES];
+
Encoder();
~Encoder() {}
diff -r d5278c76d341 -r bf5c5aca1a24 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Mon Aug 03 10:18:46 2015 -0500
+++ b/source/encoder/frameencoder.cpp Tue Aug 04 13:48:10 2015 +0530
@@ -135,7 +135,7 @@
ok &= m_rce.picTimingSEI && m_rce.hrdTiming;
}
- if (m_param->noiseReductionIntra || m_param->noiseReductionInter)
+ if (m_param->noiseReductionIntra || m_param->noiseReductionInter || m_param->rc.vbvBufferSize)
m_nr = X265_MALLOC(NoiseReduction, 1);
if (m_nr)
memset(m_nr, 0, sizeof(NoiseReduction));
@@ -361,11 +361,45 @@
}
}
+ int numTLD;
+ if (m_pool)
+ numTLD = m_param->bEnableWavefront ? m_pool->m_numWorkers : m_pool->m_numWorkers + m_pool->m_numProviders;
+ else
+ numTLD = 1;
+
/* Get the QP for this frame from rate control. This call may block until
* frames ahead of it in encode order have called rateControlEnd() */
int qp = m_top->m_rateControl->rateControlStart(m_frame, &m_rce, m_top);
m_rce.newQp = qp;
+ if (m_nr)
+ {
+ if (qp > QP_MAX_SPEC && m_frame->m_param->rc.vbvBufferSize)
+ {
+ for (int i = 0; i < numTLD; i++)
+ {
+ m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset = m_top->m_offsetEmergency[qp - QP_MAX_SPEC - 1];
+ m_tld[i].analysis.m_quant.m_frameNr[m_jpId].residualSum = m_top->m_residualSumEmergency;
+ m_tld[i].analysis.m_quant.m_frameNr[m_jpId].count = m_top->m_countEmergency;
+ }
+ }
+ else
+ {
+ if (m_param->noiseReductionIntra || m_param->noiseReductionInter)
+ {
+ for (int i = 0; i < numTLD; i++)
+ {
+ m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrOffsetDenoise;
+ m_tld[i].analysis.m_quant.m_frameNr[m_jpId].residualSum = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrResidualSum;
+ m_tld[i].analysis.m_quant.m_frameNr[m_jpId].count = m_tld[i].analysis.m_quant.m_frameNr[m_jpId].nrCount;
+ }
+ }
+ else
+ for (int i = 0; i < numTLD; i++)
+ m_tld[i].analysis.m_quant.m_frameNr[m_jpId].offset = NULL;
+ }
+ }
+
/* Clip slice QP to 0-51 spec range before encoding */
slice->m_sliceQp = x265_clip3(-QP_BD_OFFSET, QP_MAX_SPEC, qp);
@@ -702,37 +736,36 @@
}
}
- int numTLD;
- if (m_pool)
- numTLD = m_param->bEnableWavefront ? m_pool->m_numWorkers : m_pool->m_numWorkers + m_pool->m_numProviders;
- else
- numTLD = 1;
-
if (m_nr)
{
- /* Accumulate NR statistics from all worker threads */
- for (int i = 0; i < numTLD; i++)
- {
- NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];
- for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)
+ bool nrEnabled = (m_rce.newQp < QP_MAX_SPEC || !m_param->rc.vbvBufferSize) && (m_param->noiseReductionIntra || m_param->noiseReductionInter);
+
+ if (nrEnabled)
+ {
+ /* Accumulate NR statistics from all worker threads */
+ for (int i = 0; i < numTLD; i++)
+ {
+ NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];
+ for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)
+ {
+ for (int coeff = 0; coeff < MAX_NUM_TR_COEFFS; coeff++)
+ m_nr->nrResidualSum[cat][coeff] += nr->nrResidualSum[cat][coeff];
+
+ m_nr->nrCount[cat] += nr->nrCount[cat];
+ }
+ }
+
+ noiseReductionUpdate();
+
+ /* Copy updated NR coefficients back to all worker threads */
+ for (int i = 0; i < numTLD; i++)
{
- for (int coeff = 0; coeff < MAX_NUM_TR_COEFFS; coeff++)
- m_nr->residualSum[cat][coeff] += nr->residualSum[cat][coeff];
-
- m_nr->count[cat] += nr->count[cat];
+ NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];
+ memcpy(nr->nrOffsetDenoise, m_nr->nrOffsetDenoise, sizeof(uint16_t)* MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
+ memset(nr->nrCount, 0, sizeof(uint32_t)* MAX_NUM_TR_CATEGORIES);
+ memset(nr->nrResidualSum, 0, sizeof(uint32_t)* MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
}
}
-
- noiseReductionUpdate();
-
- /* Copy updated NR coefficients back to all worker threads */
- for (int i = 0; i < numTLD; i++)
- {
- NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];
- memcpy(nr->offsetDenoise, m_nr->offsetDenoise, sizeof(uint16_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
- memset(nr->count, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES);
- memset(nr->residualSum, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
- }
}
#if DETAILED_CU_STATS
@@ -1265,25 +1298,25 @@
int trSize = cat & 3;
int coefCount = 1 << ((trSize + 2) * 2);
- if (m_nr->count[cat] > maxBlocksPerTrSize[trSize])
+ if (m_nr->nrCount[cat] > maxBlocksPerTrSize[trSize])
{
for (int i = 0; i < coefCount; i++)
- m_nr->residualSum[cat][i] >>= 1;
- m_nr->count[cat] >>= 1;
+ m_nr->nrResidualSum[cat][i] >>= 1;
+ m_nr->nrCount[cat] >>= 1;
}
int nrStrength = cat < 8 ? m_param->noiseReductionIntra : m_param->noiseReductionInter;
- uint64_t scaledCount = (uint64_t)nrStrength * m_nr->count[cat];
+ uint64_t scaledCount = (uint64_t)nrStrength * m_nr->nrCount[cat];
for (int i = 0; i < coefCount; i++)
{
- uint64_t value = scaledCount + m_nr->residualSum[cat][i] / 2;
- uint64_t denom = m_nr->residualSum[cat][i] + 1;
- m_nr->offsetDenoise[cat][i] = (uint16_t)(value / denom);
+ uint64_t value = scaledCount + m_nr->nrResidualSum[cat][i] / 2;
+ uint64_t denom = m_nr->nrResidualSum[cat][i] + 1;
+ m_nr->nrOffsetDenoise[cat][i] = (uint16_t)(value / denom);
}
// Don't denoise DC coefficients
- m_nr->offsetDenoise[cat][0] = 0;
+ m_nr->nrOffsetDenoise[cat][0] = 0;
}
}
diff -r d5278c76d341 -r bf5c5aca1a24 source/encoder/search.cpp
--- a/source/encoder/search.cpp Mon Aug 03 10:18:46 2015 -0500
+++ b/source/encoder/search.cpp Tue Aug 04 13:48:10 2015 +0530
@@ -80,7 +80,7 @@
m_me.init(param.searchMethod, param.subpelRefine, param.internalCsp);
bool ok = m_quant.init(param.rdoqLevel, param.psyRdoq, scalingList, m_entropyCoder);
- if (m_param->noiseReductionIntra || m_param->noiseReductionInter)
+ if (m_param->noiseReductionIntra || m_param->noiseReductionInter || m_param->rc.vbvBufferSize)
ok &= m_quant.allocNoiseReduction(param);
ok &= Predict::allocBuffers(param.internalCsp); /* sets m_hChromaShift & m_vChromaShift */
More information about the x265-devel
mailing list