[x265] [PATCH 1 of 3] Add distortion to analysis-multi-pass
aruna at multicorewareinc.com
aruna at multicorewareinc.com
Tue Dec 27 13:44:03 CET 2016
# HG changeset patch
# User Divya Manivannan <divya at multicorewareinc.com>
# Date 1481120092 -19800
# Wed Dec 07 19:44:52 2016 +0530
# Node ID 14837a0999d7addfeb63daa5463716595d913c74
# Parent 5bd8651df490c7446d548661fe95079b8917b31f
Add distortion to analysis-multi-pass
diff -r 5bd8651df490 -r 14837a0999d7 source/common/cudata.cpp
--- a/source/common/cudata.cpp Wed Dec 21 17:34:09 2016 +0530
+++ b/source/common/cudata.cpp Wed Dec 07 19:44:52 2016 +0530
@@ -218,6 +218,8 @@
m_mvd[0] = m_mv[1] + m_numPartitions;
m_mvd[1] = m_mvd[0] + m_numPartitions;
+ m_distortion = dataPool.distortionMemBlock + instance * m_numPartitions;
+
uint32_t cuSize = g_maxCUSize >> depth;
m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (cuSize * cuSize);
m_trCoeff[1] = m_trCoeff[2] = 0;
@@ -257,6 +259,8 @@
m_mvd[0] = m_mv[1] + m_numPartitions;
m_mvd[1] = m_mvd[0] + m_numPartitions;
+ m_distortion = dataPool.distortionMemBlock + instance * m_numPartitions;
+
uint32_t cuSize = g_maxCUSize >> depth;
uint32_t sizeL = cuSize * cuSize;
uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift); // block chroma part
@@ -304,6 +308,7 @@
m_cuAbove = (m_cuAddr >= widthInCU) && !m_bFirstRowInSlice ? m_encData->getPicCTU(m_cuAddr - widthInCU) : NULL;
m_cuAboveLeft = (m_cuLeft && m_cuAbove) ? m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL;
m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU - 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL;
+ memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
}
// initialize Sub partition
@@ -337,6 +342,7 @@
/* initialize the remaining CU data in one memset */
memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ? BytesPerPartition - 12 : BytesPerPartition - 8) * m_numPartitions);
+ memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
}
/* Copy the results of a sub-part (split) CU to the parent CU */
@@ -372,6 +378,8 @@
memcpy(m_mvd[0] + offset, subCU.m_mvd[0], childGeom.numPartitions * sizeof(MV));
memcpy(m_mvd[1] + offset, subCU.m_mvd[1], childGeom.numPartitions * sizeof(MV));
+ memcpy(m_distortion + offset, subCU.m_distortion, childGeom.numPartitions * sizeof(sse_t));
+
uint32_t tmp = 1 << ((g_maxLog2CUSize - childGeom.depth) * 2);
uint32_t tmp2 = subPartIdx * tmp;
memcpy(m_trCoeff[0] + tmp2, subCU.m_trCoeff[0], sizeof(coeff_t)* tmp);
@@ -421,6 +429,7 @@
memcpy(m_mv[1], cu.m_mv[1], m_numPartitions * sizeof(MV));
memcpy(m_mvd[0], cu.m_mvd[0], m_numPartitions * sizeof(MV));
memcpy(m_mvd[1], cu.m_mvd[1], m_numPartitions * sizeof(MV));
+ memcpy(m_distortion, cu.m_distortion, m_numPartitions * sizeof(sse_t));
/* force TQBypass to true */
m_partSet(m_tqBypass, true);
@@ -468,6 +477,8 @@
memcpy(ctu.m_mvd[0] + m_absIdxInCTU, m_mvd[0], m_numPartitions * sizeof(MV));
memcpy(ctu.m_mvd[1] + m_absIdxInCTU, m_mvd[1], m_numPartitions * sizeof(MV));
+ memcpy(ctu.m_distortion + m_absIdxInCTU, m_distortion, m_numPartitions * sizeof(sse_t));
+
uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth) * 2);
uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2);
memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t)* tmpY);
@@ -520,6 +531,8 @@
memcpy(m_mvd[0], ctu.m_mvd[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
memcpy(m_mvd[1], ctu.m_mvd[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
+ memcpy(m_distortion, ctu.m_distortion + m_absIdxInCTU, m_numPartitions * sizeof(sse_t));
+
/* clear residual coding flags */
m_partSet(m_tuDepth, 0);
m_partSet(m_transformSkip[0], 0);
diff -r 5bd8651df490 -r 14837a0999d7 source/common/cudata.h
--- a/source/common/cudata.h Wed Dec 21 17:34:09 2016 +0530
+++ b/source/common/cudata.h Wed Dec 07 19:44:52 2016 +0530
@@ -205,6 +205,7 @@
uint8_t* m_chromaIntraDir; // array of intra directions (chroma)
enum { BytesPerPartition = 21 }; // combined sizeof() of all per-part data
+ sse_t* m_distortion;
coeff_t* m_trCoeff[3]; // transformed coefficient buffer per plane
int8_t m_refTuDepth[NUM_TU_DEPTH]; // TU depth of CU at depths 0, 1 and 2
@@ -341,8 +342,9 @@
uint8_t* charMemBlock;
coeff_t* trCoeffMemBlock;
MV* mvMemBlock;
+ sse_t* distortionMemBlock;
- CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL; mvMemBlock = NULL; }
+ CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL; mvMemBlock = NULL; distortionMemBlock = NULL; }
bool create(uint32_t depth, uint32_t csp, uint32_t numInstances)
{
@@ -360,6 +362,7 @@
}
CHECKED_MALLOC(charMemBlock, uint8_t, numPartition * numInstances * CUData::BytesPerPartition);
CHECKED_MALLOC_ZERO(mvMemBlock, MV, numPartition * 4 * numInstances);
+ CHECKED_MALLOC(distortionMemBlock, sse_t, numPartition * numInstances);
return true;
fail:
return false;
@@ -370,6 +373,7 @@
X265_FREE(trCoeffMemBlock);
X265_FREE(mvMemBlock);
X265_FREE(charMemBlock);
+ X265_FREE(distortionMemBlock);
}
};
}
diff -r 5bd8651df490 -r 14837a0999d7 source/common/framedata.h
--- a/source/common/framedata.h Wed Dec 21 17:34:09 2016 +0530
+++ b/source/common/framedata.h Wed Dec 07 19:44:52 2016 +0530
@@ -189,6 +189,11 @@
int* mvpIdx[2];
int32_t* ref[2];
uint8_t* modes;
+ sse_t* distortion;
+ sse_t* ctuDistortion;
+ double* scaledDistortion;
+ double averageDistortion;
+ double sdDistortion;
};
}
diff -r 5bd8651df490 -r 14837a0999d7 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Wed Dec 21 17:34:09 2016 +0530
+++ b/source/encoder/encoder.cpp Wed Dec 07 19:44:52 2016 +0530
@@ -2302,6 +2302,12 @@
uint32_t numCUsInFrame = widthInCU * heightInCU;
CHECKED_MALLOC_ZERO(analysisFrameData, analysis2PassFrameData, 1);
CHECKED_MALLOC(analysisFrameData->depth, uint8_t, NUM_4x4_PARTITIONS * numCUsInFrame);
+ CHECKED_MALLOC_ZERO(analysisFrameData->distortion, sse_t, NUM_4x4_PARTITIONS * numCUsInFrame);
+ if (m_param->rc.bStatRead)
+ {
+ CHECKED_MALLOC_ZERO(analysisFrameData->ctuDistortion, sse_t, numCUsInFrame);
+ CHECKED_MALLOC(analysisFrameData->scaledDistortion, double, numCUsInFrame);
+ }
if (!IS_X265_TYPE_I(sliceType))
{
CHECKED_MALLOC_ZERO(analysisFrameData->m_mv[0], MV, NUM_4x4_PARTITIONS * numCUsInFrame);
@@ -2327,6 +2333,12 @@
if (analysis->analysisFramedata)
{
X265_FREE(((analysis2PassFrameData*)analysis->analysisFramedata)->depth);
+ X265_FREE(((analysis2PassFrameData*)analysis->analysisFramedata)->distortion);
+ if (m_param->rc.bStatRead)
+ {
+ X265_FREE(((analysis2PassFrameData*)analysis->analysisFramedata)->ctuDistortion);
+ X265_FREE(((analysis2PassFrameData*)analysis->analysisFramedata)->scaledDistortion);
+ }
if (!IS_X265_TYPE_I(sliceType))
{
X265_FREE(((analysis2PassFrameData*)analysis->analysisFramedata)->m_mv[0]);
@@ -2471,6 +2483,9 @@
static uint64_t consumedBytes = 0;
static uint64_t totalConsumedBytes = 0;
uint32_t depthBytes = 0;
+ uint32_t widthInCU = (m_param->sourceWidth + g_maxCUSize - 1) >> g_maxLog2CUSize;
+ uint32_t heightInCU = (m_param->sourceHeight + g_maxCUSize - 1) >> g_maxLog2CUSize;
+ uint32_t numCUsInFrame = widthInCU * heightInCU;
fseeko(m_analysisFile, totalConsumedBytes, SEEK_SET);
int poc; uint32_t frameRecordSize;
@@ -2503,20 +2518,35 @@
analysis2Pass->frameRecordSize = frameRecordSize;
uint8_t* tempBuf = NULL, *depthBuf = NULL;
-
+ sse_t *tempdistBuf = NULL, *distortionBuf = NULL;
tempBuf = X265_MALLOC(uint8_t, depthBytes);
X265_FREAD(tempBuf, sizeof(uint8_t), depthBytes, m_analysisFile);
-
+ tempdistBuf = X265_MALLOC(sse_t, depthBytes);
+ X265_FREAD(tempdistBuf, sizeof(sse_t), depthBytes, m_analysisFile);
depthBuf = tempBuf;
-
+ distortionBuf = tempdistBuf;
+ analysis2PassFrameData* analysisFrameData = (analysis2PassFrameData*)analysis2Pass->analysisFramedata;
size_t count = 0;
+ uint32_t ctuCount = 0;
+ double sum = 0, sqrSum = 0;
for (uint32_t d = 0; d < depthBytes; d++)
{
int bytes = NUM_4x4_PARTITIONS >> (depthBuf[d] * 2);
- memset(&((analysis2PassFrameData *)analysis2Pass->analysisFramedata)->depth[count], depthBuf[d], bytes);
+ memset(&analysisFrameData->depth[count], depthBuf[d], bytes);
+ analysisFrameData->distortion[count] = distortionBuf[d];
+ analysisFrameData->ctuDistortion[ctuCount] += analysisFrameData->distortion[count];
count += bytes;
+ if ((count % (size_t)NUM_4x4_PARTITIONS) == 0)
+ {
+ analysisFrameData->scaledDistortion[ctuCount] = X265_LOG2(X265_MAX(analysisFrameData->ctuDistortion[ctuCount], 1));
+ sum += analysisFrameData->scaledDistortion[ctuCount];
+ sqrSum += analysisFrameData->scaledDistortion[ctuCount] * analysisFrameData->scaledDistortion[ctuCount];
+ ctuCount++;
+ }
}
-
+ double avg = sum / numCUsInFrame;
+ analysisFrameData->sdDistortion = pow(((sqrSum / numCUsInFrame) - (avg * avg)), 0.5);
+ analysisFrameData->averageDistortion = avg;
if (!IS_X265_TYPE_I(sliceType))
{
MV *tempMVBuf[2], *MVBuf[2];
@@ -2567,6 +2597,7 @@
X265_FREE(tempModeBuf);
}
X265_FREE(tempBuf);
+ X265_FREE(tempdistBuf);
consumedBytes += frameRecordSize;
if (!IS_X265_TYPE_I(sliceType))
{
@@ -2720,6 +2751,7 @@
{
depth = ctu->m_cuDepth[absPartIdx];
analysisFrameData->depth[depthBytes] = depth;
+ analysisFrameData->distortion[depthBytes] = ctu->m_distortion[absPartIdx];
absPartIdx += ctu->m_numPartitions >> (depth * 2);
}
}
@@ -2756,10 +2788,10 @@
}
/* calculate frameRecordSize */
- analysis2Pass->frameRecordSize = sizeof(analysis2Pass->frameRecordSize) + sizeof(depthBytes)+sizeof(analysis2Pass->poc);
+ analysis2Pass->frameRecordSize = sizeof(analysis2Pass->frameRecordSize) + sizeof(depthBytes) + sizeof(analysis2Pass->poc);
analysis2Pass->frameRecordSize += depthBytes * sizeof(uint8_t);
-
+ analysis2Pass->frameRecordSize += depthBytes * sizeof(sse_t);
if (curEncData.m_slice->m_sliceType != I_SLICE)
{
int numDir = (curEncData.m_slice->m_sliceType == P_SLICE) ? 1 : 2;
@@ -2773,7 +2805,7 @@
X265_FWRITE(&analysis2Pass->poc, sizeof(uint32_t), 1, m_analysisFile);
X265_FWRITE(analysisFrameData->depth, sizeof(uint8_t), depthBytes, m_analysisFile);
-
+ X265_FWRITE(analysisFrameData->distortion, sizeof(sse_t), depthBytes, m_analysisFile);
if (curEncData.m_slice->m_sliceType != I_SLICE)
{
int numDir = curEncData.m_slice->m_sliceType == P_SLICE ? 1 : 2;
diff -r 5bd8651df490 -r 14837a0999d7 source/encoder/search.cpp
--- a/source/encoder/search.cpp Wed Dec 21 17:34:09 2016 +0530
+++ b/source/encoder/search.cpp Wed Dec 07 19:44:52 2016 +0530
@@ -1207,7 +1207,7 @@
}
else
intraMode.distortion += intraMode.lumaDistortion;
-
+ cu.m_distortion[0] = intraMode.distortion;
m_entropyCoder.resetBits();
if (m_slice->m_pps->bTransquantBypassEnabled)
m_entropyCoder.codeCUTransquantBypassFlag(cu.m_tqBypass[0]);
@@ -2624,6 +2624,7 @@
interMode.chromaDistortion += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
interMode.distortion += interMode.chromaDistortion;
}
+ cu.m_distortion[0] = interMode.distortion;
m_entropyCoder.load(m_rqt[depth].cur);
m_entropyCoder.resetBits();
if (m_slice->m_pps->bTransquantBypassEnabled)
@@ -2786,6 +2787,7 @@
interMode.lumaDistortion = bestLumaDist;
interMode.coeffBits = coeffBits;
interMode.mvBits = mvBits;
+ cu.m_distortion[0] = interMode.distortion;
updateModeCost(interMode);
checkDQP(interMode, cuGeom);
}
More information about the x265-devel
mailing list