[x265] [PATCH 1 of 3] Add distortion to analysis-multi-pass

aruna at multicorewareinc.com aruna at multicorewareinc.com
Tue Dec 27 13:44:03 CET 2016


# HG changeset patch
# User Divya Manivannan <divya at multicorewareinc.com>
# Date 1481120092 -19800
#      Wed Dec 07 19:44:52 2016 +0530
# Node ID 14837a0999d7addfeb63daa5463716595d913c74
# Parent  5bd8651df490c7446d548661fe95079b8917b31f
Add distortion to analysis-multi-pass

diff -r 5bd8651df490 -r 14837a0999d7 source/common/cudata.cpp
--- a/source/common/cudata.cpp	Wed Dec 21 17:34:09 2016 +0530
+++ b/source/common/cudata.cpp	Wed Dec 07 19:44:52 2016 +0530
@@ -218,6 +218,8 @@
         m_mvd[0] = m_mv[1] +  m_numPartitions;
         m_mvd[1] = m_mvd[0] + m_numPartitions;
 
+        m_distortion = dataPool.distortionMemBlock + instance * m_numPartitions;
+
         uint32_t cuSize = g_maxCUSize >> depth;
         m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (cuSize * cuSize);
         m_trCoeff[1] = m_trCoeff[2] = 0;
@@ -257,6 +259,8 @@
         m_mvd[0] = m_mv[1] +  m_numPartitions;
         m_mvd[1] = m_mvd[0] + m_numPartitions;
 
+        m_distortion = dataPool.distortionMemBlock + instance * m_numPartitions;
+
         uint32_t cuSize = g_maxCUSize >> depth;
         uint32_t sizeL = cuSize * cuSize;
         uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift); // block chroma part
@@ -304,6 +308,7 @@
     m_cuAbove = (m_cuAddr >= widthInCU) && !m_bFirstRowInSlice ? m_encData->getPicCTU(m_cuAddr - widthInCU) : NULL;
     m_cuAboveLeft = (m_cuLeft && m_cuAbove) ? m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL;
     m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU - 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL;
+    memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
 }
 
 // initialize Sub partition
@@ -337,6 +342,7 @@
 
     /* initialize the remaining CU data in one memset */
     memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ? BytesPerPartition - 12 : BytesPerPartition - 8) * m_numPartitions);
+    memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
 }
 
 /* Copy the results of a sub-part (split) CU to the parent CU */
@@ -372,6 +378,8 @@
     memcpy(m_mvd[0] + offset, subCU.m_mvd[0], childGeom.numPartitions * sizeof(MV));
     memcpy(m_mvd[1] + offset, subCU.m_mvd[1], childGeom.numPartitions * sizeof(MV));
 
+    memcpy(m_distortion + offset, subCU.m_distortion, childGeom.numPartitions * sizeof(sse_t));
+
     uint32_t tmp = 1 << ((g_maxLog2CUSize - childGeom.depth) * 2);
     uint32_t tmp2 = subPartIdx * tmp;
     memcpy(m_trCoeff[0] + tmp2, subCU.m_trCoeff[0], sizeof(coeff_t)* tmp);
@@ -421,6 +429,7 @@
     memcpy(m_mv[1],  cu.m_mv[1],  m_numPartitions * sizeof(MV));
     memcpy(m_mvd[0], cu.m_mvd[0], m_numPartitions * sizeof(MV));
     memcpy(m_mvd[1], cu.m_mvd[1], m_numPartitions * sizeof(MV));
+    memcpy(m_distortion, cu.m_distortion, m_numPartitions * sizeof(sse_t));
 
     /* force TQBypass to true */
     m_partSet(m_tqBypass, true);
@@ -468,6 +477,8 @@
     memcpy(ctu.m_mvd[0] + m_absIdxInCTU, m_mvd[0], m_numPartitions * sizeof(MV));
     memcpy(ctu.m_mvd[1] + m_absIdxInCTU, m_mvd[1], m_numPartitions * sizeof(MV));
 
+    memcpy(ctu.m_distortion + m_absIdxInCTU, m_distortion, m_numPartitions * sizeof(sse_t));
+
     uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth) * 2);
     uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2);
     memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t)* tmpY);
@@ -520,6 +531,8 @@
     memcpy(m_mvd[0], ctu.m_mvd[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
     memcpy(m_mvd[1], ctu.m_mvd[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
 
+    memcpy(m_distortion, ctu.m_distortion + m_absIdxInCTU, m_numPartitions * sizeof(sse_t));
+
     /* clear residual coding flags */
     m_partSet(m_tuDepth, 0);
     m_partSet(m_transformSkip[0], 0);
diff -r 5bd8651df490 -r 14837a0999d7 source/common/cudata.h
--- a/source/common/cudata.h	Wed Dec 21 17:34:09 2016 +0530
+++ b/source/common/cudata.h	Wed Dec 07 19:44:52 2016 +0530
@@ -205,6 +205,7 @@
     uint8_t*      m_chromaIntraDir;   // array of intra directions (chroma)
     enum { BytesPerPartition = 21 };  // combined sizeof() of all per-part data
 
+    sse_t*        m_distortion;
     coeff_t*      m_trCoeff[3];       // transformed coefficient buffer per plane
     int8_t        m_refTuDepth[NUM_TU_DEPTH];   // TU depth of CU at depths 0, 1 and 2
 
@@ -341,8 +342,9 @@
     uint8_t* charMemBlock;
     coeff_t* trCoeffMemBlock;
     MV*      mvMemBlock;
+    sse_t*   distortionMemBlock;
 
-    CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL; mvMemBlock = NULL; }
+    CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL; mvMemBlock = NULL; distortionMemBlock = NULL; }
 
     bool create(uint32_t depth, uint32_t csp, uint32_t numInstances)
     {
@@ -360,6 +362,7 @@
         }
         CHECKED_MALLOC(charMemBlock, uint8_t, numPartition * numInstances * CUData::BytesPerPartition);
         CHECKED_MALLOC_ZERO(mvMemBlock, MV, numPartition * 4 * numInstances);
+        CHECKED_MALLOC(distortionMemBlock, sse_t, numPartition * numInstances);
         return true;
     fail:
         return false;
@@ -370,6 +373,7 @@
         X265_FREE(trCoeffMemBlock);
         X265_FREE(mvMemBlock);
         X265_FREE(charMemBlock);
+        X265_FREE(distortionMemBlock);
     }
 };
 }
diff -r 5bd8651df490 -r 14837a0999d7 source/common/framedata.h
--- a/source/common/framedata.h	Wed Dec 21 17:34:09 2016 +0530
+++ b/source/common/framedata.h	Wed Dec 07 19:44:52 2016 +0530
@@ -189,6 +189,11 @@
     int*          mvpIdx[2];
     int32_t*      ref[2];
     uint8_t*      modes;
+    sse_t*        distortion;
+    sse_t*        ctuDistortion;
+    double*       scaledDistortion;
+    double        averageDistortion;
+    double        sdDistortion;
 };
 
 }
diff -r 5bd8651df490 -r 14837a0999d7 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Wed Dec 21 17:34:09 2016 +0530
+++ b/source/encoder/encoder.cpp	Wed Dec 07 19:44:52 2016 +0530
@@ -2302,6 +2302,12 @@
     uint32_t numCUsInFrame = widthInCU * heightInCU;
     CHECKED_MALLOC_ZERO(analysisFrameData, analysis2PassFrameData, 1);
     CHECKED_MALLOC(analysisFrameData->depth, uint8_t, NUM_4x4_PARTITIONS * numCUsInFrame);
+    CHECKED_MALLOC_ZERO(analysisFrameData->distortion, sse_t, NUM_4x4_PARTITIONS * numCUsInFrame);
+    if (m_param->rc.bStatRead)
+    {
+        CHECKED_MALLOC_ZERO(analysisFrameData->ctuDistortion, sse_t, numCUsInFrame);
+        CHECKED_MALLOC(analysisFrameData->scaledDistortion, double, numCUsInFrame);
+    }
     if (!IS_X265_TYPE_I(sliceType))
     {
         CHECKED_MALLOC_ZERO(analysisFrameData->m_mv[0], MV, NUM_4x4_PARTITIONS * numCUsInFrame);
@@ -2327,6 +2333,12 @@
     if (analysis->analysisFramedata)
     {
         X265_FREE(((analysis2PassFrameData*)analysis->analysisFramedata)->depth);
+        X265_FREE(((analysis2PassFrameData*)analysis->analysisFramedata)->distortion);
+        if (m_param->rc.bStatRead)
+        {
+            X265_FREE(((analysis2PassFrameData*)analysis->analysisFramedata)->ctuDistortion);
+            X265_FREE(((analysis2PassFrameData*)analysis->analysisFramedata)->scaledDistortion);
+        }
         if (!IS_X265_TYPE_I(sliceType))
         {
             X265_FREE(((analysis2PassFrameData*)analysis->analysisFramedata)->m_mv[0]);
@@ -2471,6 +2483,9 @@
     static uint64_t consumedBytes = 0;
     static uint64_t totalConsumedBytes = 0;
     uint32_t depthBytes = 0;
+    uint32_t widthInCU = (m_param->sourceWidth + g_maxCUSize - 1) >> g_maxLog2CUSize;
+    uint32_t heightInCU = (m_param->sourceHeight + g_maxCUSize - 1) >> g_maxLog2CUSize;
+    uint32_t numCUsInFrame = widthInCU * heightInCU;
     fseeko(m_analysisFile, totalConsumedBytes, SEEK_SET);
 
     int poc; uint32_t frameRecordSize;
@@ -2503,20 +2518,35 @@
     analysis2Pass->frameRecordSize = frameRecordSize;
 
     uint8_t* tempBuf = NULL, *depthBuf = NULL;
-
+    sse_t *tempdistBuf = NULL, *distortionBuf = NULL;
     tempBuf = X265_MALLOC(uint8_t, depthBytes);
     X265_FREAD(tempBuf, sizeof(uint8_t), depthBytes, m_analysisFile);
-
+    tempdistBuf = X265_MALLOC(sse_t, depthBytes);
+    X265_FREAD(tempdistBuf, sizeof(sse_t), depthBytes, m_analysisFile);
     depthBuf = tempBuf;
-
+    distortionBuf = tempdistBuf;
+    analysis2PassFrameData* analysisFrameData = (analysis2PassFrameData*)analysis2Pass->analysisFramedata;
     size_t count = 0;
+    uint32_t ctuCount = 0;
+    double sum = 0, sqrSum = 0;
     for (uint32_t d = 0; d < depthBytes; d++)
     {
         int bytes = NUM_4x4_PARTITIONS >> (depthBuf[d] * 2);
-        memset(&((analysis2PassFrameData *)analysis2Pass->analysisFramedata)->depth[count], depthBuf[d], bytes);
+        memset(&analysisFrameData->depth[count], depthBuf[d], bytes);
+        analysisFrameData->distortion[count] = distortionBuf[d];
+        analysisFrameData->ctuDistortion[ctuCount] += analysisFrameData->distortion[count];
         count += bytes;
+        if ((count % (size_t)NUM_4x4_PARTITIONS) == 0)
+        {
+            analysisFrameData->scaledDistortion[ctuCount] = X265_LOG2(X265_MAX(analysisFrameData->ctuDistortion[ctuCount], 1));
+            sum += analysisFrameData->scaledDistortion[ctuCount];
+            sqrSum += analysisFrameData->scaledDistortion[ctuCount] * analysisFrameData->scaledDistortion[ctuCount];
+            ctuCount++;
+        }
     }
-
+    double avg = sum / numCUsInFrame;
+    analysisFrameData->sdDistortion = pow(((sqrSum / numCUsInFrame) - (avg * avg)), 0.5);
+    analysisFrameData->averageDistortion = avg;
     if (!IS_X265_TYPE_I(sliceType))
     {
         MV *tempMVBuf[2], *MVBuf[2];
@@ -2567,6 +2597,7 @@
         X265_FREE(tempModeBuf);
     }
     X265_FREE(tempBuf);
+    X265_FREE(tempdistBuf);
     consumedBytes += frameRecordSize;
     if (!IS_X265_TYPE_I(sliceType))
     {
@@ -2720,6 +2751,7 @@
         {
             depth = ctu->m_cuDepth[absPartIdx];
             analysisFrameData->depth[depthBytes] = depth;
+            analysisFrameData->distortion[depthBytes] = ctu->m_distortion[absPartIdx];
             absPartIdx += ctu->m_numPartitions >> (depth * 2);
         }
     }
@@ -2756,10 +2788,10 @@
     }
 
     /* calculate frameRecordSize */
-    analysis2Pass->frameRecordSize = sizeof(analysis2Pass->frameRecordSize) + sizeof(depthBytes)+sizeof(analysis2Pass->poc);
+    analysis2Pass->frameRecordSize = sizeof(analysis2Pass->frameRecordSize) + sizeof(depthBytes) + sizeof(analysis2Pass->poc);
 
     analysis2Pass->frameRecordSize += depthBytes * sizeof(uint8_t);
-
+    analysis2Pass->frameRecordSize += depthBytes * sizeof(sse_t);
     if (curEncData.m_slice->m_sliceType != I_SLICE)
     {
         int numDir = (curEncData.m_slice->m_sliceType == P_SLICE) ? 1 : 2;
@@ -2773,7 +2805,7 @@
     X265_FWRITE(&analysis2Pass->poc, sizeof(uint32_t), 1, m_analysisFile);
 
     X265_FWRITE(analysisFrameData->depth, sizeof(uint8_t), depthBytes, m_analysisFile);
-
+    X265_FWRITE(analysisFrameData->distortion, sizeof(sse_t), depthBytes, m_analysisFile);
     if (curEncData.m_slice->m_sliceType != I_SLICE)
     {
         int numDir = curEncData.m_slice->m_sliceType == P_SLICE ? 1 : 2;
diff -r 5bd8651df490 -r 14837a0999d7 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Wed Dec 21 17:34:09 2016 +0530
+++ b/source/encoder/search.cpp	Wed Dec 07 19:44:52 2016 +0530
@@ -1207,7 +1207,7 @@
     }
     else
         intraMode.distortion += intraMode.lumaDistortion;
-
+    cu.m_distortion[0] = intraMode.distortion;
     m_entropyCoder.resetBits();
     if (m_slice->m_pps->bTransquantBypassEnabled)
         m_entropyCoder.codeCUTransquantBypassFlag(cu.m_tqBypass[0]);
@@ -2624,6 +2624,7 @@
         interMode.chromaDistortion += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[part].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
         interMode.distortion += interMode.chromaDistortion;
     }
+    cu.m_distortion[0] = interMode.distortion;
     m_entropyCoder.load(m_rqt[depth].cur);
     m_entropyCoder.resetBits();
     if (m_slice->m_pps->bTransquantBypassEnabled)
@@ -2786,6 +2787,7 @@
     interMode.lumaDistortion = bestLumaDist;
     interMode.coeffBits = coeffBits;
     interMode.mvBits = mvBits;
+    cu.m_distortion[0] = interMode.distortion;
     updateModeCost(interMode);
     checkDQP(interMode, cuGeom);
 }


More information about the x265-devel mailing list