[x265-commits] [x265] frameencoder: recover vbvCost and intraVbvCost logic

Tue Apr 28 09:05:44 CEST 2015

details:   http://hg.videolan.org/x265/rev/2ae0ea21ec60
branches:  
changeset: 10306:2ae0ea21ec60
user:      Steve Borho <steve at borho.org>
date:      Tue Apr 28 01:58:23 2015 -0500
description:
frameencoder: recover vbvCost and intraVbvCost logic

These were inadvertently removed when calcQpForCu() was removed by ed448198ce3d
Subject: [x265] frameencoder: cleanups, no behavior change

details:   http://hg.videolan.org/x265/rev/b0431487e669
branches:  
changeset: 10307:b0431487e669
user:      Steve Borho <steve at borho.org>
date:      Tue Apr 28 02:03:59 2015 -0500
description:
frameencoder: cleanups, no behavior change
Subject: [x265] analysis: remove m_aqQP[], determine AQ QPs on demand

details:   http://hg.videolan.org/x265/rev/6e54b0be1491
branches:  
changeset: 10308:6e54b0be1491
user:      Steve Borho <steve at borho.org>
date:      Mon Apr 27 14:10:11 2015 -0500
description:
analysis: remove m_aqQP[], determine AQ QPs on demand

this has no effects on output, and should be optimally work-efficient
Subject: [x265] cudata: cu index is no longer necessary again

details:   http://hg.videolan.org/x265/rev/13290abce292
branches:  
changeset: 10309:13290abce292
user:      Steve Borho <steve at borho.org>
date:      Mon Apr 27 14:15:28 2015 -0500
description:
cudata: cu index is no longer necessary again

diffstat:

 source/common/cudata.cpp        |   3 +-
 source/common/cudata.h          |   2 +-
 source/encoder/analysis.cpp     |  42 ++++++++++------------------------------
 source/encoder/analysis.h       |   3 --
 source/encoder/frameencoder.cpp |  25 ++++++++++++++++++++++-
 5 files changed, 36 insertions(+), 39 deletions(-)

diffs (192 lines):

diff -r ed448198ce3d -r 13290abce292 source/common/cudata.cpp

--- a/source/common/cudata.cpp	Mon Apr 27 13:59:08 2015 -0500
+++ b/source/common/cudata.cpp	Mon Apr 27 14:15:28 2015 -0500
@@ -2050,8 +2050,7 @@ void CUData::calcCTUGeoms(uint32_t ctuWi
                 cu->childOffset = childIdx - cuIdx;
                 cu->absPartIdx = g_depthScanIdx[yOffset][xOffset] * 4;
                 cu->numPartitions = (NUM_4x4_PARTITIONS >> ((g_maxLog2CUSize - cu->log2CUSize) * 2));
-                cu->depth = (uint16_t)(g_log2Size[maxCUSize] - log2CUSize);
-                cu->index = (uint16_t)cuIdx;
+                cu->depth = g_log2Size[maxCUSize] - log2CUSize;
 
                 cu->flags = 0;
                 CU_SET_FLAG(cu->flags, CUGeom::PRESENT, presentFlag);
diff -r ed448198ce3d -r 13290abce292 source/common/cudata.h
--- a/source/common/cudata.h	Mon Apr 27 13:59:08 2015 -0500
+++ b/source/common/cudata.h	Mon Apr 27 14:15:28 2015 -0500
@@ -86,7 +86,7 @@ struct CUGeom
     uint32_t absPartIdx;    // Part index of this CU in terms of 4x4 blocks.
     uint32_t numPartitions; // Number of 4x4 blocks in the CU
     uint32_t flags;         // CU flags.
-    uint16_t depth, index;  // depth of this CU relative from CTU, absolute index
+    uint32_t depth;         // depth of this CU relative from CTU
 };
 
 struct MVField
diff -r ed448198ce3d -r 13290abce292 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Mon Apr 27 13:59:08 2015 -0500
+++ b/source/encoder/analysis.cpp	Mon Apr 27 14:15:28 2015 -0500
@@ -121,17 +121,6 @@ void Analysis::destroy()
     }
 }
 
-void Analysis::initAqQPs(uint32_t depth, const CUData& ctu, const CUGeom* rootGeom)
-{
-    for (int d0 = 0; d0 < 4; d0++)
-    {
-        m_aqQP[rootGeom->index + d0] = calculateQpforCuSize(ctu, rootGeom[d0]);
-
-        if (m_slice->m_pps->maxCuDQPDepth > depth)
-            initAqQPs(depth + 1, ctu, &rootGeom[d0] + rootGeom[d0].childOffset);
-    }
-}
-
 Mode& Analysis::compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext)
 {
     m_slice = ctu.m_slice;
@@ -144,18 +133,9 @@ Mode& Analysis::compressCTU(CUData& ctu,
     invalidateContexts(0);
 #endif
 
-    if (m_slice->m_pps->bUseDQP)
-    {
-        m_aqQP[0] = setLambdaFromQP(ctu, calculateQpforCuSize(ctu, cuGeom));
+    int qp = setLambdaFromQP(ctu, m_slice->m_pps->bUseDQP ? calculateQpforCuSize(ctu, cuGeom) : m_slice->m_sliceQp);
+    ctu.setQPSubParts((int8_t)qp, 0, 0);
 
-        if (m_slice->m_pps->maxCuDQPDepth)
-            initAqQPs(1, ctu, &cuGeom + 1);
-    }
-    else
-        /* adaptive quant disabled, CTU QP is always slice QP, and within spec range */
-        m_aqQP[0] = setLambdaFromQP(ctu, m_slice->m_sliceQp);
-
-    ctu.setQPSubParts((int8_t)m_aqQP[0], 0, 0);
     m_rqt[0].cur.load(initialContext);
     m_modeDepth[0].fencYuv.copyFromPicYuv(*m_frame->m_fencPic, ctu.m_cuAddr, 0);
 
@@ -178,7 +158,7 @@ Mode& Analysis::compressCTU(CUData& ctu,
     uint32_t zOrder = 0;
     if (m_slice->m_sliceType == I_SLICE)
     {
-        compressIntraCU(ctu, cuGeom, zOrder, m_aqQP[0]);
+        compressIntraCU(ctu, cuGeom, zOrder, qp);
         if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_analysisData.intraData)
         {
             CUData* bestCU = &m_modeDepth[0].bestMode->cu;
@@ -196,18 +176,18 @@ Mode& Analysis::compressCTU(CUData& ctu,
             * they are available for intra predictions */
             m_modeDepth[0].fencYuv.copyToPicYuv(*m_frame->m_reconPic, ctu.m_cuAddr, 0);
 
-            compressInterCU_rd0_4(ctu, cuGeom, m_aqQP[0]);
+            compressInterCU_rd0_4(ctu, cuGeom, qp);
 
             /* generate residual for entire CTU at once and copy to reconPic */
             encodeResidue(ctu, cuGeom);
         }
         else if (m_param->bDistributeModeAnalysis && m_param->rdLevel >= 2)
-            compressInterCU_dist(ctu, cuGeom, m_aqQP[0]);
+            compressInterCU_dist(ctu, cuGeom, qp);
         else if (m_param->rdLevel <= 4)
-            compressInterCU_rd0_4(ctu, cuGeom, m_aqQP[0]);
+            compressInterCU_rd0_4(ctu, cuGeom, qp);
         else
         {
-            compressInterCU_rd5_6(ctu, cuGeom, zOrder, m_aqQP[0]);
+            compressInterCU_rd5_6(ctu, cuGeom, zOrder, qp);
             if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_analysisData.interData)
             {
                 CUData* bestCU = &m_modeDepth[0].bestMode->cu;
@@ -327,7 +307,7 @@ void Analysis::compressIntraCU(const CUD
                 m_rqt[nextDepth].cur.load(*nextContext);
 
                 if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)
-                    nextQP = setLambdaFromQP(parentCTU, m_aqQP[childGeom.index]);
+                    nextQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, childGeom));
 
                 compressIntraCU(parentCTU, childGeom, zOrder, nextQP);
 
@@ -725,7 +705,7 @@ void Analysis::compressInterCU_dist(cons
                 m_rqt[nextDepth].cur.load(*nextContext);
 
                 if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)
-                    nextQP = setLambdaFromQP(parentCTU, m_aqQP[childGeom.index]);
+                    nextQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, childGeom));
 
                 compressInterCU_dist(parentCTU, childGeom, nextQP);
 
@@ -1000,7 +980,7 @@ void Analysis::compressInterCU_rd0_4(con
                 m_rqt[nextDepth].cur.load(*nextContext);
 
                 if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)
-                    nextQP = setLambdaFromQP(parentCTU, m_aqQP[childGeom.index]);
+                    nextQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, childGeom));
 
                 compressInterCU_rd0_4(parentCTU, childGeom, nextQP);
 
@@ -1204,7 +1184,7 @@ void Analysis::compressInterCU_rd5_6(con
                 m_rqt[nextDepth].cur.load(*nextContext);
 
                 if (m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth)
-                    nextQP = setLambdaFromQP(parentCTU, m_aqQP[childGeom.index]);
+                    nextQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, childGeom));
 
                 compressInterCU_rd5_6(parentCTU, childGeom, zOrder, nextQP);
 
diff -r ed448198ce3d -r 13290abce292 source/encoder/analysis.h
--- a/source/encoder/analysis.h	Mon Apr 27 13:59:08 2015 -0500
+++ b/source/encoder/analysis.h	Mon Apr 27 14:15:28 2015 -0500
@@ -90,7 +90,6 @@ public:
     void processPmode(PMODE& pmode, Analysis& slave);
 
     ModeDepth m_modeDepth[NUM_CU_DEPTH];
-    int       m_aqQP[CUGeom::MAX_GEOMS];
     bool      m_bTryLossless;
     bool      m_bChromaSa8d;
 
@@ -109,8 +108,6 @@ protected:
     int32_t*             m_reuseRef;
     uint32_t*            m_reuseBestMergeCand;
 
-    void initAqQPs(uint32_t depth, const CUData& ctu, const CUGeom* rootGeom);
-
     /* full analysis for an I-slice CU */
     void compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qp);
 
diff -r ed448198ce3d -r 13290abce292 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Mon Apr 27 13:59:08 2015 -0500
+++ b/source/encoder/frameencoder.cpp	Mon Apr 27 14:15:28 2015 -0500
@@ -841,10 +841,31 @@ void FrameEncoder::processRowEncoder(int
                 curEncData.m_rowStat[row].diagQpScale = x265_qp2qScale(curEncData.m_avgQpRc);
             }
 
+            FrameData::RCStatCU& cuStat = curEncData.m_cuStat[cuAddr];
             if (row >= col && row && m_vbvResetTriggerRow != intRow)
-                curEncData.m_cuStat[cuAddr].baseQp = curEncData.m_cuStat[cuAddr - numCols + 1].baseQp;
+                cuStat.baseQp = curEncData.m_cuStat[cuAddr - numCols + 1].baseQp;
             else
-                curEncData.m_cuStat[cuAddr].baseQp = curEncData.m_rowStat[row].diagQp;
+                cuStat.baseQp = curEncData.m_rowStat[row].diagQp;
+
+            /* TODO: use defines from slicetype.h for lowres block size */
+            uint32_t maxBlockCols = (m_frame->m_fencPic->m_picWidth + (16 - 1)) / 16;
+            uint32_t maxBlockRows = (m_frame->m_fencPic->m_picHeight + (16 - 1)) / 16;
+            uint32_t noOfBlocks = g_maxCUSize / 16;
+            uint32_t block_y = (cuAddr / curEncData.m_slice->m_sps->numCuInWidth) * noOfBlocks;
+            uint32_t block_x = (cuAddr * noOfBlocks) - block_y * curEncData.m_slice->m_sps->numCuInWidth;
+            
+            cuStat.vbvCost = 0;
+            cuStat.intraVbvCost = 0;
+            for (uint32_t h = 0; h < noOfBlocks && block_y < maxBlockRows; h++, block_y++)
+            {
+                uint32_t idx = block_x + (block_y * maxBlockCols);
+
+                for (uint32_t w = 0; w < noOfBlocks && (block_x + w) < maxBlockCols; w++, idx++)
+                {
+                    cuStat.vbvCost += m_frame->m_lowres.lowresCostForRc[idx] & LOWRES_COST_MASK;
+                    cuStat.intraVbvCost += m_frame->m_lowres.intraCost[idx];
+                }
+            }
         }
         else
             curEncData.m_cuStat[cuAddr].baseQp = curEncData.m_avgQpRc;