[x265] [PATCH rfc] aq: implementation of Fine-grained Adaptive Quantization

gopu at multicorewareinc.com gopu at multicorewareinc.com
Mon Mar 16 12:28:28 CET 2015


# HG changeset patch
# User Gopu Govindaswamy <gopu at multicorewareinc.com>
# Date 1426504011 -19800
#      Mon Mar 16 16:36:51 2015 +0530
# Node ID 615b61dd2be5e8ef1a7fe2f22edcac6e437f300d
# Parent  6461985f33ac6fc5b205879bbb0f2a535226ca76
aq: implementation of Fine-grained Adaptive Quantization

Currently adaptive quantization adjusts the QP values on 64x64 pixel coding tree
units (CTUs) across a video frame. the new param option --max-dqp-depth will
enable quantization parameter (QP) to be adjusted to individual quantization
groups (QGs)

Example:
--max-dqp-depth=0 for 64x64 blocks
--max-dqp-depth=1 for 32x32 blocks
--max-dqp-depth=2 for 16x16 blocks

currently this feature not supported for block 8x8

sample test results for each depth

clip - ducks_take_off_420_720p50.y4m
preset=medium
max-dqp-depth 0 - encoded 500 frames in 36.86s (13.56 fps), 4575.09 kb/s,
Global PSNR: 29.587, SSIM Mean Y: 0.8309761 ( 7.721 dB)
max-dqp-depth 1 - encoded 500 frames in 43.00s (11.63 fps), 4606.96 kb/s,
Global PSNR: 29.590, SSIM Mean Y: 0.8313855 ( 7.731 dB)
max-dqp-depth 2 - encoded 500 frames in 35.47s (14.10 fps), 4599.65 kb/s,
Global PSNR: 29.575, SSIM Mean Y: 0.8311820 ( 7.726 dB)

preset=veryslow
max-dqp-depth 0 - encoded 500 frames in 499.24s (1.00 fps), 4407.79 kb/s,
Global PSNR: 29.890, SSIM Mean Y: 0.8419664 ( 8.013 dB)
max-dqp-depth 1 - encoded 500 frames in 497.96s (1.00 fps),
4413.64 kb/s, Global PSNR: 29.884, SSIM Mean Y: 0.8420085 ( 8.014 dB)
max-dqp-depth 2 - encoded 500 frames in 511.36s (0.98 fps), 4428.71 kb/s,
Global PSNR: 29.877, SSIM Mean Y: 0.8419621 ( 8.012 dB)

-----------------------------------------
clip - Cactus_1920x1080_50.y4m
preset=medium
max-dqp-depth 0 - encoded 100 frames in 13.61s (7.35 fps), 2588.25 kb/s,
Global PSNR: 34.890, SSIMMean Y: 0.8685867 ( 8.814 dB)
max-dqp-depth 1 - encoded 100 frames in 12.15s (8.23 fps), 2629.22 kb/s,
Global PSNR: 34.901, SSIMMean Y: 0.8689989 ( 8.827 dB)
max-dqp-depth 2 - encoded 100 frames in 12.26s (8.16 fps), 2624.31 kb/s,
Global PSNR: 34.864, SSIMMean Y: 0.8688061 ( 8.821 dB)

preset=veryslow
max-dqp-depth 0 - encoded 100 frames in 138.68s (0.72 fps), 2277.00 kb/s,
Global PSNR: 35.118, SSIM Mean Y: 0.8725818 ( 8.948 dB)
max-dqp-depth 1 - encoded 100 frames in 137.21s (0.73 fps), 2293.83 kb/s,
Global PSNR: 35.117, SSIM Mean Y: 0.8725589 ( 8.947 dB)
max-dqp-depth 2 - encoded 100 frames in 134.96s (0.74 fps), 2299.79 kb/s,
Global PSNR: 35.109, SSIM Mean Y: 0.8727326 ( 8.953 dB)

diff -r 6461985f33ac -r 615b61dd2be5 source/common/cudata.cpp
--- a/source/common/cudata.cpp	Sun Mar 15 11:58:32 2015 -0500
+++ b/source/common/cudata.cpp	Mon Mar 16 16:36:51 2015 +0530
@@ -298,7 +298,7 @@
 }
 
 // initialize Sub partition
-void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom)
+void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom, const int qp)
 {
     m_absIdxInCTU   = cuGeom.absPartIdx;
     m_encData       = ctu.m_encData;
@@ -312,8 +312,11 @@
     m_cuAboveRight  = ctu.m_cuAboveRight;
     X265_CHECK(m_numPartitions == cuGeom.numPartitions, "initSubCU() size mismatch\n");
 
-    /* sequential memsets */
-    m_partSet((uint8_t*)m_qp, (uint8_t)ctu.m_qp[0]);
+    if (cuGeom.depth <= (uint32_t)m_encData->m_param->rc.maxCuDQPDepth)
+        m_partSet((uint8_t*)m_qp, (uint8_t)qp);
+    else
+        m_partSet((uint8_t*)m_qp, (uint8_t)ctu.m_qp[0]);
+
     m_partSet(m_log2CUSize,   (uint8_t)cuGeom.log2CUSize);
     m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
     m_partSet(m_tqBypass,     (uint8_t)m_encData->m_param->bLossless);
diff -r 6461985f33ac -r 615b61dd2be5 source/common/cudata.h
--- a/source/common/cudata.h	Sun Mar 15 11:58:32 2015 -0500
+++ b/source/common/cudata.h	Mon Mar 16 16:36:51 2015 +0530
@@ -182,7 +182,7 @@
     static void calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, uint32_t minCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]);
 
     void     initCTU(const Frame& frame, uint32_t cuAddr, int qp);
-    void     initSubCU(const CUData& ctu, const CUGeom& cuGeom);
+    void     initSubCU(const CUData& ctu, const CUGeom& cuGeom, const int qp);
     void     initLosslessCU(const CUData& cu, const CUGeom& cuGeom);
 
     void     copyPartFrom(const CUData& cu, const CUGeom& childGeom, uint32_t subPartIdx);
diff -r 6461985f33ac -r 615b61dd2be5 source/common/param.cpp
--- a/source/common/param.cpp	Sun Mar 15 11:58:32 2015 -0500
+++ b/source/common/param.cpp	Mon Mar 16 16:36:51 2015 +0530
@@ -210,6 +210,7 @@
     param->rc.zones = NULL;
     param->rc.bEnableSlowFirstPass = 0;
     param->rc.bStrictCbr = 0;
+    param->rc.maxCuDQPDepth = 0;
 
     /* Video Usability Information (VUI) */
     param->vui.aspectRatioIdc = 0;
@@ -839,6 +840,7 @@
     OPT2("pools", "numa-pools") p->numaPools = strdup(value);
     OPT("lambda-file") p->rc.lambdaFileName = strdup(value);
     OPT("analysis-file") p->analysisFileName = strdup(value);
+    OPT("max-dqp-depth") p->rc.maxCuDQPDepth = atoi(value);
     else
         return X265_PARAM_BAD_NAME;
 #undef OPT
diff -r 6461985f33ac -r 615b61dd2be5 source/common/quant.cpp
--- a/source/common/quant.cpp	Sun Mar 15 11:58:32 2015 -0500
+++ b/source/common/quant.cpp	Mon Mar 16 16:36:51 2015 +0530
@@ -225,13 +225,13 @@
     X265_FREE(m_fencShortBuf);
 }
 
-void Quant::setQPforQuant(const CUData& cu)
+void Quant::setQPforQuant(const CUData& cu, const int qp)
 {
     m_tqBypass = !!cu.m_tqBypass[0];
     if (m_tqBypass)
         return;
     m_nr = m_frameNr ? &m_frameNr[cu.m_encData->m_frameEncoderID] : NULL;
-    int qpy = cu.m_qp[0];
+    int qpy = qp ? qp : cu.m_qp[0];
     m_qpParam[TEXT_LUMA].setQpParam(qpy + QP_BD_OFFSET);
     setChromaQP(qpy + cu.m_slice->m_pps->chromaQpOffset[0], TEXT_CHROMA_U, cu.m_chromaFormat);
     setChromaQP(qpy + cu.m_slice->m_pps->chromaQpOffset[1], TEXT_CHROMA_V, cu.m_chromaFormat);
diff -r 6461985f33ac -r 615b61dd2be5 source/common/quant.h
--- a/source/common/quant.h	Sun Mar 15 11:58:32 2015 -0500
+++ b/source/common/quant.h	Mon Mar 16 16:36:51 2015 +0530
@@ -103,7 +103,7 @@
     bool allocNoiseReduction(const x265_param& param);
 
     /* CU setup */
-    void setQPforQuant(const CUData& cu);
+    void setQPforQuant(const CUData& cu, const int qp = 0);
 
     uint32_t transformNxN(const CUData& cu, const pixel* fenc, uint32_t fencStride, const int16_t* residual, uint32_t resiStride, coeff_t* coeff,
                           uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool useTransformSkip);
diff -r 6461985f33ac -r 615b61dd2be5 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Sun Mar 15 11:58:32 2015 -0500
+++ b/source/encoder/analysis.cpp	Mon Mar 16 16:36:51 2015 +0530
@@ -225,6 +225,10 @@
     bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
     bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
 
+    int32_t qp = 0;
+    if (depth <= (uint32_t)m_param->rc.maxCuDQPDepth)
+        qp = calculateQpforCuSize(parentCTU, cuGeom);
+
     if (m_param->analysisMode == X265_ANALYSIS_LOAD)
     {
         uint8_t* reuseDepth  = &m_reuseIntraDataCTU->depth[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
@@ -234,11 +238,11 @@
 
         if (mightNotSplit && depth == reuseDepth[zOrder] && zOrder == cuGeom.absPartIdx)
         {
-            m_quant.setQPforQuant(parentCTU);
+            m_quant.setQPforQuant(parentCTU, qp);
 
             PartSize size = (PartSize)reusePartSizes[zOrder];
             Mode& mode = size == SIZE_2Nx2N ? md.pred[PRED_INTRA] : md.pred[PRED_INTRA_NxN];
-            mode.cu.initSubCU(parentCTU, cuGeom);
+            mode.cu.initSubCU(parentCTU, cuGeom, qp);
             checkIntra(mode, cuGeom, size, &reuseModes[zOrder], &reuseChromaModes[zOrder]);
             checkBestMode(mode, depth);
 
@@ -255,15 +259,15 @@
     }
     else if (mightNotSplit)
     {
-        m_quant.setQPforQuant(parentCTU);
+        m_quant.setQPforQuant(parentCTU, qp);
 
-        md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
+        md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
         checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL, NULL);
         checkBestMode(md.pred[PRED_INTRA], depth);
 
         if (cuGeom.log2CUSize == 3 && m_slice->m_sps->quadtreeTULog2MinSize < 3)
         {
-            md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom);
+            md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom, qp);
             checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, NULL, NULL);
             checkBestMode(md.pred[PRED_INTRA_NxN], depth);
         }
@@ -280,7 +284,7 @@
         Mode* splitPred = &md.pred[PRED_SPLIT];
         splitPred->initCosts();
         CUData* splitCU = &splitPred->cu;
-        splitCU->initSubCU(parentCTU, cuGeom);
+        splitCU->initSubCU(parentCTU, cuGeom, qp);
 
         uint32_t nextDepth = depth + 1;
         ModeDepth& nd = m_modeDepth[nextDepth];
@@ -496,6 +500,10 @@
 
     X265_CHECK(m_param->rdLevel >= 2, "compressInterCU_dist does not support RD 0 or 1\n");
 
+    int32_t qp = 0;
+    if (depth <= (uint32_t)m_param->rc.maxCuDQPDepth)
+        qp = calculateQpforCuSize(parentCTU, cuGeom);
+
     if (mightNotSplit && depth >= minDepth)
     {
         int bTryAmp = m_slice->m_sps->maxAMPDepth > depth && (cuGeom.log2CUSize < 6 || m_param->rdLevel > 4);
@@ -504,28 +512,28 @@
         PMODE pmode(*this, cuGeom);
 
         /* Initialize all prediction CUs based on parentCTU */
-        md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
-        md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
+        md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
+        md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
         if (bTryIntra)
         {
-            md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
+            md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
             if (cuGeom.log2CUSize == 3 && m_slice->m_sps->quadtreeTULog2MinSize < 3 && m_param->rdLevel >= 5)
-                md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom);
+                md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom, qp);
             pmode.modes[pmode.m_jobTotal++] = PRED_INTRA;
         }
-        md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom); pmode.modes[pmode.m_jobTotal++] = PRED_2Nx2N;
-        md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom);
+        md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_2Nx2N;
+        md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom, qp);
         if (m_param->bEnableRectInter)
         {
-            md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom); pmode.modes[pmode.m_jobTotal++] = PRED_2NxN;
-            md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom); pmode.modes[pmode.m_jobTotal++] = PRED_Nx2N;
+            md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_2NxN;
+            md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_Nx2N;
         }
         if (bTryAmp)
         {
-            md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom); pmode.modes[pmode.m_jobTotal++] = PRED_2NxnU;
-            md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom); pmode.modes[pmode.m_jobTotal++] = PRED_2NxnD;
-            md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom); pmode.modes[pmode.m_jobTotal++] = PRED_nLx2N;
-            md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom); pmode.modes[pmode.m_jobTotal++] = PRED_nRx2N;
+            md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_2NxnU;
+            md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_2NxnD;
+            md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_nLx2N;
+            md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_nRx2N;
         }
 
         pmode.tryBondPeers(*m_frame->m_encData->m_jobProvider, pmode.m_jobTotal);
@@ -654,7 +662,7 @@
 
         if (md.bestMode->rdCost == MAX_INT64 && !bTryIntra)
         {
-            md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
+            md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
             checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
             encodeIntraInInter(md.pred[PRED_INTRA], cuGeom);
             checkBestMode(md.pred[PRED_INTRA], depth);
@@ -680,7 +688,7 @@
         Mode* splitPred = &md.pred[PRED_SPLIT];
         splitPred->initCosts();
         CUData* splitCU = &splitPred->cu;
-        splitCU->initSubCU(parentCTU, cuGeom);
+        splitCU->initSubCU(parentCTU, cuGeom, qp);
 
         uint32_t nextDepth = depth + 1;
         ModeDepth& nd = m_modeDepth[nextDepth];
@@ -744,13 +752,17 @@
     bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
     uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);
 
+    int32_t qp = 0;
+    if (depth <= (uint32_t)m_param->rc.maxCuDQPDepth)
+        qp = calculateQpforCuSize(parentCTU, cuGeom);
+
     if (mightNotSplit && depth >= minDepth)
     {
         bool bTryIntra = m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames;
 
         /* Compute Merge Cost */
-        md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
-        md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
+        md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
+        md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
         checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
 
         bool earlyskip = false;
@@ -759,24 +771,24 @@
 
         if (!earlyskip)
         {
-            md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom);
+            md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
             checkInter_rd0_4(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N);
 
             if (m_slice->m_sliceType == B_SLICE)
             {
-                md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom);
+                md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom, qp);
                 checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], cuGeom);
             }
 
             Mode *bestInter = &md.pred[PRED_2Nx2N];
             if (m_param->bEnableRectInter)
             {
-                md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom);
+                md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
                 checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N);
                 if (md.pred[PRED_Nx2N].sa8dCost < bestInter->sa8dCost)
                     bestInter = &md.pred[PRED_Nx2N];
 
-                md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom);
+                md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
                 checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN);
                 if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost)
                     bestInter = &md.pred[PRED_2NxN];
@@ -798,24 +810,24 @@
 
                 if (bHor)
                 {
-                    md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom);
+                    md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp);
                     checkInter_rd0_4(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU);
                     if (md.pred[PRED_2NxnU].sa8dCost < bestInter->sa8dCost)
                         bestInter = &md.pred[PRED_2NxnU];
 
-                    md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom);
+                    md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
                     checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD);
                     if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost)
                         bestInter = &md.pred[PRED_2NxnD];
                 }
                 if (bVer)
                 {
-                    md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom);
+                    md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp);
                     checkInter_rd0_4(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N);
                     if (md.pred[PRED_nLx2N].sa8dCost < bestInter->sa8dCost)
                         bestInter = &md.pred[PRED_nLx2N];
 
-                    md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom);
+                    md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
                     checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N);
                     if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost)
                         bestInter = &md.pred[PRED_nRx2N];
@@ -847,7 +859,7 @@
                 if ((bTryIntra && md.bestMode->cu.getQtRootCbf(0)) ||
                     md.bestMode->sa8dCost == MAX_INT64)
                 {
-                    md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
+                    md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
                     checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
                     encodeIntraInInter(md.pred[PRED_INTRA], cuGeom);
                     checkBestMode(md.pred[PRED_INTRA], depth);
@@ -865,7 +877,7 @@
 
                 if (bTryIntra || md.bestMode->sa8dCost == MAX_INT64)
                 {
-                    md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
+                    md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
                     checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
                     if (md.pred[PRED_INTRA].sa8dCost < md.bestMode->sa8dCost)
                         md.bestMode = &md.pred[PRED_INTRA];
@@ -893,7 +905,7 @@
                     {
                         /* generate recon pixels with no rate distortion considerations */
                         CUData& cu = md.bestMode->cu;
-                        m_quant.setQPforQuant(cu);
+                        m_quant.setQPforQuant(cu, qp);
 
                         uint32_t tuDepthRange[2];
                         cu.getInterTUQtDepthRange(tuDepthRange, 0);
@@ -918,7 +930,7 @@
                     {
                         /* generate recon pixels with no rate distortion considerations */
                         CUData& cu = md.bestMode->cu;
-                        m_quant.setQPforQuant(cu);
+                        m_quant.setQPforQuant(cu, qp);
 
                         uint32_t tuDepthRange[2];
                         cu.getIntraTUQtDepthRange(tuDepthRange, 0);
@@ -952,7 +964,7 @@
         Mode* splitPred = &md.pred[PRED_SPLIT];
         splitPred->initCosts();
         CUData* splitCU = &splitPred->cu;
-        splitCU->initSubCU(parentCTU, cuGeom);
+        splitCU->initSubCU(parentCTU, cuGeom, qp);
 
         uint32_t nextDepth = depth + 1;
         ModeDepth& nd = m_modeDepth[nextDepth];
@@ -1025,14 +1037,18 @@
     bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
     bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
 
+    int32_t qp = 0;
+    if (depth <= (uint32_t)m_param->rc.maxCuDQPDepth)
+        qp = calculateQpforCuSize(parentCTU, cuGeom);
+
     if (m_param->analysisMode == X265_ANALYSIS_LOAD)
     {
         uint8_t* reuseDepth  = &m_reuseInterDataCTU->depth[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
         uint8_t* reuseModes  = &m_reuseInterDataCTU->modes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
         if (mightNotSplit && depth == reuseDepth[zOrder] && zOrder == cuGeom.absPartIdx && reuseModes[zOrder] == MODE_SKIP)
         {
-            md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
-            md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
+            md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+            md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
             checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom, true);
 
             if (m_bTryLossless)
@@ -1051,20 +1067,20 @@
 
     if (mightNotSplit)
     {
-        md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom);
-        md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom);
+        md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+        md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
         checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom, false);
         bool earlySkip = m_param->bEnableEarlySkip && md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
 
         if (!earlySkip)
         {
-            md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom);
+            md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
             checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, false);
             checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
 
             if (m_slice->m_sliceType == B_SLICE)
             {
-                md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom);
+                md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom, qp);
                 checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], cuGeom);
                 if (md.pred[PRED_BIDIR].sa8dCost < MAX_INT64)
                 {
@@ -1075,11 +1091,11 @@
 
             if (m_param->bEnableRectInter)
             {
-                md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom);
+                md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
                 checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, false);
                 checkBestMode(md.pred[PRED_Nx2N], cuGeom.depth);
 
-                md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom);
+                md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
                 checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, false);
                 checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
             }
@@ -1102,21 +1118,21 @@
 
                 if (bHor)
                 {
-                    md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom);
+                    md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp);
                     checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, bMergeOnly);
                     checkBestMode(md.pred[PRED_2NxnU], cuGeom.depth);
 
-                    md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom);
+                    md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
                     checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, bMergeOnly);
                     checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
                 }
                 if (bVer)
                 {
-                    md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom);
+                    md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp);
                     checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, bMergeOnly);
                     checkBestMode(md.pred[PRED_nLx2N], cuGeom.depth);
 
-                    md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom);
+                    md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
                     checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, bMergeOnly);
                     checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
                 }
@@ -1124,13 +1140,13 @@
 
             if (m_slice->m_sliceType != B_SLICE || m_param->bIntraInBFrames)
             {
-                md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
+                md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
                 checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL, NULL);
                 checkBestMode(md.pred[PRED_INTRA], depth);
 
                 if (cuGeom.log2CUSize == 3 && m_slice->m_sps->quadtreeTULog2MinSize < 3)
                 {
-                    md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom);
+                    md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom, qp);
                     checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, NULL, NULL);
                     checkBestMode(md.pred[PRED_INTRA_NxN], depth);
                 }
@@ -1150,7 +1166,7 @@
         Mode* splitPred = &md.pred[PRED_SPLIT];
         splitPred->initCosts();
         CUData* splitCU = &splitPred->cu;
-        splitCU->initSubCU(parentCTU, cuGeom);
+        splitCU->initSubCU(parentCTU, cuGeom, qp);
 
         uint32_t nextDepth = depth + 1;
         ModeDepth& nd = m_modeDepth[nextDepth];
@@ -1896,7 +1912,7 @@
     return false;
 }
 
-int Analysis::calculateQpforCuSize(CUData& ctu, const CUGeom& cuGeom)
+int Analysis::calculateQpforCuSize(const CUData& ctu, const CUGeom& cuGeom)
 {
     uint32_t ctuAddr = ctu.m_cuAddr;
     FrameData& curEncData = *m_frame->m_encData;
diff -r 6461985f33ac -r 615b61dd2be5 source/encoder/analysis.h
--- a/source/encoder/analysis.h	Sun Mar 15 11:58:32 2015 -0500
+++ b/source/encoder/analysis.h	Mon Mar 16 16:36:51 2015 +0530
@@ -139,7 +139,7 @@
     /* generate residual and recon pixels for an entire CTU recursively (RD0) */
     void encodeResidue(const CUData& parentCTU, const CUGeom& cuGeom);
 
-    int calculateQpforCuSize(CUData& ctu, const CUGeom& cuGeom);
+    int calculateQpforCuSize(const CUData& ctu, const CUGeom& cuGeom);
 
     /* check whether current mode is the new best */
     inline void checkBestMode(Mode& mode, uint32_t depth)
diff -r 6461985f33ac -r 615b61dd2be5 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Sun Mar 15 11:58:32 2015 -0500
+++ b/source/encoder/encoder.cpp	Mon Mar 16 16:36:51 2015 +0530
@@ -1551,15 +1551,11 @@
     bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;
 
     if (!m_param->bLossless && (m_param->rc.aqMode || bIsVbv))
-    {
         pps->bUseDQP = true;
-        pps->maxCuDQPDepth = 0; /* TODO: make configurable? */
-    }
     else
-    {
         pps->bUseDQP = false;
-        pps->maxCuDQPDepth = 0;
-    }
+
+    pps->maxCuDQPDepth = m_param->rc.maxCuDQPDepth;
 
     pps->chromaQpOffset[0] = m_param->cbQpOffset;
     pps->chromaQpOffset[1] = m_param->crQpOffset;
@@ -1778,6 +1774,17 @@
         p->analysisMode = X265_ANALYSIS_OFF;
         x265_log(p, X265_LOG_WARNING, "Analysis save and load mode not supported for distributed mode analysis\n");
     }
+    bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;
+    if (!m_param->bLossless && (m_param->rc.aqMode || bIsVbv))
+    {
+        if (p->rc.maxCuDQPDepth > (NUM_CU_DEPTH - 2))
+        {
+            p->rc.maxCuDQPDepth = 0;
+            x265_log(p, X265_LOG_WARNING, "The maxCUDQPDepth should be less than maxCUDepth - 1(0, 1 or 2) setting maxCUDQPDepth = %d \n", 0);
+        }
+    }
+    else
+        p->rc.maxCuDQPDepth = 0;
 }
 
 void Encoder::allocAnalysis(x265_analysis_data* analysis)
diff -r 6461985f33ac -r 615b61dd2be5 source/x265.h
--- a/source/x265.h	Sun Mar 15 11:58:32 2015 -0500
+++ b/source/x265.h	Mon Mar 16 16:36:51 2015 +0530
@@ -977,6 +977,13 @@
         /* Enable stricter conditions to check bitrate deviations in CBR mode. May compromise 
          * quality to maintain bitrate adherence */
         int bStrictCbr;
+
+        /* Max depth of a minimum CuDQP for sub-LCU-level delta QP
+         * the default maxCuDQPDepth is 0 then the CuDQP signaled once per CTU, this param
+         * enable the CuDQP signaled for sub-LCU-level also, minimum maxCuDQPDepth is 0
+         * and max maxCuDQPDepth is equal to maxCUDepth, always the CuDQP signaled
+         * if currentDepth is less than or equal to maxCuDQPDepth */
+        int maxCuDQPDepth;
     } rc;
 
     /*== Video Usability Information ==*/
diff -r 6461985f33ac -r 615b61dd2be5 source/x265cli.h
--- a/source/x265cli.h	Sun Mar 15 11:58:32 2015 -0500
+++ b/source/x265cli.h	Mon Mar 16 16:36:51 2015 +0530
@@ -202,6 +202,7 @@
     { "strict-cbr",           no_argument, NULL, 0 },
     { "temporal-layers",      no_argument, NULL, 0 },
     { "no-temporal-layers",   no_argument, NULL, 0 },
+    { "max-dqp-depth",  required_argument, NULL, 0 },
     { 0, 0, 0, 0 },
     { 0, 0, 0, 0 },
     { 0, 0, 0, 0 },


More information about the x265-devel mailing list