[x265] [PATCH 1 of 2] aq: implementation of fine-grained adaptive quantization

Mon Apr 6 23:34:40 CEST 2015

On 04/06, deepthi at multicorewareinc.com wrote:
> # HG changeset patch
> # User Deepthi Nandakumar <deepthi at multicorewareinc.com>
> # Date 1427100822 -19800
> #      Mon Mar 23 14:23:42 2015 +0530
> # Node ID 30e209c9bab6acc8028f922c652d80dd51ac263f
> # Parent  ebe5e57c4b45b45338035a1009b64585f21d66d5
> aq: implementation of fine-grained adaptive quantization
> 
> Currently adaptive quantization adjusts the QP values on 64x64 pixel CodingTree
> units (CTUs) across a video frame. The new param option --qg-size will
> enable QP to be adjusted to individual quantization groups (QGs) of size 64/32/16
> 
> diff -r ebe5e57c4b45 -r 30e209c9bab6 doc/reST/cli.rst
> --- a/doc/reST/cli.rst	Sat Apr 04 15:11:39 2015 -0500
> +++ b/doc/reST/cli.rst	Mon Mar 23 14:23:42 2015 +0530
> @@ -1111,6 +1111,13 @@
>  
>  	**Range of values:** 0.0 to 3.0
>  
> +.. option:: --qg-size <64|32|16>
> +	Enable adaptive quantization for sub-CTUs. This parameter specifies 
> +	the minimum CU size at which QP can be adjusted, ie. Quantization Group
> +	size. Allowed range of values are 64, 32, 16 provided this falls within 
> +	the inclusive range [maxCUSize, minCUSize]. Experimental.
> +	Default: same as maxCUSize
> +
>  .. option:: --cutree, --no-cutree
>  
>  	Enable the use of lookahead's lowres motion vector fields to
> diff -r ebe5e57c4b45 -r 30e209c9bab6 source/CMakeLists.txt
> --- a/source/CMakeLists.txt	Sat Apr 04 15:11:39 2015 -0500
> +++ b/source/CMakeLists.txt	Mon Mar 23 14:23:42 2015 +0530
> @@ -30,7 +30,7 @@
>  mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
>  
>  # X265_BUILD must be incremented each time the public API is changed
> -set(X265_BUILD 52)
> +set(X265_BUILD 53)
>  configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
>                 "${PROJECT_BINARY_DIR}/x265.def")
>  configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
> diff -r ebe5e57c4b45 -r 30e209c9bab6 source/common/cudata.cpp
> --- a/source/common/cudata.cpp	Sat Apr 04 15:11:39 2015 -0500
> +++ b/source/common/cudata.cpp	Mon Mar 23 14:23:42 2015 +0530
> @@ -298,7 +298,7 @@
>  }
>  
>  // initialize Sub partition
> -void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom)
> +void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp)
>  {
>      m_absIdxInCTU   = cuGeom.absPartIdx;
>      m_encData       = ctu.m_encData;
> @@ -312,8 +312,8 @@
>      m_cuAboveRight  = ctu.m_cuAboveRight;
>      X265_CHECK(m_numPartitions == cuGeom.numPartitions, "initSubCU() size mismatch\n");
>  
> -    /* sequential memsets */
> -    m_partSet((uint8_t*)m_qp, (uint8_t)ctu.m_qp[0]);
> +    m_partSet((uint8_t*)m_qp, (uint8_t)qp);
> +
>      m_partSet(m_log2CUSize,   (uint8_t)cuGeom.log2CUSize);
>      m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
>      m_partSet(m_tqBypass,     (uint8_t)m_encData->m_param->bLossless);
> diff -r ebe5e57c4b45 -r 30e209c9bab6 source/common/cudata.h
> --- a/source/common/cudata.h	Sat Apr 04 15:11:39 2015 -0500
> +++ b/source/common/cudata.h	Mon Mar 23 14:23:42 2015 +0530
> @@ -182,7 +182,7 @@
>      static void calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, uint32_t minCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]);
>  
>      void     initCTU(const Frame& frame, uint32_t cuAddr, int qp);
> -    void     initSubCU(const CUData& ctu, const CUGeom& cuGeom);
> +    void     initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp);
>      void     initLosslessCU(const CUData& cu, const CUGeom& cuGeom);
>  
>      void     copyPartFrom(const CUData& cu, const CUGeom& childGeom, uint32_t subPartIdx);
> diff -r ebe5e57c4b45 -r 30e209c9bab6 source/common/param.cpp
> --- a/source/common/param.cpp	Sat Apr 04 15:11:39 2015 -0500
> +++ b/source/common/param.cpp	Mon Mar 23 14:23:42 2015 +0530
> @@ -209,6 +209,7 @@
>      param->rc.zones = NULL;
>      param->rc.bEnableSlowFirstPass = 0;
>      param->rc.bStrictCbr = 0;
> +    param->rc.qgSize = 64; /* Same as maxCUSize */
>  
>      /* Video Usability Information (VUI) */
>      param->vui.aspectRatioIdc = 0;
> @@ -263,6 +264,7 @@
>              param->rc.aqStrength = 0.0;
>              param->rc.aqMode = X265_AQ_NONE;
>              param->rc.cuTree = 0;
> +            param->rc.qgSize = 32;
>              param->bEnableFastIntra = 1;
>          }
>          else if (!strcmp(preset, "superfast"))
> @@ -279,6 +281,7 @@
>              param->rc.aqStrength = 0.0;
>              param->rc.aqMode = X265_AQ_NONE;
>              param->rc.cuTree = 0;
> +            param->rc.qgSize = 32;
>              param->bEnableSAO = 0;
>              param->bEnableFastIntra = 1;
>          }
> @@ -292,6 +295,7 @@
>              param->rdLevel = 2;
>              param->maxNumReferences = 1;
>              param->rc.cuTree = 0;
> +            param->rc.qgSize = 32;
>              param->bEnableFastIntra = 1;
>          }
>          else if (!strcmp(preset, "faster"))
> @@ -843,6 +847,7 @@
>      OPT2("pools", "numa-pools") p->numaPools = strdup(value);
>      OPT("lambda-file") p->rc.lambdaFileName = strdup(value);
>      OPT("analysis-file") p->analysisFileName = strdup(value);
> +    OPT("qg-size") p->rc.qgSize = atoi(value);
>      else
>          return X265_PARAM_BAD_NAME;
>  #undef OPT
> diff -r ebe5e57c4b45 -r 30e209c9bab6 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp	Sat Apr 04 15:11:39 2015 -0500
> +++ b/source/encoder/analysis.cpp	Mon Mar 23 14:23:42 2015 +0530
> @@ -75,6 +75,8 @@
>      m_reuseInterDataCTU = NULL;
>      m_reuseRef = NULL;
>      m_reuseBestMergeCand = NULL;
> +    for (int i = 0; i < NUM_CU_DEPTH; i++)
> +        m_qp[i] = NULL;
>  }
>  
>  bool Analysis::create(ThreadLocalData *tld)
> @@ -101,9 +103,12 @@
>              ok &= md.pred[j].reconYuv.create(cuSize, csp);
>              md.pred[j].fencYuv = &md.fencYuv;
>          }
> +        CHECKED_MALLOC(m_qp[depth], int, 1i64 << (depth << 1));

this notation is not very portable:

/Users/steve/repos/x265-sborho/source/encoder/analysis.cpp:106:42:
error: invalid suffix "i64" on integer constant
         CHECKED_MALLOC(m_qp[depth], int, 1i64 << (depth << 1));

max depth is 5, so this only needs to be 10bits. either leave it as 1 or
use an explicit typecast to uint64_t

Also, while we are updating AQ, I think now would be a good time to
revisit x264's optimization where they discard small QP changes from AQ
in the interest of not signaling DQP for minimal visual effect.

-- 
Steve Borho