[x265] [PATCH 1 of 2] aq: implementation of fine-grained adaptive quantization
Steve Borho
steve at borho.org
Mon Apr 6 23:34:40 CEST 2015
On 04/06, deepthi at multicorewareinc.com wrote:
> # HG changeset patch
> # User Deepthi Nandakumar <deepthi at multicorewareinc.com>
> # Date 1427100822 -19800
> # Mon Mar 23 14:23:42 2015 +0530
> # Node ID 30e209c9bab6acc8028f922c652d80dd51ac263f
> # Parent ebe5e57c4b45b45338035a1009b64585f21d66d5
> aq: implementation of fine-grained adaptive quantization
>
> Currently adaptive quantization adjusts the QP values on 64x64 pixel CodingTree
> units (CTUs) across a video frame. The new param option --qg-size will
> enable QP to be adjusted to individual quantization groups (QGs) of size 64/32/16
>
> diff -r ebe5e57c4b45 -r 30e209c9bab6 doc/reST/cli.rst
> --- a/doc/reST/cli.rst Sat Apr 04 15:11:39 2015 -0500
> +++ b/doc/reST/cli.rst Mon Mar 23 14:23:42 2015 +0530
> @@ -1111,6 +1111,13 @@
>
> **Range of values:** 0.0 to 3.0
>
> +.. option:: --qg-size <64|32|16>
> + Enable adaptive quantization for sub-CTUs. This parameter specifies
> + the minimum CU size at which QP can be adjusted, ie. Quantization Group
> + size. Allowed range of values are 64, 32, 16 provided this falls within
> + the inclusive range [maxCUSize, minCUSize]. Experimental.
> + Default: same as maxCUSize
> +
> .. option:: --cutree, --no-cutree
>
> Enable the use of lookahead's lowres motion vector fields to
> diff -r ebe5e57c4b45 -r 30e209c9bab6 source/CMakeLists.txt
> --- a/source/CMakeLists.txt Sat Apr 04 15:11:39 2015 -0500
> +++ b/source/CMakeLists.txt Mon Mar 23 14:23:42 2015 +0530
> @@ -30,7 +30,7 @@
> mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
>
> # X265_BUILD must be incremented each time the public API is changed
> -set(X265_BUILD 52)
> +set(X265_BUILD 53)
> configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
> "${PROJECT_BINARY_DIR}/x265.def")
> configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
> diff -r ebe5e57c4b45 -r 30e209c9bab6 source/common/cudata.cpp
> --- a/source/common/cudata.cpp Sat Apr 04 15:11:39 2015 -0500
> +++ b/source/common/cudata.cpp Mon Mar 23 14:23:42 2015 +0530
> @@ -298,7 +298,7 @@
> }
>
> // initialize Sub partition
> -void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom)
> +void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp)
> {
> m_absIdxInCTU = cuGeom.absPartIdx;
> m_encData = ctu.m_encData;
> @@ -312,8 +312,8 @@
> m_cuAboveRight = ctu.m_cuAboveRight;
> X265_CHECK(m_numPartitions == cuGeom.numPartitions, "initSubCU() size mismatch\n");
>
> - /* sequential memsets */
> - m_partSet((uint8_t*)m_qp, (uint8_t)ctu.m_qp[0]);
> + m_partSet((uint8_t*)m_qp, (uint8_t)qp);
> +
> m_partSet(m_log2CUSize, (uint8_t)cuGeom.log2CUSize);
> m_partSet(m_lumaIntraDir, (uint8_t)DC_IDX);
> m_partSet(m_tqBypass, (uint8_t)m_encData->m_param->bLossless);
> diff -r ebe5e57c4b45 -r 30e209c9bab6 source/common/cudata.h
> --- a/source/common/cudata.h Sat Apr 04 15:11:39 2015 -0500
> +++ b/source/common/cudata.h Mon Mar 23 14:23:42 2015 +0530
> @@ -182,7 +182,7 @@
> static void calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, uint32_t minCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]);
>
> void initCTU(const Frame& frame, uint32_t cuAddr, int qp);
> - void initSubCU(const CUData& ctu, const CUGeom& cuGeom);
> + void initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp);
> void initLosslessCU(const CUData& cu, const CUGeom& cuGeom);
>
> void copyPartFrom(const CUData& cu, const CUGeom& childGeom, uint32_t subPartIdx);
> diff -r ebe5e57c4b45 -r 30e209c9bab6 source/common/param.cpp
> --- a/source/common/param.cpp Sat Apr 04 15:11:39 2015 -0500
> +++ b/source/common/param.cpp Mon Mar 23 14:23:42 2015 +0530
> @@ -209,6 +209,7 @@
> param->rc.zones = NULL;
> param->rc.bEnableSlowFirstPass = 0;
> param->rc.bStrictCbr = 0;
> + param->rc.qgSize = 64; /* Same as maxCUSize */
>
> /* Video Usability Information (VUI) */
> param->vui.aspectRatioIdc = 0;
> @@ -263,6 +264,7 @@
> param->rc.aqStrength = 0.0;
> param->rc.aqMode = X265_AQ_NONE;
> param->rc.cuTree = 0;
> + param->rc.qgSize = 32;
> param->bEnableFastIntra = 1;
> }
> else if (!strcmp(preset, "superfast"))
> @@ -279,6 +281,7 @@
> param->rc.aqStrength = 0.0;
> param->rc.aqMode = X265_AQ_NONE;
> param->rc.cuTree = 0;
> + param->rc.qgSize = 32;
> param->bEnableSAO = 0;
> param->bEnableFastIntra = 1;
> }
> @@ -292,6 +295,7 @@
> param->rdLevel = 2;
> param->maxNumReferences = 1;
> param->rc.cuTree = 0;
> + param->rc.qgSize = 32;
> param->bEnableFastIntra = 1;
> }
> else if (!strcmp(preset, "faster"))
> @@ -843,6 +847,7 @@
> OPT2("pools", "numa-pools") p->numaPools = strdup(value);
> OPT("lambda-file") p->rc.lambdaFileName = strdup(value);
> OPT("analysis-file") p->analysisFileName = strdup(value);
> + OPT("qg-size") p->rc.qgSize = atoi(value);
> else
> return X265_PARAM_BAD_NAME;
> #undef OPT
> diff -r ebe5e57c4b45 -r 30e209c9bab6 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Sat Apr 04 15:11:39 2015 -0500
> +++ b/source/encoder/analysis.cpp Mon Mar 23 14:23:42 2015 +0530
> @@ -75,6 +75,8 @@
> m_reuseInterDataCTU = NULL;
> m_reuseRef = NULL;
> m_reuseBestMergeCand = NULL;
> + for (int i = 0; i < NUM_CU_DEPTH; i++)
> + m_qp[i] = NULL;
> }
>
> bool Analysis::create(ThreadLocalData *tld)
> @@ -101,9 +103,12 @@
> ok &= md.pred[j].reconYuv.create(cuSize, csp);
> md.pred[j].fencYuv = &md.fencYuv;
> }
> + CHECKED_MALLOC(m_qp[depth], int, 1i64 << (depth << 1));
this notation is not very portable:
/Users/steve/repos/x265-sborho/source/encoder/analysis.cpp:106:42:
error: invalid suffix "i64" on integer constant
CHECKED_MALLOC(m_qp[depth], int, 1i64 << (depth << 1));
max depth is 5, so this only needs to be 10bits. either leave it as 1 or
use an explicit typecast to uint64_t
Also, while we are updating AQ, I think now would be a good time to
revisit x264's optimization where they discard small QP changes from AQ
in the interest of not signaling DQP for minimal visual effect.
--
Steve Borho
More information about the x265-devel
mailing list