[x265] [PATCH 3 of 6 REV2] analysis: keep per-CU AQ QPs in cuGeom index order, simplify arguments

Mon Apr 27 10:19:19 CEST 2015

Thanks, I like initAqQPs since it gets rid of the messy section, but I was
also hoping we could avoid passing qp around for the compress* functions.

Since cuGeom now has cuIdx - the analysis functions can just look up the
appropriate QP in m_aqQp? In fact, with some interesting calculations on
cuGeom->absPartIdx and depth, we dont even need cuIdx?

On Sun, Apr 26, 2015 at 10:51 PM, Steve Borho <steve at borho.org> wrote:

> # HG changeset patch
> # User Steve Borho <steve at borho.org>
> # Date 1429909512 18000
> #      Fri Apr 24 16:05:12 2015 -0500
> # Node ID 5644bbd23e71996651f4ed558e0260201a91f70d
> # Parent  bfd57a0c0875e219d902ff3af6f4a0ddaa16b125
> analysis: keep per-CU AQ QPs in cuGeom index order, simplify arguments
>
> diff -r bfd57a0c0875 -r 5644bbd23e71 source/common/cudata.cpp
> --- a/source/common/cudata.cpp  Fri Apr 24 15:14:54 2015 -0500
> +++ b/source/common/cudata.cpp  Fri Apr 24 16:05:12 2015 -0500
> @@ -2027,6 +2027,7 @@
>          uint32_t blockSize = 1 << log2CUSize;
>          uint32_t sbWidth   = 1 << (g_log2Size[maxCUSize] - log2CUSize);
>          int32_t lastLevelFlag = log2CUSize == g_log2Size[minCUSize];
> +
>          for (uint32_t sbY = 0; sbY < sbWidth; sbY++)
>          {
>              for (uint32_t sbX = 0; sbX < sbWidth; sbX++)
> @@ -2049,7 +2050,8 @@
>                  cu->childOffset = childIdx - cuIdx;
>                  cu->absPartIdx = g_depthScanIdx[yOffset][xOffset] * 4;
>                  cu->numPartitions = (NUM_4x4_PARTITIONS >>
> ((g_maxLog2CUSize - cu->log2CUSize) * 2));
> -                cu->depth = g_log2Size[maxCUSize] - log2CUSize;
> +                cu->depth = (uint16_t)(g_log2Size[maxCUSize] -
> log2CUSize);
> +                cu->index = (uint16_t)cuIdx;
>
>                  cu->flags = 0;
>                  CU_SET_FLAG(cu->flags, CUGeom::PRESENT, presentFlag);
> diff -r bfd57a0c0875 -r 5644bbd23e71 source/common/cudata.h
> --- a/source/common/cudata.h    Fri Apr 24 15:14:54 2015 -0500
> +++ b/source/common/cudata.h    Fri Apr 24 16:05:12 2015 -0500
> @@ -85,8 +85,8 @@
>      uint32_t childOffset;   // offset of the first child CU from current
> CU
>      uint32_t absPartIdx;    // Part index of this CU in terms of 4x4
> blocks.
>      uint32_t numPartitions; // Number of 4x4 blocks in the CU
> -    uint32_t depth;         // depth of this CU relative from CTU
>      uint32_t flags;         // CU flags.
> +    uint16_t depth, index;  // depth of this CU relative from CTU,
> absolute index
>  };
>
>  struct MVField
> diff -r bfd57a0c0875 -r 5644bbd23e71 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp       Fri Apr 24 15:14:54 2015 -0500
> +++ b/source/encoder/analysis.cpp       Fri Apr 24 16:05:12 2015 -0500
> @@ -75,8 +75,6 @@
>      m_reuseInterDataCTU = NULL;
>      m_reuseRef = NULL;
>      m_reuseBestMergeCand = NULL;
> -    for (int i = 0; i < NUM_CU_DEPTH; i++)
> -        m_aqQP[i] = NULL;
>  }
>
>  bool Analysis::create(ThreadLocalData *tld)
> @@ -103,12 +101,9 @@
>              ok &= md.pred[j].reconYuv.create(cuSize, csp);
>              md.pred[j].fencYuv = &md.fencYuv;
>          }
> -        CHECKED_MALLOC(m_aqQP[depth], int, (size_t)1 << (depth << 1));
>      }
>
>      return ok;
> -fail:
> -    return false;
>  }
>
>  void Analysis::destroy()
> @@ -123,7 +118,17 @@
>              m_modeDepth[i].pred[j].predYuv.destroy();
>              m_modeDepth[i].pred[j].reconYuv.destroy();
>          }
> -        X265_FREE(m_aqQP[i]);
> +    }
> +}
> +
> +void Analysis::initAqQPs(uint32_t depth, const CUData& ctu, const CUGeom*
> rootGeom)
> +{
> +    for (int d0 = 0; d0 < 4; d0++)
> +    {
> +        m_aqQP[rootGeom->index + d0] = calculateQpforCuSize(ctu,
> rootGeom[d0]);
> +
> +        if (m_slice->m_pps->maxCuDQPDepth > depth)
> +            initAqQPs(depth + 1, ctu, &rootGeom[d0] +
> rootGeom[d0].childOffset);
>      }
>  }
>
> @@ -141,32 +146,16 @@
>
>      if (m_slice->m_pps->bUseDQP)
>      {
> -        /* TODO: In future, we could extend this to 8x8 QGs as well,
> since that's the minimum size
> -         * allowed by the HEVC standard. The AQ offset calculation will
> need to be at 8x8 granularity.
> -         * And this messy section will need to be reworked */
> -        m_aqQP[0][0] = calculateQpforCuSize(ctu, cuGeom);
> +        m_aqQP[0] = calculateQpforCuSize(ctu, cuGeom);
> +        setLambdaFromQP(*m_slice, m_aqQP[0]);
> +        m_aqQP[0] = x265_clip3(QP_MIN, QP_MAX_SPEC, m_aqQP[0]);
> +        ctu.setQPSubParts((int8_t)m_aqQP[0], 0, 0);
>
> -        const CUGeom* rootGeom = &cuGeom + 1;
> -        if (m_slice->m_pps->maxCuDQPDepth >= 1)
> -        {
> -            for (int d0 = 0; d0 < 4; d0++)
> -            {
> -                m_aqQP[1][d0] = calculateQpforCuSize(ctu, rootGeom[d0]);
> -                if (m_slice->m_pps->maxCuDQPDepth == 2)
> -                {
> -                    const CUGeom* curGeom = &rootGeom[d0] +
> rootGeom[d0].childOffset;
> -                    for (int d1 = 0; d1 < 4; d1++)
> -                        m_aqQP[2][d0 * 4 + d1] =
> calculateQpforCuSize(ctu, curGeom[d1]);
> -                }
> -            }
> -        }
> -
> -        setLambdaFromQP(*m_slice, m_aqQP[0][0]);
> -        m_aqQP[0][0] = x265_clip3(QP_MIN, QP_MAX_SPEC, m_aqQP[0][0]);
> -        ctu.setQPSubParts((int8_t)m_aqQP[0][0], 0, 0);
> +        if (m_slice->m_pps->maxCuDQPDepth)
> +            initAqQPs(1, ctu, &cuGeom + 1);
>      }
>      else
> -        m_aqQP[0][0] = m_slice->m_sliceQp;
> +        m_aqQP[0] = m_slice->m_sliceQp;
>
>      m_quant.setQPforQuant(ctu);
>      m_rqt[0].cur.load(initialContext);
> @@ -191,7 +180,7 @@
>      uint32_t zOrder = 0;
>      if (m_slice->m_sliceType == I_SLICE)
>      {
> -        compressIntraCU(ctu, cuGeom, zOrder, m_aqQP[0][0], 0);
> +        compressIntraCU(ctu, cuGeom, zOrder, m_aqQP[0]);
>          if (m_param->analysisMode == X265_ANALYSIS_SAVE &&
> m_frame->m_analysisData.intraData)
>          {
>              CUData* bestCU = &m_modeDepth[0].bestMode->cu;
> @@ -209,18 +198,18 @@
>              * they are available for intra predictions */
>              m_modeDepth[0].fencYuv.copyToPicYuv(*m_frame->m_reconPic,
> ctu.m_cuAddr, 0);
>
> -            compressInterCU_rd0_4(ctu, cuGeom, m_aqQP[0][0], 0);
> +            compressInterCU_rd0_4(ctu, cuGeom, m_aqQP[0]);
>
>              /* generate residual for entire CTU at once and copy to
> reconPic */
>              encodeResidue(ctu, cuGeom);
>          }
>          else if (m_param->bDistributeModeAnalysis && m_param->rdLevel >=
> 2)
> -            compressInterCU_dist(ctu, cuGeom, m_aqQP[0][0], 0);
> +            compressInterCU_dist(ctu, cuGeom, m_aqQP[0]);
>          else if (m_param->rdLevel <= 4)
> -            compressInterCU_rd0_4(ctu, cuGeom, m_aqQP[0][0], 0);
> +            compressInterCU_rd0_4(ctu, cuGeom, m_aqQP[0]);
>          else
>          {
> -            compressInterCU_rd5_6(ctu, cuGeom, zOrder, m_aqQP[0][0], 0);
> +            compressInterCU_rd5_6(ctu, cuGeom, zOrder, m_aqQP[0]);
>              if (m_param->analysisMode == X265_ANALYSIS_SAVE &&
> m_frame->m_analysisData.interData)
>              {
>                  CUData* bestCU = &m_modeDepth[0].bestMode->cu;
> @@ -259,7 +248,7 @@
>      }
>  }
>
> -void Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom&
> cuGeom, uint32_t& zOrder, int32_t qp, uint32_t partIdx)
> +void Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom&
> cuGeom, uint32_t& zOrder, int32_t qp)
>  {
>      uint32_t depth = cuGeom.depth;
>      ModeDepth& md = m_modeDepth[depth];
> @@ -270,7 +259,6 @@
>
>      if (m_slice->m_pps->bUseDQP && depth && depth <=
> m_slice->m_pps->maxCuDQPDepth)
>      {
> -        qp = m_aqQP[depth][partIdx];
>          setLambdaFromQP(*m_slice, qp);
>          qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
>      }
> @@ -342,7 +330,10 @@
>              {
>                  m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv,
> childGeom.absPartIdx);
>                  m_rqt[nextDepth].cur.load(*nextContext);
> -                compressIntraCU(parentCTU, childGeom, zOrder, qp, partIdx
> * 4 + subPartIdx);
> +
> +                int32_t nextQP = m_slice->m_pps->bUseDQP && nextDepth <=
> m_slice->m_pps->maxCuDQPDepth ?
> +                                 m_aqQP[childGeom.index] : qp;
> +                compressIntraCU(parentCTU, childGeom, zOrder, nextQP);
>
>                  // Save best CU and pred data for this sub CU
>                  splitCU->copyPartFrom(nd.bestMode->cu, childGeom,
> subPartIdx);
> @@ -530,7 +521,7 @@
>      while (task >= 0);
>  }
>
> -void Analysis::compressInterCU_dist(const CUData& parentCTU, const
> CUGeom& cuGeom, int32_t qp, uint32_t partIdx)
> +void Analysis::compressInterCU_dist(const CUData& parentCTU, const
> CUGeom& cuGeom, int32_t qp)
>  {
>      uint32_t depth = cuGeom.depth;
>      uint32_t cuAddr = parentCTU.m_cuAddr;
> @@ -545,7 +536,6 @@
>
>      if (m_slice->m_pps->bUseDQP && depth && depth <=
> m_slice->m_pps->maxCuDQPDepth)
>      {
> -        qp = m_aqQP[depth][partIdx];
>          setLambdaFromQP(*m_slice, qp);
>          qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
>      }
> @@ -749,7 +739,9 @@
>              {
>                  m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv,
> childGeom.absPartIdx);
>                  m_rqt[nextDepth].cur.load(*nextContext);
> -                compressInterCU_dist(parentCTU, childGeom, qp, partIdx *
> 4 + subPartIdx);
> +                int32_t nextQP = m_slice->m_pps->bUseDQP && nextDepth <=
> m_slice->m_pps->maxCuDQPDepth ?
> +                                 m_aqQP[childGeom.index] : qp;
> +                compressInterCU_dist(parentCTU, childGeom, nextQP);
>
>                  // Save best CU and pred data for this sub CU
>                  splitCU->copyPartFrom(nd.bestMode->cu, childGeom,
> subPartIdx);
> @@ -788,7 +780,7 @@
>          md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr,
> cuGeom.absPartIdx);
>  }
>
> -void Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const
> CUGeom& cuGeom, int32_t qp, uint32_t partIdx)
> +void Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const
> CUGeom& cuGeom, int32_t qp)
>  {
>      uint32_t depth = cuGeom.depth;
>      uint32_t cuAddr = parentCTU.m_cuAddr;
> @@ -801,7 +793,6 @@
>
>      if (m_slice->m_pps->bUseDQP && depth && depth <=
> m_slice->m_pps->maxCuDQPDepth)
>      {
> -        qp = m_aqQP[depth][partIdx];
>          setLambdaFromQP(*m_slice, qp);
>          qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
>      }
> @@ -1028,7 +1019,9 @@
>              {
>                  m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv,
> childGeom.absPartIdx);
>                  m_rqt[nextDepth].cur.load(*nextContext);
> -                compressInterCU_rd0_4(parentCTU, childGeom, qp, partIdx *
> 4 + subPartIdx);
> +                int32_t nextQP = m_slice->m_pps->bUseDQP && nextDepth <=
> m_slice->m_pps->maxCuDQPDepth ?
> +                                 m_aqQP[childGeom.index] : qp;
> +                compressInterCU_rd0_4(parentCTU, childGeom, nextQP);
>
>                  // Save best CU and pred data for this sub CU
>                  splitCU->copyPartFrom(nd.bestMode->cu, childGeom,
> subPartIdx);
> @@ -1079,7 +1072,7 @@
>          md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr,
> cuGeom.absPartIdx);
>  }
>
> -void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const
> CUGeom& cuGeom, uint32_t &zOrder, int32_t qp, uint32_t partIdx)
> +void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const
> CUGeom& cuGeom, uint32_t &zOrder, int32_t qp)
>  {
>      uint32_t depth = cuGeom.depth;
>      ModeDepth& md = m_modeDepth[depth];
> @@ -1090,7 +1083,6 @@
>
>      if (m_slice->m_pps->bUseDQP && depth && depth <=
> m_slice->m_pps->maxCuDQPDepth)
>      {
> -        qp = m_aqQP[depth][partIdx];
>          setLambdaFromQP(*m_slice, qp);
>          qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
>      }
> @@ -1234,7 +1226,9 @@
>              {
>                  m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv,
> childGeom.absPartIdx);
>                  m_rqt[nextDepth].cur.load(*nextContext);
> -                compressInterCU_rd5_6(parentCTU, childGeom, zOrder, qp,
> partIdx * 4 + subPartIdx);
> +                int32_t nextQP = m_slice->m_pps->bUseDQP && nextDepth <=
> m_slice->m_pps->maxCuDQPDepth ?
> +                                 m_aqQP[childGeom.index] : qp;
> +                compressInterCU_rd5_6(parentCTU, childGeom, zOrder,
> nextQP);
>
>                  // Save best CU and pred data for this sub CU
>                  splitCU->copyPartFrom(nd.bestMode->cu, childGeom,
> subPartIdx);
> diff -r bfd57a0c0875 -r 5644bbd23e71 source/encoder/analysis.h
> --- a/source/encoder/analysis.h Fri Apr 24 15:14:54 2015 -0500
> +++ b/source/encoder/analysis.h Fri Apr 24 16:05:12 2015 -0500
> @@ -90,7 +90,7 @@
>      void processPmode(PMODE& pmode, Analysis& slave);
>
>      ModeDepth m_modeDepth[NUM_CU_DEPTH];
> -    int*      m_aqQP[NUM_CU_DEPTH];
> +    int       m_aqQP[CUGeom::MAX_GEOMS];
>      bool      m_bTryLossless;
>      bool      m_bChromaSa8d;
>
> @@ -109,13 +109,15 @@
>      int32_t*             m_reuseRef;
>      uint32_t*            m_reuseBestMergeCand;
>
> +    void initAqQPs(uint32_t depth, const CUData& ctu, const CUGeom*
> rootGeom);
> +
>      /* full analysis for an I-slice CU */
> -    void compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom,
> uint32_t &zOrder, int32_t qpDepth, uint32_t partIdx);
> +    void compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom,
> uint32_t &zOrder, int32_t qp);
>
>      /* full analysis for a P or B slice CU */
> -    void compressInterCU_dist(const CUData& parentCTU, const CUGeom&
> cuGeom, int32_t qpDepth, uint32_t partIdx);
> -    void compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom&
> cuGeom, int32_t qpDepth, uint32_t partIdx);
> -    void compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom&
> cuGeom, uint32_t &zOrder, int32_t qpDepth, uint32_t partIdx);
> +    void compressInterCU_dist(const CUData& parentCTU, const CUGeom&
> cuGeom, int32_t qp);
> +    void compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom&
> cuGeom, int32_t qp);
> +    void compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom&
> cuGeom, uint32_t &zOrder, int32_t qp);
>
>      /* measure merge and skip */
>      void checkMerge2Nx2N_rd0_4(Mode& skip, Mode& merge, const CUGeom&
> cuGeom);
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150427/a3778dce/attachment-0001.html>