[x265] [PATCH 3 of 3] implement QP based RD refinement [CHANGES OUTPUT]

Wed Nov 4 08:18:16 CET 2015

Great job on this feature, Kavitha!

On Tue, Oct 27, 2015 at 11:21 AM, <kavitha at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Kavitha Sampath <kavitha at multicorewareinc.com>
> # Date 1445612726 -19800
> #      Fri Oct 23 20:35:26 2015 +0530
> # Node ID 067f831bfc9a6c023cbf642e8c9742deff631ea5
> # Parent  8f08f346dd675a638df2ca8aa030b5be61ab06d7
> implement QP based RD refinement [CHANGES OUTPUT]
>
> After CU analysis, calculate R-D cost on the best partition mode
> for a range of QP values to find the optimal rounding effect.
>
> diff -r 8f08f346dd67 -r 067f831bfc9a source/common/cudata.cpp
> --- a/source/common/cudata.cpp  Fri Oct 23 19:32:21 2015 +0530
> +++ b/source/common/cudata.cpp  Fri Oct 23 20:35:26 2015 +0530
> @@ -430,7 +430,7 @@
>  }
>
>  /* The reverse of copyToPic, called only by encodeResidue */
> -void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom)
> +void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom, bool
> copyQp)
>  {
>      m_encData       = ctu.m_encData;
>      m_slice         = ctu.m_slice;
> @@ -441,7 +441,8 @@
>      m_numPartitions = cuGeom.numPartitions;
>
>      /* copy out all prediction info for this part */
> -    m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp + m_absIdxInCTU);
> +    if (copyQp) m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp +
> m_absIdxInCTU);
> +
>      m_partCopy(m_log2CUSize,   ctu.m_log2CUSize + m_absIdxInCTU);
>      m_partCopy(m_lumaIntraDir, ctu.m_lumaIntraDir + m_absIdxInCTU);
>      m_partCopy(m_tqBypass,     ctu.m_tqBypass + m_absIdxInCTU);
> diff -r 8f08f346dd67 -r 067f831bfc9a source/common/cudata.h
> --- a/source/common/cudata.h    Fri Oct 23 19:32:21 2015 +0530
> +++ b/source/common/cudata.h    Fri Oct 23 20:35:26 2015 +0530
> @@ -222,7 +222,7 @@
>      void     copyToPic(uint32_t depth) const;
>
>      /* RD-0 methods called only from encodeResidue */
> -    void     copyFromPic(const CUData& ctu, const CUGeom& cuGeom);
> +    void     copyFromPic(const CUData& ctu, const CUGeom& cuGeom, bool
> copyQp = true);
>      void     updatePic(uint32_t depth) const;
>
>      void     setPartSizeSubParts(PartSize size)    {
> m_partSet(m_partSize, (uint8_t)size); }
> diff -r 8f08f346dd67 -r 067f831bfc9a source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp       Fri Oct 23 19:32:21 2015 +0530
> +++ b/source/encoder/analysis.cpp       Fri Oct 23 20:35:26 2015 +0530
> @@ -201,6 +201,9 @@
>          }
>      }
>
> +    if (m_param->bEnableRdRefine)
> +        qpRdRefine(ctu, cuGeom);
> +
>      return *m_modeDepth[0].bestMode;
>  }
>
> @@ -229,6 +232,53 @@
>      }
>  }
>
> +void Analysis::qpRdRefine(const CUData& parentCTU, const CUGeom& cuGeom)
> +{
> +    uint64_t origCost, bestCost, cost, prevCost;
> +    int failure, nQP, lambdaQP;
> +    double bestQp, origQP;
> +    bestQp = origQP =
> m_frame->m_encData->m_cuStat[parentCTU.m_cuAddr].baseQp;
> +    bestCost = origCost = m_modeDepth[0].bestMode->rdCost;
> +    lambdaQP = calculateQpforCuSize(parentCTU, cuGeom);
> +
> +    for (int16_t dir = 1; dir >= -1; dir -= 2)
> +    {
> +        int threshold = !!m_param->psyRd;
> +
> +        failure = 0;
> +        prevCost = origCost;
> +        double modQP = origQP + dir;
> +
> +        while (modQP >= QP_MIN && modQP <= QP_MAX_SPEC)
> +        {
> +             /* set modified QP for quant, maintain constant lambda for
> all QPs
> +              * use lambda of QP used for CU analysis for cost
> calculation */
> +             nQP = setLambdaFromQP(parentCTU,
> calculateQpforCuSize(parentCTU, cuGeom, modQP), lambdaQP);
> +
> +            recodeCU(parentCTU, cuGeom, nQP, modQP);
> +            cost = m_modeDepth[0].bestMode->rdCost;
> +            COPY2_IF_LT(bestCost, cost, bestQp, modQP);
> +
> +            if (cost < prevCost)
> +                failure = 0;
> +            else
> +                failure++;
> +
> +            if (failure > threshold)
> +                break;
> +
> +            prevCost = cost;
> +            modQP += dir;
> +       }
> +    }
> +
> +    /* TODO: Try last CU's QP to decide the bestQP before re-encode */
> +
> +    /* Re-encode CU for best chosen QP */
> +    nQP = setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU,
> cuGeom, bestQp));
> +    recodeCU(parentCTU, cuGeom, nQP, bestQp);
> +}
> +
>  void Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom&
> cuGeom, int32_t qp)
>  {
>      uint32_t depth = cuGeom.depth;
> @@ -1422,6 +1472,110 @@
>      return refMask;
>  }
>
> +void Analysis::recodeCU(const CUData& parentCTU, const CUGeom& cuGeom,
> int32_t qp, double modQP)
> +{
> +    uint32_t depth = cuGeom.depth;
> +    ModeDepth& md = m_modeDepth[depth];
> +    md.bestMode = NULL;
> +
> +    bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
> +    bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
> +    bool bDecidedDepth = parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
> +
> +    if (bDecidedDepth)
> +    {
> +        Mode& mode = md.pred[0];
> +        md.bestMode = &mode;
> +        mode.cu.initSubCU(parentCTU, cuGeom, qp);
> +        PartSize size = (PartSize)parentCTU.m_partSize[cuGeom.absPartIdx];
> +        if (parentCTU.isIntra(cuGeom.absPartIdx))
> +        {
> +            memcpy(mode.cu.m_lumaIntraDir, parentCTU.m_lumaIntraDir +
> cuGeom.absPartIdx, cuGeom.numPartitions);
> +            memcpy(mode.cu.m_chromaIntraDir, parentCTU.m_chromaIntraDir +
> cuGeom.absPartIdx, cuGeom.numPartitions);
> +            checkIntra(mode, cuGeom, size);
> +        }
> +        else
> +        {
> +            mode.cu.copyFromPic(parentCTU, cuGeom, false);
> +            for (int part = 0; part <
> (int)parentCTU.getNumPartInter(cuGeom.absPartIdx); part++)
> +            {
> +                PredictionUnit pu(mode.cu, cuGeom, part);
> +                motionCompensation(mode.cu, pu, mode.predYuv, true,
> true);
> +            }
> +
> +            if (parentCTU.isSkipped(cuGeom.absPartIdx))
> +                encodeResAndCalcRdSkipCU(mode);
> +            else
> +                encodeResAndCalcRdInterCU(mode, cuGeom);
> +
>

Since the first pass decided parentCTU as CBF=0, we're now not giving it an
opportunity to convert to CBF=1. Lets try removing the if-else and allow
encodeResAndCalcRdInterCU unconditionally?

> +            /* checkMerge2Nx2N function performs checkDQP after encoding
> residual, do the same */
> +            bool mergeInter2Nx2N = size == SIZE_2Nx2N &&
> parentCTU.m_mergeFlag[cuGeom.absPartIdx];
> +            if (parentCTU.isSkipped(cuGeom.absPartIdx) || mergeInter2Nx2N)
> +                checkDQP(mode, cuGeom);
> +        }
> +
> +        if (m_bTryLossless)
> +            tryLossless(cuGeom);
> +
> +        if (mightSplit)
> +            addSplitFlagCost(*md.bestMode, cuGeom.depth);
> +    }
> +    else
> +    {
> +        Mode* splitPred = &md.pred[PRED_SPLIT];
> +        md.bestMode = splitPred;
> +        splitPred->initCosts();
> +        CUData* splitCU = &splitPred->cu;
> +        splitCU->initSubCU(parentCTU, cuGeom, qp);
> +
> +        uint32_t nextDepth = depth + 1;
> +        ModeDepth& nd = m_modeDepth[nextDepth];
> +        invalidateContexts(nextDepth);
> +        Entropy* nextContext = &m_rqt[depth].cur;
> +        int nextQP = qp;
> +
> +        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
> +        {
> +            const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset +
> subPartIdx);
> +            if (childGeom.flags & CUGeom::PRESENT)
> +            {
> +                m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv,
> childGeom.absPartIdx);
> +                m_rqt[nextDepth].cur.load(*nextContext);
> +
> +                if (m_slice->m_pps->bUseDQP && nextDepth <=
> m_slice->m_pps->maxCuDQPDepth)
> +                {
> +                    int constLambdaQp = calculateQpforCuSize(parentCTU,
> childGeom);
> +                    nextQP = setLambdaFromQP(parentCTU,
> calculateQpforCuSize(parentCTU, childGeom, modQP), constLambdaQp);
> +                }
> +                recodeCU(parentCTU, childGeom, nextQP, modQP);
> +
> +                // Save best CU and pred data for this sub CU
> +                splitCU->copyPartFrom(nd.bestMode->cu, childGeom,
> subPartIdx);
> +                splitPred->addSubCosts(*nd.bestMode);
> +                nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv,
> childGeom.numPartitions * subPartIdx);
> +                nextContext = &nd.bestMode->contexts;
> +            }
> +            else
> +            {
> +                splitCU->setEmptyPart(childGeom, subPartIdx);
> +                // Set depth of non-present CU to 0 to ensure that
> correct CU is fetched as reference to code deltaQP
> +                memset(parentCTU.m_cuDepth + childGeom.absPartIdx, 0,
> childGeom.numPartitions);
> +            }
> +        }
> +        nextContext->store(splitPred->contexts);
> +        if (mightNotSplit)
> +            addSplitFlagCost(*splitPred, cuGeom.depth);
> +        else
> +            updateModeCost(*splitPred);
> +
> +        checkDQPForSplitPred(*splitPred, cuGeom);
> +    }
> +
> +    /* Copy best data to encData CTU and recon */
> +    md.bestMode->cu.copyToPic(depth);
> +    md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic,
> parentCTU.m_cuAddr, cuGeom.absPartIdx);
> +}
> +
>  /* sets md.bestMode if a valid merge candidate is found, else leaves it
> NULL */
>  void Analysis::checkMerge2Nx2N_rd0_4(Mode& skip, Mode& merge, const
> CUGeom& cuGeom)
>  {
> diff -r 8f08f346dd67 -r 067f831bfc9a source/encoder/analysis.h
> --- a/source/encoder/analysis.h Fri Oct 23 19:32:21 2015 +0530
> +++ b/source/encoder/analysis.h Fri Oct 23 20:35:26 2015 +0530
> @@ -110,6 +110,9 @@
>
>      uint32_t m_splitRefIdx[4];
>
> +    /* refine RD based on QP for rd-levels 5 and 6 */
> +    void qpRdRefine(const CUData& ctu, const CUGeom& cuGeom);
> +
>      /* full analysis for an I-slice CU */
>      void compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom,
> int32_t qp);
>
> @@ -118,6 +121,8 @@
>      uint32_t compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom&
> cuGeom, int32_t qp);
>      uint32_t compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom&
> cuGeom, uint32_t &zOrder, int32_t qp);
>
> +    void recodeCU(const CUData& parentCTU, const CUGeom& cuGeom, int32_t
> qp, double modQP);
> +
>      /* measure merge and skip */
>      void checkMerge2Nx2N_rd0_4(Mode& skip, Mode& merge, const CUGeom&
> cuGeom);
>      void checkMerge2Nx2N_rd5_6(Mode& skip, Mode& merge, const CUGeom&
> cuGeom, bool isShareMergeCand);
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>

-- 
Deepthi Nandakumar
Engineering Manager, x265
Multicoreware, Inc
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20151104/5624938d/attachment-0001.html>