[x265] [PATCH] analysis: add an additional round of sub-pel refinement for inter 2Nx2N in rd 5 and 6

santhoshini at multicorewareinc.com santhoshini at multicorewareinc.com
Tue May 19 14:34:44 CEST 2015


# HG changeset patch
# User Santhoshini Sekar<santhoshini at multicorewareinc.com>
# Date 1432028003 -19800
#      Tue May 19 15:03:23 2015 +0530
# Node ID 904ac8808858baaeaaa333b5a105af50c1107db0
# Parent  d7b100e51e828833eee006f1da93e499ac161d28
analysis: add an additional round of sub-pel refinement for inter 2Nx2N in rd 5 and 6

diff -r d7b100e51e82 -r 904ac8808858 source/common/cudata.cpp
--- a/source/common/cudata.cpp	Mon May 18 18:24:08 2015 -0500
+++ b/source/common/cudata.cpp	Tue May 19 15:03:23 2015 +0530
@@ -456,6 +456,41 @@
     memcpy(ctu.m_trCoeff[2] + tmpC2, m_trCoeff[2], sizeof(coeff_t) * tmpC);
 }
 
+void CUData::copyToCU(CUData& ctu) const
+{
+    m_partCopy((uint8_t*)ctu.m_qp, (uint8_t*)m_qp);
+    m_partCopy(ctu.m_log2CUSize, m_log2CUSize);
+    m_partCopy(ctu.m_lumaIntraDir, m_lumaIntraDir);
+    m_partCopy(ctu.m_tqBypass, m_tqBypass);
+    m_partCopy((uint8_t*)ctu.m_refIdx[0], (uint8_t*)m_refIdx[0]);
+    m_partCopy((uint8_t*)ctu.m_refIdx[1], (uint8_t*)m_refIdx[1]);
+    m_partCopy(ctu.m_cuDepth, m_cuDepth);
+    m_partCopy(ctu.m_predMode, m_predMode);
+    m_partCopy(ctu.m_partSize, m_partSize);
+    m_partCopy(ctu.m_mergeFlag, m_mergeFlag);
+    m_partCopy(ctu.m_interDir, m_interDir);
+    m_partCopy(ctu.m_mvpIdx[0], m_mvpIdx[0]);
+    m_partCopy(ctu.m_mvpIdx[1], m_mvpIdx[1]);
+    m_partCopy(ctu.m_tuDepth, m_tuDepth);
+    m_partCopy(ctu.m_transformSkip[0], m_transformSkip[0]);
+    m_partCopy(ctu.m_transformSkip[1], m_transformSkip[1]);
+    m_partCopy(ctu.m_transformSkip[2], m_transformSkip[2]);
+    m_partCopy(ctu.m_cbf[0], m_cbf[0]);
+    m_partCopy(ctu.m_cbf[1], m_cbf[1]);
+    m_partCopy(ctu.m_cbf[2], m_cbf[2]);
+    m_partCopy(ctu.m_chromaIntraDir, m_chromaIntraDir);
+
+    memcpy(ctu.m_mv[0],  m_mv[0],  m_numPartitions * sizeof(MV));
+    memcpy(ctu.m_mv[1],  m_mv[1],  m_numPartitions * sizeof(MV));
+    memcpy(ctu.m_mvd[0], m_mvd[0], m_numPartitions * sizeof(MV));
+    memcpy(ctu.m_mvd[1], m_mvd[1], m_numPartitions * sizeof(MV));
+
+    memcpy(ctu.m_trCoeff[0], m_trCoeff[0], sizeof(coeff_t));
+
+    memcpy(ctu.m_trCoeff[1], m_trCoeff[1], sizeof(coeff_t));
+    memcpy(ctu.m_trCoeff[2], m_trCoeff[2], sizeof(coeff_t));
+}
+
 /* The reverse of copyToPic, called only by encodeResidue */
 void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom)
 {
diff -r d7b100e51e82 -r 904ac8808858 source/common/cudata.h
--- a/source/common/cudata.h	Mon May 18 18:24:08 2015 -0500
+++ b/source/common/cudata.h	Tue May 19 15:03:23 2015 +0530
@@ -188,6 +188,7 @@
     void     copyPartFrom(const CUData& cu, const CUGeom& childGeom, uint32_t subPartIdx);
     void     setEmptyPart(const CUGeom& childGeom, uint32_t subPartIdx);
     void     copyToPic(uint32_t depth) const;
+    void     copyToCU(CUData& ctu) const;
 
     /* RD-0 methods called only from encodeResidue */
     void     copyFromPic(const CUData& ctu, const CUGeom& cuGeom);
diff -r d7b100e51e82 -r 904ac8808858 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Mon May 18 18:24:08 2015 -0500
+++ b/source/encoder/analysis.cpp	Tue May 19 15:03:23 2015 +0530
@@ -739,7 +739,31 @@
         cuStat.count[depth] += 1;
         cuStat.avgCost[depth] = (temp + md.bestMode->rdCost) / cuStat.count[depth];
     }
+    /* If zero-residual, do not bother doing subpelRefine */
+    bool subpelRefine = !!(md.bestMode->cu.m_predMode[0] & MODE_INTER) && !(md.bestMode->cu.m_mergeFlag[0]) && (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N) && (md.bestMode->cu.m_cuDepth[0] == depth);
+    if (subpelRefine && m_param->rdLevel > 4)
+    {
+        int hpelDirs = MotionEstimate::hpelDirCount(m_param->subpelRefine);
 
+        Mode* rdRefine = &md.pred[PRED_RD_REFINE];
+        rdRefine->initCosts();
+        rdRefine->cu.initSubCU(parentCTU, cuGeom, qp);
+        memcpy(rdRefine->bestME[0], md.bestMode->bestME[0], sizeof(MotionData));
+        if (m_slice->m_sliceType == B_SLICE)
+            memcpy(&rdRefine->bestME[0][1], &md.bestMode->bestME[0][1], sizeof(MotionData));
+        md.bestMode->cu.copyToCU(rdRefine->cu);
+        rdRefine->reconYuv.copyFromYuv(md.bestMode->reconYuv);
+        rdRefine->predYuv.copyFromYuv(md.bestMode->predYuv);
+
+        for (int i = 1; i <= hpelDirs; i++)
+        {
+            qPelRefine(*rdRefine, cuGeom, true, i);
+            if (m_slice->m_pps->bUseDQP && depth <= m_slice->m_pps->maxCuDQPDepth)
+                setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, cuGeom));
+            encodeResAndCalcRdInterCU(*rdRefine, cuGeom);
+            checkBestMode(*rdRefine, depth);
+        }
+    }
     /* Copy best data to encData CTU and recon */
     md.bestMode->cu.copyToPic(depth);
     if (md.bestMode != &md.pred[PRED_SPLIT])
@@ -1207,7 +1231,31 @@
         checkDQPForSplitPred(*splitPred, cuGeom);
         checkBestMode(*splitPred, depth);
     }
+    /* If zero-residual, do not bother doing subpelRefine */
+    bool subpelRefine = !!(md.bestMode->cu.m_predMode[0] & MODE_INTER) && !(md.bestMode->cu.m_mergeFlag[0]) && (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N) && (md.bestMode->cu.m_cuDepth[0] == depth);
+    if (subpelRefine)
+    {
+        int hpelDirs = MotionEstimate::hpelDirCount(m_param->subpelRefine);
 
+        Mode* rdRefine = &md.pred[PRED_RD_REFINE];
+        rdRefine->initCosts();
+        rdRefine->cu.initSubCU(parentCTU, cuGeom, qp);
+        memcpy(rdRefine->bestME[0], md.bestMode->bestME[0], sizeof(MotionData));
+        if (m_slice->m_sliceType == B_SLICE)
+            memcpy(&rdRefine->bestME[0][1], &md.bestMode->bestME[0][1], sizeof(MotionData));
+        md.bestMode->cu.copyToCU(rdRefine->cu);
+        rdRefine->reconYuv.copyFromYuv(md.bestMode->reconYuv);
+        rdRefine->predYuv.copyFromYuv(md.bestMode->predYuv);
+
+        for (int i = 1; i <= hpelDirs; i++)
+        {
+            qPelRefine(*rdRefine, cuGeom, true, i);
+            if (m_slice->m_pps->bUseDQP && depth <= m_slice->m_pps->maxCuDQPDepth)
+                setLambdaFromQP(parentCTU, calculateQpforCuSize(parentCTU, cuGeom));
+            encodeResAndCalcRdInterCU(*rdRefine, cuGeom);
+            checkBestMode(*rdRefine, depth);
+        }
+    }
     /* Copy best data to encData CTU and recon */
     md.bestMode->cu.copyToPic(depth);
     if (md.bestMode != &md.pred[PRED_SPLIT])
diff -r d7b100e51e82 -r 904ac8808858 source/encoder/analysis.h
--- a/source/encoder/analysis.h	Mon May 18 18:24:08 2015 -0500
+++ b/source/encoder/analysis.h	Tue May 19 15:03:23 2015 +0530
@@ -59,6 +59,7 @@
         PRED_nRx2N,
         PRED_INTRA_NxN, /* 4x4 intra PU blocks for 8x8 CU */
         PRED_LOSSLESS,  /* lossless encode of best mode */
+        PRED_RD_REFINE,
         MAX_PRED_TYPES
     };
 
diff -r d7b100e51e82 -r 904ac8808858 source/encoder/motion.cpp
--- a/source/encoder/motion.cpp	Mon May 18 18:24:08 2015 -0500
+++ b/source/encoder/motion.cpp	Tue May 19 15:03:23 2015 +0530
@@ -155,6 +155,11 @@
            workload[subme].qpel_iters / 2;
 }
 
+int MotionEstimate::hpelDirCount(int subme)
+{
+    return workload[subme].hpel_dirs;
+}
+
 MotionEstimate::~MotionEstimate()
 {
     fencPUYuv.destroy();
@@ -1205,6 +1210,49 @@
     return bcost;
 }
 
+int MotionEstimate::qPelCompare(ReferencePlanes *ref,
+                                   const MV &       mvmin,
+                                   const MV &       mvmax,
+                                   const MV&        mvp,
+                                   const MV&        mv,
+                                   MV &             outQMv,
+                                   int halfPelIdx)
+{
+    setMVP(mvp);
+
+    MV qmvmin = mvmin.toQPel();
+    MV qmvmax = mvmax.toQPel();
+
+    MV fmv = mv.roundToFPel();
+    fmv = fmv.clipped(qmvmin, qmvmax);
+    int bcost = INT_MAX;
+    const SubpelWorkload& wl = workload[this->subpelRefine];
+
+    MV hmv = fmv + square1[halfPelIdx] * 2;
+    bcost = subpelCompare(ref, hmv, satd) + mvcost(hmv);
+    MV bmv = hmv;
+
+    for (int iter = 0; iter < wl.qpel_iters; iter++)
+    {
+        int bdir = 0;
+        for (int i = 1; i <= wl.qpel_dirs; i++)
+        {
+            MV qmv = hmv + square1[i];
+            int cost = subpelCompare(ref, qmv, satd) + mvcost(qmv);
+            COPY2_IF_LT(bcost, cost, bdir, i);
+        }
+
+        if (bdir)
+            bmv += square1[bdir];
+        else
+            break;
+    }
+
+    x265_emms();
+    outQMv = bmv;
+    return bcost;
+}
+
 int MotionEstimate::subpelCompare(ReferencePlanes *ref, const MV& qmv, pixelcmp_t cmp)
 {
     intptr_t refStride = ref->lumaStride;
diff -r d7b100e51e82 -r 904ac8808858 source/encoder/motion.h
--- a/source/encoder/motion.h	Mon May 18 18:24:08 2015 -0500
+++ b/source/encoder/motion.h	Tue May 19 15:03:23 2015 +0530
@@ -69,6 +69,7 @@
 
     static void initScales();
     static int hpelIterationCount(int subme);
+    static int hpelDirCount(int subme);
     void init(int method, int refine, int csp);
 
     /* Methods called at slice setup */
@@ -90,6 +91,7 @@
     }
 
     int motionEstimate(ReferencePlanes* ref, const MV & mvmin, const MV & mvmax, const MV & qmvp, int numCandidates, const MV * mvc, int merange, MV & outQMv);
+    int qPelCompare(ReferencePlanes* ref, const MV & mvmin, const MV & mvmax, const MV & mvp, const MV & mv, MV & outQMv, int halfPelIdx);
 
     int subpelCompare(ReferencePlanes* ref, const MV &qmv, pixelcmp_t);
 
diff -r d7b100e51e82 -r 904ac8808858 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Mon May 18 18:24:08 2015 -0500
+++ b/source/encoder/search.cpp	Tue May 19 15:03:23 2015 +0530
@@ -2299,6 +2299,54 @@
     interMode.sa8dBits += totalmebits;
 }
 
+void Search::qPelRefine(Mode& interMode, const CUGeom& cuGeom, bool bChromaSA8D, int halfpelIdx)
+{
+    CUData& cu = interMode.cu;
+    Yuv* predYuv = &interMode.predYuv;
+
+    const Slice *slice = m_slice;
+    uint32_t interDir = cu.m_interDir[0];
+
+    const int* numRefIdx = slice->m_numRefIdx;
+
+    MotionData* bestME = interMode.bestME[0];
+    PredictionUnit pu(cu, cuGeom, 0);
+
+    for (uint32_t list = 0; list < 2; list++)
+    {
+        if (interDir & (1 << list))
+        {
+            int ref = bestME[list].ref;
+            uint32_t bits = m_listSelBits[list] + MVP_IDX_BITS;
+            bits += getTUBits(ref, numRefIdx[list]);
+
+            int merange = m_param->searchRange;
+
+            MV mvmin, mvmax, outmv, mvp = interMode.bestME[0][0].mvp;
+            MV mv = interMode.bestME[0][0].mv;
+
+            int satdCost;
+            setSearchRange(cu, mv, merange, mvmin, mvmax);
+            satdCost = m_me.qPelCompare(&slice->m_mref[list][ref], mvmin, mvmax, mvp, mv, outmv, halfpelIdx);
+
+            /* Get total cost of partition, but only include MV bit cost once */
+            bits += m_me.bitcost(outmv);
+            uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost.getCost(bits);
+
+            if (cost < bestME[list].cost)
+            {
+                bestME[list].mv = outmv;
+                bestME[list].cost = cost;
+                bestME[list].bits = bits;
+            }
+        }
+    }
+
+    motionCompensation(cu, pu, *predYuv, true, bChromaSA8D);
+
+    X265_CHECK(interMode.ok(), "inter mode is not ok");
+}
+
 void Search::getBlkBits(PartSize cuMode, bool bPSlice, int partIdx, uint32_t lastMode, uint32_t blockBit[3])
 {
     if (cuMode == SIZE_2Nx2N)
diff -r d7b100e51e82 -r 904ac8808858 source/encoder/search.h
--- a/source/encoder/search.h	Mon May 18 18:24:08 2015 -0500
+++ b/source/encoder/search.h	Tue May 19 15:03:23 2015 +0530
@@ -302,6 +302,7 @@
 
     // estimation inter prediction (non-skip)
     void     predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC);
+    void     qPelRefine(Mode& interMode, const CUGeom& cuGeom, bool bChroma, int halfpelIdx);
 
     // encode residual and compute rd-cost for inter mode
     void     encodeResAndCalcRdInterCU(Mode& interMode, const CUGeom& cuGeom);


More information about the x265-devel mailing list