[x265] [PATCH] [OUTPUT CHANGED]4% speedup via sa8dCost threshold based early-out check with very small compression loss

ashok at multicorewareinc.com ashok at multicorewareinc.com
Sat Jan 31 15:10:10 CET 2015


# HG changeset patch
# User Ashok Kumar Mishra<ashok at multicorewareinc.com>
# Date 1421415068 -19800
#      Fri Jan 16 19:01:08 2015 +0530
# Node ID 3216d4b11fa4d5bb90a1598dbe6065897a5fa4e0
# Parent  6c5156500d6d4fa655acaf7a8b77f2ba3a0f794b
[OUTPUT CHANGED]4% speedup via sa8dCost threshold based early-out check with very small compression loss

diff -r 6c5156500d6d -r 3216d4b11fa4 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Fri Jan 30 11:54:22 2015 -0600
+++ b/source/encoder/analysis.cpp	Fri Jan 16 19:01:08 2015 +0530
@@ -1187,13 +1187,13 @@
                 (!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0)))
             {
                 md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
-                checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL);
+                checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL, md.bestMode->sa8dCost);
                 checkBestMode(md.pred[PRED_INTRA], depth);
 
                 if (depth == g_maxCUDepth && cuGeom.log2CUSize > m_slice->m_sps->quadtreeTULog2MinSize)
                 {
                     md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom);
-                    checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, NULL);
+                    checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, NULL, md.bestMode->sa8dCost >> 2);
                     checkBestMode(md.pred[PRED_INTRA_NxN], depth);
                 }
             }
@@ -1422,6 +1422,13 @@
         prepMotionCompensation(tempPred->cu, cuGeom, 0);
         motionCompensation(tempPred->predYuv, true, true);
 
+        const Yuv& fencYuv = *tempPred->fencYuv;
+        Yuv& predYuv = tempPred->predYuv;
+        int part = partitionFromLog2Size(cuGeom.log2CUSize);
+        tempPred->distortion = primitives.cu[part].sa8d(fencYuv.m_buf[0], fencYuv.m_size, predYuv.m_buf[0], predYuv.m_size);
+        tempPred->sa8dBits = getTUBits(i, maxNumMergeCand);
+        tempPred->sa8dCost = m_rdCost.calcRdSADCost(tempPred->distortion, tempPred->sa8dBits);
+
         uint8_t hasCbf = true;
         bool swapped = false;
         if (!foundCbf0Merge)
@@ -1451,6 +1458,7 @@
                 tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv;
                 tempPred->cu.m_refIdx[1][0] = (int8_t)mvFieldNeighbours[i][1].refIdx;
                 tempPred->cu.setPredModeSubParts(MODE_INTER);
+                tempPred->sa8dCost = bestPred->sa8dCost;
                 tempPred->predYuv.copyFromYuv(bestPred->predYuv);
             }
             
@@ -1549,6 +1557,12 @@
     }
     if (predInterSearch(interMode, cuGeom, bMergeOnly, true))
     {
+        const Yuv& fencYuv = *interMode.fencYuv;
+        Yuv& predYuv = interMode.predYuv;
+        int part = partitionFromLog2Size(cuGeom.log2CUSize);
+        interMode.distortion = primitives.cu[part].sa8d(fencYuv.m_buf[0], fencYuv.m_size, predYuv.m_buf[0], predYuv.m_size);
+        interMode.sa8dCost = m_rdCost.calcRdSADCost(interMode.distortion, interMode.sa8dBits);
+
         /* predInterSearch sets interMode.sa8dBits, but this is ignored */
         encodeResAndCalcRdInterCU(interMode, cuGeom);
 
diff -r 6c5156500d6d -r 3216d4b11fa4 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Fri Jan 30 11:54:22 2015 -0600
+++ b/source/encoder/search.cpp	Fri Jan 16 19:01:08 2015 +0530
@@ -1131,7 +1131,7 @@
     }
 }
 
-void Search::checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSize, uint8_t* sharedModes)
+void Search::checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSize, uint8_t* sharedModes, uint64_t sa8dThreshold)
 {
     CUData& cu = intraMode.cu;
 
@@ -1143,7 +1143,13 @@
     cu.getIntraTUQtDepthRange(tuDepthRange, 0);
 
     intraMode.initCosts();
-    intraMode.distortion += estIntraPredQT(intraMode, cuGeom, tuDepthRange, sharedModes);
+    intraMode.distortion = estIntraPredQT(intraMode, cuGeom, tuDepthRange, sharedModes, sa8dThreshold);
+
+    if (intraMode.distortion == MAX_INT)
+    {
+        intraMode.rdCost = MAX_INT64;
+        return;
+    }
     intraMode.distortion += estIntraPredChromaQT(intraMode, cuGeom);
 
     m_entropyCoder.resetBits();
@@ -1391,7 +1397,7 @@
     updateModeCost(intraMode);
 }
 
-uint32_t Search::estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2], uint8_t* sharedModes)
+uint32_t Search::estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2], uint8_t* sharedModes, uint64_t sa8dThreshold)
 {
     CUData& cu = intraMode.cu;
     Yuv* reconYuv = &intraMode.reconYuv;
@@ -1533,6 +1539,7 @@
                 candCostList[i] = MAX_INT64;
 
             uint64_t paddedBcost = bcost + (bcost >> 3); // 1.12%
+            paddedBcost = X265_MIN(paddedBcost, sa8dThreshold);
             for (int mode = 0; mode < 35; mode++)
                 if (modeCosts[mode] < paddedBcost || (mpms & ((uint64_t)1 << mode)))
                     updateCandList(mode, modeCosts[mode], maxCandCount, rdModeList, candCostList);
@@ -1551,8 +1558,11 @@
                     codeIntraLumaTSkip(intraMode, cuGeom, initTuDepth, absPartIdx, icosts);
                 else
                     codeIntraLumaQT(intraMode, cuGeom, initTuDepth, absPartIdx, false, icosts, depthRange);
-                COPY2_IF_LT(bcost, icosts.rdcost, bmode, rdModeList[i]);
+                COPY3_IF_LT(bcost, icosts.rdcost, bmode, rdModeList[i], intraMode.sa8dCost, candCostList[i]);
             }
+
+            if (bcost == MAX_INT64)
+                return MAX_INT;
         }
 
         /* remeasure best mode, allowing TU splits */
diff -r 6c5156500d6d -r 3216d4b11fa4 source/encoder/search.h
--- a/source/encoder/search.h	Fri Jan 30 11:54:22 2015 -0600
+++ b/source/encoder/search.h	Fri Jan 16 19:01:08 2015 +0530
@@ -162,7 +162,7 @@
     void     invalidateContexts(int fromDepth);
 
     // full RD search of intra modes. if sharedModes is not NULL, it directly uses them
-    void     checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSize, uint8_t* sharedModes);
+    void     checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSize, uint8_t* sharedModes, uint64_t sa8dThreshold = MAX_INT64);
 
     // select best intra mode using only sa8d costs, cannot measure NxN intra
     void     checkIntraInInter(Mode& intraMode, const CUGeom& cuGeom);
@@ -203,7 +203,7 @@
     void     saveResidualQTData(CUData& cu, ShortYuv& resiYuv, uint32_t absPartIdx, uint32_t tuDepth);
 
     // RDO search of luma intra modes; result is fully encoded luma. luma distortion is returned
-    uint32_t estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2], uint8_t* sharedModes);
+    uint32_t estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2], uint8_t* sharedModes, uint64_t sa8dThreshold);
 
     // RDO select best chroma mode from luma; result is fully encode chroma. chroma distortion is returned
     uint32_t estIntraPredChromaQT(Mode &intraMode, const CUGeom& cuGeom);


More information about the x265-devel mailing list