[x265] [PATCH] weightp: pass struct to tryCommonDenom() to cache motion compensated reference planes for reuse

kavitha at multicorewareinc.com kavitha at multicorewareinc.com
Thu Feb 20 13:27:32 CET 2014


# HG changeset patch
# User Kavitha Sampath <kavitha at multicorewareinc.com>
# Date 1392899156 -19800
#      Thu Feb 20 17:55:56 2014 +0530
# Node ID a359217a5a5dbd2c8183c41dbf3f7193a09e0269
# Parent  3389061b75a486e004409ab628c46fed39d03b72
weightp: pass struct to tryCommonDenom() to cache motion compensated reference planes for reuse

diff -r 3389061b75a4 -r a359217a5a5d source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp	Wed Feb 19 17:03:21 2014 -0600
+++ b/source/encoder/weightPrediction.cpp	Thu Feb 20 17:55:56 2014 +0530
@@ -32,6 +32,13 @@
 using namespace x265;
 
 namespace weightp {
+
+struct cache
+{
+    pixel    *mcRef[2][MAX_NUM_REF][3];
+    uint32_t weightcost[2][MAX_NUM_REF][3];
+};
+
 /* make a motion compensated copy of lowres ref into mcout with the same stride.
  * The borders of mcout are not extended */
 void mcLuma(pixel *    mcout,
@@ -213,7 +220,7 @@
 bool tryCommonDenom(TComSlice&     slice,
                     x265_param&    param,
                     wpScalingParam wp[2][MAX_NUM_REF][3],
-                    pixel *        temp,
+                    cache&         cacheData,
                     int            indenom)
 {
     TComPic *pic = slice.getPic();
@@ -221,12 +228,6 @@
     Lowres& fenc = pic->m_lowres;
     int curPoc = slice.getPOC();
 
-    /* caller provides temp space for two full-pel planes. Split it
-     * in half for motion compensation of the reference and then the
-     * weighting */
-    pixel *mcTemp = temp;
-    pixel *weightTemp = temp + picorig->getStride() * picorig->getHeight();
-
     int log2denom[3] = { indenom };
     int csp = picorig->m_picCsp;
     int hshift = CHROMA_H_SHIFT(csp);
@@ -327,6 +328,8 @@
                 pixel *fref;
                 int    origstride, frefstride;
                 int    width, height;
+                pixel* &buf = cacheData.mcRef[list][ref][yuv];
+                uint32_t& origscore = cacheData.weightcost[list][ref][yuv];
                 switch (yuv)
                 {
                 case 0:
@@ -338,8 +341,12 @@
 
                     if (bMotionCompensate)
                     {
-                        mcLuma(mcTemp, refLowres, mvCosts, fenc.intraCost, mvs);
-                        fref = mcTemp;
+                        if (!buf)
+                        {
+                            buf = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
+                            mcLuma(buf, refLowres, mvCosts, fenc.intraCost, mvs);
+                        }
+                        fref = buf;
                     }
                     break;
 
@@ -359,8 +366,12 @@
 
                     if (bMotionCompensate)
                     {
-                        mcChroma(mcTemp, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
-                        fref = mcTemp;
+                        if (!buf)
+                        {
+                            buf = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
+                            mcChroma(buf, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
+                        }
+                        fref = buf;
                     }
                     break;
 
@@ -373,8 +384,12 @@
 
                     if (bMotionCompensate)
                     {
-                        mcChroma(mcTemp, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
-                        fref = mcTemp;
+                        if (!buf)
+                        {
+                            buf = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
+                            mcChroma(buf, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
+                        }
+                        fref = buf;
                     }
                     break;
 
@@ -388,10 +403,14 @@
                 int mindenom = w.log2WeightDenom;
                 int minscale = w.inputWeight;
                 int minoff = 0;
+                pixel *weightTemp = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
 
-                uint32_t origscore = weightCost(orig, origstride, fref, frefstride, weightTemp, width, height, NULL);
-                if (!origscore)
-                    continue;
+                if (origscore == 0)
+                {
+                    origscore = weightCost(orig, origstride, fref, frefstride, weightTemp, width, height, NULL);
+                    if (!origscore)
+                        continue;
+                }
 
                 uint32_t minscore = origscore;
                 bool bFound = false;
@@ -437,6 +456,7 @@
                             break;
                     }
                 }
+                X265_FREE(weightTemp);
 
                 // if chroma denoms diverged, we must start over
                 if (mindenom < log2denom[yuv])
@@ -481,48 +501,38 @@
     wpScalingParam wp[2][MAX_NUM_REF][3];
     int numPredDir = slice.isInterP() ? 1 : 2;
 
-    /* TODO: perf - collect some of this data into a struct which is passed to
-     * tryCommonDenom() to avoid recalculating some data.  Motion compensated
-     * reference planes can be cached this way */
-
-    TComPicYuv *orig = slice.getPic()->getPicYuvOrg();
-    pixel *temp = X265_MALLOC(pixel, 2 * orig->getStride() * orig->getHeight());
-    if (temp)
+    weightp::cache cacheData;
+    memset(&cacheData, 0, sizeof(cacheData));
+    int denom = slice.getNumRefIdx(REF_PIC_LIST_0) > 3 ? 7 : 6;
+    do
     {
-        int denom = slice.getNumRefIdx(REF_PIC_LIST_0) > 3 ? 7 : 6;
-        do
-        {
-            /* reset weight states */
-            for (int list = 0; list < numPredDir; list++)
-            {
-                for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
-                {
-                    SET_WEIGHT(wp[list][ref][0], false, 1 << denom, denom, 0);
-                    SET_WEIGHT(wp[list][ref][1], false, 1 << denom, denom, 0);
-                    SET_WEIGHT(wp[list][ref][2], false, 1 << denom, denom, 0);
-                }
-            }
-
-            if (weightp::tryCommonDenom(slice, param, wp, temp, denom))
-                break;
-            denom--; // decrement to satisfy the range limitation
-        }
-        while (denom > 0);
-
-        X265_FREE(temp);
-    }
-
-    if (param.logLevel >= X265_LOG_DEBUG)
-    {
-        char buf[1024];
-        int p = 0;
-        bool bWeighted = false;
-
-        p = sprintf(buf, "poc: %d weights:", slice.getPOC());
+        /* reset weight states */
         for (int list = 0; list < numPredDir; list++)
         {
             for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
             {
+                SET_WEIGHT(wp[list][ref][0], false, 1 << denom, denom, 0);
+                SET_WEIGHT(wp[list][ref][1], false, 1 << denom, denom, 0);
+                SET_WEIGHT(wp[list][ref][2], false, 1 << denom, denom, 0);
+            }
+        }
+
+        if (weightp::tryCommonDenom(slice, param, wp, cacheData, denom))
+            break;
+        denom--; // decrement to satisfy the range limitation
+    }
+    while (denom > 0);
+
+    char buf[1024];
+    int  p = 0;
+    bool bWeighted = false;
+    p = sprintf(buf, "poc: %d weights:", slice.getPOC());
+    for (int list = 0; list < numPredDir; list++)
+    {
+        for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
+        {
+            if (param.logLevel >= X265_LOG_DEBUG)
+            {
                 wpScalingParam* w = &wp[list][ref][0];
                 if (w[0].bPresentFlag || w[1].bPresentFlag || w[2].bPresentFlag)
                 {
@@ -537,13 +547,18 @@
                     p += sprintf(buf + p, "]");
                 }
             }
+            for (int yuv = 0; yuv < 3; yuv++)
+            {
+                if(cacheData.mcRef[list][ref][yuv])
+                    X265_FREE(cacheData.mcRef[list][ref][yuv]);
+            }
         }
-        if (bWeighted)
-        {
-            if (p < 80) // pad with spaces to ensure progress line overwritten
-                sprintf(buf + p, "%*s", 80-p, " ");
-            x265_log(&param, X265_LOG_DEBUG, "%s\n", buf);
-        }
+    }
+    if (param.logLevel >= X265_LOG_DEBUG && bWeighted)
+    {
+        if (p < 80) // pad with spaces to ensure progress line overwritten
+            sprintf(buf + p, "%*s", 80-p, " ");
+        x265_log(&param, X265_LOG_DEBUG, "%s\n", buf);
     }
     slice.setWpScaling(wp);
 }


More information about the x265-devel mailing list