[x265] [PATCH] weightp: pass struct to tryCommonDenom() to cache motion compensated reference planes for reuse

kavitha at multicorewareinc.com kavitha at multicorewareinc.com
Fri Feb 21 19:50:24 CET 2014


# HG changeset patch
# User Kavitha Sampath <kavitha at multicorewareinc.com>
# Date 1393008408 -19800
#      Sat Feb 22 00:16:48 2014 +0530
# Node ID e0f5813915e6b96078eada45f352c8e247d23b32
# Parent  5e2043f89aa11363dffe33a0ff06550a7d862326
weightp: pass struct to tryCommonDenom() to cache motion compensated reference planes for reuse

diff -r 5e2043f89aa1 -r e0f5813915e6 source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp	Fri Feb 21 03:05:48 2014 -0600
+++ b/source/encoder/weightPrediction.cpp	Sat Feb 22 00:16:48 2014 +0530
@@ -32,6 +32,14 @@
 using namespace x265;
 
 namespace weightp {
+
+struct cache
+{
+    pixel    *mcRef[2][MAX_NUM_REF][3];
+    uint32_t unweightedCost[2][MAX_NUM_REF][3];
+    pixel    *weightTemp;
+};
+
 /* make a motion compensated copy of lowres ref into mcout with the same stride.
  * The borders of mcout are not extended */
 void mcLuma(pixel *    mcout,
@@ -213,7 +221,7 @@
 bool tryCommonDenom(TComSlice&     slice,
                     x265_param&    param,
                     wpScalingParam wp[2][MAX_NUM_REF][3],
-                    pixel *        temp,
+                    cache&         cacheData,
                     int            indenom)
 {
     TComPic *pic = slice.getPic();
@@ -221,12 +229,6 @@
     Lowres& fenc = pic->m_lowres;
     int curPoc = slice.getPOC();
 
-    /* caller provides temp space for two full-pel planes. Split it
-     * in half for motion compensation of the reference and then the
-     * weighting */
-    pixel *mcTemp = temp;
-    pixel *weightTemp = temp + picorig->getStride() * picorig->getHeight();
-
     int log2denom[3] = { indenom };
     int csp = picorig->m_picCsp;
     int hshift = CHROMA_H_SHIFT(csp);
@@ -327,6 +329,8 @@
                 pixel *fref;
                 int    origstride, frefstride;
                 int    width, height;
+                pixel* &buf = cacheData.mcRef[list][ref][yuv];
+                uint32_t& origscore = cacheData.unweightedCost[list][ref][yuv];
                 switch (yuv)
                 {
                 case 0:
@@ -338,8 +342,12 @@
 
                     if (bMotionCompensate)
                     {
-                        mcLuma(mcTemp, refLowres, mvCosts, fenc.intraCost, mvs);
-                        fref = mcTemp;
+                        if (!buf)
+                        {
+                            buf = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
+                            mcLuma(buf, refLowres, mvCosts, fenc.intraCost, mvs);
+                        }
+                        fref = buf;
                     }
                     break;
 
@@ -359,8 +367,12 @@
 
                     if (bMotionCompensate)
                     {
-                        mcChroma(mcTemp, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
-                        fref = mcTemp;
+                        if (!buf)
+                        {
+                            buf = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
+                            mcChroma(buf, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
+                        }
+                        fref = buf;
                     }
                     break;
 
@@ -373,8 +385,12 @@
 
                     if (bMotionCompensate)
                     {
-                        mcChroma(mcTemp, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
-                        fref = mcTemp;
+                        if (!buf)
+                        {
+                            buf = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
+                            mcChroma(buf, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
+                        }
+                        fref = buf;
                     }
                     break;
 
@@ -389,9 +405,12 @@
                 int minscale = w.inputWeight;
                 int minoff = 0;
 
-                uint32_t origscore = weightCost(orig, origstride, fref, frefstride, weightTemp, width, height, NULL);
-                if (!origscore)
-                    continue;
+                if (origscore == 0)
+                {
+                    origscore = weightCost(orig, origstride, fref, frefstride, cacheData.weightTemp, width, height, NULL);
+                    if (!origscore)
+                        continue;
+                }
 
                 uint32_t minscore = origscore;
                 bool bFound = false;
@@ -431,7 +450,7 @@
                         }
 
                         SET_WEIGHT(w, true, curScale, mindenom, ioff);
-                        uint32_t s = weightCost(orig, origstride, fref, frefstride, weightTemp, width, height, &w);
+                        uint32_t s = weightCost(orig, origstride, fref, frefstride, cacheData.weightTemp, width, height, &w);
                         COPY4_IF_LT(minscore, s, minscale, curScale, minoff, ioff, bFound, true);
                         if (minoff == curOffset - oD && ioff != curOffset - oD)
                             break;
@@ -481,13 +500,11 @@
     wpScalingParam wp[2][MAX_NUM_REF][3];
     int numPredDir = slice.isInterP() ? 1 : 2;
 
-    /* TODO: perf - collect some of this data into a struct which is passed to
-     * tryCommonDenom() to avoid recalculating some data.  Motion compensated
-     * reference planes can be cached this way */
-
+    weightp::cache cacheData;
+    memset(&cacheData, 0, sizeof(cacheData));
     TComPicYuv *orig = slice.getPic()->getPicYuvOrg();
-    pixel *temp = X265_MALLOC(pixel, 2 * orig->getStride() * orig->getHeight());
-    if (temp)
+    cacheData.weightTemp = X265_MALLOC(pixel, orig->getStride() * orig->getHeight());
+    if (cacheData.weightTemp)
     {
         int denom = slice.getNumRefIdx(REF_PIC_LIST_0) > 3 ? 7 : 6;
         do
@@ -503,13 +520,23 @@
                 }
             }
 
-            if (weightp::tryCommonDenom(slice, param, wp, temp, denom))
+            if (weightp::tryCommonDenom(slice, param, wp, cacheData, denom))
                 break;
             denom--; // decrement to satisfy the range limitation
         }
         while (denom > 0);
 
-        X265_FREE(temp);
+        X265_FREE(cacheData.weightTemp);
+        for (int list = 0; list < numPredDir; list++)
+        {
+            for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
+            {
+                for (int yuv = 0; yuv < 3; yuv++)
+                {
+                    X265_FREE(cacheData.mcRef[list][ref][yuv]);
+                }
+            }
+        }
     }
 
     if (param.logLevel >= X265_LOG_DEBUG)


More information about the x265-devel mailing list