[x265] [PATCH] weightp: use struct to cache data for reuse, refactor MC of reference planes

kavitha at multicorewareinc.com kavitha at multicorewareinc.com
Fri Feb 28 08:01:16 CET 2014


# HG changeset patch
# User Kavitha Sampath <kavitha at multicorewareinc.com>
# Date 1393570702 -19800
#      Fri Feb 28 12:28:22 2014 +0530
# Node ID 4f86ef761116e6973d4b96168d095c9bdd5dbcf3
# Parent  8189f9e9a39f135eb1a8b6c76833503f0b85c6f2
weightp: use struct to cache data for reuse, refactor MC of reference planes

diff -r 8189f9e9a39f -r 4f86ef761116 source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp	Thu Feb 27 19:05:54 2014 -0600
+++ b/source/encoder/weightPrediction.cpp	Fri Feb 28 12:28:22 2014 +0530
@@ -30,8 +30,39 @@
 #include <cmath>
 
 using namespace x265;
+namespace weightp {
 
-namespace weightp {
+struct RefData
+{
+    pixel    *mcbuf;
+    uint32_t unweightedCost;
+    pixel    *fref;
+};
+
+struct ChannelData
+{
+    pixel    *orig;
+    int      stride;
+    int      width;
+    int      height;
+};
+
+struct CspData
+{
+    int csp;
+    int hshift;
+    int vshift;
+};
+
+struct cache
+{
+    RefData     ref[2][MAX_NUM_REF][3];
+    ChannelData paramset[3];
+    pixel       *weightTemp;
+    CspData     colorFormat;
+    int         numPredDir;
+};
+
 /* make a motion compensated copy of lowres ref into mcout with the same stride.
  * The borders of mcout are not extended */
 void mcLuma(pixel *    mcout,
@@ -90,14 +121,13 @@
               const MV * mvs,
               int        height,
               int        width,
-              int        csp)
+              CspData  * c)
 {
     /* the motion vectors correspond to 8x8 lowres luma blocks, or 16x16 fullres
      * luma blocks. We have to adapt block size to chroma csp */
-    int hShift = CHROMA_H_SHIFT(csp);
-    int vShift = CHROMA_V_SHIFT(csp);
-    int bw = 16 >> hShift;
-    int bh = 16 >> vShift;
+    int csp = c->csp;
+    int bw = 16 >> c->hshift;
+    int bh = 16 >> c->vshift;
     MV mvmin, mvmax;
 
     int lowresWidthInCU = fenc.width >> 3;
@@ -119,8 +149,8 @@
             {
                 MV mv = mvs[cu]; // lowres MV
                 mv <<= 1;        // fullres MV
-                mv.x >>= hShift;
-                mv.y >>= vShift;
+                mv.x >>= c->hshift;
+                mv.y >>= c->vshift;
 
                 /* clip MV to available pixels */
                 mvmin.x = (int16_t)((-x - 8) << 2);
@@ -211,26 +241,15 @@
 const float epsilon = 1.f / 128.f;
 
 bool tryCommonDenom(TComSlice&     slice,
-                    x265_param&    param,
                     wpScalingParam wp[2][MAX_NUM_REF][3],
-                    pixel *        temp,
+                    cache&         cacheData,
                     int            indenom)
 {
     TComPic *pic = slice.getPic();
     TComPicYuv *picorig = pic->getPicYuvOrg();
     Lowres& fenc = pic->m_lowres;
-    int curPoc = slice.getPOC();
-
-    /* caller provides temp space for two full-pel planes. Split it
-     * in half for motion compensation of the reference and then the
-     * weighting */
-    pixel *mcTemp = temp;
-    pixel *weightTemp = temp + picorig->getStride() * picorig->getHeight();
-
     int log2denom[3] = { indenom };
-    int csp = picorig->m_picCsp;
-    int hshift = CHROMA_H_SHIFT(csp);
-    int vshift = CHROMA_V_SHIFT(csp);
+    CspData *c = &cacheData.colorFormat;
 
     /* Round dimensions to 16, calculate pixel counts in luma and chroma */
     int numpixels[3];
@@ -239,54 +258,21 @@
         int h = ((picorig->getHeight() + 15) >> 4) << 4;
         numpixels[0] = w * h;
 
-        w >>= hshift;
-        h >>= vshift;
+        w >>= c->hshift;
+        h >>= c->vshift;
         numpixels[1] = numpixels[2] = w * h;
     }
 
     int numWeighted = 0;
-    int numPredDir = slice.isInterP() ? 1 : 2;
 
-    for (int list = 0; list < numPredDir; list++)
+    for (int list = 0; list < cacheData.numPredDir; list++)
     {
         for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
         {
             wpScalingParam *fw = wp[list][ref];
             TComPic *refPic = slice.getRefPic(list, ref);
             Lowres& refLowres = refPic->m_lowres;
-
-            MV *mvs = NULL;
-            int32_t *mvCosts = NULL;
             bool bWeightRef = false;
-            bool bMotionCompensate = false;
-
-            /* test whether POC distance is within range for lookahead structures */
-            int diffPoc = abs(curPoc - refPic->getPOC());
-            if (diffPoc <= param.bframes + 1)
-            {
-                mvs = fenc.lowresMvs[list][diffPoc - 1];
-                mvCosts = fenc.lowresMvCosts[list][diffPoc - 1];
-                /* test whether this motion search was performed by lookahead */
-                if (mvs[0].x != 0x7FFF)
-                {
-                    bMotionCompensate = true;
-
-                    /* reference chroma planes must be extended prior to being
-                     * used as motion compensation sources */
-                    if (!refPic->m_bChromaPlanesExtended)
-                    {
-                        refPic->m_bChromaPlanesExtended = true;
-                        TComPicYuv *refyuv = refPic->getPicYuvOrg();
-                        int stride = refyuv->getCStride();
-                        int width = refyuv->getWidth() >> hshift;
-                        int height = refyuv->getHeight() >> vshift;
-                        int marginX = refyuv->getChromaMarginX();
-                        int marginY = refyuv->getChromaMarginY();
-                        extendPicBorder(refyuv->getCbAddr(), stride, width, height, marginX, marginY);
-                        extendPicBorder(refyuv->getCrAddr(), stride, width, height, marginX, marginY);
-                    }
-                }
-            }
 
             /* prepare estimates */
             float guessScale[3], fencMean[3], refMean[3];
@@ -322,66 +308,12 @@
                 if (meanDiff < 0.5f && guessVal < epsilon)
                     continue;
 
-                /* prepare inputs to weight analysis */
-                pixel *orig;
-                pixel *fref;
-                int    origstride, frefstride;
-                int    width, height;
-                switch (yuv)
-                {
-                case 0:
-                    orig = fenc.lowresPlane[0];
-                    fref = refLowres.lowresPlane[0];
-                    origstride = frefstride = fenc.lumaStride;
-                    width = fenc.width;
-                    height = fenc.lines;
+                RefData *rd = &cacheData.ref[list][ref][yuv];
+                ChannelData *p = &cacheData.paramset[yuv];
 
-                    if (bMotionCompensate)
-                    {
-                        mcLuma(mcTemp, refLowres, mvCosts, fenc.intraCost, mvs);
-                        fref = mcTemp;
-                    }
-                    break;
-
-                case 1:
-                    orig = picorig->getCbAddr();
-                    fref = refPic->getPicYuvOrg()->getCbAddr();
-                    origstride = frefstride = picorig->getCStride();
-
-                    /* Clamp the chroma dimensions to the nearest multiple of
-                     * 8x8 blocks (or 16x16 for 4:4:4) since mcChroma uses lowres
-                     * blocks and weightCost measures 8x8 blocks. This
-                     * potentially ignores some edge pixels, but simplifies the
-                     * logic and prevents reading uninitialized pixels. Lowres
-                     * planes are border extended and require no clamping. */
-                    width =  ((picorig->getWidth()  >> 4) << 4) >> hshift;
-                    height = ((picorig->getHeight() >> 4) << 4) >> vshift;
-
-                    if (bMotionCompensate)
-                    {
-                        mcChroma(mcTemp, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
-                        fref = mcTemp;
-                    }
-                    break;
-
-                case 2:
-                    fref = refPic->getPicYuvOrg()->getCrAddr();
-                    orig = picorig->getCrAddr();
-                    origstride = frefstride = picorig->getCStride();
-                    width =  ((picorig->getWidth()  >> 4) << 4) >> hshift;
-                    height = ((picorig->getHeight() >> 4) << 4) >> vshift;
-
-                    if (bMotionCompensate)
-                    {
-                        mcChroma(mcTemp, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
-                        fref = mcTemp;
-                    }
-                    break;
-
-                default:
-                    // idiotic compilers must die
-                    return false;
-                }
+                uint32_t origscore = rd->unweightedCost;
+                if (!origscore)
+                    continue;
 
                 wpScalingParam w;
                 w.setFromWeightAndOffset((int)(guessScale[yuv] * (1 << log2denom[yuv]) + 0.5), 0, log2denom[yuv]);
@@ -389,10 +321,6 @@
                 int minscale = w.inputWeight;
                 int minoff = 0;
 
-                uint32_t origscore = weightCost(orig, origstride, fref, frefstride, weightTemp, width, height, NULL);
-                if (!origscore)
-                    continue;
-
                 uint32_t minscore = origscore;
                 bool bFound = false;
                 static const int sD = 4; // scale distance
@@ -431,7 +359,7 @@
                         }
 
                         SET_WEIGHT(w, true, curScale, mindenom, ioff);
-                        uint32_t s = weightCost(orig, origstride, fref, frefstride, weightTemp, width, height, &w);
+                        uint32_t s = weightCost(p->orig, p->stride, rd->fref, p->stride, cacheData.weightTemp, p->width, p->height, &w);
                         COPY4_IF_LT(minscore, s, minscale, curScale, minoff, ioff, bFound, true);
                         if (minoff == curOffset - oD && ioff != curOffset - oD)
                             break;
@@ -473,28 +401,171 @@
 
     return true;
 }
+
+bool prepareRef(cache& cacheData, TComSlice& slice, x265_param& param)
+{
+    TComPic *pic = slice.getPic();
+    TComPicYuv *picorig = pic->getPicYuvOrg();
+    Lowres& fenc = pic->m_lowres;
+    int curPoc = slice.getPOC();
+    CspData *c = &cacheData.colorFormat;
+    c->csp = picorig->m_picCsp;
+    c->hshift = CHROMA_H_SHIFT(c->csp);
+    c->vshift = CHROMA_V_SHIFT(c->csp);
+
+    for (int list = 0; list < cacheData.numPredDir; list++)
+    {
+        for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
+        {
+            TComPic *refPic = slice.getRefPic(list, ref);
+            Lowres& refLowres = refPic->m_lowres;
+
+            MV *mvs = NULL;
+            int32_t *mvCosts = NULL;
+            bool bMotionCompensate = false;
+
+            /* test whether POC distance is within range for lookahead structures */
+            int diffPoc = abs(curPoc - refPic->getPOC());
+            if (diffPoc <= param.bframes + 1)
+            {
+                mvs = fenc.lowresMvs[list][diffPoc - 1];
+                mvCosts = fenc.lowresMvCosts[list][diffPoc - 1];
+                /* test whether this motion search was performed by lookahead */
+                if (mvs[0].x != 0x7FFF)
+                {
+                    bMotionCompensate = true;
+
+                    /* reference chroma planes must be extended prior to being
+                     * used as motion compensation sources */
+                    if (!refPic->m_bChromaPlanesExtended)
+                    {
+                        refPic->m_bChromaPlanesExtended = true;
+                        TComPicYuv *refyuv = refPic->getPicYuvOrg();
+                        int stride = refyuv->getCStride();
+                        int width = refyuv->getWidth() >> c->hshift;
+                        int height = refyuv->getHeight() >> c->vshift;
+                        int marginX = refyuv->getChromaMarginX();
+                        int marginY = refyuv->getChromaMarginY();
+                        extendPicBorder(refyuv->getCbAddr(), stride, width, height, marginX, marginY);
+                        extendPicBorder(refyuv->getCrAddr(), stride, width, height, marginX, marginY);
+                    }
+                }
+            }
+            for (int yuv = 0; yuv < 3; yuv++)
+            {
+                /* prepare inputs to weight analysis */
+                RefData *rd = &cacheData.ref[list][ref][yuv];
+                ChannelData *p = &cacheData.paramset[yuv];
+                pixel* &buf = rd->mcbuf;
+
+                switch (yuv)
+                {
+                case 0:
+                    p->orig = fenc.lowresPlane[0];
+                    p->stride = fenc.lumaStride;
+                    p->width = fenc.width;
+                    p->height = fenc.lines;
+                    rd->fref = refLowres.lowresPlane[0];
+                    if (bMotionCompensate)
+                    {
+                        buf = X265_MALLOC(pixel, p->stride * p->height);
+                        if (buf)
+                        {
+                            mcLuma(buf, refLowres, mvCosts, fenc.intraCost, mvs);
+                            rd->fref = buf;
+                        }
+                    }
+                    break;
+
+                case 1:
+                    p->orig = picorig->getCbAddr();
+                    p->stride = picorig->getCStride();
+                    rd->fref = refPic->getPicYuvOrg()->getCbAddr();
+
+                    /* Clamp the chroma dimensions to the nearest multiple of
+                     * 8x8 blocks (or 16x16 for 4:4:4) since mcChroma uses lowres
+                     * blocks and weightCost measures 8x8 blocks. This
+                     * potentially ignores some edge pixels, but simplifies the
+                     * logic and prevents reading uninitialized pixels. Lowres
+                     * planes are border extended and require no clamping. */
+                    p->width =  ((picorig->getWidth()  >> 4) << 4) >> c->hshift;
+                    p->height = ((picorig->getHeight() >> 4) << 4) >> c->vshift;
+                    if (bMotionCompensate)
+                    {
+                        buf = X265_MALLOC(pixel, p->stride * p->height);
+                        if (buf)
+                        {
+                            mcChroma(buf, rd->fref, fenc, p->stride, mvCosts, fenc.intraCost, mvs, p->height, p->width, c);
+                            rd->fref = buf;
+                        }
+                    }
+                    break;
+
+                case 2:
+                    rd->fref = refPic->getPicYuvOrg()->getCrAddr();
+                    p->orig = picorig->getCrAddr();
+                    p->stride = picorig->getCStride();
+                    p->width =  ((picorig->getWidth()  >> 4) << 4) >> c->hshift;
+                    p->height = ((picorig->getHeight() >> 4) << 4) >> c->vshift;
+                    if (bMotionCompensate)
+                    {
+                        buf = X265_MALLOC(pixel, p->stride * p->height);
+                        if (buf)
+                        {
+                            mcChroma(buf, rd->fref, fenc, p->stride, mvCosts, fenc.intraCost, mvs, p->height, p->width, c);
+                            rd->fref = buf;
+                        }
+                    }
+                    break;
+
+                default:
+                    return false;
+                }
+                rd->unweightedCost = weightCost(p->orig, p->stride, rd->fref, p->stride, cacheData.weightTemp, p->width, p->height, NULL);
+            }
+        }
+    }
+    return true;
+}
+
+void tearDown(cache& cacheData, TComSlice& slice)
+{
+    X265_FREE(cacheData.weightTemp);
+    for (int list = 0; list < cacheData.numPredDir; list++)
+    {
+        for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
+        {
+            for (int yuv = 0; yuv < 3; yuv++)
+            {
+                X265_FREE(cacheData.ref[list][ref][yuv].mcbuf);
+            }
+        }
+    }
+}
 }
 
 namespace x265 {
 void weightAnalyse(TComSlice& slice, x265_param& param)
 {
     wpScalingParam wp[2][MAX_NUM_REF][3];
-    int numPredDir = slice.isInterP() ? 1 : 2;
 
     /* TODO: perf - collect some of this data into a struct which is passed to
      * tryCommonDenom() to avoid recalculating some data.  Motion compensated
      * reference planes can be cached this way */
-
+
+    weightp::cache cacheData;
+    memset(&cacheData, 0, sizeof(cacheData));
     TComPicYuv *orig = slice.getPic()->getPicYuvOrg();
-    pixel *temp = X265_MALLOC(pixel, 2 * orig->getStride() * orig->getHeight());
-
-    if (temp)
+    cacheData.numPredDir = slice.isInterP() ? 1 : 2;
+    cacheData.weightTemp = X265_MALLOC(pixel, orig->getStride() * orig->getHeight());
+    if (cacheData.weightTemp)
     {
+        prepareRef(cacheData, slice, param);
         int denom = slice.getNumRefIdx(REF_PIC_LIST_0) > 3 ? 7 : 6;
         do
         {
             /* reset weight states */
-            for (int list = 0; list < numPredDir; list++)
+            for (int list = 0; list < cacheData.numPredDir; list++)
             {
                 for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
                 {
@@ -503,14 +574,12 @@
                     SET_WEIGHT(wp[list][ref][2], false, 1 << denom, denom, 0);
                 }
             }
-
-            if (weightp::tryCommonDenom(slice, param, wp, temp, denom))
+            if (weightp::tryCommonDenom(slice, wp, cacheData, denom))
                 break;
-            denom--; // decrement to satisfy the range limitation
+            denom--; // decrement to satisfy the range limitation 
         }
         while (denom > 0);
-
-        X265_FREE(temp);
+        tearDown(cacheData, slice);
     }
 
     if (param.logLevel >= 4)
@@ -520,7 +589,7 @@
         bool bWeighted = false;
 
         p = sprintf(buf, "poc: %d weights:", slice.getPOC());
-        for (int list = 0; list < numPredDir; list++)
+        for (int list = 0; list < cacheData.numPredDir; list++)
         {
             for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
             {


More information about the x265-devel mailing list