[x265] [PATCH] weightp: weight only one reference; donot cache mc refs anymore

Steve Borho steve at borho.org
Mon Mar 10 03:31:04 CET 2014


On Sun, Mar 9, 2014 at 7:02 AM,  <kavitha at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Kavitha Sampath <kavitha at multicorewareinc.com>
> # Date 1394365741 -19800
> #      Sun Mar 09 17:19:01 2014 +0530
> # Node ID efb0ff5f607b70cb6c728bec3f61709b87626606
> # Parent  93861c42b879798134bb200ff633f6492a7ff376
> weightp: weight only one reference; donot cache mc refs anymore

This causes shadow warnings with GCC

/Users/steve/repos/x265/source/encoder/weightPrediction.cpp:403:28:
warning: declaration shadows a local variable [-Wshadow]

            wpScalingParam w;

                           ^

/Users/steve/repos/x265/source/encoder/weightPrediction.cpp:249:9:
note: previous declaration is here

    int w = ((picorig->getWidth()  + 15) >> 4) << 4;


>
> diff -r 93861c42b879 -r efb0ff5f607b source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp   Fri Mar 07 22:54:00 2014 -0600
> +++ b/source/encoder/frameencoder.cpp   Sun Mar 09 17:19:01 2014 +0530
> @@ -453,7 +453,9 @@
>      //------------------------------------------------------------------------------
>      //  Weighted Prediction parameters estimation.
>      //------------------------------------------------------------------------------
> -    if ((slice->getSliceType() == P_SLICE && slice->getPPS()->getUseWP()) || (slice->getSliceType() == B_SLICE && slice->getPPS()->getWPBiPred()))
> +    bool weightpSet = slice->getSliceType() == P_SLICE && slice->getPPS()->getUseWP();
> +    bool weightbSet = slice->getSliceType() == B_SLICE && slice->getPPS()->getWPBiPred();
> +    if (weightpSet || weightbSet)
>      {
>          assert(slice->getPPS()->getUseWP());
>          weightAnalyse(*slice, *m_cfg->param);
> @@ -466,7 +468,7 @@
>          for (int ref = 0; ref < slice->getNumRefIdx(l); ref++)
>          {
>              wpScalingParam *w = NULL;
> -            if ((slice->isInterP() && slice->getPPS()->getUseWP() && slice->m_weightPredTable[l][ref][0].bPresentFlag))
> +            if (weightpSet && !ref && slice->m_weightPredTable[l][ref][0].bPresentFlag)
>              {
>                  w = slice->m_weightPredTable[l][ref];
>                  slice->m_numWPRefs++;
> diff -r 93861c42b879 -r efb0ff5f607b source/encoder/weightPrediction.cpp
> --- a/source/encoder/weightPrediction.cpp       Fri Mar 07 22:54:00 2014 -0600
> +++ b/source/encoder/weightPrediction.cpp       Sun Mar 09 17:19:01 2014 +0530
> @@ -33,34 +33,10 @@
>  using namespace x265;
>  namespace weightp {
>
> -struct RefData
> -{
> -    pixel *  mcbuf;
> -    pixel *  fref;
> -    float    guessScale;
> -    float    fencMean;
> -    float    refMean;
> -    uint32_t unweightedCost;
> -};
> -
> -struct ChannelData
> -{
> -    pixel* orig;
> -    int    stride;
> -    int    width;
> -    int    height;
> -};
> -
>  struct Cache
>  {
> -    wpScalingParam wp[2][MAX_NUM_REF][3];
> -    RefData        ref[2][MAX_NUM_REF][3];
> -    ChannelData    paramset[3];
> -
>      const int *    intraCost;
> -    pixel*         weightTemp;
>      int            numPredDir;
> -    int            lambda;
>      int            csp;
>      int            hshift;
>      int            vshift;
> @@ -191,6 +167,7 @@
>   * pixels have unreliable availability */
>  uint32_t weightCost(pixel *         fenc,
>                      pixel *         ref,
> +                    pixel *         weightTemp,
>                      int             stride,
>                      const Cache &   cache,
>                      int             width,
> @@ -208,9 +185,9 @@
>          int correction = IF_INTERNAL_PREC - X265_DEPTH; /* intermediate interpolation depth */
>          int pwidth = ((width + 15) >> 4) << 4;
>
> -        primitives.weight_pp(ref, cache.weightTemp, stride, stride, pwidth, height,
> +        primitives.weight_pp(ref, weightTemp, stride, stride, pwidth, height,
>                               weight, round << correction, denom + correction, offset);
> -        ref = cache.weightTemp;
> +        ref = weightTemp;
>      }
>
>      uint32_t cost = 0;
> @@ -241,154 +218,24 @@
>      return cost;
>  }
>
> -bool tryCommonDenom(TComSlice& slice, Cache& cache, int indenom)
> -{
> -    int log2denom[3] = { indenom };
> -    const float epsilon = 1.f / 128.f;
> -
> -    /* reset weight states */
> -    for (int list = 0; list < cache.numPredDir; list++)
> -    {
> -        for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
> -        {
> -            SET_WEIGHT(cache.wp[list][ref][0], false, 1 << indenom, indenom, 0);
> -            SET_WEIGHT(cache.wp[list][ref][1], false, 1 << indenom, indenom, 0);
> -            SET_WEIGHT(cache.wp[list][ref][2], false, 1 << indenom, indenom, 0);
> -        }
> -    }
> -
> -    int numWeighted = 0;
> -    for (int list = 0; list < cache.numPredDir; list++)
> -    {
> -        for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
> -        {
> -            wpScalingParam *fw = cache.wp[list][ref];
> -
> -            for (int yuv = 1; yuv < 3; yuv++)
> -            {
> -                /* Ensure that the denominators of cb and cr are same */
> -                RefData *rd = &cache.ref[list][ref][yuv];
> -                fw[yuv].setFromWeightAndOffset((int)(rd->guessScale * (1 << log2denom[1]) + 0.5), 0, log2denom[1]);
> -                log2denom[1] = X265_MIN(log2denom[1], (int)fw[yuv].log2WeightDenom);
> -            }
> -            log2denom[2] = log2denom[1];
> -
> -            bool bWeightRef = false;
> -            for (int yuv = 0; yuv < 3; yuv++)
> -            {
> -                RefData *rd = &cache.ref[list][ref][yuv];
> -                ChannelData *p = &cache.paramset[yuv];
> -                if (yuv && !fw[0].bPresentFlag)
> -                {
> -                    fw[1].inputWeight = 1 << fw[1].log2WeightDenom;
> -                    fw[2].inputWeight = 1 << fw[2].log2WeightDenom;
> -                    break;
> -                }
> -
> -                x265_emms();
> -                /* Early termination */
> -                float meanDiff = rd->refMean < rd->fencMean ? rd->fencMean - rd->refMean : rd->refMean - rd->fencMean;
> -                float guessVal = rd->guessScale > 1.f ? rd->guessScale - 1.f : 1.f - rd->guessScale;
> -                if ((meanDiff < 0.5f && guessVal < epsilon) || !rd->unweightedCost)
> -                    continue;
> -
> -                wpScalingParam w;
> -                w.setFromWeightAndOffset((int)(rd->guessScale * (1 << log2denom[yuv]) + 0.5), 0, log2denom[yuv]);
> -                int mindenom = w.log2WeightDenom;
> -                int minscale = w.inputWeight;
> -                int minoff = 0;
> -
> -                uint32_t origscore = rd->unweightedCost;
> -                uint32_t minscore = origscore;
> -                bool bFound = false;
> -                static const int sD = 4; // scale distance
> -                static const int oD = 2; // offset distance
> -                for (int is = minscale - sD; is <= minscale + sD; is++)
> -                {
> -                    int deltaWeight = is - (1 << mindenom);
> -                    if (deltaWeight > 127 || deltaWeight <= -128)
> -                        continue;
> -
> -                    int curScale = is;
> -                    int curOffset = (int)(rd->fencMean - rd->refMean * curScale / (1 << mindenom) + 0.5f);
> -                    if (curOffset < -128 || curOffset > 127)
> -                    {
> -                        /* Rescale considering the constraints on curOffset. We do it in this order
> -                         * because scale has a much wider range than offset (because of denom), so
> -                         * it should almost never need to be clamped. */
> -                        curOffset = Clip3(-128, 127, curOffset);
> -                        curScale = (int)((1 << mindenom) * (rd->fencMean - curOffset) / rd->refMean + 0.5f);
> -                        curScale = Clip3(0, 127, curScale);
> -                    }
> -
> -                    for (int ioff = curOffset - oD; (ioff <= (curOffset + oD)) && (ioff < 127); ioff++)
> -                    {
> -                        if (yuv)
> -                        {
> -                            int pred = (128 - ((128 * curScale) >> (mindenom)));
> -                            int deltaOffset = ioff - pred; // signed 10bit
> -                            if (deltaOffset < -512 || deltaOffset > 511)
> -                                continue;
> -                            ioff = Clip3(-128, 127, (deltaOffset + pred)); // signed 8bit
> -                        }
> -                        else
> -                        {
> -                            ioff = Clip3(-128, 127, ioff);
> -                        }
> -
> -                        SET_WEIGHT(w, true, curScale, mindenom, ioff);
> -                        uint32_t s = weightCost(p->orig, rd->fref, p->stride, cache, p->width, p->height, &w, !yuv) +
> -                                     sliceHeaderCost(&w, cache.lambda, !!yuv);
> -                        COPY4_IF_LT(minscore, s, minscale, curScale, minoff, ioff, bFound, true);
> -                        if (minoff == curOffset - oD && ioff != curOffset - oD)
> -                            break;
> -                    }
> -                }
> -
> -                // if chroma denoms diverged, we must start over
> -                if (mindenom < log2denom[yuv])
> -                    return false;
> -
> -                if (!bFound || (minscale == (1 << mindenom) && minoff == 0) || (float)minscore / origscore > 0.998f)
> -                {
> -                    fw[yuv].bPresentFlag = false;
> -                    fw[yuv].inputWeight = 1 << fw[yuv].log2WeightDenom;
> -                }
> -                else
> -                {
> -                    SET_WEIGHT(fw[yuv], true, minscale, mindenom, minoff);
> -                    bWeightRef = true;
> -                }
> -            }
> -
> -            if (bWeightRef)
> -            {
> -                // Make sure both chroma channels match
> -                if (fw[1].bPresentFlag != fw[2].bPresentFlag)
> -                {
> -                    if (fw[1].bPresentFlag)
> -                        fw[2] = fw[1];
> -                    else
> -                        fw[1] = fw[2];
> -                }
> -
> -                if (++numWeighted >= 8)
> -                    return true;
> -            }
> -        }
> -    }
> -
> -    return true;
> -}
> -
> -void prepareRef(Cache& cache, TComSlice& slice, x265_param& param)
> +void tryCommonDenom(TComSlice&     slice,
> +                    x265_param&    param,
> +                    wpScalingParam wp[2][MAX_NUM_REF][3],
> +                    pixel *        temp,
> +                    int            indenom)
>  {
>      TComPic *pic = slice.getPic();
>      TComPicYuv *picorig = pic->getPicYuvOrg();
>      Lowres& fenc = pic->m_lowres;
>
> -    cache.weightTemp = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
> -    cache.lambda = (int) x265_lambda2_non_I[slice.getSliceQp()];
> +    /* caller provides temp space for two full-pel planes. Split it
> +     * in half for motion compensation of the reference and then the
> +     * weighting */
> +    pixel *mcbuf = temp;
> +    pixel *weightTemp = temp + picorig->getStride() * picorig->getHeight();
> +
> +    weightp::Cache cache;
> +    memset(&cache, 0, sizeof(cache));
>      cache.intraCost = fenc.intraCost;
>      cache.lowresWidthInCU = fenc.width >> 3;
>      cache.lowresHeightInCU = fenc.lines >> 3;
> @@ -396,6 +243,7 @@
>      cache.hshift = CHROMA_H_SHIFT(cache.csp);
>      cache.vshift = CHROMA_V_SHIFT(cache.csp);
>
> +    int lambda = (int) x265_lambda2_non_I[slice.getSliceQp()];
>      int curPoc = slice.getPOC();
>      int numpixels[3];
>      int w = ((picorig->getWidth()  + 15) >> 4) << 4;
> @@ -407,137 +255,228 @@
>      numpixels[1] = numpixels[2] = w * h;
>
>      cache.numPredDir = slice.isInterP() ? 1 : 2;
> +    int chromadenom = indenom;
> +    const float epsilon = 1.f / 128.f;
> +
> +    /* reset weight states */
>      for (int list = 0; list < cache.numPredDir; list++)
>      {
>          for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
>          {
> -            TComPic *refPic = slice.getRefPic(list, ref);
> -            Lowres& refLowres = refPic->m_lowres;
> +            SET_WEIGHT(wp[list][ref][0], false, 1 << indenom, indenom, 0);
> +            SET_WEIGHT(wp[list][ref][1], false, 1 << indenom, indenom, 0);
> +            SET_WEIGHT(wp[list][ref][2], false, 1 << indenom, indenom, 0);
> +        }
> +    }
>
> -            MV *mvs = NULL;
> -            bool bMotionCompensate = false;
> +    for (int list = 0; list < cache.numPredDir; list++)
> +    {
> +        wpScalingParam *fw = wp[list][0];
> +        TComPic *refPic = slice.getRefPic(list, 0);
> +        Lowres& refLowres = refPic->m_lowres;
>
> -            /* test whether POC distance is within range for lookahead structures */
> -            int diffPoc = abs(curPoc - refPic->getPOC());
> -            if (diffPoc <= param.bframes + 1)
> +        MV *mvs = NULL;
> +        bool bMotionCompensate = false;
> +
> +        /* test whether POC distance is within range for lookahead structures */
> +        int diffPoc = abs(curPoc - refPic->getPOC());
> +        if (diffPoc <= param.bframes + 1)
> +        {
> +            mvs = fenc.lowresMvs[list][diffPoc - 1];
> +            /* test whether this motion search was performed by lookahead */
> +            if (mvs[0].x != 0x7FFF)
>              {
> -                mvs = fenc.lowresMvs[list][diffPoc - 1];
> -                /* test whether this motion search was performed by lookahead */
> -                if (mvs[0].x != 0x7FFF)
> +                bMotionCompensate = true;
> +
> +                /* reference chroma planes must be extended prior to being
> +                    * used as motion compensation sources */
> +                if (!refPic->m_bChromaPlanesExtended)
>                  {
> -                    bMotionCompensate = true;
> -
> -                    /* reference chroma planes must be extended prior to being
> -                     * used as motion compensation sources */
> -                    if (!refPic->m_bChromaPlanesExtended)
> -                    {
> -                        refPic->m_bChromaPlanesExtended = true;
> -                        TComPicYuv *refyuv = refPic->getPicYuvOrg();
> -                        int stride = refyuv->getCStride();
> -                        int width = refyuv->getWidth() >> cache.hshift;
> -                        int height = refyuv->getHeight() >> cache.vshift;
> -                        int marginX = refyuv->getChromaMarginX();
> -                        int marginY = refyuv->getChromaMarginY();
> -                        extendPicBorder(refyuv->getCbAddr(), stride, width, height, marginX, marginY);
> -                        extendPicBorder(refyuv->getCrAddr(), stride, width, height, marginX, marginY);
> -                    }
> +                    refPic->m_bChromaPlanesExtended = true;
> +                    TComPicYuv *refyuv = refPic->getPicYuvOrg();
> +                    int stride = refyuv->getCStride();
> +                    int width = refyuv->getWidth() >> cache.hshift;
> +                    int height = refyuv->getHeight() >> cache.vshift;
> +                    int marginX = refyuv->getChromaMarginX();
> +                    int marginY = refyuv->getChromaMarginY();
> +                    extendPicBorder(refyuv->getCbAddr(), stride, width, height, marginX, marginY);
> +                    extendPicBorder(refyuv->getCrAddr(), stride, width, height, marginX, marginY);
>                  }
>              }
> -            for (int yuv = 0; yuv < 3; yuv++)
> +        }
> +
> +        /* prepare estimates */
> +        float guessScale[3], fencMean[3], refMean[3];
> +        for (int yuv = 0; yuv < 3; yuv++)
> +        {
> +            uint64_t fencVar = fenc.wp_ssd[yuv] + !refLowres.wp_ssd[yuv];
> +            uint64_t refVar  = refLowres.wp_ssd[yuv] + !refLowres.wp_ssd[yuv];
> +            if (fencVar && refVar)
> +                guessScale[yuv] = Clip3(-2.f, 1.8f, std::sqrt((float)fencVar / refVar));
> +            else
> +                guessScale[yuv] = 1.8f;
> +            fencMean[yuv] = (float)fenc.wp_sum[yuv] / (numpixels[yuv]) / (1 << (X265_DEPTH - 8));
> +            refMean[yuv]  = (float)refLowres.wp_sum[yuv] / (numpixels[yuv]) / (1 << (X265_DEPTH - 8));
> +
> +            /* Ensure that the denominators of cb and cr are same */
> +            if (yuv)
>              {
> -                /* prepare inputs to weight analysis */
> -                RefData *rd = &cache.ref[list][ref][yuv];
> -                ChannelData *p = &cache.paramset[yuv];
> -
> -                x265_emms();
> -                uint64_t fencVar = fenc.wp_ssd[yuv] + !refLowres.wp_ssd[yuv];
> -                uint64_t refVar  = refLowres.wp_ssd[yuv] + !refLowres.wp_ssd[yuv];
> -                if (fencVar && refVar)
> -                    rd->guessScale = Clip3(-2.f, 1.8f, std::sqrt((float)fencVar / refVar));
> -                else
> -                    rd->guessScale = 1.8f;
> -                rd->fencMean = (float)fenc.wp_sum[yuv] / (numpixels[yuv]) / (1 << (X265_DEPTH - 8));
> -                rd->refMean  = (float)refLowres.wp_sum[yuv] / (numpixels[yuv]) / (1 << (X265_DEPTH - 8));
> -
> -                switch (yuv)
> -                {
> -                case 0:
> -                    p->orig = fenc.lowresPlane[0];
> -                    p->stride = fenc.lumaStride;
> -                    p->width = fenc.width;
> -                    p->height = fenc.lines;
> -                    rd->fref = refLowres.lowresPlane[0];
> -                    if (bMotionCompensate)
> -                    {
> -                        rd->mcbuf = X265_MALLOC(pixel, p->stride * p->height);
> -                        if (rd->mcbuf)
> -                        {
> -                            mcLuma(rd->mcbuf, refLowres, mvs);
> -                            rd->fref = rd->mcbuf;
> -                        }
> -                    }
> -                    break;
> -
> -                case 1:
> -                    p->orig = picorig->getCbAddr();
> -                    p->stride = picorig->getCStride();
> -                    rd->fref = refPic->getPicYuvOrg()->getCbAddr();
> -
> -                    /* Clamp the chroma dimensions to the nearest multiple of
> -                     * 8x8 blocks (or 16x16 for 4:4:4) since mcChroma uses lowres
> -                     * blocks and weightCost measures 8x8 blocks. This
> -                     * potentially ignores some edge pixels, but simplifies the
> -                     * logic and prevents reading uninitialized pixels. Lowres
> -                     * planes are border extended and require no clamping. */
> -                    p->width =  ((picorig->getWidth()  >> 4) << 4) >> cache.hshift;
> -                    p->height = ((picorig->getHeight() >> 4) << 4) >> cache.vshift;
> -                    if (bMotionCompensate)
> -                    {
> -                        rd->mcbuf = X265_MALLOC(pixel, p->stride * p->height);
> -                        if (rd->mcbuf)
> -                        {
> -                            mcChroma(rd->mcbuf, rd->fref, p->stride, mvs, cache, p->height, p->width);
> -                            rd->fref = rd->mcbuf;
> -                        }
> -                    }
> -                    break;
> -
> -                case 2:
> -                    rd->fref = refPic->getPicYuvOrg()->getCrAddr();
> -                    p->orig = picorig->getCrAddr();
> -                    p->stride = picorig->getCStride();
> -                    p->width =  ((picorig->getWidth()  >> 4) << 4) >> cache.hshift;
> -                    p->height = ((picorig->getHeight() >> 4) << 4) >> cache.vshift;
> -                    if (bMotionCompensate)
> -                    {
> -                        rd->mcbuf = X265_MALLOC(pixel, p->stride * p->height);
> -                        if (rd->mcbuf)
> -                        {
> -                            mcChroma(rd->mcbuf, rd->fref, p->stride, mvs, cache, p->height, p->width);
> -                            rd->fref = rd->mcbuf;
> -                        }
> -                    }
> -                    break;
> -
> -                default:
> -                    return;
> -                }
> -                rd->unweightedCost = weightCost(p->orig, rd->fref, p->stride, cache, p->width, p->height, NULL, !yuv);
> +                fw[yuv].setFromWeightAndOffset((int)(guessScale[yuv] * (1 << chromadenom) + 0.5), 0, chromadenom);
> +                chromadenom = X265_MIN(chromadenom, (int)fw[yuv].log2WeightDenom);
>              }
>          }
> -    }
> -}
>
> -void tearDown(Cache& cache, TComSlice& slice)
> -{
> -    X265_FREE(cache.weightTemp);
> -    for (int list = 0; list < cache.numPredDir; list++)
> -    {
> -        for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
> +        bool bWeightRef = false;
> +        for (int yuv = 0; yuv < 3; yuv++)
>          {
> -            for (int yuv = 0; yuv < 3; yuv++)
> +            if (yuv && !fw[0].bPresentFlag)
>              {
> -                X265_FREE(cache.ref[list][ref][yuv].mcbuf);
> +                fw[1].inputWeight = 1 << fw[1].log2WeightDenom;
> +                fw[2].inputWeight = 1 << fw[2].log2WeightDenom;
> +                break;
> +            }
> +
> +            x265_emms();
> +            /* Early termination */
> +            float meanDiff = refMean[yuv] < fencMean[yuv] ? fencMean[yuv] - refMean[yuv] : refMean[yuv] - fencMean[yuv];
> +            float guessVal = guessScale[yuv] > 1.f ? guessScale[yuv] - 1.f : 1.f - guessScale[yuv];
> +            if (meanDiff < 0.5f && guessVal < epsilon)
> +                continue;
> +
> +            /* prepare inputs to weight analysis */
> +            pixel *orig;
> +            pixel *fref;
> +            int    stride;
> +            int    width, height;
> +            switch (yuv)
> +            {
> +            case 0:
> +                orig = fenc.lowresPlane[0];
> +                stride = fenc.lumaStride;
> +                width = fenc.width;
> +                height = fenc.lines;
> +                fref = refLowres.lowresPlane[0];
> +                if (bMotionCompensate)
> +                {
> +                    mcLuma(mcbuf, refLowres, mvs);
> +                    fref = mcbuf;
> +                }
> +                break;
> +
> +            case 1:
> +                orig = picorig->getCbAddr();
> +                stride = picorig->getCStride();
> +                fref = refPic->getPicYuvOrg()->getCbAddr();
> +
> +                /* Clamp the chroma dimensions to the nearest multiple of
> +                    * 8x8 blocks (or 16x16 for 4:4:4) since mcChroma uses lowres
> +                    * blocks and weightCost measures 8x8 blocks. This
> +                    * potentially ignores some edge pixels, but simplifies the
> +                    * logic and prevents reading uninitialized pixels. Lowres
> +                    * planes are border extended and require no clamping. */
> +                width =  ((picorig->getWidth()  >> 4) << 4) >> cache.hshift;
> +                height = ((picorig->getHeight() >> 4) << 4) >> cache.vshift;
> +                if (bMotionCompensate)
> +                {
> +                    mcChroma(mcbuf, fref, stride, mvs, cache, height, width);
> +                    fref = mcbuf;
> +                }
> +                break;
> +
> +            case 2:
> +                fref = refPic->getPicYuvOrg()->getCrAddr();
> +                orig = picorig->getCrAddr();
> +                stride = picorig->getCStride();
> +                width =  ((picorig->getWidth()  >> 4) << 4) >> cache.hshift;
> +                height = ((picorig->getHeight() >> 4) << 4) >> cache.vshift;
> +                if (bMotionCompensate)
> +                {
> +                    mcChroma(mcbuf, fref, stride, mvs, cache, height, width);
> +                    fref = mcbuf;
> +                }
> +                break;
> +
> +            default:
> +                return;
> +            }
> +
> +            wpScalingParam w;
> +            w.setFromWeightAndOffset((int)(guessScale[yuv] * (1 << fw[yuv].log2WeightDenom) + 0.5), 0, fw[yuv].log2WeightDenom);
> +            int mindenom = w.log2WeightDenom;
> +            int minscale = w.inputWeight;
> +            int minoff = 0;
> +
> +            uint32_t origscore = weightCost(orig, fref, weightTemp, stride, cache, width, height, NULL, !yuv);
> +            if (!origscore)
> +                continue;
> +
> +            uint32_t minscore = origscore;
> +            bool bFound = false;
> +            static const int sD = 4; // scale distance
> +            static const int oD = 2; // offset distance
> +            for (int is = minscale - sD; is <= minscale + sD; is++)
> +            {
> +                int deltaWeight = is - (1 << mindenom);
> +                if (deltaWeight > 127 || deltaWeight <= -128)
> +                    continue;
> +
> +                int curScale = is;
> +                int curOffset = (int)(fencMean[yuv] - refMean[yuv] * curScale / (1 << mindenom) + 0.5f);
> +                if (curOffset < -128 || curOffset > 127)
> +                {
> +                    /* Rescale considering the constraints on curOffset. We do it in this order
> +                        * because scale has a much wider range than offset (because of denom), so
> +                        * it should almost never need to be clamped. */
> +                    curOffset = Clip3(-128, 127, curOffset);
> +                    curScale = (int)((1 << mindenom) * (fencMean[yuv] - curOffset) / refMean[yuv] + 0.5f);
> +                    curScale = Clip3(0, 127, curScale);
> +                }
> +
> +                for (int ioff = curOffset - oD; (ioff <= (curOffset + oD)) && (ioff < 127); ioff++)
> +                {
> +                    if (yuv)
> +                    {
> +                        int pred = (128 - ((128 * curScale) >> (mindenom)));
> +                        int deltaOffset = ioff - pred; // signed 10bit
> +                        if (deltaOffset < -512 || deltaOffset > 511)
> +                            continue;
> +                        ioff = Clip3(-128, 127, (deltaOffset + pred)); // signed 8bit
> +                    }
> +                    else
> +                    {
> +                        ioff = Clip3(-128, 127, ioff);
> +                    }
> +
> +                    SET_WEIGHT(w, true, curScale, mindenom, ioff);
> +                    uint32_t s = weightCost(orig, fref, weightTemp, stride, cache, width, height, &w, !yuv) +
> +                                    sliceHeaderCost(&w, lambda, !!yuv);
> +                    COPY4_IF_LT(minscore, s, minscale, curScale, minoff, ioff, bFound, true);
> +                    if (minoff == curOffset - oD && ioff != curOffset - oD)
> +                        break;
> +                }
> +            }
> +
> +            if (!bFound || (minscale == (1 << mindenom) && minoff == 0) || (float)minscore / origscore > 0.998f)
> +            {
> +                fw[yuv].bPresentFlag = false;
> +                fw[yuv].inputWeight = 1 << fw[yuv].log2WeightDenom;
> +            }
> +            else
> +            {
> +                SET_WEIGHT(fw[yuv], true, minscale, mindenom, minoff);
> +                bWeightRef = true;
> +            }
> +        }
> +
> +        if (bWeightRef)
> +        {
> +            // Make sure both chroma channels match
> +            if (fw[1].bPresentFlag != fw[2].bPresentFlag)
> +            {
> +                if (fw[1].bPresentFlag)
> +                    fw[2] = fw[1];
> +                else
> +                    fw[1] = fw[2];
>              }
>          }
>      }
> @@ -547,35 +486,30 @@
>  namespace x265 {
>  void weightAnalyse(TComSlice& slice, x265_param& param)
>  {
> -    weightp::Cache cache;
> -    memset(&cache, 0, sizeof(cache));
> +    wpScalingParam wp[2][MAX_NUM_REF][3];
> +    int numPredDir = slice.isInterP() ? 1 : 2;
> +    TComPicYuv *orig = slice.getPic()->getPicYuvOrg();
> +    pixel *temp = X265_MALLOC(pixel, 2 * orig->getStride() * orig->getHeight());
>
> -    prepareRef(cache, slice, param);
> -    if (cache.weightTemp)
> +    if (temp)
>      {
>          int denom = slice.getNumRefIdx(REF_PIC_LIST_0) > 3 ? 7 : 6;
> -        do
> -        {
> -            if (weightp::tryCommonDenom(slice, cache, denom))
> -                break;
> -            denom--; // decrement to satisfy the range limitation
> -        }
> -        while (denom > 0);
> +        weightp::tryCommonDenom(slice, param, wp, temp, denom);
> +        X265_FREE(temp);
>      }
>      else
>      {
> -        for (int list = 0; list < cache.numPredDir; list++)
> +        for (int list = 0; list < numPredDir; list++)
>          {
>              for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
>              {
> -                SET_WEIGHT(cache.wp[list][ref][0], false, 1, 0, 0);
> -                SET_WEIGHT(cache.wp[list][ref][1], false, 1, 0, 0);
> -                SET_WEIGHT(cache.wp[list][ref][2], false, 1, 0, 0);
> +                SET_WEIGHT(wp[list][ref][0], false, 1, 0, 0);
> +                SET_WEIGHT(wp[list][ref][1], false, 1, 0, 0);
> +                SET_WEIGHT(wp[list][ref][2], false, 1, 0, 0);
>              }
>          }
>      }
> -    tearDown(cache, slice);
> -    slice.setWpScaling(cache.wp);
> +    slice.setWpScaling(wp);
>
>      if (param.logLevel >= X265_LOG_FULL)
>      {
> @@ -584,23 +518,20 @@
>          bool bWeighted = false;
>
>          p = sprintf(buf, "poc: %d weights:", slice.getPOC());
> -        for (int list = 0; list < cache.numPredDir; list++)
> +        for (int list = 0; list < numPredDir; list++)
>          {
> -            for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
> +            wpScalingParam* w = &wp[list][0][0];
> +            if (w[0].bPresentFlag || w[1].bPresentFlag || w[2].bPresentFlag)
>              {
> -                wpScalingParam* w = &cache.wp[list][ref][0];
> -                if (w[0].bPresentFlag || w[1].bPresentFlag || w[2].bPresentFlag)
> -                {
> -                    bWeighted = true;
> -                    p += sprintf(buf + p, " [L%d:R%d ", list, ref);
> -                    if (w[0].bPresentFlag)
> -                        p += sprintf(buf + p, "Y{%d/%d%+d}", w[0].inputWeight, 1 << w[0].log2WeightDenom, w[0].inputOffset);
> -                    if (w[1].bPresentFlag)
> -                        p += sprintf(buf + p, "U{%d/%d%+d}", w[1].inputWeight, 1 << w[1].log2WeightDenom, w[1].inputOffset);
> -                    if (w[2].bPresentFlag)
> -                        p += sprintf(buf + p, "V{%d/%d%+d}", w[2].inputWeight, 1 << w[2].log2WeightDenom, w[2].inputOffset);
> -                    p += sprintf(buf + p, "]");
> -                }
> +                bWeighted = true;
> +                p += sprintf(buf + p, " [L%d:R0 ", list);
> +                if (w[0].bPresentFlag)
> +                    p += sprintf(buf + p, "Y{%d/%d%+d}", w[0].inputWeight, 1 << w[0].log2WeightDenom, w[0].inputOffset);
> +                if (w[1].bPresentFlag)
> +                    p += sprintf(buf + p, "U{%d/%d%+d}", w[1].inputWeight, 1 << w[1].log2WeightDenom, w[1].inputOffset);
> +                if (w[2].bPresentFlag)
> +                    p += sprintf(buf + p, "V{%d/%d%+d}", w[2].inputWeight, 1 << w[2].log2WeightDenom, w[2].inputOffset);
> +                p += sprintf(buf + p, "]");
>              }
>          }
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel



-- 
Steve Borho


More information about the x265-devel mailing list