[x265] [PATCH] weightp: weight only one reference; donot cache mc refs anymore
Steve Borho
steve at borho.org
Mon Mar 10 03:31:04 CET 2014
On Sun, Mar 9, 2014 at 7:02 AM, <kavitha at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Kavitha Sampath <kavitha at multicorewareinc.com>
> # Date 1394365741 -19800
> # Sun Mar 09 17:19:01 2014 +0530
> # Node ID efb0ff5f607b70cb6c728bec3f61709b87626606
> # Parent 93861c42b879798134bb200ff633f6492a7ff376
> weightp: weight only one reference; donot cache mc refs anymore
This causes shadow warnings with GCC
/Users/steve/repos/x265/source/encoder/weightPrediction.cpp:403:28:
warning: declaration shadows a local variable [-Wshadow]
wpScalingParam w;
^
/Users/steve/repos/x265/source/encoder/weightPrediction.cpp:249:9:
note: previous declaration is here
int w = ((picorig->getWidth() + 15) >> 4) << 4;
>
> diff -r 93861c42b879 -r efb0ff5f607b source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp Fri Mar 07 22:54:00 2014 -0600
> +++ b/source/encoder/frameencoder.cpp Sun Mar 09 17:19:01 2014 +0530
> @@ -453,7 +453,9 @@
> //------------------------------------------------------------------------------
> // Weighted Prediction parameters estimation.
> //------------------------------------------------------------------------------
> - if ((slice->getSliceType() == P_SLICE && slice->getPPS()->getUseWP()) || (slice->getSliceType() == B_SLICE && slice->getPPS()->getWPBiPred()))
> + bool weightpSet = slice->getSliceType() == P_SLICE && slice->getPPS()->getUseWP();
> + bool weightbSet = slice->getSliceType() == B_SLICE && slice->getPPS()->getWPBiPred();
> + if (weightpSet || weightbSet)
> {
> assert(slice->getPPS()->getUseWP());
> weightAnalyse(*slice, *m_cfg->param);
> @@ -466,7 +468,7 @@
> for (int ref = 0; ref < slice->getNumRefIdx(l); ref++)
> {
> wpScalingParam *w = NULL;
> - if ((slice->isInterP() && slice->getPPS()->getUseWP() && slice->m_weightPredTable[l][ref][0].bPresentFlag))
> + if (weightpSet && !ref && slice->m_weightPredTable[l][ref][0].bPresentFlag)
> {
> w = slice->m_weightPredTable[l][ref];
> slice->m_numWPRefs++;
> diff -r 93861c42b879 -r efb0ff5f607b source/encoder/weightPrediction.cpp
> --- a/source/encoder/weightPrediction.cpp Fri Mar 07 22:54:00 2014 -0600
> +++ b/source/encoder/weightPrediction.cpp Sun Mar 09 17:19:01 2014 +0530
> @@ -33,34 +33,10 @@
> using namespace x265;
> namespace weightp {
>
> -struct RefData
> -{
> - pixel * mcbuf;
> - pixel * fref;
> - float guessScale;
> - float fencMean;
> - float refMean;
> - uint32_t unweightedCost;
> -};
> -
> -struct ChannelData
> -{
> - pixel* orig;
> - int stride;
> - int width;
> - int height;
> -};
> -
> struct Cache
> {
> - wpScalingParam wp[2][MAX_NUM_REF][3];
> - RefData ref[2][MAX_NUM_REF][3];
> - ChannelData paramset[3];
> -
> const int * intraCost;
> - pixel* weightTemp;
> int numPredDir;
> - int lambda;
> int csp;
> int hshift;
> int vshift;
> @@ -191,6 +167,7 @@
> * pixels have unreliable availability */
> uint32_t weightCost(pixel * fenc,
> pixel * ref,
> + pixel * weightTemp,
> int stride,
> const Cache & cache,
> int width,
> @@ -208,9 +185,9 @@
> int correction = IF_INTERNAL_PREC - X265_DEPTH; /* intermediate interpolation depth */
> int pwidth = ((width + 15) >> 4) << 4;
>
> - primitives.weight_pp(ref, cache.weightTemp, stride, stride, pwidth, height,
> + primitives.weight_pp(ref, weightTemp, stride, stride, pwidth, height,
> weight, round << correction, denom + correction, offset);
> - ref = cache.weightTemp;
> + ref = weightTemp;
> }
>
> uint32_t cost = 0;
> @@ -241,154 +218,24 @@
> return cost;
> }
>
> -bool tryCommonDenom(TComSlice& slice, Cache& cache, int indenom)
> -{
> - int log2denom[3] = { indenom };
> - const float epsilon = 1.f / 128.f;
> -
> - /* reset weight states */
> - for (int list = 0; list < cache.numPredDir; list++)
> - {
> - for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
> - {
> - SET_WEIGHT(cache.wp[list][ref][0], false, 1 << indenom, indenom, 0);
> - SET_WEIGHT(cache.wp[list][ref][1], false, 1 << indenom, indenom, 0);
> - SET_WEIGHT(cache.wp[list][ref][2], false, 1 << indenom, indenom, 0);
> - }
> - }
> -
> - int numWeighted = 0;
> - for (int list = 0; list < cache.numPredDir; list++)
> - {
> - for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
> - {
> - wpScalingParam *fw = cache.wp[list][ref];
> -
> - for (int yuv = 1; yuv < 3; yuv++)
> - {
> - /* Ensure that the denominators of cb and cr are same */
> - RefData *rd = &cache.ref[list][ref][yuv];
> - fw[yuv].setFromWeightAndOffset((int)(rd->guessScale * (1 << log2denom[1]) + 0.5), 0, log2denom[1]);
> - log2denom[1] = X265_MIN(log2denom[1], (int)fw[yuv].log2WeightDenom);
> - }
> - log2denom[2] = log2denom[1];
> -
> - bool bWeightRef = false;
> - for (int yuv = 0; yuv < 3; yuv++)
> - {
> - RefData *rd = &cache.ref[list][ref][yuv];
> - ChannelData *p = &cache.paramset[yuv];
> - if (yuv && !fw[0].bPresentFlag)
> - {
> - fw[1].inputWeight = 1 << fw[1].log2WeightDenom;
> - fw[2].inputWeight = 1 << fw[2].log2WeightDenom;
> - break;
> - }
> -
> - x265_emms();
> - /* Early termination */
> - float meanDiff = rd->refMean < rd->fencMean ? rd->fencMean - rd->refMean : rd->refMean - rd->fencMean;
> - float guessVal = rd->guessScale > 1.f ? rd->guessScale - 1.f : 1.f - rd->guessScale;
> - if ((meanDiff < 0.5f && guessVal < epsilon) || !rd->unweightedCost)
> - continue;
> -
> - wpScalingParam w;
> - w.setFromWeightAndOffset((int)(rd->guessScale * (1 << log2denom[yuv]) + 0.5), 0, log2denom[yuv]);
> - int mindenom = w.log2WeightDenom;
> - int minscale = w.inputWeight;
> - int minoff = 0;
> -
> - uint32_t origscore = rd->unweightedCost;
> - uint32_t minscore = origscore;
> - bool bFound = false;
> - static const int sD = 4; // scale distance
> - static const int oD = 2; // offset distance
> - for (int is = minscale - sD; is <= minscale + sD; is++)
> - {
> - int deltaWeight = is - (1 << mindenom);
> - if (deltaWeight > 127 || deltaWeight <= -128)
> - continue;
> -
> - int curScale = is;
> - int curOffset = (int)(rd->fencMean - rd->refMean * curScale / (1 << mindenom) + 0.5f);
> - if (curOffset < -128 || curOffset > 127)
> - {
> - /* Rescale considering the constraints on curOffset. We do it in this order
> - * because scale has a much wider range than offset (because of denom), so
> - * it should almost never need to be clamped. */
> - curOffset = Clip3(-128, 127, curOffset);
> - curScale = (int)((1 << mindenom) * (rd->fencMean - curOffset) / rd->refMean + 0.5f);
> - curScale = Clip3(0, 127, curScale);
> - }
> -
> - for (int ioff = curOffset - oD; (ioff <= (curOffset + oD)) && (ioff < 127); ioff++)
> - {
> - if (yuv)
> - {
> - int pred = (128 - ((128 * curScale) >> (mindenom)));
> - int deltaOffset = ioff - pred; // signed 10bit
> - if (deltaOffset < -512 || deltaOffset > 511)
> - continue;
> - ioff = Clip3(-128, 127, (deltaOffset + pred)); // signed 8bit
> - }
> - else
> - {
> - ioff = Clip3(-128, 127, ioff);
> - }
> -
> - SET_WEIGHT(w, true, curScale, mindenom, ioff);
> - uint32_t s = weightCost(p->orig, rd->fref, p->stride, cache, p->width, p->height, &w, !yuv) +
> - sliceHeaderCost(&w, cache.lambda, !!yuv);
> - COPY4_IF_LT(minscore, s, minscale, curScale, minoff, ioff, bFound, true);
> - if (minoff == curOffset - oD && ioff != curOffset - oD)
> - break;
> - }
> - }
> -
> - // if chroma denoms diverged, we must start over
> - if (mindenom < log2denom[yuv])
> - return false;
> -
> - if (!bFound || (minscale == (1 << mindenom) && minoff == 0) || (float)minscore / origscore > 0.998f)
> - {
> - fw[yuv].bPresentFlag = false;
> - fw[yuv].inputWeight = 1 << fw[yuv].log2WeightDenom;
> - }
> - else
> - {
> - SET_WEIGHT(fw[yuv], true, minscale, mindenom, minoff);
> - bWeightRef = true;
> - }
> - }
> -
> - if (bWeightRef)
> - {
> - // Make sure both chroma channels match
> - if (fw[1].bPresentFlag != fw[2].bPresentFlag)
> - {
> - if (fw[1].bPresentFlag)
> - fw[2] = fw[1];
> - else
> - fw[1] = fw[2];
> - }
> -
> - if (++numWeighted >= 8)
> - return true;
> - }
> - }
> - }
> -
> - return true;
> -}
> -
> -void prepareRef(Cache& cache, TComSlice& slice, x265_param& param)
> +void tryCommonDenom(TComSlice& slice,
> + x265_param& param,
> + wpScalingParam wp[2][MAX_NUM_REF][3],
> + pixel * temp,
> + int indenom)
> {
> TComPic *pic = slice.getPic();
> TComPicYuv *picorig = pic->getPicYuvOrg();
> Lowres& fenc = pic->m_lowres;
>
> - cache.weightTemp = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
> - cache.lambda = (int) x265_lambda2_non_I[slice.getSliceQp()];
> + /* caller provides temp space for two full-pel planes. Split it
> + * in half for motion compensation of the reference and then the
> + * weighting */
> + pixel *mcbuf = temp;
> + pixel *weightTemp = temp + picorig->getStride() * picorig->getHeight();
> +
> + weightp::Cache cache;
> + memset(&cache, 0, sizeof(cache));
> cache.intraCost = fenc.intraCost;
> cache.lowresWidthInCU = fenc.width >> 3;
> cache.lowresHeightInCU = fenc.lines >> 3;
> @@ -396,6 +243,7 @@
> cache.hshift = CHROMA_H_SHIFT(cache.csp);
> cache.vshift = CHROMA_V_SHIFT(cache.csp);
>
> + int lambda = (int) x265_lambda2_non_I[slice.getSliceQp()];
> int curPoc = slice.getPOC();
> int numpixels[3];
> int w = ((picorig->getWidth() + 15) >> 4) << 4;
> @@ -407,137 +255,228 @@
> numpixels[1] = numpixels[2] = w * h;
>
> cache.numPredDir = slice.isInterP() ? 1 : 2;
> + int chromadenom = indenom;
> + const float epsilon = 1.f / 128.f;
> +
> + /* reset weight states */
> for (int list = 0; list < cache.numPredDir; list++)
> {
> for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
> {
> - TComPic *refPic = slice.getRefPic(list, ref);
> - Lowres& refLowres = refPic->m_lowres;
> + SET_WEIGHT(wp[list][ref][0], false, 1 << indenom, indenom, 0);
> + SET_WEIGHT(wp[list][ref][1], false, 1 << indenom, indenom, 0);
> + SET_WEIGHT(wp[list][ref][2], false, 1 << indenom, indenom, 0);
> + }
> + }
>
> - MV *mvs = NULL;
> - bool bMotionCompensate = false;
> + for (int list = 0; list < cache.numPredDir; list++)
> + {
> + wpScalingParam *fw = wp[list][0];
> + TComPic *refPic = slice.getRefPic(list, 0);
> + Lowres& refLowres = refPic->m_lowres;
>
> - /* test whether POC distance is within range for lookahead structures */
> - int diffPoc = abs(curPoc - refPic->getPOC());
> - if (diffPoc <= param.bframes + 1)
> + MV *mvs = NULL;
> + bool bMotionCompensate = false;
> +
> + /* test whether POC distance is within range for lookahead structures */
> + int diffPoc = abs(curPoc - refPic->getPOC());
> + if (diffPoc <= param.bframes + 1)
> + {
> + mvs = fenc.lowresMvs[list][diffPoc - 1];
> + /* test whether this motion search was performed by lookahead */
> + if (mvs[0].x != 0x7FFF)
> {
> - mvs = fenc.lowresMvs[list][diffPoc - 1];
> - /* test whether this motion search was performed by lookahead */
> - if (mvs[0].x != 0x7FFF)
> + bMotionCompensate = true;
> +
> + /* reference chroma planes must be extended prior to being
> + * used as motion compensation sources */
> + if (!refPic->m_bChromaPlanesExtended)
> {
> - bMotionCompensate = true;
> -
> - /* reference chroma planes must be extended prior to being
> - * used as motion compensation sources */
> - if (!refPic->m_bChromaPlanesExtended)
> - {
> - refPic->m_bChromaPlanesExtended = true;
> - TComPicYuv *refyuv = refPic->getPicYuvOrg();
> - int stride = refyuv->getCStride();
> - int width = refyuv->getWidth() >> cache.hshift;
> - int height = refyuv->getHeight() >> cache.vshift;
> - int marginX = refyuv->getChromaMarginX();
> - int marginY = refyuv->getChromaMarginY();
> - extendPicBorder(refyuv->getCbAddr(), stride, width, height, marginX, marginY);
> - extendPicBorder(refyuv->getCrAddr(), stride, width, height, marginX, marginY);
> - }
> + refPic->m_bChromaPlanesExtended = true;
> + TComPicYuv *refyuv = refPic->getPicYuvOrg();
> + int stride = refyuv->getCStride();
> + int width = refyuv->getWidth() >> cache.hshift;
> + int height = refyuv->getHeight() >> cache.vshift;
> + int marginX = refyuv->getChromaMarginX();
> + int marginY = refyuv->getChromaMarginY();
> + extendPicBorder(refyuv->getCbAddr(), stride, width, height, marginX, marginY);
> + extendPicBorder(refyuv->getCrAddr(), stride, width, height, marginX, marginY);
> }
> }
> - for (int yuv = 0; yuv < 3; yuv++)
> + }
> +
> + /* prepare estimates */
> + float guessScale[3], fencMean[3], refMean[3];
> + for (int yuv = 0; yuv < 3; yuv++)
> + {
> + uint64_t fencVar = fenc.wp_ssd[yuv] + !refLowres.wp_ssd[yuv];
> + uint64_t refVar = refLowres.wp_ssd[yuv] + !refLowres.wp_ssd[yuv];
> + if (fencVar && refVar)
> + guessScale[yuv] = Clip3(-2.f, 1.8f, std::sqrt((float)fencVar / refVar));
> + else
> + guessScale[yuv] = 1.8f;
> + fencMean[yuv] = (float)fenc.wp_sum[yuv] / (numpixels[yuv]) / (1 << (X265_DEPTH - 8));
> + refMean[yuv] = (float)refLowres.wp_sum[yuv] / (numpixels[yuv]) / (1 << (X265_DEPTH - 8));
> +
> + /* Ensure that the denominators of cb and cr are same */
> + if (yuv)
> {
> - /* prepare inputs to weight analysis */
> - RefData *rd = &cache.ref[list][ref][yuv];
> - ChannelData *p = &cache.paramset[yuv];
> -
> - x265_emms();
> - uint64_t fencVar = fenc.wp_ssd[yuv] + !refLowres.wp_ssd[yuv];
> - uint64_t refVar = refLowres.wp_ssd[yuv] + !refLowres.wp_ssd[yuv];
> - if (fencVar && refVar)
> - rd->guessScale = Clip3(-2.f, 1.8f, std::sqrt((float)fencVar / refVar));
> - else
> - rd->guessScale = 1.8f;
> - rd->fencMean = (float)fenc.wp_sum[yuv] / (numpixels[yuv]) / (1 << (X265_DEPTH - 8));
> - rd->refMean = (float)refLowres.wp_sum[yuv] / (numpixels[yuv]) / (1 << (X265_DEPTH - 8));
> -
> - switch (yuv)
> - {
> - case 0:
> - p->orig = fenc.lowresPlane[0];
> - p->stride = fenc.lumaStride;
> - p->width = fenc.width;
> - p->height = fenc.lines;
> - rd->fref = refLowres.lowresPlane[0];
> - if (bMotionCompensate)
> - {
> - rd->mcbuf = X265_MALLOC(pixel, p->stride * p->height);
> - if (rd->mcbuf)
> - {
> - mcLuma(rd->mcbuf, refLowres, mvs);
> - rd->fref = rd->mcbuf;
> - }
> - }
> - break;
> -
> - case 1:
> - p->orig = picorig->getCbAddr();
> - p->stride = picorig->getCStride();
> - rd->fref = refPic->getPicYuvOrg()->getCbAddr();
> -
> - /* Clamp the chroma dimensions to the nearest multiple of
> - * 8x8 blocks (or 16x16 for 4:4:4) since mcChroma uses lowres
> - * blocks and weightCost measures 8x8 blocks. This
> - * potentially ignores some edge pixels, but simplifies the
> - * logic and prevents reading uninitialized pixels. Lowres
> - * planes are border extended and require no clamping. */
> - p->width = ((picorig->getWidth() >> 4) << 4) >> cache.hshift;
> - p->height = ((picorig->getHeight() >> 4) << 4) >> cache.vshift;
> - if (bMotionCompensate)
> - {
> - rd->mcbuf = X265_MALLOC(pixel, p->stride * p->height);
> - if (rd->mcbuf)
> - {
> - mcChroma(rd->mcbuf, rd->fref, p->stride, mvs, cache, p->height, p->width);
> - rd->fref = rd->mcbuf;
> - }
> - }
> - break;
> -
> - case 2:
> - rd->fref = refPic->getPicYuvOrg()->getCrAddr();
> - p->orig = picorig->getCrAddr();
> - p->stride = picorig->getCStride();
> - p->width = ((picorig->getWidth() >> 4) << 4) >> cache.hshift;
> - p->height = ((picorig->getHeight() >> 4) << 4) >> cache.vshift;
> - if (bMotionCompensate)
> - {
> - rd->mcbuf = X265_MALLOC(pixel, p->stride * p->height);
> - if (rd->mcbuf)
> - {
> - mcChroma(rd->mcbuf, rd->fref, p->stride, mvs, cache, p->height, p->width);
> - rd->fref = rd->mcbuf;
> - }
> - }
> - break;
> -
> - default:
> - return;
> - }
> - rd->unweightedCost = weightCost(p->orig, rd->fref, p->stride, cache, p->width, p->height, NULL, !yuv);
> + fw[yuv].setFromWeightAndOffset((int)(guessScale[yuv] * (1 << chromadenom) + 0.5), 0, chromadenom);
> + chromadenom = X265_MIN(chromadenom, (int)fw[yuv].log2WeightDenom);
> }
> }
> - }
> -}
>
> -void tearDown(Cache& cache, TComSlice& slice)
> -{
> - X265_FREE(cache.weightTemp);
> - for (int list = 0; list < cache.numPredDir; list++)
> - {
> - for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
> + bool bWeightRef = false;
> + for (int yuv = 0; yuv < 3; yuv++)
> {
> - for (int yuv = 0; yuv < 3; yuv++)
> + if (yuv && !fw[0].bPresentFlag)
> {
> - X265_FREE(cache.ref[list][ref][yuv].mcbuf);
> + fw[1].inputWeight = 1 << fw[1].log2WeightDenom;
> + fw[2].inputWeight = 1 << fw[2].log2WeightDenom;
> + break;
> + }
> +
> + x265_emms();
> + /* Early termination */
> + float meanDiff = refMean[yuv] < fencMean[yuv] ? fencMean[yuv] - refMean[yuv] : refMean[yuv] - fencMean[yuv];
> + float guessVal = guessScale[yuv] > 1.f ? guessScale[yuv] - 1.f : 1.f - guessScale[yuv];
> + if (meanDiff < 0.5f && guessVal < epsilon)
> + continue;
> +
> + /* prepare inputs to weight analysis */
> + pixel *orig;
> + pixel *fref;
> + int stride;
> + int width, height;
> + switch (yuv)
> + {
> + case 0:
> + orig = fenc.lowresPlane[0];
> + stride = fenc.lumaStride;
> + width = fenc.width;
> + height = fenc.lines;
> + fref = refLowres.lowresPlane[0];
> + if (bMotionCompensate)
> + {
> + mcLuma(mcbuf, refLowres, mvs);
> + fref = mcbuf;
> + }
> + break;
> +
> + case 1:
> + orig = picorig->getCbAddr();
> + stride = picorig->getCStride();
> + fref = refPic->getPicYuvOrg()->getCbAddr();
> +
> + /* Clamp the chroma dimensions to the nearest multiple of
> + * 8x8 blocks (or 16x16 for 4:4:4) since mcChroma uses lowres
> + * blocks and weightCost measures 8x8 blocks. This
> + * potentially ignores some edge pixels, but simplifies the
> + * logic and prevents reading uninitialized pixels. Lowres
> + * planes are border extended and require no clamping. */
> + width = ((picorig->getWidth() >> 4) << 4) >> cache.hshift;
> + height = ((picorig->getHeight() >> 4) << 4) >> cache.vshift;
> + if (bMotionCompensate)
> + {
> + mcChroma(mcbuf, fref, stride, mvs, cache, height, width);
> + fref = mcbuf;
> + }
> + break;
> +
> + case 2:
> + fref = refPic->getPicYuvOrg()->getCrAddr();
> + orig = picorig->getCrAddr();
> + stride = picorig->getCStride();
> + width = ((picorig->getWidth() >> 4) << 4) >> cache.hshift;
> + height = ((picorig->getHeight() >> 4) << 4) >> cache.vshift;
> + if (bMotionCompensate)
> + {
> + mcChroma(mcbuf, fref, stride, mvs, cache, height, width);
> + fref = mcbuf;
> + }
> + break;
> +
> + default:
> + return;
> + }
> +
> + wpScalingParam w;
> + w.setFromWeightAndOffset((int)(guessScale[yuv] * (1 << fw[yuv].log2WeightDenom) + 0.5), 0, fw[yuv].log2WeightDenom);
> + int mindenom = w.log2WeightDenom;
> + int minscale = w.inputWeight;
> + int minoff = 0;
> +
> + uint32_t origscore = weightCost(orig, fref, weightTemp, stride, cache, width, height, NULL, !yuv);
> + if (!origscore)
> + continue;
> +
> + uint32_t minscore = origscore;
> + bool bFound = false;
> + static const int sD = 4; // scale distance
> + static const int oD = 2; // offset distance
> + for (int is = minscale - sD; is <= minscale + sD; is++)
> + {
> + int deltaWeight = is - (1 << mindenom);
> + if (deltaWeight > 127 || deltaWeight <= -128)
> + continue;
> +
> + int curScale = is;
> + int curOffset = (int)(fencMean[yuv] - refMean[yuv] * curScale / (1 << mindenom) + 0.5f);
> + if (curOffset < -128 || curOffset > 127)
> + {
> + /* Rescale considering the constraints on curOffset. We do it in this order
> + * because scale has a much wider range than offset (because of denom), so
> + * it should almost never need to be clamped. */
> + curOffset = Clip3(-128, 127, curOffset);
> + curScale = (int)((1 << mindenom) * (fencMean[yuv] - curOffset) / refMean[yuv] + 0.5f);
> + curScale = Clip3(0, 127, curScale);
> + }
> +
> + for (int ioff = curOffset - oD; (ioff <= (curOffset + oD)) && (ioff < 127); ioff++)
> + {
> + if (yuv)
> + {
> + int pred = (128 - ((128 * curScale) >> (mindenom)));
> + int deltaOffset = ioff - pred; // signed 10bit
> + if (deltaOffset < -512 || deltaOffset > 511)
> + continue;
> + ioff = Clip3(-128, 127, (deltaOffset + pred)); // signed 8bit
> + }
> + else
> + {
> + ioff = Clip3(-128, 127, ioff);
> + }
> +
> + SET_WEIGHT(w, true, curScale, mindenom, ioff);
> + uint32_t s = weightCost(orig, fref, weightTemp, stride, cache, width, height, &w, !yuv) +
> + sliceHeaderCost(&w, lambda, !!yuv);
> + COPY4_IF_LT(minscore, s, minscale, curScale, minoff, ioff, bFound, true);
> + if (minoff == curOffset - oD && ioff != curOffset - oD)
> + break;
> + }
> + }
> +
> + if (!bFound || (minscale == (1 << mindenom) && minoff == 0) || (float)minscore / origscore > 0.998f)
> + {
> + fw[yuv].bPresentFlag = false;
> + fw[yuv].inputWeight = 1 << fw[yuv].log2WeightDenom;
> + }
> + else
> + {
> + SET_WEIGHT(fw[yuv], true, minscale, mindenom, minoff);
> + bWeightRef = true;
> + }
> + }
> +
> + if (bWeightRef)
> + {
> + // Make sure both chroma channels match
> + if (fw[1].bPresentFlag != fw[2].bPresentFlag)
> + {
> + if (fw[1].bPresentFlag)
> + fw[2] = fw[1];
> + else
> + fw[1] = fw[2];
> }
> }
> }
> @@ -547,35 +486,30 @@
> namespace x265 {
> void weightAnalyse(TComSlice& slice, x265_param& param)
> {
> - weightp::Cache cache;
> - memset(&cache, 0, sizeof(cache));
> + wpScalingParam wp[2][MAX_NUM_REF][3];
> + int numPredDir = slice.isInterP() ? 1 : 2;
> + TComPicYuv *orig = slice.getPic()->getPicYuvOrg();
> + pixel *temp = X265_MALLOC(pixel, 2 * orig->getStride() * orig->getHeight());
>
> - prepareRef(cache, slice, param);
> - if (cache.weightTemp)
> + if (temp)
> {
> int denom = slice.getNumRefIdx(REF_PIC_LIST_0) > 3 ? 7 : 6;
> - do
> - {
> - if (weightp::tryCommonDenom(slice, cache, denom))
> - break;
> - denom--; // decrement to satisfy the range limitation
> - }
> - while (denom > 0);
> + weightp::tryCommonDenom(slice, param, wp, temp, denom);
> + X265_FREE(temp);
> }
> else
> {
> - for (int list = 0; list < cache.numPredDir; list++)
> + for (int list = 0; list < numPredDir; list++)
> {
> for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
> {
> - SET_WEIGHT(cache.wp[list][ref][0], false, 1, 0, 0);
> - SET_WEIGHT(cache.wp[list][ref][1], false, 1, 0, 0);
> - SET_WEIGHT(cache.wp[list][ref][2], false, 1, 0, 0);
> + SET_WEIGHT(wp[list][ref][0], false, 1, 0, 0);
> + SET_WEIGHT(wp[list][ref][1], false, 1, 0, 0);
> + SET_WEIGHT(wp[list][ref][2], false, 1, 0, 0);
> }
> }
> }
> - tearDown(cache, slice);
> - slice.setWpScaling(cache.wp);
> + slice.setWpScaling(wp);
>
> if (param.logLevel >= X265_LOG_FULL)
> {
> @@ -584,23 +518,20 @@
> bool bWeighted = false;
>
> p = sprintf(buf, "poc: %d weights:", slice.getPOC());
> - for (int list = 0; list < cache.numPredDir; list++)
> + for (int list = 0; list < numPredDir; list++)
> {
> - for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
> + wpScalingParam* w = &wp[list][0][0];
> + if (w[0].bPresentFlag || w[1].bPresentFlag || w[2].bPresentFlag)
> {
> - wpScalingParam* w = &cache.wp[list][ref][0];
> - if (w[0].bPresentFlag || w[1].bPresentFlag || w[2].bPresentFlag)
> - {
> - bWeighted = true;
> - p += sprintf(buf + p, " [L%d:R%d ", list, ref);
> - if (w[0].bPresentFlag)
> - p += sprintf(buf + p, "Y{%d/%d%+d}", w[0].inputWeight, 1 << w[0].log2WeightDenom, w[0].inputOffset);
> - if (w[1].bPresentFlag)
> - p += sprintf(buf + p, "U{%d/%d%+d}", w[1].inputWeight, 1 << w[1].log2WeightDenom, w[1].inputOffset);
> - if (w[2].bPresentFlag)
> - p += sprintf(buf + p, "V{%d/%d%+d}", w[2].inputWeight, 1 << w[2].log2WeightDenom, w[2].inputOffset);
> - p += sprintf(buf + p, "]");
> - }
> + bWeighted = true;
> + p += sprintf(buf + p, " [L%d:R0 ", list);
> + if (w[0].bPresentFlag)
> + p += sprintf(buf + p, "Y{%d/%d%+d}", w[0].inputWeight, 1 << w[0].log2WeightDenom, w[0].inputOffset);
> + if (w[1].bPresentFlag)
> + p += sprintf(buf + p, "U{%d/%d%+d}", w[1].inputWeight, 1 << w[1].log2WeightDenom, w[1].inputOffset);
> + if (w[2].bPresentFlag)
> + p += sprintf(buf + p, "V{%d/%d%+d}", w[2].inputWeight, 1 << w[2].log2WeightDenom, w[2].inputOffset);
> + p += sprintf(buf + p, "]");
> }
> }
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
--
Steve Borho
More information about the x265-devel
mailing list