[x265] [PATCH] weightp: weight only one reference; donot cache mc refs anymore
kavitha at multicorewareinc.com
kavitha at multicorewareinc.com
Sun Mar 9 13:02:34 CET 2014
# HG changeset patch
# User Kavitha Sampath <kavitha at multicorewareinc.com>
# Date 1394365741 -19800
# Sun Mar 09 17:19:01 2014 +0530
# Node ID efb0ff5f607b70cb6c728bec3f61709b87626606
# Parent 93861c42b879798134bb200ff633f6492a7ff376
weightp: weight only one reference; donot cache mc refs anymore
diff -r 93861c42b879 -r efb0ff5f607b source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Fri Mar 07 22:54:00 2014 -0600
+++ b/source/encoder/frameencoder.cpp Sun Mar 09 17:19:01 2014 +0530
@@ -453,7 +453,9 @@
//------------------------------------------------------------------------------
// Weighted Prediction parameters estimation.
//------------------------------------------------------------------------------
- if ((slice->getSliceType() == P_SLICE && slice->getPPS()->getUseWP()) || (slice->getSliceType() == B_SLICE && slice->getPPS()->getWPBiPred()))
+ bool weightpSet = slice->getSliceType() == P_SLICE && slice->getPPS()->getUseWP();
+ bool weightbSet = slice->getSliceType() == B_SLICE && slice->getPPS()->getWPBiPred();
+ if (weightpSet || weightbSet)
{
assert(slice->getPPS()->getUseWP());
weightAnalyse(*slice, *m_cfg->param);
@@ -466,7 +468,7 @@
for (int ref = 0; ref < slice->getNumRefIdx(l); ref++)
{
wpScalingParam *w = NULL;
- if ((slice->isInterP() && slice->getPPS()->getUseWP() && slice->m_weightPredTable[l][ref][0].bPresentFlag))
+ if (weightpSet && !ref && slice->m_weightPredTable[l][ref][0].bPresentFlag)
{
w = slice->m_weightPredTable[l][ref];
slice->m_numWPRefs++;
diff -r 93861c42b879 -r efb0ff5f607b source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp Fri Mar 07 22:54:00 2014 -0600
+++ b/source/encoder/weightPrediction.cpp Sun Mar 09 17:19:01 2014 +0530
@@ -33,34 +33,10 @@
using namespace x265;
namespace weightp {
-struct RefData
-{
- pixel * mcbuf;
- pixel * fref;
- float guessScale;
- float fencMean;
- float refMean;
- uint32_t unweightedCost;
-};
-
-struct ChannelData
-{
- pixel* orig;
- int stride;
- int width;
- int height;
-};
-
struct Cache
{
- wpScalingParam wp[2][MAX_NUM_REF][3];
- RefData ref[2][MAX_NUM_REF][3];
- ChannelData paramset[3];
-
const int * intraCost;
- pixel* weightTemp;
int numPredDir;
- int lambda;
int csp;
int hshift;
int vshift;
@@ -191,6 +167,7 @@
* pixels have unreliable availability */
uint32_t weightCost(pixel * fenc,
pixel * ref,
+ pixel * weightTemp,
int stride,
const Cache & cache,
int width,
@@ -208,9 +185,9 @@
int correction = IF_INTERNAL_PREC - X265_DEPTH; /* intermediate interpolation depth */
int pwidth = ((width + 15) >> 4) << 4;
- primitives.weight_pp(ref, cache.weightTemp, stride, stride, pwidth, height,
+ primitives.weight_pp(ref, weightTemp, stride, stride, pwidth, height,
weight, round << correction, denom + correction, offset);
- ref = cache.weightTemp;
+ ref = weightTemp;
}
uint32_t cost = 0;
@@ -241,154 +218,24 @@
return cost;
}
-bool tryCommonDenom(TComSlice& slice, Cache& cache, int indenom)
-{
- int log2denom[3] = { indenom };
- const float epsilon = 1.f / 128.f;
-
- /* reset weight states */
- for (int list = 0; list < cache.numPredDir; list++)
- {
- for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
- {
- SET_WEIGHT(cache.wp[list][ref][0], false, 1 << indenom, indenom, 0);
- SET_WEIGHT(cache.wp[list][ref][1], false, 1 << indenom, indenom, 0);
- SET_WEIGHT(cache.wp[list][ref][2], false, 1 << indenom, indenom, 0);
- }
- }
-
- int numWeighted = 0;
- for (int list = 0; list < cache.numPredDir; list++)
- {
- for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
- {
- wpScalingParam *fw = cache.wp[list][ref];
-
- for (int yuv = 1; yuv < 3; yuv++)
- {
- /* Ensure that the denominators of cb and cr are same */
- RefData *rd = &cache.ref[list][ref][yuv];
- fw[yuv].setFromWeightAndOffset((int)(rd->guessScale * (1 << log2denom[1]) + 0.5), 0, log2denom[1]);
- log2denom[1] = X265_MIN(log2denom[1], (int)fw[yuv].log2WeightDenom);
- }
- log2denom[2] = log2denom[1];
-
- bool bWeightRef = false;
- for (int yuv = 0; yuv < 3; yuv++)
- {
- RefData *rd = &cache.ref[list][ref][yuv];
- ChannelData *p = &cache.paramset[yuv];
- if (yuv && !fw[0].bPresentFlag)
- {
- fw[1].inputWeight = 1 << fw[1].log2WeightDenom;
- fw[2].inputWeight = 1 << fw[2].log2WeightDenom;
- break;
- }
-
- x265_emms();
- /* Early termination */
- float meanDiff = rd->refMean < rd->fencMean ? rd->fencMean - rd->refMean : rd->refMean - rd->fencMean;
- float guessVal = rd->guessScale > 1.f ? rd->guessScale - 1.f : 1.f - rd->guessScale;
- if ((meanDiff < 0.5f && guessVal < epsilon) || !rd->unweightedCost)
- continue;
-
- wpScalingParam w;
- w.setFromWeightAndOffset((int)(rd->guessScale * (1 << log2denom[yuv]) + 0.5), 0, log2denom[yuv]);
- int mindenom = w.log2WeightDenom;
- int minscale = w.inputWeight;
- int minoff = 0;
-
- uint32_t origscore = rd->unweightedCost;
- uint32_t minscore = origscore;
- bool bFound = false;
- static const int sD = 4; // scale distance
- static const int oD = 2; // offset distance
- for (int is = minscale - sD; is <= minscale + sD; is++)
- {
- int deltaWeight = is - (1 << mindenom);
- if (deltaWeight > 127 || deltaWeight <= -128)
- continue;
-
- int curScale = is;
- int curOffset = (int)(rd->fencMean - rd->refMean * curScale / (1 << mindenom) + 0.5f);
- if (curOffset < -128 || curOffset > 127)
- {
- /* Rescale considering the constraints on curOffset. We do it in this order
- * because scale has a much wider range than offset (because of denom), so
- * it should almost never need to be clamped. */
- curOffset = Clip3(-128, 127, curOffset);
- curScale = (int)((1 << mindenom) * (rd->fencMean - curOffset) / rd->refMean + 0.5f);
- curScale = Clip3(0, 127, curScale);
- }
-
- for (int ioff = curOffset - oD; (ioff <= (curOffset + oD)) && (ioff < 127); ioff++)
- {
- if (yuv)
- {
- int pred = (128 - ((128 * curScale) >> (mindenom)));
- int deltaOffset = ioff - pred; // signed 10bit
- if (deltaOffset < -512 || deltaOffset > 511)
- continue;
- ioff = Clip3(-128, 127, (deltaOffset + pred)); // signed 8bit
- }
- else
- {
- ioff = Clip3(-128, 127, ioff);
- }
-
- SET_WEIGHT(w, true, curScale, mindenom, ioff);
- uint32_t s = weightCost(p->orig, rd->fref, p->stride, cache, p->width, p->height, &w, !yuv) +
- sliceHeaderCost(&w, cache.lambda, !!yuv);
- COPY4_IF_LT(minscore, s, minscale, curScale, minoff, ioff, bFound, true);
- if (minoff == curOffset - oD && ioff != curOffset - oD)
- break;
- }
- }
-
- // if chroma denoms diverged, we must start over
- if (mindenom < log2denom[yuv])
- return false;
-
- if (!bFound || (minscale == (1 << mindenom) && minoff == 0) || (float)minscore / origscore > 0.998f)
- {
- fw[yuv].bPresentFlag = false;
- fw[yuv].inputWeight = 1 << fw[yuv].log2WeightDenom;
- }
- else
- {
- SET_WEIGHT(fw[yuv], true, minscale, mindenom, minoff);
- bWeightRef = true;
- }
- }
-
- if (bWeightRef)
- {
- // Make sure both chroma channels match
- if (fw[1].bPresentFlag != fw[2].bPresentFlag)
- {
- if (fw[1].bPresentFlag)
- fw[2] = fw[1];
- else
- fw[1] = fw[2];
- }
-
- if (++numWeighted >= 8)
- return true;
- }
- }
- }
-
- return true;
-}
-
-void prepareRef(Cache& cache, TComSlice& slice, x265_param& param)
+void tryCommonDenom(TComSlice& slice,
+ x265_param& param,
+ wpScalingParam wp[2][MAX_NUM_REF][3],
+ pixel * temp,
+ int indenom)
{
TComPic *pic = slice.getPic();
TComPicYuv *picorig = pic->getPicYuvOrg();
Lowres& fenc = pic->m_lowres;
- cache.weightTemp = X265_MALLOC(pixel, picorig->getStride() * picorig->getHeight());
- cache.lambda = (int) x265_lambda2_non_I[slice.getSliceQp()];
+ /* caller provides temp space for two full-pel planes. Split it
+ * in half for motion compensation of the reference and then the
+ * weighting */
+ pixel *mcbuf = temp;
+ pixel *weightTemp = temp + picorig->getStride() * picorig->getHeight();
+
+ weightp::Cache cache;
+ memset(&cache, 0, sizeof(cache));
cache.intraCost = fenc.intraCost;
cache.lowresWidthInCU = fenc.width >> 3;
cache.lowresHeightInCU = fenc.lines >> 3;
@@ -396,6 +243,7 @@
cache.hshift = CHROMA_H_SHIFT(cache.csp);
cache.vshift = CHROMA_V_SHIFT(cache.csp);
+ int lambda = (int) x265_lambda2_non_I[slice.getSliceQp()];
int curPoc = slice.getPOC();
int numpixels[3];
int w = ((picorig->getWidth() + 15) >> 4) << 4;
@@ -407,137 +255,228 @@
numpixels[1] = numpixels[2] = w * h;
cache.numPredDir = slice.isInterP() ? 1 : 2;
+ int chromadenom = indenom;
+ const float epsilon = 1.f / 128.f;
+
+ /* reset weight states */
for (int list = 0; list < cache.numPredDir; list++)
{
for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
{
- TComPic *refPic = slice.getRefPic(list, ref);
- Lowres& refLowres = refPic->m_lowres;
+ SET_WEIGHT(wp[list][ref][0], false, 1 << indenom, indenom, 0);
+ SET_WEIGHT(wp[list][ref][1], false, 1 << indenom, indenom, 0);
+ SET_WEIGHT(wp[list][ref][2], false, 1 << indenom, indenom, 0);
+ }
+ }
- MV *mvs = NULL;
- bool bMotionCompensate = false;
+ for (int list = 0; list < cache.numPredDir; list++)
+ {
+ wpScalingParam *fw = wp[list][0];
+ TComPic *refPic = slice.getRefPic(list, 0);
+ Lowres& refLowres = refPic->m_lowres;
- /* test whether POC distance is within range for lookahead structures */
- int diffPoc = abs(curPoc - refPic->getPOC());
- if (diffPoc <= param.bframes + 1)
+ MV *mvs = NULL;
+ bool bMotionCompensate = false;
+
+ /* test whether POC distance is within range for lookahead structures */
+ int diffPoc = abs(curPoc - refPic->getPOC());
+ if (diffPoc <= param.bframes + 1)
+ {
+ mvs = fenc.lowresMvs[list][diffPoc - 1];
+ /* test whether this motion search was performed by lookahead */
+ if (mvs[0].x != 0x7FFF)
{
- mvs = fenc.lowresMvs[list][diffPoc - 1];
- /* test whether this motion search was performed by lookahead */
- if (mvs[0].x != 0x7FFF)
+ bMotionCompensate = true;
+
+ /* reference chroma planes must be extended prior to being
+ * used as motion compensation sources */
+ if (!refPic->m_bChromaPlanesExtended)
{
- bMotionCompensate = true;
-
- /* reference chroma planes must be extended prior to being
- * used as motion compensation sources */
- if (!refPic->m_bChromaPlanesExtended)
- {
- refPic->m_bChromaPlanesExtended = true;
- TComPicYuv *refyuv = refPic->getPicYuvOrg();
- int stride = refyuv->getCStride();
- int width = refyuv->getWidth() >> cache.hshift;
- int height = refyuv->getHeight() >> cache.vshift;
- int marginX = refyuv->getChromaMarginX();
- int marginY = refyuv->getChromaMarginY();
- extendPicBorder(refyuv->getCbAddr(), stride, width, height, marginX, marginY);
- extendPicBorder(refyuv->getCrAddr(), stride, width, height, marginX, marginY);
- }
+ refPic->m_bChromaPlanesExtended = true;
+ TComPicYuv *refyuv = refPic->getPicYuvOrg();
+ int stride = refyuv->getCStride();
+ int width = refyuv->getWidth() >> cache.hshift;
+ int height = refyuv->getHeight() >> cache.vshift;
+ int marginX = refyuv->getChromaMarginX();
+ int marginY = refyuv->getChromaMarginY();
+ extendPicBorder(refyuv->getCbAddr(), stride, width, height, marginX, marginY);
+ extendPicBorder(refyuv->getCrAddr(), stride, width, height, marginX, marginY);
}
}
- for (int yuv = 0; yuv < 3; yuv++)
+ }
+
+ /* prepare estimates */
+ float guessScale[3], fencMean[3], refMean[3];
+ for (int yuv = 0; yuv < 3; yuv++)
+ {
+ uint64_t fencVar = fenc.wp_ssd[yuv] + !refLowres.wp_ssd[yuv];
+ uint64_t refVar = refLowres.wp_ssd[yuv] + !refLowres.wp_ssd[yuv];
+ if (fencVar && refVar)
+ guessScale[yuv] = Clip3(-2.f, 1.8f, std::sqrt((float)fencVar / refVar));
+ else
+ guessScale[yuv] = 1.8f;
+ fencMean[yuv] = (float)fenc.wp_sum[yuv] / (numpixels[yuv]) / (1 << (X265_DEPTH - 8));
+ refMean[yuv] = (float)refLowres.wp_sum[yuv] / (numpixels[yuv]) / (1 << (X265_DEPTH - 8));
+
+ /* Ensure that the denominators of cb and cr are same */
+ if (yuv)
{
- /* prepare inputs to weight analysis */
- RefData *rd = &cache.ref[list][ref][yuv];
- ChannelData *p = &cache.paramset[yuv];
-
- x265_emms();
- uint64_t fencVar = fenc.wp_ssd[yuv] + !refLowres.wp_ssd[yuv];
- uint64_t refVar = refLowres.wp_ssd[yuv] + !refLowres.wp_ssd[yuv];
- if (fencVar && refVar)
- rd->guessScale = Clip3(-2.f, 1.8f, std::sqrt((float)fencVar / refVar));
- else
- rd->guessScale = 1.8f;
- rd->fencMean = (float)fenc.wp_sum[yuv] / (numpixels[yuv]) / (1 << (X265_DEPTH - 8));
- rd->refMean = (float)refLowres.wp_sum[yuv] / (numpixels[yuv]) / (1 << (X265_DEPTH - 8));
-
- switch (yuv)
- {
- case 0:
- p->orig = fenc.lowresPlane[0];
- p->stride = fenc.lumaStride;
- p->width = fenc.width;
- p->height = fenc.lines;
- rd->fref = refLowres.lowresPlane[0];
- if (bMotionCompensate)
- {
- rd->mcbuf = X265_MALLOC(pixel, p->stride * p->height);
- if (rd->mcbuf)
- {
- mcLuma(rd->mcbuf, refLowres, mvs);
- rd->fref = rd->mcbuf;
- }
- }
- break;
-
- case 1:
- p->orig = picorig->getCbAddr();
- p->stride = picorig->getCStride();
- rd->fref = refPic->getPicYuvOrg()->getCbAddr();
-
- /* Clamp the chroma dimensions to the nearest multiple of
- * 8x8 blocks (or 16x16 for 4:4:4) since mcChroma uses lowres
- * blocks and weightCost measures 8x8 blocks. This
- * potentially ignores some edge pixels, but simplifies the
- * logic and prevents reading uninitialized pixels. Lowres
- * planes are border extended and require no clamping. */
- p->width = ((picorig->getWidth() >> 4) << 4) >> cache.hshift;
- p->height = ((picorig->getHeight() >> 4) << 4) >> cache.vshift;
- if (bMotionCompensate)
- {
- rd->mcbuf = X265_MALLOC(pixel, p->stride * p->height);
- if (rd->mcbuf)
- {
- mcChroma(rd->mcbuf, rd->fref, p->stride, mvs, cache, p->height, p->width);
- rd->fref = rd->mcbuf;
- }
- }
- break;
-
- case 2:
- rd->fref = refPic->getPicYuvOrg()->getCrAddr();
- p->orig = picorig->getCrAddr();
- p->stride = picorig->getCStride();
- p->width = ((picorig->getWidth() >> 4) << 4) >> cache.hshift;
- p->height = ((picorig->getHeight() >> 4) << 4) >> cache.vshift;
- if (bMotionCompensate)
- {
- rd->mcbuf = X265_MALLOC(pixel, p->stride * p->height);
- if (rd->mcbuf)
- {
- mcChroma(rd->mcbuf, rd->fref, p->stride, mvs, cache, p->height, p->width);
- rd->fref = rd->mcbuf;
- }
- }
- break;
-
- default:
- return;
- }
- rd->unweightedCost = weightCost(p->orig, rd->fref, p->stride, cache, p->width, p->height, NULL, !yuv);
+ fw[yuv].setFromWeightAndOffset((int)(guessScale[yuv] * (1 << chromadenom) + 0.5), 0, chromadenom);
+ chromadenom = X265_MIN(chromadenom, (int)fw[yuv].log2WeightDenom);
}
}
- }
-}
-void tearDown(Cache& cache, TComSlice& slice)
-{
- X265_FREE(cache.weightTemp);
- for (int list = 0; list < cache.numPredDir; list++)
- {
- for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
+ bool bWeightRef = false;
+ for (int yuv = 0; yuv < 3; yuv++)
{
- for (int yuv = 0; yuv < 3; yuv++)
+ if (yuv && !fw[0].bPresentFlag)
{
- X265_FREE(cache.ref[list][ref][yuv].mcbuf);
+ fw[1].inputWeight = 1 << fw[1].log2WeightDenom;
+ fw[2].inputWeight = 1 << fw[2].log2WeightDenom;
+ break;
+ }
+
+ x265_emms();
+ /* Early termination */
+ float meanDiff = refMean[yuv] < fencMean[yuv] ? fencMean[yuv] - refMean[yuv] : refMean[yuv] - fencMean[yuv];
+ float guessVal = guessScale[yuv] > 1.f ? guessScale[yuv] - 1.f : 1.f - guessScale[yuv];
+ if (meanDiff < 0.5f && guessVal < epsilon)
+ continue;
+
+ /* prepare inputs to weight analysis */
+ pixel *orig;
+ pixel *fref;
+ int stride;
+ int width, height;
+ switch (yuv)
+ {
+ case 0:
+ orig = fenc.lowresPlane[0];
+ stride = fenc.lumaStride;
+ width = fenc.width;
+ height = fenc.lines;
+ fref = refLowres.lowresPlane[0];
+ if (bMotionCompensate)
+ {
+ mcLuma(mcbuf, refLowres, mvs);
+ fref = mcbuf;
+ }
+ break;
+
+ case 1:
+ orig = picorig->getCbAddr();
+ stride = picorig->getCStride();
+ fref = refPic->getPicYuvOrg()->getCbAddr();
+
+ /* Clamp the chroma dimensions to the nearest multiple of
+ * 8x8 blocks (or 16x16 for 4:4:4) since mcChroma uses lowres
+ * blocks and weightCost measures 8x8 blocks. This
+ * potentially ignores some edge pixels, but simplifies the
+ * logic and prevents reading uninitialized pixels. Lowres
+ * planes are border extended and require no clamping. */
+ width = ((picorig->getWidth() >> 4) << 4) >> cache.hshift;
+ height = ((picorig->getHeight() >> 4) << 4) >> cache.vshift;
+ if (bMotionCompensate)
+ {
+ mcChroma(mcbuf, fref, stride, mvs, cache, height, width);
+ fref = mcbuf;
+ }
+ break;
+
+ case 2:
+ fref = refPic->getPicYuvOrg()->getCrAddr();
+ orig = picorig->getCrAddr();
+ stride = picorig->getCStride();
+ width = ((picorig->getWidth() >> 4) << 4) >> cache.hshift;
+ height = ((picorig->getHeight() >> 4) << 4) >> cache.vshift;
+ if (bMotionCompensate)
+ {
+ mcChroma(mcbuf, fref, stride, mvs, cache, height, width);
+ fref = mcbuf;
+ }
+ break;
+
+ default:
+ return;
+ }
+
+ wpScalingParam w;
+ w.setFromWeightAndOffset((int)(guessScale[yuv] * (1 << fw[yuv].log2WeightDenom) + 0.5), 0, fw[yuv].log2WeightDenom);
+ int mindenom = w.log2WeightDenom;
+ int minscale = w.inputWeight;
+ int minoff = 0;
+
+ uint32_t origscore = weightCost(orig, fref, weightTemp, stride, cache, width, height, NULL, !yuv);
+ if (!origscore)
+ continue;
+
+ uint32_t minscore = origscore;
+ bool bFound = false;
+ static const int sD = 4; // scale distance
+ static const int oD = 2; // offset distance
+ for (int is = minscale - sD; is <= minscale + sD; is++)
+ {
+ int deltaWeight = is - (1 << mindenom);
+ if (deltaWeight > 127 || deltaWeight <= -128)
+ continue;
+
+ int curScale = is;
+ int curOffset = (int)(fencMean[yuv] - refMean[yuv] * curScale / (1 << mindenom) + 0.5f);
+ if (curOffset < -128 || curOffset > 127)
+ {
+ /* Rescale considering the constraints on curOffset. We do it in this order
+ * because scale has a much wider range than offset (because of denom), so
+ * it should almost never need to be clamped. */
+ curOffset = Clip3(-128, 127, curOffset);
+ curScale = (int)((1 << mindenom) * (fencMean[yuv] - curOffset) / refMean[yuv] + 0.5f);
+ curScale = Clip3(0, 127, curScale);
+ }
+
+ for (int ioff = curOffset - oD; (ioff <= (curOffset + oD)) && (ioff < 127); ioff++)
+ {
+ if (yuv)
+ {
+ int pred = (128 - ((128 * curScale) >> (mindenom)));
+ int deltaOffset = ioff - pred; // signed 10bit
+ if (deltaOffset < -512 || deltaOffset > 511)
+ continue;
+ ioff = Clip3(-128, 127, (deltaOffset + pred)); // signed 8bit
+ }
+ else
+ {
+ ioff = Clip3(-128, 127, ioff);
+ }
+
+ SET_WEIGHT(w, true, curScale, mindenom, ioff);
+ uint32_t s = weightCost(orig, fref, weightTemp, stride, cache, width, height, &w, !yuv) +
+ sliceHeaderCost(&w, lambda, !!yuv);
+ COPY4_IF_LT(minscore, s, minscale, curScale, minoff, ioff, bFound, true);
+ if (minoff == curOffset - oD && ioff != curOffset - oD)
+ break;
+ }
+ }
+
+ if (!bFound || (minscale == (1 << mindenom) && minoff == 0) || (float)minscore / origscore > 0.998f)
+ {
+ fw[yuv].bPresentFlag = false;
+ fw[yuv].inputWeight = 1 << fw[yuv].log2WeightDenom;
+ }
+ else
+ {
+ SET_WEIGHT(fw[yuv], true, minscale, mindenom, minoff);
+ bWeightRef = true;
+ }
+ }
+
+ if (bWeightRef)
+ {
+ // Make sure both chroma channels match
+ if (fw[1].bPresentFlag != fw[2].bPresentFlag)
+ {
+ if (fw[1].bPresentFlag)
+ fw[2] = fw[1];
+ else
+ fw[1] = fw[2];
}
}
}
@@ -547,35 +486,30 @@
namespace x265 {
void weightAnalyse(TComSlice& slice, x265_param& param)
{
- weightp::Cache cache;
- memset(&cache, 0, sizeof(cache));
+ wpScalingParam wp[2][MAX_NUM_REF][3];
+ int numPredDir = slice.isInterP() ? 1 : 2;
+ TComPicYuv *orig = slice.getPic()->getPicYuvOrg();
+ pixel *temp = X265_MALLOC(pixel, 2 * orig->getStride() * orig->getHeight());
- prepareRef(cache, slice, param);
- if (cache.weightTemp)
+ if (temp)
{
int denom = slice.getNumRefIdx(REF_PIC_LIST_0) > 3 ? 7 : 6;
- do
- {
- if (weightp::tryCommonDenom(slice, cache, denom))
- break;
- denom--; // decrement to satisfy the range limitation
- }
- while (denom > 0);
+ weightp::tryCommonDenom(slice, param, wp, temp, denom);
+ X265_FREE(temp);
}
else
{
- for (int list = 0; list < cache.numPredDir; list++)
+ for (int list = 0; list < numPredDir; list++)
{
for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
{
- SET_WEIGHT(cache.wp[list][ref][0], false, 1, 0, 0);
- SET_WEIGHT(cache.wp[list][ref][1], false, 1, 0, 0);
- SET_WEIGHT(cache.wp[list][ref][2], false, 1, 0, 0);
+ SET_WEIGHT(wp[list][ref][0], false, 1, 0, 0);
+ SET_WEIGHT(wp[list][ref][1], false, 1, 0, 0);
+ SET_WEIGHT(wp[list][ref][2], false, 1, 0, 0);
}
}
}
- tearDown(cache, slice);
- slice.setWpScaling(cache.wp);
+ slice.setWpScaling(wp);
if (param.logLevel >= X265_LOG_FULL)
{
@@ -584,23 +518,20 @@
bool bWeighted = false;
p = sprintf(buf, "poc: %d weights:", slice.getPOC());
- for (int list = 0; list < cache.numPredDir; list++)
+ for (int list = 0; list < numPredDir; list++)
{
- for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
+ wpScalingParam* w = &wp[list][0][0];
+ if (w[0].bPresentFlag || w[1].bPresentFlag || w[2].bPresentFlag)
{
- wpScalingParam* w = &cache.wp[list][ref][0];
- if (w[0].bPresentFlag || w[1].bPresentFlag || w[2].bPresentFlag)
- {
- bWeighted = true;
- p += sprintf(buf + p, " [L%d:R%d ", list, ref);
- if (w[0].bPresentFlag)
- p += sprintf(buf + p, "Y{%d/%d%+d}", w[0].inputWeight, 1 << w[0].log2WeightDenom, w[0].inputOffset);
- if (w[1].bPresentFlag)
- p += sprintf(buf + p, "U{%d/%d%+d}", w[1].inputWeight, 1 << w[1].log2WeightDenom, w[1].inputOffset);
- if (w[2].bPresentFlag)
- p += sprintf(buf + p, "V{%d/%d%+d}", w[2].inputWeight, 1 << w[2].log2WeightDenom, w[2].inputOffset);
- p += sprintf(buf + p, "]");
- }
+ bWeighted = true;
+ p += sprintf(buf + p, " [L%d:R0 ", list);
+ if (w[0].bPresentFlag)
+ p += sprintf(buf + p, "Y{%d/%d%+d}", w[0].inputWeight, 1 << w[0].log2WeightDenom, w[0].inputOffset);
+ if (w[1].bPresentFlag)
+ p += sprintf(buf + p, "U{%d/%d%+d}", w[1].inputWeight, 1 << w[1].log2WeightDenom, w[1].inputOffset);
+ if (w[2].bPresentFlag)
+ p += sprintf(buf + p, "V{%d/%d%+d}", w[2].inputWeight, 1 << w[2].log2WeightDenom, w[2].inputOffset);
+ p += sprintf(buf + p, "]");
}
}
More information about the x265-devel
mailing list