[x265] [PATCH] weightp: use struct to cache data for reuse, refactor MC of reference planes
kavitha at multicorewareinc.com
kavitha at multicorewareinc.com
Fri Feb 28 08:01:16 CET 2014
# HG changeset patch
# User Kavitha Sampath <kavitha at multicorewareinc.com>
# Date 1393570702 -19800
# Fri Feb 28 12:28:22 2014 +0530
# Node ID 4f86ef761116e6973d4b96168d095c9bdd5dbcf3
# Parent 8189f9e9a39f135eb1a8b6c76833503f0b85c6f2
weightp: use struct to cache data for reuse, refactor MC of reference planes
diff -r 8189f9e9a39f -r 4f86ef761116 source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp Thu Feb 27 19:05:54 2014 -0600
+++ b/source/encoder/weightPrediction.cpp Fri Feb 28 12:28:22 2014 +0530
@@ -30,8 +30,39 @@
#include <cmath>
using namespace x265;
+namespace weightp {
-namespace weightp {
+struct RefData
+{
+ pixel *mcbuf;
+ uint32_t unweightedCost;
+ pixel *fref;
+};
+
+struct ChannelData
+{
+ pixel *orig;
+ int stride;
+ int width;
+ int height;
+};
+
+struct CspData
+{
+ int csp;
+ int hshift;
+ int vshift;
+};
+
+struct cache
+{
+ RefData ref[2][MAX_NUM_REF][3];
+ ChannelData paramset[3];
+ pixel *weightTemp;
+ CspData colorFormat;
+ int numPredDir;
+};
+
/* make a motion compensated copy of lowres ref into mcout with the same stride.
* The borders of mcout are not extended */
void mcLuma(pixel * mcout,
@@ -90,14 +121,13 @@
const MV * mvs,
int height,
int width,
- int csp)
+ CspData * c)
{
/* the motion vectors correspond to 8x8 lowres luma blocks, or 16x16 fullres
* luma blocks. We have to adapt block size to chroma csp */
- int hShift = CHROMA_H_SHIFT(csp);
- int vShift = CHROMA_V_SHIFT(csp);
- int bw = 16 >> hShift;
- int bh = 16 >> vShift;
+ int csp = c->csp;
+ int bw = 16 >> c->hshift;
+ int bh = 16 >> c->vshift;
MV mvmin, mvmax;
int lowresWidthInCU = fenc.width >> 3;
@@ -119,8 +149,8 @@
{
MV mv = mvs[cu]; // lowres MV
mv <<= 1; // fullres MV
- mv.x >>= hShift;
- mv.y >>= vShift;
+ mv.x >>= c->hshift;
+ mv.y >>= c->vshift;
/* clip MV to available pixels */
mvmin.x = (int16_t)((-x - 8) << 2);
@@ -211,26 +241,15 @@
const float epsilon = 1.f / 128.f;
bool tryCommonDenom(TComSlice& slice,
- x265_param& param,
wpScalingParam wp[2][MAX_NUM_REF][3],
- pixel * temp,
+ cache& cacheData,
int indenom)
{
TComPic *pic = slice.getPic();
TComPicYuv *picorig = pic->getPicYuvOrg();
Lowres& fenc = pic->m_lowres;
- int curPoc = slice.getPOC();
-
- /* caller provides temp space for two full-pel planes. Split it
- * in half for motion compensation of the reference and then the
- * weighting */
- pixel *mcTemp = temp;
- pixel *weightTemp = temp + picorig->getStride() * picorig->getHeight();
-
int log2denom[3] = { indenom };
- int csp = picorig->m_picCsp;
- int hshift = CHROMA_H_SHIFT(csp);
- int vshift = CHROMA_V_SHIFT(csp);
+ CspData *c = &cacheData.colorFormat;
/* Round dimensions to 16, calculate pixel counts in luma and chroma */
int numpixels[3];
@@ -239,54 +258,21 @@
int h = ((picorig->getHeight() + 15) >> 4) << 4;
numpixels[0] = w * h;
- w >>= hshift;
- h >>= vshift;
+ w >>= c->hshift;
+ h >>= c->vshift;
numpixels[1] = numpixels[2] = w * h;
}
int numWeighted = 0;
- int numPredDir = slice.isInterP() ? 1 : 2;
- for (int list = 0; list < numPredDir; list++)
+ for (int list = 0; list < cacheData.numPredDir; list++)
{
for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
{
wpScalingParam *fw = wp[list][ref];
TComPic *refPic = slice.getRefPic(list, ref);
Lowres& refLowres = refPic->m_lowres;
-
- MV *mvs = NULL;
- int32_t *mvCosts = NULL;
bool bWeightRef = false;
- bool bMotionCompensate = false;
-
- /* test whether POC distance is within range for lookahead structures */
- int diffPoc = abs(curPoc - refPic->getPOC());
- if (diffPoc <= param.bframes + 1)
- {
- mvs = fenc.lowresMvs[list][diffPoc - 1];
- mvCosts = fenc.lowresMvCosts[list][diffPoc - 1];
- /* test whether this motion search was performed by lookahead */
- if (mvs[0].x != 0x7FFF)
- {
- bMotionCompensate = true;
-
- /* reference chroma planes must be extended prior to being
- * used as motion compensation sources */
- if (!refPic->m_bChromaPlanesExtended)
- {
- refPic->m_bChromaPlanesExtended = true;
- TComPicYuv *refyuv = refPic->getPicYuvOrg();
- int stride = refyuv->getCStride();
- int width = refyuv->getWidth() >> hshift;
- int height = refyuv->getHeight() >> vshift;
- int marginX = refyuv->getChromaMarginX();
- int marginY = refyuv->getChromaMarginY();
- extendPicBorder(refyuv->getCbAddr(), stride, width, height, marginX, marginY);
- extendPicBorder(refyuv->getCrAddr(), stride, width, height, marginX, marginY);
- }
- }
- }
/* prepare estimates */
float guessScale[3], fencMean[3], refMean[3];
@@ -322,66 +308,12 @@
if (meanDiff < 0.5f && guessVal < epsilon)
continue;
- /* prepare inputs to weight analysis */
- pixel *orig;
- pixel *fref;
- int origstride, frefstride;
- int width, height;
- switch (yuv)
- {
- case 0:
- orig = fenc.lowresPlane[0];
- fref = refLowres.lowresPlane[0];
- origstride = frefstride = fenc.lumaStride;
- width = fenc.width;
- height = fenc.lines;
+ RefData *rd = &cacheData.ref[list][ref][yuv];
+ ChannelData *p = &cacheData.paramset[yuv];
- if (bMotionCompensate)
- {
- mcLuma(mcTemp, refLowres, mvCosts, fenc.intraCost, mvs);
- fref = mcTemp;
- }
- break;
-
- case 1:
- orig = picorig->getCbAddr();
- fref = refPic->getPicYuvOrg()->getCbAddr();
- origstride = frefstride = picorig->getCStride();
-
- /* Clamp the chroma dimensions to the nearest multiple of
- * 8x8 blocks (or 16x16 for 4:4:4) since mcChroma uses lowres
- * blocks and weightCost measures 8x8 blocks. This
- * potentially ignores some edge pixels, but simplifies the
- * logic and prevents reading uninitialized pixels. Lowres
- * planes are border extended and require no clamping. */
- width = ((picorig->getWidth() >> 4) << 4) >> hshift;
- height = ((picorig->getHeight() >> 4) << 4) >> vshift;
-
- if (bMotionCompensate)
- {
- mcChroma(mcTemp, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
- fref = mcTemp;
- }
- break;
-
- case 2:
- fref = refPic->getPicYuvOrg()->getCrAddr();
- orig = picorig->getCrAddr();
- origstride = frefstride = picorig->getCStride();
- width = ((picorig->getWidth() >> 4) << 4) >> hshift;
- height = ((picorig->getHeight() >> 4) << 4) >> vshift;
-
- if (bMotionCompensate)
- {
- mcChroma(mcTemp, fref, fenc, frefstride, mvCosts, fenc.intraCost, mvs, height, width, csp);
- fref = mcTemp;
- }
- break;
-
- default:
- // idiotic compilers must die
- return false;
- }
+ uint32_t origscore = rd->unweightedCost;
+ if (!origscore)
+ continue;
wpScalingParam w;
w.setFromWeightAndOffset((int)(guessScale[yuv] * (1 << log2denom[yuv]) + 0.5), 0, log2denom[yuv]);
@@ -389,10 +321,6 @@
int minscale = w.inputWeight;
int minoff = 0;
- uint32_t origscore = weightCost(orig, origstride, fref, frefstride, weightTemp, width, height, NULL);
- if (!origscore)
- continue;
-
uint32_t minscore = origscore;
bool bFound = false;
static const int sD = 4; // scale distance
@@ -431,7 +359,7 @@
}
SET_WEIGHT(w, true, curScale, mindenom, ioff);
- uint32_t s = weightCost(orig, origstride, fref, frefstride, weightTemp, width, height, &w);
+ uint32_t s = weightCost(p->orig, p->stride, rd->fref, p->stride, cacheData.weightTemp, p->width, p->height, &w);
COPY4_IF_LT(minscore, s, minscale, curScale, minoff, ioff, bFound, true);
if (minoff == curOffset - oD && ioff != curOffset - oD)
break;
@@ -473,28 +401,171 @@
return true;
}
+
+bool prepareRef(cache& cacheData, TComSlice& slice, x265_param& param)
+{
+ TComPic *pic = slice.getPic();
+ TComPicYuv *picorig = pic->getPicYuvOrg();
+ Lowres& fenc = pic->m_lowres;
+ int curPoc = slice.getPOC();
+ CspData *c = &cacheData.colorFormat;
+ c->csp = picorig->m_picCsp;
+ c->hshift = CHROMA_H_SHIFT(c->csp);
+ c->vshift = CHROMA_V_SHIFT(c->csp);
+
+ for (int list = 0; list < cacheData.numPredDir; list++)
+ {
+ for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
+ {
+ TComPic *refPic = slice.getRefPic(list, ref);
+ Lowres& refLowres = refPic->m_lowres;
+
+ MV *mvs = NULL;
+ int32_t *mvCosts = NULL;
+ bool bMotionCompensate = false;
+
+ /* test whether POC distance is within range for lookahead structures */
+ int diffPoc = abs(curPoc - refPic->getPOC());
+ if (diffPoc <= param.bframes + 1)
+ {
+ mvs = fenc.lowresMvs[list][diffPoc - 1];
+ mvCosts = fenc.lowresMvCosts[list][diffPoc - 1];
+ /* test whether this motion search was performed by lookahead */
+ if (mvs[0].x != 0x7FFF)
+ {
+ bMotionCompensate = true;
+
+ /* reference chroma planes must be extended prior to being
+ * used as motion compensation sources */
+ if (!refPic->m_bChromaPlanesExtended)
+ {
+ refPic->m_bChromaPlanesExtended = true;
+ TComPicYuv *refyuv = refPic->getPicYuvOrg();
+ int stride = refyuv->getCStride();
+ int width = refyuv->getWidth() >> c->hshift;
+ int height = refyuv->getHeight() >> c->vshift;
+ int marginX = refyuv->getChromaMarginX();
+ int marginY = refyuv->getChromaMarginY();
+ extendPicBorder(refyuv->getCbAddr(), stride, width, height, marginX, marginY);
+ extendPicBorder(refyuv->getCrAddr(), stride, width, height, marginX, marginY);
+ }
+ }
+ }
+ for (int yuv = 0; yuv < 3; yuv++)
+ {
+ /* prepare inputs to weight analysis */
+ RefData *rd = &cacheData.ref[list][ref][yuv];
+ ChannelData *p = &cacheData.paramset[yuv];
+ pixel* &buf = rd->mcbuf;
+
+ switch (yuv)
+ {
+ case 0:
+ p->orig = fenc.lowresPlane[0];
+ p->stride = fenc.lumaStride;
+ p->width = fenc.width;
+ p->height = fenc.lines;
+ rd->fref = refLowres.lowresPlane[0];
+ if (bMotionCompensate)
+ {
+ buf = X265_MALLOC(pixel, p->stride * p->height);
+ if (buf)
+ {
+ mcLuma(buf, refLowres, mvCosts, fenc.intraCost, mvs);
+ rd->fref = buf;
+ }
+ }
+ break;
+
+ case 1:
+ p->orig = picorig->getCbAddr();
+ p->stride = picorig->getCStride();
+ rd->fref = refPic->getPicYuvOrg()->getCbAddr();
+
+ /* Clamp the chroma dimensions to the nearest multiple of
+ * 8x8 blocks (or 16x16 for 4:4:4) since mcChroma uses lowres
+ * blocks and weightCost measures 8x8 blocks. This
+ * potentially ignores some edge pixels, but simplifies the
+ * logic and prevents reading uninitialized pixels. Lowres
+ * planes are border extended and require no clamping. */
+ p->width = ((picorig->getWidth() >> 4) << 4) >> c->hshift;
+ p->height = ((picorig->getHeight() >> 4) << 4) >> c->vshift;
+ if (bMotionCompensate)
+ {
+ buf = X265_MALLOC(pixel, p->stride * p->height);
+ if (buf)
+ {
+ mcChroma(buf, rd->fref, fenc, p->stride, mvCosts, fenc.intraCost, mvs, p->height, p->width, c);
+ rd->fref = buf;
+ }
+ }
+ break;
+
+ case 2:
+ rd->fref = refPic->getPicYuvOrg()->getCrAddr();
+ p->orig = picorig->getCrAddr();
+ p->stride = picorig->getCStride();
+ p->width = ((picorig->getWidth() >> 4) << 4) >> c->hshift;
+ p->height = ((picorig->getHeight() >> 4) << 4) >> c->vshift;
+ if (bMotionCompensate)
+ {
+ buf = X265_MALLOC(pixel, p->stride * p->height);
+ if (buf)
+ {
+ mcChroma(buf, rd->fref, fenc, p->stride, mvCosts, fenc.intraCost, mvs, p->height, p->width, c);
+ rd->fref = buf;
+ }
+ }
+ break;
+
+ default:
+ return false;
+ }
+ rd->unweightedCost = weightCost(p->orig, p->stride, rd->fref, p->stride, cacheData.weightTemp, p->width, p->height, NULL);
+ }
+ }
+ }
+ return true;
+}
+
+void tearDown(cache& cacheData, TComSlice& slice)
+{
+ X265_FREE(cacheData.weightTemp);
+ for (int list = 0; list < cacheData.numPredDir; list++)
+ {
+ for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
+ {
+ for (int yuv = 0; yuv < 3; yuv++)
+ {
+ X265_FREE(cacheData.ref[list][ref][yuv].mcbuf);
+ }
+ }
+ }
+}
}
namespace x265 {
void weightAnalyse(TComSlice& slice, x265_param& param)
{
wpScalingParam wp[2][MAX_NUM_REF][3];
- int numPredDir = slice.isInterP() ? 1 : 2;
/* TODO: perf - collect some of this data into a struct which is passed to
* tryCommonDenom() to avoid recalculating some data. Motion compensated
* reference planes can be cached this way */
-
+
+ weightp::cache cacheData;
+ memset(&cacheData, 0, sizeof(cacheData));
TComPicYuv *orig = slice.getPic()->getPicYuvOrg();
- pixel *temp = X265_MALLOC(pixel, 2 * orig->getStride() * orig->getHeight());
-
- if (temp)
+ cacheData.numPredDir = slice.isInterP() ? 1 : 2;
+ cacheData.weightTemp = X265_MALLOC(pixel, orig->getStride() * orig->getHeight());
+ if (cacheData.weightTemp)
{
+ prepareRef(cacheData, slice, param);
int denom = slice.getNumRefIdx(REF_PIC_LIST_0) > 3 ? 7 : 6;
do
{
/* reset weight states */
- for (int list = 0; list < numPredDir; list++)
+ for (int list = 0; list < cacheData.numPredDir; list++)
{
for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
{
@@ -503,14 +574,12 @@
SET_WEIGHT(wp[list][ref][2], false, 1 << denom, denom, 0);
}
}
-
- if (weightp::tryCommonDenom(slice, param, wp, temp, denom))
+ if (weightp::tryCommonDenom(slice, wp, cacheData, denom))
break;
- denom--; // decrement to satisfy the range limitation
+ denom--; // decrement to satisfy the range limitation
}
while (denom > 0);
-
- X265_FREE(temp);
+ tearDown(cacheData, slice);
}
if (param.logLevel >= 4)
@@ -520,7 +589,7 @@
bool bWeighted = false;
p = sprintf(buf, "poc: %d weights:", slice.getPOC());
- for (int list = 0; list < numPredDir; list++)
+ for (int list = 0; list < cacheData.numPredDir; list++)
{
for (int ref = 0; ref < slice.getNumRefIdx(list); ref++)
{
More information about the x265-devel
mailing list