[x265] [PATCH 01 of 29] intra refactoring: removed storing left neighbouring pixels in 144 bytes stride - new
Ashok Kumar Mishra
ashok at multicorewareinc.com
Tue Jan 13 09:17:56 CET 2015
On Tue, Jan 13, 2015 at 1:07 PM, chen <chenm003 at 163.com> wrote:
>
>
>
>
>
>
>
> At 2015-01-13 15:11:09,dnyaneshwar at multicorewareinc.com wrote:
> ># HG changeset patch
> ># User Ashok Kumar Mishra<ashok at multicorewareinc.com>
> ># Date 1421054362 -19800
> ># Mon Jan 12 14:49:22 2015 +0530
> ># Node ID 9ec15ed0fe2a56eaf0c45954b750cac7293ffcd9
> ># Parent 50a2071500dc4b813edb357c298867931bbf42a1
> >intra refactoring: removed storing left neighbouring pixels in 144 bytes stride - new
> >
> >diff -r 50a2071500dc -r 9ec15ed0fe2a source/common/predict.cpp
> >--- a/source/common/predict.cpp Mon Jan 12 20:01:58 2015 +0530
> >+++ b/source/common/predict.cpp Mon Jan 12 14:49:22 2015 +0530
> >@@ -83,19 +83,18 @@
> > {
> > int tuSize = 1 << log2TrSize;
> >
> >- pixel* refLft;
> >- pixel* refAbv;
> >+ pixel *refLft, *refAbv;
> >
> >- if (g_intraFilterFlags[dirMode] & tuSize)
> >+ if (!(g_intraFilterFlags[dirMode] & tuSize))
> >+ {
> >+ refLft = m_refLeft + tuSize - 1;
> >+ refAbv = m_refAbove + tuSize - 1;
> >+ }
> >+ else
> > {
> > refLft = m_refLeftFlt + tuSize - 1;
> > refAbv = m_refAboveFlt + tuSize - 1;
> > }
> >- else
> >- {
> >- refLft = m_refLeft + tuSize - 1;
> >- refAbv = m_refAbove + tuSize - 1;
> >- }
>
> It is same code, why reorder here?
>
> In the next patch, the above code is replaced by a single line as below
pixel* srcPix = (!(g_intraFilterFlags[dirMode] & tuSize)) ?
intraNeighbourBuf[0] : intraNeighbourBuf[1];
Now there will be no separate buffers for refAbove and refLeft pixels.
>
>
>
> > bool bFilter = log2TrSize <= 4;
> > int sizeIdx = log2TrSize - 2;
> >@@ -116,8 +115,10 @@
> > pixel* left = buf0 + bufOffset;
> >
> > int limit = (dirMode <= 25 && dirMode >= 11) ? (tuSize + 1 + 1) : (tuSize2 + 1);
> >- for (int k = 0; k < limit; k++)
> >- left[k] = src[k * ADI_BUF_STRIDE];
> >+
> >+ left[0] = src[0];
> >+ for (int k = 1; k < limit; k++)
> >+ left[k] = src[k + tuSize2];
>
> Why not memcpy?
>
> agreed. It can be replaced by a memcpy() and send in next patch.
>
>
>
> > if (chFmt == X265_CSP_I444 && (g_intraFilterFlags[dirMode] & tuSize))
> > {
> >@@ -669,10 +670,11 @@
> > refAbove += bufOffset;
> > refLeft += bufOffset;
> >
> >- // ADI_BUF_STRIDE * (2 * tuSize + 1);
> > memcpy(refAbove, adiBuf, (tuSize2 + 1) * sizeof(pixel));
> >- for (int k = 0; k < tuSize2 + 1; k++)
> >- refLeft[k] = adiBuf[k * ADI_BUF_STRIDE];
> >+
> >+ refLeft[0] = adiBuf[0];
> >+ for (int k = 1; k < tuSize2 + 1 ; k++)
> >+ refLeft[k] = adiBuf[k + tuSize2];
> Same as above
>
>
>
> > if (dirMode == ALL_IDX ? (8 | 16 | 32) & tuSize : g_intraFilterFlags[dirMode] & tuSize)
> > {
> >@@ -685,46 +687,45 @@
> > if (bStrongSmoothing)
> > {
> > const int trSize = 32;
> >- const int trSize2 = 32 * 2;
> >+ const int trSize2 = trSize << 1;
> > const int threshold = 1 << (X265_DEPTH - 5);
> > int refBL = refLeft[trSize2];
> > int refTL = refAbove[0];
> > int refTR = refAbove[trSize2];
> >- bStrongSmoothing = (abs(refBL + refTL - 2 * refLeft[trSize]) < threshold &&
> >- abs(refTL + refTR - 2 * refAbove[trSize]) < threshold);
> >+ bStrongSmoothing = (abs(refBL + refTL - (refLeft[trSize] << 1)) < threshold &&
> >+ abs(refTL + refTR - (refAbove[trSize] << 1)) < threshold);
> >
> > if (bStrongSmoothing)
> > {
> > // bilinear interpolation
> > const int shift = 5 + 1; // log2TrSize + 1;
> > int init = (refTL << shift) + tuSize;
> >- int delta;
> >+ int deltaL, deltaR;
> >
> > refLeftFlt[0] = refAboveFlt[0] = refAbove[0];
> >
> > //TODO: Performance Primitive???
> >- delta = refBL - refTL;
> >+ deltaL = refBL - refTL; deltaR = refTR - refTL;
> > for (int i = 1; i < trSize2; i++)
> >- refLeftFlt[i] = (pixel)((init + delta * i) >> shift);
> >+ {
> >+ refLeftFlt[i] = (pixel)((init + deltaL * i) >> shift);
> >+ refAboveFlt[i] = (pixel)((init + deltaR * i) >> shift);
> >+ }
> > refLeftFlt[trSize2] = refLeft[trSize2];
> >-
> >- delta = refTR - refTL;
> >- for (int i = 1; i < trSize2; i++)
> >- refAboveFlt[i] = (pixel)((init + delta * i) >> shift);
> > refAboveFlt[trSize2] = refAbove[trSize2];
> >
> > return;
> > }
> > }
> >
> >- refLeft[-1] = refAbove[1];
> >- for (int i = 0; i < tuSize2; i++)
> >- refLeftFlt[i] = (refLeft[i - 1] + 2 * refLeft[i] + refLeft[i + 1] + 2) >> 2;
> >+ refLeftFlt[0] = (refAbove[1] + (refLeft[0] << 1) + refLeft[1] + 2) >> 2;
> >+ for (int i = 1; i < tuSize2; i++)
> >+ refLeftFlt[i] = (refLeft[i - 1] + (refLeft[i] << 1) + refLeft[i + 1] + 2) >> 2;
> > refLeftFlt[tuSize2] = refLeft[tuSize2];
> >
> > refAboveFlt[0] = refLeftFlt[0];
> > for (int i = 1; i < tuSize2; i++)
> >- refAboveFlt[i] = (refAbove[i - 1] + 2 * refAbove[i] + refAbove[i + 1] + 2) >> 2;
> >+ refAboveFlt[i] = (refAbove[i - 1] + (refAbove[i] << 1) + refAbove[i + 1] + 2) >> 2;
> > refAboveFlt[tuSize2] = refAbove[tuSize2];
> > }
> > }
> >@@ -804,14 +805,15 @@
> > uint32_t tuSize = intraNeighbors.tuSize;
> > uint32_t refSize = tuSize * 2 + 1;
> >
> >+ // Nothing is available, perform DC prediction.
> > if (numIntraNeighbor == 0)
> > {
> > // Fill border with DC value
> > for (uint32_t i = 0; i < refSize; i++)
> > adiRef[i] = dcValue;
> >
> >- for (uint32_t i = 1; i < refSize; i++)
> >- adiRef[i * ADI_BUF_STRIDE] = dcValue;
> >+ for (uint32_t i = 0; i < refSize - 1; i++)
> >+ adiRef[i + refSize] = dcValue;
> > }
> > else if (numIntraNeighbor == totalUnits)
> > {
> >@@ -821,9 +823,10 @@
> >
> > // Fill left border with rec. samples
> > adiTemp = adiOrigin - 1;
> >- for (uint32_t i = 1; i < refSize; i++)
> >+
> >+ for (uint32_t i = 0; i < refSize - 1; i++)
> > {
> >- adiRef[i * ADI_BUF_STRIDE] = adiTemp[0];
> >+ adiRef[i + refSize] = adiTemp[0];
> > adiTemp += picStride;
> > }
> > }
> >@@ -943,8 +946,8 @@
> > memcpy(adiRef, adi, refSize * sizeof(*adiRef));
> >
> > adi = adiLineBuffer + refSize - 1;
> >- for (int i = 1; i < (int)refSize; i++)
> >- adiRef[i * ADI_BUF_STRIDE] = adi[-i];
> >+ for (int i = 0; i < (int)refSize - 1; i++)
> >+ adiRef[i + refSize] = adi[-(i + 1)];
> > }
> > }
> >
> >_______________________________________________
> >x265-devel mailing list
> >x265-devel at videolan.org
> >https://mailman.videolan.org/listinfo/x265-devel
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150113/562516d1/attachment.html>
More information about the x265-devel
mailing list