<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Tue, Jan 13, 2015 at 1:07 PM, chen <span dir="ltr"><<a href="mailto:chenm003@163.com" target="_blank">chenm003@163.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex"><div style="line-height:1.7;color:rgb(0,0,0);font-size:14px;font-family:arial"><div><br><br><br><br><br></div>
<div></div>
<div></div>
<div><br></div><pre><br>At 2015-01-13 15:11:09,<a href="mailto:dnyaneshwar@multicorewareinc.com" target="_blank">dnyaneshwar@multicorewareinc.com</a> wrote:
># HG changeset patch
># User Ashok Kumar Mishra<<a href="mailto:ashok@multicorewareinc.com" target="_blank">ashok@multicorewareinc.com</a>>
># Date 1421054362 -19800
># Mon Jan 12 14:49:22 2015 +0530
># Node ID 9ec15ed0fe2a56eaf0c45954b750cac7293ffcd9
># Parent 50a2071500dc4b813edb357c298867931bbf42a1
>intra refactoring: removed storing left neighbouring pixels in 144 bytes stride - new
>
>diff -r 50a2071500dc -r 9ec15ed0fe2a source/common/predict.cpp
>--- a/source/common/predict.cpp Mon Jan 12 20:01:58 2015 +0530
>+++ b/source/common/predict.cpp Mon Jan 12 14:49:22 2015 +0530
>@@ -83,19 +83,18 @@
> {
> int tuSize = 1 << log2TrSize;
>
>- pixel* refLft;
>- pixel* refAbv;
>+ pixel *refLft, *refAbv;
>
>- if (g_intraFilterFlags[dirMode] & tuSize)
>+ if (!(g_intraFilterFlags[dirMode] & tuSize))
>+ {
>+ refLft = m_refLeft + tuSize - 1;
>+ refAbv = m_refAbove + tuSize - 1;
>+ }
>+ else
> {
> refLft = m_refLeftFlt + tuSize - 1;
> refAbv = m_refAboveFlt + tuSize - 1;
> }
>- else
>- {
>- refLft = m_refLeft + tuSize - 1;
>- refAbv = m_refAbove + tuSize - 1;
>- }<br><br>It is same code, why reorder here?</pre></div></blockquote><div>In the next patch, the above code is replaced by a single line as below</div><div>pixel* srcPix = (!(g_intraFilterFlags[dirMode] & tuSize)) ? intraNeighbourBuf[0] : intraNeighbourBuf[1];</div><div>Now there will be no separate buffers for refAbove and refLeft pixels. </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex"><div style="line-height:1.7;color:rgb(0,0,0);font-size:14px;font-family:arial"><pre><span class=""><br>
> bool bFilter = log2TrSize <= 4;
> int sizeIdx = log2TrSize - 2;
>@@ -116,8 +115,10 @@
> pixel* left = buf0 + bufOffset;
>
> int limit = (dirMode <= 25 && dirMode >= 11) ? (tuSize + 1 + 1) : (tuSize2 + 1);
>- for (int k = 0; k < limit; k++)
>- left[k] = src[k * ADI_BUF_STRIDE];
>+
>+ left[0] = src[0];
>+ for (int k = 1; k < limit; k++)
>+ left[k] = src[k + tuSize2];<br><br></span>Why not memcpy?</pre></div></blockquote><div>agreed. It can be replaced by a memcpy() and send in next patch.</div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left-width:1px;border-left-color:rgb(204,204,204);border-left-style:solid;padding-left:1ex"><div style="line-height:1.7;color:rgb(0,0,0);font-size:14px;font-family:arial"><pre><span class=""><br>
> if (chFmt == X265_CSP_I444 && (g_intraFilterFlags[dirMode] & tuSize))
> {
>@@ -669,10 +670,11 @@
> refAbove += bufOffset;
> refLeft += bufOffset;
>
>- // ADI_BUF_STRIDE * (2 * tuSize + 1);
> memcpy(refAbove, adiBuf, (tuSize2 + 1) * sizeof(pixel));
>- for (int k = 0; k < tuSize2 + 1; k++)
>- refLeft[k] = adiBuf[k * ADI_BUF_STRIDE];
>+
>+ refLeft[0] = adiBuf[0];
>+ for (int k = 1; k < tuSize2 + 1 ; k++)
>+ refLeft[k] = adiBuf[k + tuSize2];<br></span>Same as above<div><div class="h5"><br>
> if (dirMode == ALL_IDX ? (8 | 16 | 32) & tuSize : g_intraFilterFlags[dirMode] & tuSize)
> {
>@@ -685,46 +687,45 @@
> if (bStrongSmoothing)
> {
> const int trSize = 32;
>- const int trSize2 = 32 * 2;
>+ const int trSize2 = trSize << 1;
> const int threshold = 1 << (X265_DEPTH - 5);
> int refBL = refLeft[trSize2];
> int refTL = refAbove[0];
> int refTR = refAbove[trSize2];
>- bStrongSmoothing = (abs(refBL + refTL - 2 * refLeft[trSize]) < threshold &&
>- abs(refTL + refTR - 2 * refAbove[trSize]) < threshold);
>+ bStrongSmoothing = (abs(refBL + refTL - (refLeft[trSize] << 1)) < threshold &&
>+ abs(refTL + refTR - (refAbove[trSize] << 1)) < threshold);
>
> if (bStrongSmoothing)
> {
> // bilinear interpolation
> const int shift = 5 + 1; // log2TrSize + 1;
> int init = (refTL << shift) + tuSize;
>- int delta;
>+ int deltaL, deltaR;
>
> refLeftFlt[0] = refAboveFlt[0] = refAbove[0];
>
> //TODO: Performance Primitive???
>- delta = refBL - refTL;
>+ deltaL = refBL - refTL; deltaR = refTR - refTL;
> for (int i = 1; i < trSize2; i++)
>- refLeftFlt[i] = (pixel)((init + delta * i) >> shift);
>+ {
>+ refLeftFlt[i] = (pixel)((init + deltaL * i) >> shift);
>+ refAboveFlt[i] = (pixel)((init + deltaR * i) >> shift);
>+ }
> refLeftFlt[trSize2] = refLeft[trSize2];
>-
>- delta = refTR - refTL;
>- for (int i = 1; i < trSize2; i++)
>- refAboveFlt[i] = (pixel)((init + delta * i) >> shift);
> refAboveFlt[trSize2] = refAbove[trSize2];
>
> return;
> }
> }
>
>- refLeft[-1] = refAbove[1];
>- for (int i = 0; i < tuSize2; i++)
>- refLeftFlt[i] = (refLeft[i - 1] + 2 * refLeft[i] + refLeft[i + 1] + 2) >> 2;
>+ refLeftFlt[0] = (refAbove[1] + (refLeft[0] << 1) + refLeft[1] + 2) >> 2;
>+ for (int i = 1; i < tuSize2; i++)
>+ refLeftFlt[i] = (refLeft[i - 1] + (refLeft[i] << 1) + refLeft[i + 1] + 2) >> 2;
> refLeftFlt[tuSize2] = refLeft[tuSize2];
>
> refAboveFlt[0] = refLeftFlt[0];
> for (int i = 1; i < tuSize2; i++)
>- refAboveFlt[i] = (refAbove[i - 1] + 2 * refAbove[i] + refAbove[i + 1] + 2) >> 2;
>+ refAboveFlt[i] = (refAbove[i - 1] + (refAbove[i] << 1) + refAbove[i + 1] + 2) >> 2;
> refAboveFlt[tuSize2] = refAbove[tuSize2];
> }
> }
>@@ -804,14 +805,15 @@
> uint32_t tuSize = intraNeighbors.tuSize;
> uint32_t refSize = tuSize * 2 + 1;
>
>+ // Nothing is available, perform DC prediction.
> if (numIntraNeighbor == 0)
> {
> // Fill border with DC value
> for (uint32_t i = 0; i < refSize; i++)
> adiRef[i] = dcValue;
>
>- for (uint32_t i = 1; i < refSize; i++)
>- adiRef[i * ADI_BUF_STRIDE] = dcValue;
>+ for (uint32_t i = 0; i < refSize - 1; i++)
>+ adiRef[i + refSize] = dcValue;
> }
> else if (numIntraNeighbor == totalUnits)
> {
>@@ -821,9 +823,10 @@
>
> // Fill left border with rec. samples
> adiTemp = adiOrigin - 1;
>- for (uint32_t i = 1; i < refSize; i++)
>+
>+ for (uint32_t i = 0; i < refSize - 1; i++)
> {
>- adiRef[i * ADI_BUF_STRIDE] = adiTemp[0];
>+ adiRef[i + refSize] = adiTemp[0];
> adiTemp += picStride;
> }
> }
>@@ -943,8 +946,8 @@
> memcpy(adiRef, adi, refSize * sizeof(*adiRef));
>
> adi = adiLineBuffer + refSize - 1;
>- for (int i = 1; i < (int)refSize; i++)
>- adiRef[i * ADI_BUF_STRIDE] = adi[-i];
>+ for (int i = 0; i < (int)refSize - 1; i++)
>+ adiRef[i + refSize] = adi[-(i + 1)];
> }
> }
>
>_______________________________________________
>x265-devel mailing list
><a href="mailto:x265-devel@videolan.org" target="_blank">x265-devel@videolan.org</a>
><a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a>
</div></div></pre></div><br>_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
<br></blockquote></div><br></div></div>