[x265] [PATCH] A few small performance improvements for intrapred c code
dave
dtyx265 at gmail.com
Wed Oct 1 19:54:15 CEST 2014
I know the C model is not a priority, I just felt like doing it. Also,
I don't think I really changed the core algorithms. As my comment
states, "A few small performance improvements".
On 10/01/2014 08:32 AM, chen wrote:
> We don't worry about C model performance, it is for reference only.
> I like clear code, it easy to port to different platform
> with algorithm re-design.
> At 2014-10-01 09:43:15,dtyx265 at gmail.com wrote:
> ># HG changeset patch
> ># User David T Yuen <dtyx265 at gmail.com>
> ># Date 1412127675 25200
> ># Node ID 353ccf9c2b2adc37cfb1bc7124fcca131599e0ad
> ># Parent 5a6845566d1492d29af29ecc0cf75d644994735c
> >A few small performance improvements for intrapred c code
> >
> >diff -r 5a6845566d14 -r 353ccf9c2b2a source/common/intrapred.cpp
> >--- a/source/common/intrapred.cpp Mon Sep 29 17:37:47 2014 -0500
> >+++ b/source/common/intrapred.cpp Tue Sep 30 18:41:15 2014 -0700
> >@@ -35,10 +35,6 @@
> > for (w = 0; w < width; w++)
> > {
> > sum += above[w];
> >- }
> >-
> >- for (w = 0; w < width; w++)
> >- {
> > sum += left[w];
> > }
> >
> >@@ -57,11 +53,10 @@
> > dst[x] = (pixel)((above[x] + 3 * dst[x] + 2) >> 2);
> > }
> >
> >- dst += dststride;
> > for (int y = 1; y < size; y++)
> > {
> >+ dst += dststride;
> > *dst = (pixel)((left[y] + 3 * *dst + 2) >> 2);
> >- dst += dststride;
> > }
> > }
> >
> >@@ -134,13 +129,11 @@
> > }
> >
> > template<int width>
> >-void intra_pred_ang_c(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
> >+void intra_pred_ang_c1(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
> > {
> > // Map the mode index to main prediction direction and angle
> > int k, l;
> >- bool modeHor = (dirMode < 18);
> >- bool modeVer = !modeHor;
> >- int intraPredAngle = modeVer ? (int)dirMode - VER_IDX : modeHor ? -((int)dirMode - HOR_IDX) : 0;
> >+ int intraPredAngle = dirMode > 17 ? (int)dirMode - VER_IDX : -((int)dirMode - HOR_IDX);
> > int absAng = abs(intraPredAngle);
> > int signAng = intraPredAngle < 0 ? -1 : 1;
> >
> >@@ -154,27 +147,12 @@
> >
> > // Do angular predictions
> > {
> >- pixel* refMain;
> >- pixel* refSide;
> >-
> >- // Initialise the Main and Left reference array.
> >- if (intraPredAngle < 0)
> >+ pixel* refMain = refLeft;
> >+ pixel* refSide = refAbove;
> >+ if (dirMode > 17)
> > {
> >- refMain = (modeVer ? refAbove : refLeft); // + (width - 1);
> >- refSide = (modeVer ? refLeft : refAbove); // + (width - 1);
> >-
> >- // Extend the Main reference to the left.
> >- int invAngleSum = 128; // rounding for (shift by 8)
> >- for (k = -1; k > width * intraPredAngle >> 5; k--)
> >- {
> >- invAngleSum += invAngle;
> >- refMain[k] = refSide[invAngleSum >> 8];
> >- }
> >- }
> >- else
> >- {
> >- refMain = modeVer ? refAbove : refLeft;
> >- refSide = modeVer ? refLeft : refAbove;
> >+ refMain = refAbove;
> >+ refSide = refLeft;
> > }
> >
> > if (intraPredAngle == 0)
> >@@ -197,6 +175,18 @@
> > }
> > else
> > {
> >+ // Initialise the Main and Left reference array.
> >+ if (intraPredAngle < 0)
> >+ {
> >+ // Extend the Main reference to the left.
> >+ int invAngleSum = 128; // rounding for (shift by 8)
> >+ for (k = -1; k > width * intraPredAngle >> 5; k--)
> >+ {
> >+ invAngleSum += invAngle;
> >+ refMain[k] = refSide[invAngleSum >> 8];
> >+ }
> >+ }
> >+
> > int deltaPos = 0;
> > int deltaInt;
> > int deltaFract;
> >@@ -227,18 +217,24 @@
> > }
> > }
> > }
> >+ }
> >+}
> >
> >- // Flip the block if this is the horizontal mode
> >- if (modeHor)
> >+template<int width>
> >+void intra_pred_ang_c(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
> >+{
> >+ int k, l;
> >+ intra_pred_ang_c1<width>(dst, dstStride, refLeft, refAbove, dirMode, bFilter);
> >+
> >+ if (dirMode < 18)
> >+ {
> >+ for (k = 0; k < width - 1; k++)
> > {
> >- for (k = 0; k < width - 1; k++)
> >+ for (l = k + 1; l < width; l++)
> > {
> >- for (l = k + 1; l < width; l++)
> >- {
> >- pixel tmp = dst[k * dstStride + l];
> >- dst[k * dstStride + l] = dst[l * dstStride + k];
> >- dst[l * dstStride + k] = tmp;
> >- }
> >+ dst[l * dstStride + k] ^= dst[k * dstStride + l];
> >+ dst[k * dstStride + l] ^= dst[l * dstStride + k];
> >+ dst[l * dstStride + k] ^= dst[k * dstStride + l];
> > }
> > }
> > }
> >@@ -250,28 +246,22 @@
> > const int size = 1 << log2Size;
> > for (int mode = 2; mode <= 34; mode++)
> > {
> >- pixel *left = (g_intraFilterFlags[mode] & size ? left1 : left0);
> >- pixel *above = (g_intraFilterFlags[mode] & size ? above1 : above0);
> >+ pixel *left;
> >+ pixel *above;
> >+
> >+ if (g_intraFilterFlags[mode] & size)
> >+ {
> >+ left = left1;
> >+ above = above1;
> >+ }
> >+ else
> >+ {
> >+ left = left0;
> >+ above = above0;
> >+ }
> > pixel *out = dest + ((mode - 2) << (log2Size * 2));
> >
> >- intra_pred_ang_c<size>(out, size, left, above, mode, bLuma);
> >-
> >- // Optimize code don't flip buffer
> >- bool modeHor = (mode < 18);
> >-
> >- // transpose the block if this is a horizontal mode
> >- if (modeHor)
> >- {
> >- for (int k = 0; k < size - 1; k++)
> >- {
> >- for (int l = k + 1; l < size; l++)
> >- {
> >- pixel tmp = out[k * size + l];
> >- out[k * size + l] = out[l * size + k];
> >- out[l * size + k] = tmp;
> >- }
> >- }
> >- }
> >+ intra_pred_ang_c1<size>(out, size, left, above, mode, bLuma);
> > }
> > }
> > }
> >_______________________________________________
> >x265-devel mailing list
> >x265-devel at videolan.org
> >https://mailman.videolan.org/listinfo/x265-devel
>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20141001/18258f98/attachment.html>
More information about the x265-devel
mailing list