[x265] [PATCH] A few small performance improvements for intrapred c code
chen
chenm003 at 163.com
Wed Oct 1 17:32:26 CEST 2014
We don't worry about C model performance, it is for reference only.
I like clear code, it easy to port to different platform with algorithm re-design.
At 2014-10-01 09:43:15,dtyx265 at gmail.com wrote:
># HG changeset patch
># User David T Yuen <dtyx265 at gmail.com>
># Date 1412127675 25200
># Node ID 353ccf9c2b2adc37cfb1bc7124fcca131599e0ad
># Parent 5a6845566d1492d29af29ecc0cf75d644994735c
>A few small performance improvements for intrapred c code
>
>diff -r 5a6845566d14 -r 353ccf9c2b2a source/common/intrapred.cpp
>--- a/source/common/intrapred.cpp Mon Sep 29 17:37:47 2014 -0500
>+++ b/source/common/intrapred.cpp Tue Sep 30 18:41:15 2014 -0700
>@@ -35,10 +35,6 @@
> for (w = 0; w < width; w++)
> {
> sum += above[w];
>- }
>-
>- for (w = 0; w < width; w++)
>- {
> sum += left[w];
> }
>
>@@ -57,11 +53,10 @@
> dst[x] = (pixel)((above[x] + 3 * dst[x] + 2) >> 2);
> }
>
>- dst += dststride;
> for (int y = 1; y < size; y++)
> {
>+ dst += dststride;
> *dst = (pixel)((left[y] + 3 * *dst + 2) >> 2);
>- dst += dststride;
> }
> }
>
>@@ -134,13 +129,11 @@
> }
>
> template<int width>
>-void intra_pred_ang_c(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
>+void intra_pred_ang_c1(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
> {
> // Map the mode index to main prediction direction and angle
> int k, l;
>- bool modeHor = (dirMode < 18);
>- bool modeVer = !modeHor;
>- int intraPredAngle = modeVer ? (int)dirMode - VER_IDX : modeHor ? -((int)dirMode - HOR_IDX) : 0;
>+ int intraPredAngle = dirMode > 17 ? (int)dirMode - VER_IDX : -((int)dirMode - HOR_IDX);
> int absAng = abs(intraPredAngle);
> int signAng = intraPredAngle < 0 ? -1 : 1;
>
>@@ -154,27 +147,12 @@
>
> // Do angular predictions
> {
>- pixel* refMain;
>- pixel* refSide;
>-
>- // Initialise the Main and Left reference array.
>- if (intraPredAngle < 0)
>+ pixel* refMain = refLeft;
>+ pixel* refSide = refAbove;
>+ if (dirMode > 17)
> {
>- refMain = (modeVer ? refAbove : refLeft); // + (width - 1);
>- refSide = (modeVer ? refLeft : refAbove); // + (width - 1);
>-
>- // Extend the Main reference to the left.
>- int invAngleSum = 128; // rounding for (shift by 8)
>- for (k = -1; k > width * intraPredAngle >> 5; k--)
>- {
>- invAngleSum += invAngle;
>- refMain[k] = refSide[invAngleSum >> 8];
>- }
>- }
>- else
>- {
>- refMain = modeVer ? refAbove : refLeft;
>- refSide = modeVer ? refLeft : refAbove;
>+ refMain = refAbove;
>+ refSide = refLeft;
> }
>
> if (intraPredAngle == 0)
>@@ -197,6 +175,18 @@
> }
> else
> {
>+ // Initialise the Main and Left reference array.
>+ if (intraPredAngle < 0)
>+ {
>+ // Extend the Main reference to the left.
>+ int invAngleSum = 128; // rounding for (shift by 8)
>+ for (k = -1; k > width * intraPredAngle >> 5; k--)
>+ {
>+ invAngleSum += invAngle;
>+ refMain[k] = refSide[invAngleSum >> 8];
>+ }
>+ }
>+
> int deltaPos = 0;
> int deltaInt;
> int deltaFract;
>@@ -227,18 +217,24 @@
> }
> }
> }
>+ }
>+}
>
>- // Flip the block if this is the horizontal mode
>- if (modeHor)
>+template<int width>
>+void intra_pred_ang_c(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
>+{
>+ int k, l;
>+ intra_pred_ang_c1<width>(dst, dstStride, refLeft, refAbove, dirMode, bFilter);
>+
>+ if (dirMode < 18)
>+ {
>+ for (k = 0; k < width - 1; k++)
> {
>- for (k = 0; k < width - 1; k++)
>+ for (l = k + 1; l < width; l++)
> {
>- for (l = k + 1; l < width; l++)
>- {
>- pixel tmp = dst[k * dstStride + l];
>- dst[k * dstStride + l] = dst[l * dstStride + k];
>- dst[l * dstStride + k] = tmp;
>- }
>+ dst[l * dstStride + k] ^= dst[k * dstStride + l];
>+ dst[k * dstStride + l] ^= dst[l * dstStride + k];
>+ dst[l * dstStride + k] ^= dst[k * dstStride + l];
> }
> }
> }
>@@ -250,28 +246,22 @@
> const int size = 1 << log2Size;
> for (int mode = 2; mode <= 34; mode++)
> {
>- pixel *left = (g_intraFilterFlags[mode] & size ? left1 : left0);
>- pixel *above = (g_intraFilterFlags[mode] & size ? above1 : above0);
>+ pixel *left;
>+ pixel *above;
>+
>+ if (g_intraFilterFlags[mode] & size)
>+ {
>+ left = left1;
>+ above = above1;
>+ }
>+ else
>+ {
>+ left = left0;
>+ above = above0;
>+ }
> pixel *out = dest + ((mode - 2) << (log2Size * 2));
>
>- intra_pred_ang_c<size>(out, size, left, above, mode, bLuma);
>-
>- // Optimize code don't flip buffer
>- bool modeHor = (mode < 18);
>-
>- // transpose the block if this is a horizontal mode
>- if (modeHor)
>- {
>- for (int k = 0; k < size - 1; k++)
>- {
>- for (int l = k + 1; l < size; l++)
>- {
>- pixel tmp = out[k * size + l];
>- out[k * size + l] = out[l * size + k];
>- out[l * size + k] = tmp;
>- }
>- }
>- }
>+ intra_pred_ang_c1<size>(out, size, left, above, mode, bLuma);
> }
> }
> }
>_______________________________________________
>x265-devel mailing list
>x265-devel at videolan.org
>https://mailman.videolan.org/listinfo/x265-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20141001/768c3b40/attachment-0001.html>
More information about the x265-devel
mailing list