[x265] [PATCH] slicetype : intra cost estimates
Steve Borho
steve at borho.org
Mon Aug 12 13:27:39 CEST 2013
On Mon, Aug 12, 2013 at 5:56 AM, <gopu at multicorewareinc.com> wrote:
> # HG changeset patch
> # User ggopu
> # Date 1376304994 -19800
> # Node ID 666b6206628994d42c826afa9b01e7499b0d150c
> # Parent 8438cad92049281833caa951cc48f6d90c7434eb
> slicetype : intra cost estimates
>
> diff -r 8438cad92049 -r 666b62066289 source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp Sun Aug 11 21:41:12 2013 -0700
> +++ b/source/encoder/slicetype.cpp Mon Aug 12 16:26:34 2013 +0530
> @@ -36,9 +36,9 @@
> // taking any of the threading changes because we will eventually use the
> x265
> // thread pool and wavefront processing.
>
> -#define QP_BD_OFFSET (6*(X265_DEPTH-8))
> +#define QP_BD_OFFSET (6 * (X265_DEPTH - 8))
> // arbitrary, but low because SATD scores are 1/4 normal
> -#define X264_LOOKAHEAD_QP (12+QP_BD_OFFSET)
> +#define X264_LOOKAHEAD_QP (12 + QP_BD_OFFSET)
>
this should be X265_LOOKAHEAD_QP
>
> // Under Construction
> #if defined(_MSC_VER)
> @@ -47,7 +47,6 @@
> #endif
>
> namespace x265 {
> -
> struct Lookahead
> {
> MotionEstimate me;
> @@ -58,7 +57,7 @@
>
> TComList<TComPic*> inputQueue; // input pictures in order
> received
> TComList<TComPic*> outputQueue; // pictures to be encoded, in
> encode order
> -
> +
> Lookahead(int _frameQueueSize)
> {
> me.setQP(X264_LOOKAHEAD_QP, 1.0);
> @@ -66,6 +65,7 @@
> frameQueueSize = _frameQueueSize;
> frames = new LookaheadFrame*[frameQueueSize];
> }
> +
> ~Lookahead()
> {
> if (frames)
> @@ -76,20 +76,21 @@
> int estimateCUCost(int cux, int cuy, int p0, int p1, int b, int
> do_search[2]);
> };
>
> -static inline int16_t x265_median( int16_t a, int16_t b, int16_t c )
> +static inline int16_t x265_median(int16_t a, int16_t b, int16_t c)
> {
> - int16_t t = (a-b)&((a-b)>>31);
> + int16_t t = (a - b) & ((a - b) >> 31);
> +
> a -= t;
> b += t;
> - b -= (b-c)&((b-c)>>31);
> - b += (a-b)&((a-b)>>31);
> + b -= (b - c) & ((b - c) >> 31);
> + b += (a - b) & ((a - b) >> 31);
> return b;
> }
>
> -static inline void x265_median_mv( MV &dst, MV a, MV b, MV c )
> +static inline void x265_median_mv(MV &dst, MV a, MV b, MV c)
> {
> - dst.x = x265_median( a.x, b.x, c.x );
> - dst.y = x265_median( a.y, b.y, c.y );
> + dst.x = x265_median(a.x, b.x, c.x);
> + dst.y = x265_median(a.y, b.y, c.y);
> }
>
> int Lookahead::estimateFrameCost(int p0, int p1, int b, int bIntraPenalty)
> @@ -163,17 +164,18 @@
> const int cu_size = g_maxCUWidth / 2;
> const int pel_offset = cu_size * cux + cu_size * cuy * stride;
> const int merange = 16;
> +
> me.setSourcePU(pel_offset, cu_size, cu_size);
>
> - MV (*fenc_mvs[2]) = { &fenc->lowresMvs[0][b - p0 - 1][cu_xy],
> &fenc->lowresMvs[1][p1 - b - 1][cu_xy] };
> - int (*fenc_costs[2]) = { &fenc->lowresMvCosts[0][b - p0 - 1][cu_xy],
> &fenc->lowresMvCosts[1][p1 - b - 1][cu_xy] };
> + MV(*fenc_mvs[2]) = { &fenc->lowresMvs[0][b - p0 - 1][cu_xy],
> &fenc->lowresMvs[1][p1 - b - 1][cu_xy] };
> + int(*fenc_costs[2]) = { &fenc->lowresMvCosts[0][b - p0 - 1][cu_xy],
> &fenc->lowresMvCosts[1][p1 - b - 1][cu_xy] };
>
> MV mvmin, mvmax;
> // TODO: calculate search extents
>
> for (int i = 0; i < 2; i++)
> {
> - if (!do_search[i])
> + if (!do_search[i])
> continue;
>
> int numc = 0;
> @@ -199,7 +201,7 @@
> mvp = mvc[0];
> else
> {
> - x265_median_mv(mvp, mvc[0], mvc[1], mvc[2]);
> + x265_median_mv(mvp, mvc[0], mvc[1], mvc[2]);
> }
>
>
uncrustify should be done in separate patch
> *fenc_costs[i] = me.motionEstimate(i ? fref1 : fref0, mvmin,
> mvmax, mvp, numc, mvc, merange, *fenc_mvs[i]);
> @@ -209,7 +211,69 @@
> // TODO: add bidir
> }
>
> - // TODO: copy intra SATD cost analysis here (DC + planar + all-angs)
> + UInt width = fenc->cuWidth;
>
cuWidth is the width of the frame in CUs? you already have cu_size
variable in this function
> + UInt numModesAvailable = 35; //total number of Intra modes
> + Int nLog2SizeMinus2 = g_convertToBit[width];
> + x265::pixelcmp_t sa8d = x265::primitives.sa8d[nLog2SizeMinus2];
>
drop everything between here
> + UInt64 CandCostList[FAST_UDI_MAX_RDMODE_NUM];
> + UInt numModesForFullRD = 5; // Currently set as 5 at default
> + UInt CandNum;
> + UInt partOffset = 0;
> +
> + assert(numModesForFullRD < numModesAvailable);
> +
> + for (UInt i = 0; i < numModesForFullRD; i++)
> + {
> + CandCostList[i] = MAX_UINT;
> + }
> +
> + ALIGN_VAR_32(pixel, buffer[64 * 64]); // current cu buffer
> + primitives.blockcpy_pp(fenc->cuWidth, fenc->cuHeight, buffer,
> FENC_STRIDE, fenc->m_lumaPlane[0][0] + pel_offset, fenc->m_lumaStride);
>
and here, and use the m_me.fenc aligned copy of the block
> + pixel *pAbove0 = fenc->m_lumaPlane[0][0] + pel_offset -
> fenc->m_lumaStride;
> + pixel *pAbove1 = fenc->m_lumaPlane[0][0] + pel_offset -
> fenc->cuHeight;
> + pixel *pLeft0 = fenc->m_lumaPlane[0][0] + pel_offset +
> fenc->m_lumaStride;
> + pixel *pLeft1 = fenc->m_lumaPlane[0][0] + pel_offset + fenc->cuWidth;
>
>
I suspect these need to be separately allocated buffers on the stack. Min
Chen should review this.
> + CandNum = 0;
> + UInt modeCosts[35];
>
drop modeCosts and CandNum, see below.
> + // 33 Angle modes once
> + ALIGN_VAR_32(pixel, buf_trans[32 * 32]);
> + ALIGN_VAR_32(pixel, tmp[33 * 32 * 32]);
> +
> + if (fenc->cuWidth <= 32)
> + {
>
wrong variable again, and cu_size is always <= 32
> + // 1
> + primitives.intra_pred_dc(pAbove0 + 1, pLeft0 + 1, buffer, stride,
> width, (width <= 16));
> + modeCosts[DC_IDX] = sa8d(fenc->m_lumaPlane[0][0],
> fenc->m_lumaStride, buffer, fenc->stride);
> +
> + // 0
> + pixel *above = pAbove0;
> + pixel *left = pLeft0;
> + if (width >= 8 && width <= 32)
> + {
> + above = pAbove1;
> + left = pLeft1;
> + }
> + primitives.intra_pred_planar((pixel*)above + 1, (pixel*)left + 1,
> buffer, fenc->stride, width);
> + modeCosts[PLANAR_IDX] = sa8d(fenc->m_lumaPlane[0][0],
> fenc->m_lumaStride, buffer, fenc->stride);
> +
> + // Transpose NxN
> + x265::primitives.transpose[nLog2SizeMinus2](buf_trans,
> (pixel*)fenc, stride);
> +
> + x265::primitives.intra_pred_allangs[nLog2SizeMinus2](tmp,
> pAbove0, pLeft0, pAbove1, pLeft1, (width <= 16));
> +
> + // TODO: We need SATD_x4 here
> + for (UInt mode = 2; mode < numModesAvailable; mode++)
> + {
> + bool modeHor = (mode < 18);
> + pixel *cmp = (modeHor ? buf_trans : fenc->m_lumaPlane[0][0]);
> + intptr_t srcStride = (modeHor ? width : stride);
> + modeCosts[mode] = sa8d(cmp, srcStride, &tmp[(mode - 2) *
> (width * width)], width);
> + }
>
don't keep array of costs, simply remember the least satd cost of them all
> + }
> +
> return 0;
> }
>
> @@ -622,6 +686,7 @@
> (dst)[2] = &(src)[2][i_pel_offset]; \
> (dst)[3] = &(src)[3][i_pel_offset]; \
> }
> +
> #define LOAD_WPELS_LUMA(dst, src) \
> (dst) = &(src)[i_pel_offset];
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> http://mailman.videolan.org/listinfo/x265-devel
>
--
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/private/x265-devel/attachments/20130812/5076e72e/attachment.html>
More information about the x265-devel
mailing list