[x265] [PATCH] slicetype : intra cost estimates

Steve Borho steve at borho.org
Mon Aug 12 13:27:39 CEST 2013


On Mon, Aug 12, 2013 at 5:56 AM, <gopu at multicorewareinc.com> wrote:

> # HG changeset patch
> # User ggopu
> # Date 1376304994 -19800
> # Node ID 666b6206628994d42c826afa9b01e7499b0d150c
> # Parent  8438cad92049281833caa951cc48f6d90c7434eb
> slicetype : intra cost estimates
>
> diff -r 8438cad92049 -r 666b62066289 source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp      Sun Aug 11 21:41:12 2013 -0700
> +++ b/source/encoder/slicetype.cpp      Mon Aug 12 16:26:34 2013 +0530
> @@ -36,9 +36,9 @@
>  // taking any of the threading changes because we will eventually use the
> x265
>  // thread pool and wavefront processing.
>
> -#define QP_BD_OFFSET (6*(X265_DEPTH-8))
> +#define QP_BD_OFFSET (6 * (X265_DEPTH - 8))
>  // arbitrary, but low because SATD scores are 1/4 normal
> -#define X264_LOOKAHEAD_QP (12+QP_BD_OFFSET)
> +#define X264_LOOKAHEAD_QP (12 + QP_BD_OFFSET)
>

this should be X265_LOOKAHEAD_QP


>
>  // Under Construction
>  #if defined(_MSC_VER)
> @@ -47,7 +47,6 @@
>  #endif
>
>  namespace x265 {
> -
>  struct Lookahead
>  {
>      MotionEstimate   me;
> @@ -58,7 +57,7 @@
>
>      TComList<TComPic*> inputQueue;       // input pictures in order
> received
>      TComList<TComPic*> outputQueue;      // pictures to be encoded, in
> encode order
> -
> +
>      Lookahead(int _frameQueueSize)
>      {
>          me.setQP(X264_LOOKAHEAD_QP, 1.0);
> @@ -66,6 +65,7 @@
>          frameQueueSize = _frameQueueSize;
>          frames = new LookaheadFrame*[frameQueueSize];
>      }
> +
>      ~Lookahead()
>      {
>          if (frames)
> @@ -76,20 +76,21 @@
>      int estimateCUCost(int cux, int cuy, int p0, int p1, int b, int
> do_search[2]);
>  };
>
> -static inline int16_t x265_median( int16_t a, int16_t b, int16_t c )
> +static inline int16_t x265_median(int16_t a, int16_t b, int16_t c)
>  {
> -    int16_t t = (a-b)&((a-b)>>31);
> +    int16_t t = (a - b) & ((a - b) >> 31);
> +
>      a -= t;
>      b += t;
> -    b -= (b-c)&((b-c)>>31);
> -    b += (a-b)&((a-b)>>31);
> +    b -= (b - c) & ((b - c) >> 31);
> +    b += (a - b) & ((a - b) >> 31);
>      return b;
>  }
>
> -static inline void x265_median_mv( MV &dst, MV a, MV b, MV c )
> +static inline void x265_median_mv(MV &dst, MV a, MV b, MV c)
>  {
> -    dst.x = x265_median( a.x, b.x, c.x );
> -    dst.y = x265_median( a.y, b.y, c.y );
> +    dst.x = x265_median(a.x, b.x, c.x);
> +    dst.y = x265_median(a.y, b.y, c.y);
>  }
>
>  int Lookahead::estimateFrameCost(int p0, int p1, int b, int bIntraPenalty)
> @@ -163,17 +164,18 @@
>      const int cu_size = g_maxCUWidth / 2;
>      const int pel_offset = cu_size * cux + cu_size * cuy * stride;
>      const int merange = 16;
> +
>      me.setSourcePU(pel_offset, cu_size, cu_size);
>
> -    MV (*fenc_mvs[2]) = { &fenc->lowresMvs[0][b - p0 - 1][cu_xy],
> &fenc->lowresMvs[1][p1 - b - 1][cu_xy] };
> -    int (*fenc_costs[2]) = { &fenc->lowresMvCosts[0][b - p0 - 1][cu_xy],
> &fenc->lowresMvCosts[1][p1 - b - 1][cu_xy] };
> +    MV(*fenc_mvs[2]) = { &fenc->lowresMvs[0][b - p0 - 1][cu_xy],
> &fenc->lowresMvs[1][p1 - b - 1][cu_xy] };
> +    int(*fenc_costs[2]) = { &fenc->lowresMvCosts[0][b - p0 - 1][cu_xy],
> &fenc->lowresMvCosts[1][p1 - b - 1][cu_xy] };
>
>      MV mvmin, mvmax;
>      // TODO: calculate search extents
>
>      for (int i = 0; i < 2; i++)
>      {
> -       if (!do_search[i])
> +        if (!do_search[i])
>              continue;
>
>          int numc = 0;
> @@ -199,7 +201,7 @@
>              mvp = mvc[0];
>          else
>          {
> -            x265_median_mv(mvp, mvc[0], mvc[1], mvc[2]);
> +            x265_median_mv(mvp, mvc[0], mvc[1], mvc[2]);
>          }
>
>
uncrustify should be done in separate patch


>          *fenc_costs[i] = me.motionEstimate(i ? fref1 : fref0, mvmin,
> mvmax, mvp, numc, mvc, merange, *fenc_mvs[i]);
> @@ -209,7 +211,69 @@
>          // TODO: add bidir
>      }
>
> -    // TODO: copy intra SATD cost analysis here (DC + planar + all-angs)
> +    UInt width       = fenc->cuWidth;
>

cuWidth is the width of the frame in CUs?  you already have cu_size
variable in this function


> +    UInt numModesAvailable = 35; //total number of Intra modes
> +    Int nLog2SizeMinus2 = g_convertToBit[width];
> +    x265::pixelcmp_t sa8d = x265::primitives.sa8d[nLog2SizeMinus2];
>

drop everything between here


> +    UInt64 CandCostList[FAST_UDI_MAX_RDMODE_NUM];
> +    UInt numModesForFullRD = 5; // Currently set as 5 at default
> +    UInt CandNum;
> +    UInt partOffset = 0;
> +
> +    assert(numModesForFullRD < numModesAvailable);
> +
> +    for (UInt i = 0; i < numModesForFullRD; i++)
> +    {
> +        CandCostList[i] = MAX_UINT;
> +    }
> +
> +    ALIGN_VAR_32(pixel, buffer[64 * 64]); // current cu buffer
> +    primitives.blockcpy_pp(fenc->cuWidth, fenc->cuHeight, buffer,
> FENC_STRIDE, fenc->m_lumaPlane[0][0] + pel_offset, fenc->m_lumaStride);
>

and here, and use the m_me.fenc aligned copy of the block


> +    pixel *pAbove0 = fenc->m_lumaPlane[0][0] + pel_offset -
> fenc->m_lumaStride;
> +    pixel *pAbove1 = fenc->m_lumaPlane[0][0] + pel_offset -
> fenc->cuHeight;
> +    pixel *pLeft0  = fenc->m_lumaPlane[0][0] + pel_offset +
> fenc->m_lumaStride;
> +    pixel *pLeft1  = fenc->m_lumaPlane[0][0] + pel_offset + fenc->cuWidth;
>
>
I suspect these need to be separately allocated buffers on the stack.  Min
Chen should review this.


> +    CandNum = 0;
> +    UInt modeCosts[35];
>

drop modeCosts and CandNum, see below.


> +    // 33 Angle modes once
> +    ALIGN_VAR_32(pixel, buf_trans[32 * 32]);
> +    ALIGN_VAR_32(pixel, tmp[33 * 32 * 32]);
> +
> +    if (fenc->cuWidth <= 32)
> +    {
>

wrong variable again, and cu_size is always <= 32


> +        // 1
> +        primitives.intra_pred_dc(pAbove0 + 1, pLeft0 + 1, buffer, stride,
> width, (width <= 16));
> +        modeCosts[DC_IDX] = sa8d(fenc->m_lumaPlane[0][0],
> fenc->m_lumaStride, buffer, fenc->stride);
> +
> +        // 0
> +        pixel *above = pAbove0;
> +        pixel *left  = pLeft0;
> +        if (width >= 8 && width <= 32)
> +        {
> +            above = pAbove1;
> +            left  = pLeft1;
> +        }
> +        primitives.intra_pred_planar((pixel*)above + 1, (pixel*)left + 1,
> buffer, fenc->stride, width);
> +        modeCosts[PLANAR_IDX] = sa8d(fenc->m_lumaPlane[0][0],
> fenc->m_lumaStride, buffer, fenc->stride);
> +
> +        // Transpose NxN
> +        x265::primitives.transpose[nLog2SizeMinus2](buf_trans,
> (pixel*)fenc, stride);
> +
> +        x265::primitives.intra_pred_allangs[nLog2SizeMinus2](tmp,
> pAbove0, pLeft0, pAbove1, pLeft1, (width <= 16));
> +
> +        // TODO: We need SATD_x4 here
> +        for (UInt mode = 2; mode < numModesAvailable; mode++)
> +        {
> +            bool modeHor = (mode < 18);
> +            pixel *cmp = (modeHor ? buf_trans : fenc->m_lumaPlane[0][0]);
> +            intptr_t srcStride = (modeHor ? width : stride);
> +            modeCosts[mode] = sa8d(cmp, srcStride, &tmp[(mode - 2) *
> (width * width)], width);
> +        }
>

don't keep array of costs, simply remember the least satd cost of them all


> +    }
> +
>      return 0;
>  }
>
> @@ -622,6 +686,7 @@
>          (dst)[2] = &(src)[2][i_pel_offset]; \
>          (dst)[3] = &(src)[3][i_pel_offset]; \
>      }
> +
>  #define LOAD_WPELS_LUMA(dst, src) \
>      (dst) = &(src)[i_pel_offset];
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> http://mailman.videolan.org/listinfo/x265-devel
>



-- 
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/private/x265-devel/attachments/20130812/5076e72e/attachment.html>


More information about the x265-devel mailing list