[x265] [PATCH 1 of 2] Pulling x264 weight decision into/for x265 lookahead

Steve Borho steve at borho.org
Thu Nov 14 02:29:27 CET 2013


On Wed, Nov 13, 2013 at 6:40 AM, <shazeb at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Shazeb Nawaz Khan <shazeb at multicorewareinc.com>
> # Date 1384345982 -19800
> #      Wed Nov 13 18:03:02 2013 +0530
> # Node ID 213808a2069d21c49a4d5e99d71ad71b8af344b8
> # Parent  c4ca80d19105ccf1ba2ec14dd65915f2820a660d
> Pulling x264 weight decision into/for x265 lookahead
>
> diff -r c4ca80d19105 -r 213808a2069d source/Lib/TLibCommon/TComSlice.h
> --- a/source/Lib/TLibCommon/TComSlice.h Tue Nov 12 19:10:23 2013 +0530
> +++ b/source/Lib/TLibCommon/TComSlice.h Wed Nov 13 18:03:02 2013 +0530
> @@ -42,6 +42,7 @@
>  #include "TComRom.h"
>  #include "x265.h"  // NAL type enums
>  #include "piclist.h"
> +#include "common.h"
>
>  #include <cstring>
>  #include <assert.h>
> @@ -1256,6 +1257,20 @@
>
>      // Weighted prediction scaling values built from above parameters
> (bitdepth scaled):
>      int         w, o, offset, shift, round;
> +
> +    /* makes a non-h265 weight (i.e. fix7), into an h265 weight */
> +    void setFromWeightAndOffset( int weight_nonh264, int offset )
>

white-space; x264 paren style is not the same as ours


> +    {
> +        inputOffset = offset;
> +        log2WeightDenom = 7;
> +        inputWeight = weight_nonh264;
>

drop the _nonh264 suffix


> +        while( log2WeightDenom > 0 && (inputWeight > 127) )
> +        {
> +            log2WeightDenom--;
> +            inputWeight >>= 1;
> +        }
> +        inputWeight = X265_MIN( inputWeight, 127 );
> +    }
>  };
>
>  typedef WpScalingParam wpScalingParam;
> diff -r c4ca80d19105 -r 213808a2069d source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp      Tue Nov 12 19:10:23 2013 +0530
> +++ b/source/encoder/slicetype.cpp      Wed Nov 13 18:03:02 2013 +0530
> @@ -45,6 +45,14 @@
>
>  using namespace x265;
>
> +#define SET_WEIGHT(w, b, s, d, o)\
> +{\
> +    (w).inputWeight = (s);\
> +    (w).log2WeightDenom = (d);\
> +    (w).inputOffset = (o);\
> +    (w).bPresentFlag = b;\
> +}
> +
>  static inline int16_t median(int16_t a, int16_t b, int16_t c)
>  {
>      int16_t t = (a - b) & ((a - b) >> 31);
> @@ -190,16 +198,141 @@
>      return pic->m_lowres.satdCost;
>  }
>
> +static void mcWeight(pixel *dst, intptr_t dstStride, pixel *src, intptr_t
> srcStride,
> +                       const wpScalingParam *weight, int width, int
> height)
> +{
> +    int offset = weight->inputOffset << (X265_DEPTH - 8);
> +    int scale = weight->inputWeight;
> +    int denom = weight->log2WeightDenom;
> +    int correction = (IF_INTERNAL_PREC - X265_DEPTH);
>

should these terms be moved into the primitive itself?  this function feels
like it should not exist.

is the correction term part of the actual weight algorithm?


> +    if (denom >= 1)
> +    {
> +        primitives.weightpUniPixel(src, dst, srcStride, dstStride, width,
> height, scale, (1<<(denom - 1 + correction)), (denom + correction), offset);
> +    }
> +    else
> +    {
> +        primitives.weightpUniPixel(src, dst, srcStride, dstStride, width,
> height, scale, 0 + correction, 0 + correction, offset);
> +    }
> +}
> +
> +unsigned int Lookahead::weightCostLuma(int b, pixel *src, wpScalingParam
> *w)
> +{
> +    Lowres *fenc = frames[b];
> +    unsigned int cost = 0;
> +    int stride = fenc->lumaStride;
> +    int lines = fenc->lines;
> +    int width = fenc->width;
> +    pixel *fenc_plane = fenc->lowresPlane[0];
> +    ALIGN_VAR_16( pixel, buf[8*8]);
> +    int pixoff = 0;
> +    int mb = 0;
> +
> +    if (w)
> +    {
> +        for (int y = 0; y < lines; y += 8, pixoff = y * stride)
> +            for (int x = 0; x < width; x += 8, mb++, pixoff += 8)
> +            {
> +                // TO DO prepare full weighted plane
> +                mcWeight(buf, 8, &src[pixoff], stride, w, 8, 8);
> +                int cmp = primitives.satd[LUMA_8x8]( buf, 8,
> &fenc_plane[pixoff], stride );
> +                cost += X265_MIN( cmp, fenc->intraCost[mb] );
> +            }
> +    }
> +    else
> +        for (int y = 0; y < lines; y += 8, pixoff = y * stride)
> +            for (int x = 0; x < width; x += 8, mb++, pixoff += 8)
> +            {
> +                int cmp = primitives.satd[LUMA_8x8](&src[pixoff], stride,
> &fenc_plane[pixoff], stride);
> +                cost += X265_MIN(cmp, fenc->intraCost[mb]);
> +            }
> +    x265_emms();
> +    return cost;
> +}
> +
> +void Lookahead::weightsAnalyse(int b, int p0, int b_lookahead,
> wpScalingParam* weights)
> +{
>

drop the b_lookahead argument


> +    Lowres *fenc, *ref;
> +    fenc = frames[b];
> +    ref  = frames[p0];
> +    /* epsilon is chosen to require at least a numerator of 127 (with
> denominator = 128) */
> +    const float epsilon = 1.f/128.f;
> +    SET_WEIGHT( weights[0], 0, 1, 0, 0 );
> +    float guess_scale, fenc_mean, ref_mean;
> +    guess_scale = sqrtf( (float) fenc->wp_ssd[0] / ref->wp_ssd[0]);
> +    fenc_mean = (float)fenc->wp_sum[0] / (fenc->lines * fenc->width) / (1
> << (X265_DEPTH - 8));
> +    ref_mean  = (float) ref->wp_sum[0] / (fenc->lines * fenc->width) / (1
> << (X265_DEPTH - 8));
> +
> +     /* Don't check chroma in lookahead, or if there wasn't a luma
> weight. */
> +    int minoff = 0, minscale, mindenom;
> +    unsigned int minscore = 0, origscore = 1;
> +    int found = 0;
> +
> +    //early termination
>

space after //


> +    if( fabsf( ref_mean - fenc_mean ) < 0.5f && fabsf( 1.f - guess_scale
> ) < epsilon )
>

white-space


> +    {
> +        SET_WEIGHT( *weights, 0, 1, 0, 0 );
> +        return;
> +    }
> +
> +    weights->setFromWeightAndOffset( (int)( guess_scale * 128 + 0.5), 0 );
> +
> +    mindenom = weights->log2WeightDenom;
> +    minscale = weights->inputWeight;
> +
> +    pixel *mcbuf = NULL;
> +    if (!fenc->bIntraCalculated)
> +    {
> +        estimateFrameCost(b,b,b,0);
> +    }
> +    mcbuf = frames[p0]->lowresPlane[0];
> +    origscore = minscore = weightCostLuma( b, mcbuf, NULL );
> +
> +    if( !minscore )
>

lots of white-space issues in this function still


> +        return;
> +
> +    unsigned int s=0;
> +    int cur_scale = minscale;
> +    int cur_offset = (int) (fenc_mean - ref_mean * cur_scale / (1 <<
> mindenom) + 0.5f * b_lookahead);
> +    if( cur_offset < - 128 || cur_offset > 127 )
> +    {
> +        /* Rescale considering the constraints on cur_offset. We do it in
> this order
> +            * because scale has a much wider range than offset (because
> of denom), so
> +            * it should almost never need to be clamped. */
> +        cur_offset = Clip3( -128, 127, cur_offset );
> +        cur_scale = (int) ((1 << mindenom) * (fenc_mean - cur_offset) /
> ref_mean + 0.5f);
> +        cur_scale = Clip3( 0, 127, cur_scale );
> +    }
> +    SET_WEIGHT(*weights, 1, cur_scale, mindenom, cur_offset);
> +    s = weightCostLuma(b, mcbuf, weights);
> +    COPY4_IF_LT( minscore, s, minscale, cur_scale, minoff, cur_offset,
> found, 1 );
>

this emms is redundant


> +    x265_emms();
> +
> +    /* Use a smaller denominator if possible */
> +    while( mindenom > 0 && !(minscale&1) )
> +    {
> +        mindenom--;
> +        minscale >>= 1;
> +    }
> +
> +    if( !found || (minscale == 1 << mindenom && minoff == 0) ||
> (float)minscore / origscore > 0.998f )
> +    {
> +        SET_WEIGHT( *weights, 0, 1, 0, 0 );
> +        return;
> +    }
> +    else
> +    {
> +        SET_WEIGHT( *weights, 1, minscale, mindenom, minoff );
> +    }
> +}
> +
>  #define NUM_CUS (widthInCU > 2 && heightInCU > 2 ? (widthInCU - 2) *
> (heightInCU - 2) : widthInCU * heightInCU)
>
>  int Lookahead::estimateFrameCost(int p0, int p1, int b, bool
> bIntraPenalty)
>  {
>      int score = 0;
>      Lowres *fenc = frames[b];
> -
> -    curb = b;
> -    curp0 = p0;
> -    curp1 = p1;
> +    wpScalingParam wp;
> +    wp.bPresentFlag = false;
>
>      if (fenc->costEst[b - p0][p1 - b] >= 0 && fenc->rowSatds[b - p0][p1 -
> b][0] != -1)
>          score = fenc->costEst[b - p0][p1 - b];
> @@ -209,9 +342,21 @@
>          bDoSearch[0] = b != p0 && fenc->lowresMvs[0][b - p0 - 1][0].x ==
> 0x7FFF;
>          bDoSearch[1] = b != p1 && fenc->lowresMvs[1][p1 - b - 1][0].x ==
> 0x7FFF;
>
> -        if (bDoSearch[0]) fenc->lowresMvs[0][b - p0 - 1][0].x = 0;
> +        if (bDoSearch[0])
> +        {
> +            if( cfg->param.bEnableWeightedPred && b==p1)
> +            {
> +                weightsAnalyse(b, p0, 1, &wp);
> +            }
> +            bDoSearch[0] = b != p0 && fenc->lowresMvs[0][b - p0 - 1][0].x
> == 0x7FFF;
> +            bDoSearch[1] = b != p1 && fenc->lowresMvs[1][p1 - b - 1][0].x
> == 0x7FFF;
>

the above two lines should be removed


> +            fenc->lowresMvs[0][b - p0 - 1][0].x = 0;
> +        }
>          if (bDoSearch[1]) fenc->lowresMvs[1][p1 - b - 1][0].x = 0;
>
> +        curb = b;
> +        curp0 = p0;
> +        curp1 = p1;
>

the above three lines should be removed


>          fenc->costEst[b - p0][p1 - b] = 0;
>          fenc->costEstAq[b - p0][p1 - b] = 0;
>          // TODO: use lowres MVs as motion candidates in full-res search
> @@ -613,14 +758,6 @@
>              } */
>          }
>
> -        /* Analyse for weighted P frames
> -        if (!h->param.rc.b_stat_read &&
> h->lookahead->next.list[bframes]->i_type == X264_TYPE_P
> -            && h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE)
> -        {
> -            x265_emms();
> -            x264_weights_analyse(h, h->lookahead->next.list[bframes],
> h->lookahead->last_nonb, 0);
> -        }*/
> -
>          /* dequeue all frames from inputQueue that are about to be
> enqueued
>           * in the output queue.  The order is important because TComPic
> can
>           * only be in one list at a time */
> diff -r c4ca80d19105 -r 213808a2069d source/encoder/slicetype.h
> --- a/source/encoder/slicetype.h        Tue Nov 12 19:10:23 2013 +0530
> +++ b/source/encoder/slicetype.h        Wed Nov 13 18:03:02 2013 +0530
> @@ -47,11 +47,13 @@
>      int                 costIntra;      // Estimated Intra cost for all
> CUs in a row
>      int                 costIntraAq;    // Estimated weighted Aq Intra
> cost for all CUs in a row
>      int                 intraMbs;       // Number of Intra CUs
> +    TEncCfg             *cfg;
>
>      Lowres** frames;
>      int widthInCU;
>      int heightInCU;
>      int merange;
> +    Lowres *weightedRef;
>
>      LookaheadRow()
>      {
> @@ -82,6 +84,9 @@
>      int              widthInCU;       // width of lowres frame in
> downscale CUs
>      int              heightInCU;      // height of lowres frame in
> downscale CUs
>
> +    Lowres weightedRef;
> +    int numWRefs;
> +
>      PicList inputQueue;  // input pictures in order received
>      PicList outputQueue; // pictures to be encoded, in encode order
>
> @@ -110,6 +115,11 @@
>      int slicetypePathCost(char *path, int threshold);
>
>      void processRow(int row);
> +
> +    void weightsAnalyse(int b, int p0, int b_lookahead, wpScalingParam
> *w);
> +    unsigned int weightCostLuma(int b, pixel *src, wpScalingParam *w);
> +    pixel* weightCostInit(int b, int p0, pixel *dest);
> +    int x265_weight_slice_header_cost(wpScalingParam *w, int b_chroma);
>

this last function doesn't exist


>  };
>  }
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>



-- 
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131113/58243eb2/attachment-0001.html>


More information about the x265-devel mailing list