[x265] [PATCH 1 of 2] Pulling x264 weight decision into/for x265 lookahead
Shazeb Khan
shazeb at multicorewareinc.com
Thu Nov 14 21:07:05 CET 2013
On Wed, Nov 13, 2013 at 7:29 PM, Steve Borho <steve at borho.org> wrote:
>
>
>
> On Wed, Nov 13, 2013 at 6:40 AM, <shazeb at multicorewareinc.com> wrote:
>
>> # HG changeset patch
>> # User Shazeb Nawaz Khan <shazeb at multicorewareinc.com>
>> # Date 1384345982 -19800
>> # Wed Nov 13 18:03:02 2013 +0530
>> # Node ID 213808a2069d21c49a4d5e99d71ad71b8af344b8
>> # Parent c4ca80d19105ccf1ba2ec14dd65915f2820a660d
>> Pulling x264 weight decision into/for x265 lookahead
>>
>> diff -r c4ca80d19105 -r 213808a2069d source/Lib/TLibCommon/TComSlice.h
>> --- a/source/Lib/TLibCommon/TComSlice.h Tue Nov 12 19:10:23 2013 +0530
>> +++ b/source/Lib/TLibCommon/TComSlice.h Wed Nov 13 18:03:02 2013 +0530
>> @@ -42,6 +42,7 @@
>> #include "TComRom.h"
>> #include "x265.h" // NAL type enums
>> #include "piclist.h"
>> +#include "common.h"
>>
>> #include <cstring>
>> #include <assert.h>
>> @@ -1256,6 +1257,20 @@
>>
>> // Weighted prediction scaling values built from above parameters
>> (bitdepth scaled):
>> int w, o, offset, shift, round;
>> +
>> + /* makes a non-h265 weight (i.e. fix7), into an h265 weight */
>> + void setFromWeightAndOffset( int weight_nonh264, int offset )
>>
>
> white-space; x264 paren style is not the same as ours
>
>
>> + {
>> + inputOffset = offset;
>> + log2WeightDenom = 7;
>> + inputWeight = weight_nonh264;
>>
>
> drop the _nonh264 suffix
>
>
>> + while( log2WeightDenom > 0 && (inputWeight > 127) )
>> + {
>> + log2WeightDenom--;
>> + inputWeight >>= 1;
>> + }
>> + inputWeight = X265_MIN( inputWeight, 127 );
>> + }
>> };
>>
>> typedef WpScalingParam wpScalingParam;
>> diff -r c4ca80d19105 -r 213808a2069d source/encoder/slicetype.cpp
>> --- a/source/encoder/slicetype.cpp Tue Nov 12 19:10:23 2013 +0530
>> +++ b/source/encoder/slicetype.cpp Wed Nov 13 18:03:02 2013 +0530
>> @@ -45,6 +45,14 @@
>>
>> using namespace x265;
>>
>> +#define SET_WEIGHT(w, b, s, d, o)\
>> +{\
>> + (w).inputWeight = (s);\
>> + (w).log2WeightDenom = (d);\
>> + (w).inputOffset = (o);\
>> + (w).bPresentFlag = b;\
>> +}
>> +
>> static inline int16_t median(int16_t a, int16_t b, int16_t c)
>> {
>> int16_t t = (a - b) & ((a - b) >> 31);
>> @@ -190,16 +198,141 @@
>> return pic->m_lowres.satdCost;
>> }
>>
>> +static void mcWeight(pixel *dst, intptr_t dstStride, pixel *src,
>> intptr_t srcStride,
>> + const wpScalingParam *weight, int width, int
>> height)
>> +{
>> + int offset = weight->inputOffset << (X265_DEPTH - 8);
>> + int scale = weight->inputWeight;
>> + int denom = weight->log2WeightDenom;
>> + int correction = (IF_INTERNAL_PREC - X265_DEPTH);
>>
>
> should these terms be moved into the primitive itself? this function
> feels like it should not exist.
>
> is the correction term part of the actual weight algorithm?
>
The correction term is just used to adjust round, shift so that the weight
primitive, which simulates pixel to short conversion can be reused in its
existing form.
>
>
>> + if (denom >= 1)
>> + {
>> + primitives.weightpUniPixel(src, dst, srcStride, dstStride,
>> width, height, scale, (1<<(denom - 1 + correction)), (denom + correction),
>> offset);
>> + }
>> + else
>> + {
>> + primitives.weightpUniPixel(src, dst, srcStride, dstStride,
>> width, height, scale, 0 + correction, 0 + correction, offset);
>> + }
>> +}
>> +
>> +unsigned int Lookahead::weightCostLuma(int b, pixel *src, wpScalingParam
>> *w)
>> +{
>> + Lowres *fenc = frames[b];
>> + unsigned int cost = 0;
>> + int stride = fenc->lumaStride;
>> + int lines = fenc->lines;
>> + int width = fenc->width;
>> + pixel *fenc_plane = fenc->lowresPlane[0];
>> + ALIGN_VAR_16( pixel, buf[8*8]);
>> + int pixoff = 0;
>> + int mb = 0;
>> +
>> + if (w)
>> + {
>> + for (int y = 0; y < lines; y += 8, pixoff = y * stride)
>> + for (int x = 0; x < width; x += 8, mb++, pixoff += 8)
>> + {
>> + // TO DO prepare full weighted plane
>> + mcWeight(buf, 8, &src[pixoff], stride, w, 8, 8);
>> + int cmp = primitives.satd[LUMA_8x8]( buf, 8,
>> &fenc_plane[pixoff], stride );
>> + cost += X265_MIN( cmp, fenc->intraCost[mb] );
>> + }
>> + }
>> + else
>> + for (int y = 0; y < lines; y += 8, pixoff = y * stride)
>> + for (int x = 0; x < width; x += 8, mb++, pixoff += 8)
>> + {
>> + int cmp = primitives.satd[LUMA_8x8](&src[pixoff],
>> stride, &fenc_plane[pixoff], stride);
>> + cost += X265_MIN(cmp, fenc->intraCost[mb]);
>> + }
>> + x265_emms();
>> + return cost;
>> +}
>> +
>> +void Lookahead::weightsAnalyse(int b, int p0, int b_lookahead,
>> wpScalingParam* weights)
>> +{
>>
>
> drop the b_lookahead argument
>
>
>> + Lowres *fenc, *ref;
>> + fenc = frames[b];
>> + ref = frames[p0];
>> + /* epsilon is chosen to require at least a numerator of 127 (with
>> denominator = 128) */
>> + const float epsilon = 1.f/128.f;
>> + SET_WEIGHT( weights[0], 0, 1, 0, 0 );
>> + float guess_scale, fenc_mean, ref_mean;
>> + guess_scale = sqrtf( (float) fenc->wp_ssd[0] / ref->wp_ssd[0]);
>> + fenc_mean = (float)fenc->wp_sum[0] / (fenc->lines * fenc->width) /
>> (1 << (X265_DEPTH - 8));
>> + ref_mean = (float) ref->wp_sum[0] / (fenc->lines * fenc->width) /
>> (1 << (X265_DEPTH - 8));
>> +
>> + /* Don't check chroma in lookahead, or if there wasn't a luma
>> weight. */
>> + int minoff = 0, minscale, mindenom;
>> + unsigned int minscore = 0, origscore = 1;
>> + int found = 0;
>> +
>> + //early termination
>>
>
> space after //
>
>
>> + if( fabsf( ref_mean - fenc_mean ) < 0.5f && fabsf( 1.f - guess_scale
>> ) < epsilon )
>>
>
> white-space
>
>
>> + {
>> + SET_WEIGHT( *weights, 0, 1, 0, 0 );
>> + return;
>> + }
>> +
>> + weights->setFromWeightAndOffset( (int)( guess_scale * 128 + 0.5), 0
>> );
>> +
>> + mindenom = weights->log2WeightDenom;
>> + minscale = weights->inputWeight;
>> +
>> + pixel *mcbuf = NULL;
>> + if (!fenc->bIntraCalculated)
>> + {
>> + estimateFrameCost(b,b,b,0);
>> + }
>> + mcbuf = frames[p0]->lowresPlane[0];
>> + origscore = minscore = weightCostLuma( b, mcbuf, NULL );
>> +
>> + if( !minscore )
>>
>
> lots of white-space issues in this function still
>
>
>> + return;
>> +
>> + unsigned int s=0;
>> + int cur_scale = minscale;
>> + int cur_offset = (int) (fenc_mean - ref_mean * cur_scale / (1 <<
>> mindenom) + 0.5f * b_lookahead);
>> + if( cur_offset < - 128 || cur_offset > 127 )
>> + {
>> + /* Rescale considering the constraints on cur_offset. We do it
>> in this order
>> + * because scale has a much wider range than offset (because
>> of denom), so
>> + * it should almost never need to be clamped. */
>> + cur_offset = Clip3( -128, 127, cur_offset );
>> + cur_scale = (int) ((1 << mindenom) * (fenc_mean - cur_offset) /
>> ref_mean + 0.5f);
>> + cur_scale = Clip3( 0, 127, cur_scale );
>> + }
>> + SET_WEIGHT(*weights, 1, cur_scale, mindenom, cur_offset);
>> + s = weightCostLuma(b, mcbuf, weights);
>> + COPY4_IF_LT( minscore, s, minscale, cur_scale, minoff, cur_offset,
>> found, 1 );
>>
>
> this emms is redundant
>
>
>> + x265_emms();
>> +
>> + /* Use a smaller denominator if possible */
>> + while( mindenom > 0 && !(minscale&1) )
>> + {
>> + mindenom--;
>> + minscale >>= 1;
>> + }
>> +
>> + if( !found || (minscale == 1 << mindenom && minoff == 0) ||
>> (float)minscore / origscore > 0.998f )
>> + {
>> + SET_WEIGHT( *weights, 0, 1, 0, 0 );
>> + return;
>> + }
>> + else
>> + {
>> + SET_WEIGHT( *weights, 1, minscale, mindenom, minoff );
>> + }
>> +}
>> +
>> #define NUM_CUS (widthInCU > 2 && heightInCU > 2 ? (widthInCU - 2) *
>> (heightInCU - 2) : widthInCU * heightInCU)
>>
>> int Lookahead::estimateFrameCost(int p0, int p1, int b, bool
>> bIntraPenalty)
>> {
>> int score = 0;
>> Lowres *fenc = frames[b];
>> -
>> - curb = b;
>> - curp0 = p0;
>> - curp1 = p1;
>> + wpScalingParam wp;
>> + wp.bPresentFlag = false;
>>
>> if (fenc->costEst[b - p0][p1 - b] >= 0 && fenc->rowSatds[b - p0][p1
>> - b][0] != -1)
>> score = fenc->costEst[b - p0][p1 - b];
>> @@ -209,9 +342,21 @@
>> bDoSearch[0] = b != p0 && fenc->lowresMvs[0][b - p0 - 1][0].x ==
>> 0x7FFF;
>> bDoSearch[1] = b != p1 && fenc->lowresMvs[1][p1 - b - 1][0].x ==
>> 0x7FFF;
>>
>> - if (bDoSearch[0]) fenc->lowresMvs[0][b - p0 - 1][0].x = 0;
>> + if (bDoSearch[0])
>> + {
>> + if( cfg->param.bEnableWeightedPred && b==p1)
>> + {
>> + weightsAnalyse(b, p0, 1, &wp);
>> + }
>> + bDoSearch[0] = b != p0 && fenc->lowresMvs[0][b - p0 -
>> 1][0].x == 0x7FFF;
>> + bDoSearch[1] = b != p1 && fenc->lowresMvs[1][p1 - b -
>> 1][0].x == 0x7FFF;
>>
>
> the above two lines should be removed
>
the weightsAnalyse function makes a recursive call to estimateFrameCost,
which alters the bDoSearch, curb, curp0, curp1, variables, hence they need
to be initialized here
>
>> + fenc->lowresMvs[0][b - p0 - 1][0].x = 0;
>> + }
>> if (bDoSearch[1]) fenc->lowresMvs[1][p1 - b - 1][0].x = 0;
>>
>> + curb = b;
>> + curp0 = p0;
>> + curp1 = p1;
>>
>
> the above three lines should be removed
>
>
>> fenc->costEst[b - p0][p1 - b] = 0;
>> fenc->costEstAq[b - p0][p1 - b] = 0;
>> // TODO: use lowres MVs as motion candidates in full-res search
>> @@ -613,14 +758,6 @@
>> } */
>> }
>>
>> - /* Analyse for weighted P frames
>> - if (!h->param.rc.b_stat_read &&
>> h->lookahead->next.list[bframes]->i_type == X264_TYPE_P
>> - && h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE)
>> - {
>> - x265_emms();
>> - x264_weights_analyse(h, h->lookahead->next.list[bframes],
>> h->lookahead->last_nonb, 0);
>> - }*/
>> -
>> /* dequeue all frames from inputQueue that are about to be
>> enqueued
>> * in the output queue. The order is important because TComPic
>> can
>> * only be in one list at a time */
>> diff -r c4ca80d19105 -r 213808a2069d source/encoder/slicetype.h
>> --- a/source/encoder/slicetype.h Tue Nov 12 19:10:23 2013 +0530
>> +++ b/source/encoder/slicetype.h Wed Nov 13 18:03:02 2013 +0530
>> @@ -47,11 +47,13 @@
>> int costIntra; // Estimated Intra cost for all
>> CUs in a row
>> int costIntraAq; // Estimated weighted Aq Intra
>> cost for all CUs in a row
>> int intraMbs; // Number of Intra CUs
>> + TEncCfg *cfg;
>>
>> Lowres** frames;
>> int widthInCU;
>> int heightInCU;
>> int merange;
>> + Lowres *weightedRef;
>>
>> LookaheadRow()
>> {
>> @@ -82,6 +84,9 @@
>> int widthInCU; // width of lowres frame in
>> downscale CUs
>> int heightInCU; // height of lowres frame in
>> downscale CUs
>>
>> + Lowres weightedRef;
>> + int numWRefs;
>> +
>> PicList inputQueue; // input pictures in order received
>> PicList outputQueue; // pictures to be encoded, in encode order
>>
>> @@ -110,6 +115,11 @@
>> int slicetypePathCost(char *path, int threshold);
>>
>> void processRow(int row);
>> +
>> + void weightsAnalyse(int b, int p0, int b_lookahead, wpScalingParam
>> *w);
>> + unsigned int weightCostLuma(int b, pixel *src, wpScalingParam *w);
>> + pixel* weightCostInit(int b, int p0, pixel *dest);
>> + int x265_weight_slice_header_cost(wpScalingParam *w, int b_chroma);
>>
>
> this last function doesn't exist
>
>
>> };
>> }
>>
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
>
> --
> Steve Borho
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131114/88b9b34c/attachment-0001.html>
More information about the x265-devel
mailing list