[x265] [PATCH 1 of 2] Pulling x264 weight decision into/for x265 lookahead
Steve Borho
steve at borho.org
Fri Nov 15 06:29:05 CET 2013
On Thu, Nov 14, 2013 at 2:07 PM, Shazeb Khan <shazeb at multicorewareinc.com>wrote:
>
>
>
> On Wed, Nov 13, 2013 at 7:29 PM, Steve Borho <steve at borho.org> wrote:
>
>>
>>
>>
>> On Wed, Nov 13, 2013 at 6:40 AM, <shazeb at multicorewareinc.com> wrote:
>>
>>> # HG changeset patch
>>> # User Shazeb Nawaz Khan <shazeb at multicorewareinc.com>
>>> # Date 1384345982 -19800
>>> # Wed Nov 13 18:03:02 2013 +0530
>>> # Node ID 213808a2069d21c49a4d5e99d71ad71b8af344b8
>>> # Parent c4ca80d19105ccf1ba2ec14dd65915f2820a660d
>>> Pulling x264 weight decision into/for x265 lookahead
>>>
>>> diff -r c4ca80d19105 -r 213808a2069d source/Lib/TLibCommon/TComSlice.h
>>> --- a/source/Lib/TLibCommon/TComSlice.h Tue Nov 12 19:10:23 2013 +0530
>>> +++ b/source/Lib/TLibCommon/TComSlice.h Wed Nov 13 18:03:02 2013 +0530
>>> @@ -42,6 +42,7 @@
>>> #include "TComRom.h"
>>> #include "x265.h" // NAL type enums
>>> #include "piclist.h"
>>> +#include "common.h"
>>>
>>> #include <cstring>
>>> #include <assert.h>
>>> @@ -1256,6 +1257,20 @@
>>>
>>> // Weighted prediction scaling values built from above parameters
>>> (bitdepth scaled):
>>> int w, o, offset, shift, round;
>>> +
>>> + /* makes a non-h265 weight (i.e. fix7), into an h265 weight */
>>> + void setFromWeightAndOffset( int weight_nonh264, int offset )
>>>
>>
>> white-space; x264 paren style is not the same as ours
>>
>>
>>> + {
>>> + inputOffset = offset;
>>> + log2WeightDenom = 7;
>>> + inputWeight = weight_nonh264;
>>>
>>
>> drop the _nonh264 suffix
>>
>>
>>> + while( log2WeightDenom > 0 && (inputWeight > 127) )
>>> + {
>>> + log2WeightDenom--;
>>> + inputWeight >>= 1;
>>> + }
>>> + inputWeight = X265_MIN( inputWeight, 127 );
>>> + }
>>> };
>>>
>>> typedef WpScalingParam wpScalingParam;
>>> diff -r c4ca80d19105 -r 213808a2069d source/encoder/slicetype.cpp
>>> --- a/source/encoder/slicetype.cpp Tue Nov 12 19:10:23 2013 +0530
>>> +++ b/source/encoder/slicetype.cpp Wed Nov 13 18:03:02 2013 +0530
>>> @@ -45,6 +45,14 @@
>>>
>>> using namespace x265;
>>>
>>> +#define SET_WEIGHT(w, b, s, d, o)\
>>> +{\
>>> + (w).inputWeight = (s);\
>>> + (w).log2WeightDenom = (d);\
>>> + (w).inputOffset = (o);\
>>> + (w).bPresentFlag = b;\
>>> +}
>>> +
>>> static inline int16_t median(int16_t a, int16_t b, int16_t c)
>>> {
>>> int16_t t = (a - b) & ((a - b) >> 31);
>>> @@ -190,16 +198,141 @@
>>> return pic->m_lowres.satdCost;
>>> }
>>>
>>> +static void mcWeight(pixel *dst, intptr_t dstStride, pixel *src,
>>> intptr_t srcStride,
>>> + const wpScalingParam *weight, int width, int
>>> height)
>>> +{
>>> + int offset = weight->inputOffset << (X265_DEPTH - 8);
>>> + int scale = weight->inputWeight;
>>> + int denom = weight->log2WeightDenom;
>>> + int correction = (IF_INTERNAL_PREC - X265_DEPTH);
>>>
>>
>> should these terms be moved into the primitive itself? this function
>> feels like it should not exist.
>>
>> is the correction term part of the actual weight algorithm?
>>
>
> The correction term is just used to adjust round, shift so that the weight
> primitive, which simulates pixel to short conversion can be reused in its
> existing form.
>
>
>>
>>
>>> + if (denom >= 1)
>>> + {
>>> + primitives.weightpUniPixel(src, dst, srcStride, dstStride,
>>> width, height, scale, (1<<(denom - 1 + correction)), (denom + correction),
>>> offset);
>>> + }
>>> + else
>>> + {
>>> + primitives.weightpUniPixel(src, dst, srcStride, dstStride,
>>> width, height, scale, 0 + correction, 0 + correction, offset);
>>> + }
>>> +}
>>> +
>>> +unsigned int Lookahead::weightCostLuma(int b, pixel *src,
>>> wpScalingParam *w)
>>> +{
>>> + Lowres *fenc = frames[b];
>>> + unsigned int cost = 0;
>>> + int stride = fenc->lumaStride;
>>> + int lines = fenc->lines;
>>> + int width = fenc->width;
>>> + pixel *fenc_plane = fenc->lowresPlane[0];
>>> + ALIGN_VAR_16( pixel, buf[8*8]);
>>> + int pixoff = 0;
>>> + int mb = 0;
>>> +
>>> + if (w)
>>> + {
>>> + for (int y = 0; y < lines; y += 8, pixoff = y * stride)
>>> + for (int x = 0; x < width; x += 8, mb++, pixoff += 8)
>>> + {
>>> + // TO DO prepare full weighted plane
>>> + mcWeight(buf, 8, &src[pixoff], stride, w, 8, 8);
>>> + int cmp = primitives.satd[LUMA_8x8]( buf, 8,
>>> &fenc_plane[pixoff], stride );
>>> + cost += X265_MIN( cmp, fenc->intraCost[mb] );
>>> + }
>>> + }
>>> + else
>>> + for (int y = 0; y < lines; y += 8, pixoff = y * stride)
>>> + for (int x = 0; x < width; x += 8, mb++, pixoff += 8)
>>> + {
>>> + int cmp = primitives.satd[LUMA_8x8](&src[pixoff],
>>> stride, &fenc_plane[pixoff], stride);
>>> + cost += X265_MIN(cmp, fenc->intraCost[mb]);
>>> + }
>>> + x265_emms();
>>> + return cost;
>>> +}
>>> +
>>> +void Lookahead::weightsAnalyse(int b, int p0, int b_lookahead,
>>> wpScalingParam* weights)
>>> +{
>>>
>>
>> drop the b_lookahead argument
>>
>>
>>> + Lowres *fenc, *ref;
>>> + fenc = frames[b];
>>> + ref = frames[p0];
>>> + /* epsilon is chosen to require at least a numerator of 127 (with
>>> denominator = 128) */
>>> + const float epsilon = 1.f/128.f;
>>> + SET_WEIGHT( weights[0], 0, 1, 0, 0 );
>>> + float guess_scale, fenc_mean, ref_mean;
>>> + guess_scale = sqrtf( (float) fenc->wp_ssd[0] / ref->wp_ssd[0]);
>>> + fenc_mean = (float)fenc->wp_sum[0] / (fenc->lines * fenc->width) /
>>> (1 << (X265_DEPTH - 8));
>>> + ref_mean = (float) ref->wp_sum[0] / (fenc->lines * fenc->width) /
>>> (1 << (X265_DEPTH - 8));
>>> +
>>> + /* Don't check chroma in lookahead, or if there wasn't a luma
>>> weight. */
>>> + int minoff = 0, minscale, mindenom;
>>> + unsigned int minscore = 0, origscore = 1;
>>> + int found = 0;
>>> +
>>> + //early termination
>>>
>>
>> space after //
>>
>>
>>> + if( fabsf( ref_mean - fenc_mean ) < 0.5f && fabsf( 1.f -
>>> guess_scale ) < epsilon )
>>>
>>
>> white-space
>>
>>
>>> + {
>>> + SET_WEIGHT( *weights, 0, 1, 0, 0 );
>>> + return;
>>> + }
>>> +
>>> + weights->setFromWeightAndOffset( (int)( guess_scale * 128 + 0.5), 0
>>> );
>>> +
>>> + mindenom = weights->log2WeightDenom;
>>> + minscale = weights->inputWeight;
>>> +
>>> + pixel *mcbuf = NULL;
>>> + if (!fenc->bIntraCalculated)
>>> + {
>>> + estimateFrameCost(b,b,b,0);
>>> + }
>>> + mcbuf = frames[p0]->lowresPlane[0];
>>> + origscore = minscore = weightCostLuma( b, mcbuf, NULL );
>>> +
>>> + if( !minscore )
>>>
>>
>> lots of white-space issues in this function still
>>
>>
>>> + return;
>>> +
>>> + unsigned int s=0;
>>> + int cur_scale = minscale;
>>> + int cur_offset = (int) (fenc_mean - ref_mean * cur_scale / (1 <<
>>> mindenom) + 0.5f * b_lookahead);
>>> + if( cur_offset < - 128 || cur_offset > 127 )
>>> + {
>>> + /* Rescale considering the constraints on cur_offset. We do it
>>> in this order
>>> + * because scale has a much wider range than offset (because
>>> of denom), so
>>> + * it should almost never need to be clamped. */
>>> + cur_offset = Clip3( -128, 127, cur_offset );
>>> + cur_scale = (int) ((1 << mindenom) * (fenc_mean - cur_offset) /
>>> ref_mean + 0.5f);
>>> + cur_scale = Clip3( 0, 127, cur_scale );
>>> + }
>>> + SET_WEIGHT(*weights, 1, cur_scale, mindenom, cur_offset);
>>> + s = weightCostLuma(b, mcbuf, weights);
>>> + COPY4_IF_LT( minscore, s, minscale, cur_scale, minoff, cur_offset,
>>> found, 1 );
>>>
>>
>> this emms is redundant
>>
>>
>>> + x265_emms();
>>> +
>>> + /* Use a smaller denominator if possible */
>>> + while( mindenom > 0 && !(minscale&1) )
>>> + {
>>> + mindenom--;
>>> + minscale >>= 1;
>>> + }
>>> +
>>> + if( !found || (minscale == 1 << mindenom && minoff == 0) ||
>>> (float)minscore / origscore > 0.998f )
>>> + {
>>> + SET_WEIGHT( *weights, 0, 1, 0, 0 );
>>> + return;
>>> + }
>>> + else
>>> + {
>>> + SET_WEIGHT( *weights, 1, minscale, mindenom, minoff );
>>> + }
>>> +}
>>> +
>>> #define NUM_CUS (widthInCU > 2 && heightInCU > 2 ? (widthInCU - 2) *
>>> (heightInCU - 2) : widthInCU * heightInCU)
>>>
>>> int Lookahead::estimateFrameCost(int p0, int p1, int b, bool
>>> bIntraPenalty)
>>> {
>>> int score = 0;
>>> Lowres *fenc = frames[b];
>>> -
>>> - curb = b;
>>> - curp0 = p0;
>>> - curp1 = p1;
>>> + wpScalingParam wp;
>>> + wp.bPresentFlag = false;
>>>
>>> if (fenc->costEst[b - p0][p1 - b] >= 0 && fenc->rowSatds[b - p0][p1
>>> - b][0] != -1)
>>> score = fenc->costEst[b - p0][p1 - b];
>>> @@ -209,9 +342,21 @@
>>> bDoSearch[0] = b != p0 && fenc->lowresMvs[0][b - p0 - 1][0].x
>>> == 0x7FFF;
>>> bDoSearch[1] = b != p1 && fenc->lowresMvs[1][p1 - b - 1][0].x
>>> == 0x7FFF;
>>>
>>> - if (bDoSearch[0]) fenc->lowresMvs[0][b - p0 - 1][0].x = 0;
>>> + if (bDoSearch[0])
>>> + {
>>> + if( cfg->param.bEnableWeightedPred && b==p1)
>>> + {
>>> + weightsAnalyse(b, p0, 1, &wp);
>>> + }
>>> + bDoSearch[0] = b != p0 && fenc->lowresMvs[0][b - p0 -
>>> 1][0].x == 0x7FFF;
>>> + bDoSearch[1] = b != p1 && fenc->lowresMvs[1][p1 - b -
>>> 1][0].x == 0x7FFF;
>>>
>>
>> the above two lines should be removed
>>
>
> the weightsAnalyse function makes a recursive call to estimateFrameCost,
> which alters the bDoSearch, curb, curp0, curp1, variables, hence they need
> to be initialized here
>
I didn't realize those were turned into member vars when we threaded the
lookahead. Yuck.
my suggestion would be to move the weightp analysis higher then.
if (cfg->param.bEnableWeightedPred && b == p1 && b != p0 &&
fenc->lowresMvs[0][b - p0 - 1][0].x == 0x7FFF)
weightsAnalyse(...);
bDoSearch[0] = b != p0 && fenc->lowresMvs[0][b - p0 - 1][0].x == 0x7FFF;
bDoSearch[1] = b != p1 && fenc->lowresMvs[1][p1 - b - 1][0].x == 0x7FFF;
--
Steve
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131114/c7051931/attachment-0001.html>
More information about the x265-devel
mailing list