[x265] [PATCH 1 of 2] Pulling x264 weight decision into/for x265 lookahead
shazeb at multicorewareinc.com
shazeb at multicorewareinc.com
Wed Nov 13 13:40:18 CET 2013
# HG changeset patch
# User Shazeb Nawaz Khan <shazeb at multicorewareinc.com>
# Date 1384345982 -19800
# Wed Nov 13 18:03:02 2013 +0530
# Node ID 213808a2069d21c49a4d5e99d71ad71b8af344b8
# Parent c4ca80d19105ccf1ba2ec14dd65915f2820a660d
Pulling x264 weight decision into/for x265 lookahead
diff -r c4ca80d19105 -r 213808a2069d source/Lib/TLibCommon/TComSlice.h
--- a/source/Lib/TLibCommon/TComSlice.h Tue Nov 12 19:10:23 2013 +0530
+++ b/source/Lib/TLibCommon/TComSlice.h Wed Nov 13 18:03:02 2013 +0530
@@ -42,6 +42,7 @@
#include "TComRom.h"
#include "x265.h" // NAL type enums
#include "piclist.h"
+#include "common.h"
#include <cstring>
#include <assert.h>
@@ -1256,6 +1257,20 @@
// Weighted prediction scaling values built from above parameters (bitdepth scaled):
int w, o, offset, shift, round;
+
+ /* makes a non-h265 weight (i.e. fix7), into an h265 weight */
+ void setFromWeightAndOffset( int weight_nonh264, int offset )
+ {
+ inputOffset = offset;
+ log2WeightDenom = 7;
+ inputWeight = weight_nonh264;
+ while( log2WeightDenom > 0 && (inputWeight > 127) )
+ {
+ log2WeightDenom--;
+ inputWeight >>= 1;
+ }
+ inputWeight = X265_MIN( inputWeight, 127 );
+ }
};
typedef WpScalingParam wpScalingParam;
diff -r c4ca80d19105 -r 213808a2069d source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Tue Nov 12 19:10:23 2013 +0530
+++ b/source/encoder/slicetype.cpp Wed Nov 13 18:03:02 2013 +0530
@@ -45,6 +45,14 @@
using namespace x265;
+#define SET_WEIGHT(w, b, s, d, o)\
+{\
+ (w).inputWeight = (s);\
+ (w).log2WeightDenom = (d);\
+ (w).inputOffset = (o);\
+ (w).bPresentFlag = b;\
+}
+
static inline int16_t median(int16_t a, int16_t b, int16_t c)
{
int16_t t = (a - b) & ((a - b) >> 31);
@@ -190,16 +198,141 @@
return pic->m_lowres.satdCost;
}
+static void mcWeight(pixel *dst, intptr_t dstStride, pixel *src, intptr_t srcStride,
+ const wpScalingParam *weight, int width, int height)
+{
+ int offset = weight->inputOffset << (X265_DEPTH - 8);
+ int scale = weight->inputWeight;
+ int denom = weight->log2WeightDenom;
+ int correction = (IF_INTERNAL_PREC - X265_DEPTH);
+ if (denom >= 1)
+ {
+ primitives.weightpUniPixel(src, dst, srcStride, dstStride, width, height, scale, (1<<(denom - 1 + correction)), (denom + correction), offset);
+ }
+ else
+ {
+ primitives.weightpUniPixel(src, dst, srcStride, dstStride, width, height, scale, 0 + correction, 0 + correction, offset);
+ }
+}
+
+unsigned int Lookahead::weightCostLuma(int b, pixel *src, wpScalingParam *w)
+{
+ Lowres *fenc = frames[b];
+ unsigned int cost = 0;
+ int stride = fenc->lumaStride;
+ int lines = fenc->lines;
+ int width = fenc->width;
+ pixel *fenc_plane = fenc->lowresPlane[0];
+ ALIGN_VAR_16( pixel, buf[8*8]);
+ int pixoff = 0;
+ int mb = 0;
+
+ if (w)
+ {
+ for (int y = 0; y < lines; y += 8, pixoff = y * stride)
+ for (int x = 0; x < width; x += 8, mb++, pixoff += 8)
+ {
+ // TO DO prepare full weighted plane
+ mcWeight(buf, 8, &src[pixoff], stride, w, 8, 8);
+ int cmp = primitives.satd[LUMA_8x8]( buf, 8, &fenc_plane[pixoff], stride );
+ cost += X265_MIN( cmp, fenc->intraCost[mb] );
+ }
+ }
+ else
+ for (int y = 0; y < lines; y += 8, pixoff = y * stride)
+ for (int x = 0; x < width; x += 8, mb++, pixoff += 8)
+ {
+ int cmp = primitives.satd[LUMA_8x8](&src[pixoff], stride, &fenc_plane[pixoff], stride);
+ cost += X265_MIN(cmp, fenc->intraCost[mb]);
+ }
+ x265_emms();
+ return cost;
+}
+
+void Lookahead::weightsAnalyse(int b, int p0, int b_lookahead, wpScalingParam* weights)
+{
+ Lowres *fenc, *ref;
+ fenc = frames[b];
+ ref = frames[p0];
+ /* epsilon is chosen to require at least a numerator of 127 (with denominator = 128) */
+ const float epsilon = 1.f/128.f;
+ SET_WEIGHT( weights[0], 0, 1, 0, 0 );
+ float guess_scale, fenc_mean, ref_mean;
+ guess_scale = sqrtf( (float) fenc->wp_ssd[0] / ref->wp_ssd[0]);
+ fenc_mean = (float)fenc->wp_sum[0] / (fenc->lines * fenc->width) / (1 << (X265_DEPTH - 8));
+ ref_mean = (float) ref->wp_sum[0] / (fenc->lines * fenc->width) / (1 << (X265_DEPTH - 8));
+
+ /* Don't check chroma in lookahead, or if there wasn't a luma weight. */
+ int minoff = 0, minscale, mindenom;
+ unsigned int minscore = 0, origscore = 1;
+ int found = 0;
+
+ //early termination
+ if( fabsf( ref_mean - fenc_mean ) < 0.5f && fabsf( 1.f - guess_scale ) < epsilon )
+ {
+ SET_WEIGHT( *weights, 0, 1, 0, 0 );
+ return;
+ }
+
+ weights->setFromWeightAndOffset( (int)( guess_scale * 128 + 0.5), 0 );
+
+ mindenom = weights->log2WeightDenom;
+ minscale = weights->inputWeight;
+
+ pixel *mcbuf = NULL;
+ if (!fenc->bIntraCalculated)
+ {
+ estimateFrameCost(b,b,b,0);
+ }
+ mcbuf = frames[p0]->lowresPlane[0];
+ origscore = minscore = weightCostLuma( b, mcbuf, NULL );
+
+ if( !minscore )
+ return;
+
+ unsigned int s=0;
+ int cur_scale = minscale;
+ int cur_offset = (int) (fenc_mean - ref_mean * cur_scale / (1 << mindenom) + 0.5f * b_lookahead);
+ if( cur_offset < - 128 || cur_offset > 127 )
+ {
+ /* Rescale considering the constraints on cur_offset. We do it in this order
+ * because scale has a much wider range than offset (because of denom), so
+ * it should almost never need to be clamped. */
+ cur_offset = Clip3( -128, 127, cur_offset );
+ cur_scale = (int) ((1 << mindenom) * (fenc_mean - cur_offset) / ref_mean + 0.5f);
+ cur_scale = Clip3( 0, 127, cur_scale );
+ }
+ SET_WEIGHT(*weights, 1, cur_scale, mindenom, cur_offset);
+ s = weightCostLuma(b, mcbuf, weights);
+ COPY4_IF_LT( minscore, s, minscale, cur_scale, minoff, cur_offset, found, 1 );
+ x265_emms();
+
+ /* Use a smaller denominator if possible */
+ while( mindenom > 0 && !(minscale&1) )
+ {
+ mindenom--;
+ minscale >>= 1;
+ }
+
+ if( !found || (minscale == 1 << mindenom && minoff == 0) || (float)minscore / origscore > 0.998f )
+ {
+ SET_WEIGHT( *weights, 0, 1, 0, 0 );
+ return;
+ }
+ else
+ {
+ SET_WEIGHT( *weights, 1, minscale, mindenom, minoff );
+ }
+}
+
#define NUM_CUS (widthInCU > 2 && heightInCU > 2 ? (widthInCU - 2) * (heightInCU - 2) : widthInCU * heightInCU)
int Lookahead::estimateFrameCost(int p0, int p1, int b, bool bIntraPenalty)
{
int score = 0;
Lowres *fenc = frames[b];
-
- curb = b;
- curp0 = p0;
- curp1 = p1;
+ wpScalingParam wp;
+ wp.bPresentFlag = false;
if (fenc->costEst[b - p0][p1 - b] >= 0 && fenc->rowSatds[b - p0][p1 - b][0] != -1)
score = fenc->costEst[b - p0][p1 - b];
@@ -209,9 +342,21 @@
bDoSearch[0] = b != p0 && fenc->lowresMvs[0][b - p0 - 1][0].x == 0x7FFF;
bDoSearch[1] = b != p1 && fenc->lowresMvs[1][p1 - b - 1][0].x == 0x7FFF;
- if (bDoSearch[0]) fenc->lowresMvs[0][b - p0 - 1][0].x = 0;
+ if (bDoSearch[0])
+ {
+ if( cfg->param.bEnableWeightedPred && b==p1)
+ {
+ weightsAnalyse(b, p0, 1, &wp);
+ }
+ bDoSearch[0] = b != p0 && fenc->lowresMvs[0][b - p0 - 1][0].x == 0x7FFF;
+ bDoSearch[1] = b != p1 && fenc->lowresMvs[1][p1 - b - 1][0].x == 0x7FFF;
+ fenc->lowresMvs[0][b - p0 - 1][0].x = 0;
+ }
if (bDoSearch[1]) fenc->lowresMvs[1][p1 - b - 1][0].x = 0;
+ curb = b;
+ curp0 = p0;
+ curp1 = p1;
fenc->costEst[b - p0][p1 - b] = 0;
fenc->costEstAq[b - p0][p1 - b] = 0;
// TODO: use lowres MVs as motion candidates in full-res search
@@ -613,14 +758,6 @@
} */
}
- /* Analyse for weighted P frames
- if (!h->param.rc.b_stat_read && h->lookahead->next.list[bframes]->i_type == X264_TYPE_P
- && h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE)
- {
- x265_emms();
- x264_weights_analyse(h, h->lookahead->next.list[bframes], h->lookahead->last_nonb, 0);
- }*/
-
/* dequeue all frames from inputQueue that are about to be enqueued
* in the output queue. The order is important because TComPic can
* only be in one list at a time */
diff -r c4ca80d19105 -r 213808a2069d source/encoder/slicetype.h
--- a/source/encoder/slicetype.h Tue Nov 12 19:10:23 2013 +0530
+++ b/source/encoder/slicetype.h Wed Nov 13 18:03:02 2013 +0530
@@ -47,11 +47,13 @@
int costIntra; // Estimated Intra cost for all CUs in a row
int costIntraAq; // Estimated weighted Aq Intra cost for all CUs in a row
int intraMbs; // Number of Intra CUs
+ TEncCfg *cfg;
Lowres** frames;
int widthInCU;
int heightInCU;
int merange;
+ Lowres *weightedRef;
LookaheadRow()
{
@@ -82,6 +84,9 @@
int widthInCU; // width of lowres frame in downscale CUs
int heightInCU; // height of lowres frame in downscale CUs
+ Lowres weightedRef;
+ int numWRefs;
+
PicList inputQueue; // input pictures in order received
PicList outputQueue; // pictures to be encoded, in encode order
@@ -110,6 +115,11 @@
int slicetypePathCost(char *path, int threshold);
void processRow(int row);
+
+ void weightsAnalyse(int b, int p0, int b_lookahead, wpScalingParam *w);
+ unsigned int weightCostLuma(int b, pixel *src, wpScalingParam *w);
+ pixel* weightCostInit(int b, int p0, pixel *dest);
+ int x265_weight_slice_header_cost(wpScalingParam *w, int b_chroma);
};
}
More information about the x265-devel
mailing list