[x265] [PATCH 1 of 2] Pulling x264 weight decision into x265 lookahead

shazeb at multicorewareinc.com shazeb at multicorewareinc.com
Thu Nov 14 14:11:54 CET 2013


# HG changeset patch
# User Shazeb Nawaz Khan <shazeb at multicorewareinc.com>
# Date 1384432451 -19800
#      Thu Nov 14 18:04:11 2013 +0530
# Node ID dea83349f7865ee0c8a8b0572e9dbf771993fc14
# Parent  5683ee5b793cca5956f1e44e4e0bb3d6be70e942
Pulling x264 weight decision into x265 lookahead

diff -r 5683ee5b793c -r dea83349f786 source/Lib/TLibCommon/TComSlice.h
--- a/source/Lib/TLibCommon/TComSlice.h	Wed Nov 13 13:53:13 2013 +0000
+++ b/source/Lib/TLibCommon/TComSlice.h	Thu Nov 14 18:04:11 2013 +0530
@@ -42,6 +42,7 @@
 #include "TComRom.h"
 #include "x265.h"  // NAL type enums
 #include "piclist.h"
+#include "common.h"
 
 #include <cstring>
 #include <assert.h>
@@ -1256,6 +1257,20 @@
 
     // Weighted prediction scaling values built from above parameters (bitdepth scaled):
     int         w, o, offset, shift, round;
+
+    /* makes a non-h265 weight (i.e. fix7), into an h265 weight */
+    void setFromWeightAndOffset(int weight, int offset)
+    {
+        inputOffset = offset;
+        log2WeightDenom = 7;
+        inputWeight = weight;
+        while (log2WeightDenom > 0 && (inputWeight > 127))
+        {
+            log2WeightDenom--;
+            inputWeight >>= 1;
+        }
+        inputWeight = X265_MIN(inputWeight, 127);
+    }
 };
 
 typedef WpScalingParam wpScalingParam;
diff -r 5683ee5b793c -r dea83349f786 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp	Wed Nov 13 13:53:13 2013 +0000
+++ b/source/encoder/slicetype.cpp	Thu Nov 14 18:04:11 2013 +0530
@@ -45,6 +45,14 @@
 
 using namespace x265;
 
+#define SET_WEIGHT(w, b, s, d, o) \
+    { \
+        (w).inputWeight = (s); \
+        (w).log2WeightDenom = (d); \
+        (w).inputOffset = (o); \
+        (w).bPresentFlag = b; \
+    }
+
 static inline int16_t median(int16_t a, int16_t b, int16_t c)
 {
     int16_t t = (a - b) & ((a - b) >> 31);
@@ -190,6 +198,141 @@
     return pic->m_lowres.satdCost;
 }
 
+static void mcWeight(pixel *dst, intptr_t dstStride, pixel *src, intptr_t srcStride,
+                     const wpScalingParam *weight, int width, int height)
+{
+    int offset = weight->inputOffset << (X265_DEPTH - 8);
+    int scale = weight->inputWeight;
+    int denom = weight->log2WeightDenom;
+    int correction = (IF_INTERNAL_PREC - X265_DEPTH);
+
+    if (denom >= 1)
+    {
+        primitives.weightpUniPixel(src, dst, srcStride, dstStride, width, height, scale, (1 << (denom - 1 + correction)), (denom + correction), offset);
+    }
+    else
+    {
+        primitives.weightpUniPixel(src, dst, srcStride, dstStride, width, height, scale, 0 + correction, 0 + correction, offset);
+    }
+}
+
+unsigned int Lookahead::weightCostLuma(int b, pixel *src, wpScalingParam *w)
+{
+    Lowres *fenc = frames[b];
+    unsigned int cost = 0;
+    int stride = fenc->lumaStride;
+    int lines = fenc->lines;
+    int width = fenc->width;
+    pixel *fenc_plane = fenc->lowresPlane[0];
+
+    ALIGN_VAR_16(pixel, buf[8 * 8]);
+    int pixoff = 0;
+    int mb = 0;
+
+    if (w)
+    {
+        for (int y = 0; y < lines; y += 8, pixoff = y * stride)
+        {
+            for (int x = 0; x < width; x += 8, mb++, pixoff += 8)
+            {
+                // TO DO prepare full weighted plane
+                mcWeight(buf, 8, &src[pixoff], stride, w, 8, 8);
+                int cmp = primitives.satd[LUMA_8x8](buf, 8, &fenc_plane[pixoff], stride);
+                cost += X265_MIN(cmp, fenc->intraCost[mb]);
+            }
+        }
+    }
+    else
+        for (int y = 0; y < lines; y += 8, pixoff = y * stride)
+        {
+            for (int x = 0; x < width; x += 8, mb++, pixoff += 8)
+            {
+                int cmp = primitives.satd[LUMA_8x8](&src[pixoff], stride, &fenc_plane[pixoff], stride);
+                cost += X265_MIN(cmp, fenc->intraCost[mb]);
+            }
+        }
+
+    x265_emms();
+    return cost;
+}
+
+void Lookahead::weightsAnalyse(int b, int p0)
+{
+    Lowres *fenc, *ref;
+
+    fenc = frames[b];
+    ref  = frames[p0];
+    /* epsilon is chosen to require at least a numerator of 127 (with denominator = 128) */
+    const float epsilon = 1.f / 128.f;
+    wpScalingParam w;
+    SET_WEIGHT(w, 0, 1, 0, 0);
+    float guess_scale, fenc_mean, ref_mean;
+    guess_scale = sqrtf((float)fenc->wp_ssd[0] / ref->wp_ssd[0]);
+    fenc_mean = (float)fenc->wp_sum[0] / (fenc->lines * fenc->width) / (1 << (X265_DEPTH - 8));
+    ref_mean  = (float)ref->wp_sum[0] / (fenc->lines * fenc->width) / (1 << (X265_DEPTH - 8));
+
+    /* Don't check chroma in lookahead, or if there wasn't a luma weight. */
+    int minoff = 0, minscale, mindenom;
+    unsigned int minscore = 0, origscore = 1;
+    int found = 0;
+
+    /* Early termination */
+    if (fabsf(ref_mean - fenc_mean) < 0.5f && fabsf(1.f - guess_scale) < epsilon)
+    {
+        SET_WEIGHT(w, 0, 1, 0, 0);
+        return;
+    }
+
+    w.setFromWeightAndOffset((int)(guess_scale * 128 + 0.5), 0);
+
+    mindenom = w.log2WeightDenom;
+    minscale = w.inputWeight;
+
+    pixel *mcbuf = NULL;
+    if (!fenc->bIntraCalculated)
+    {
+        estimateFrameCost(b, b, b, 0);
+    }
+    mcbuf = frames[p0]->lowresPlane[0];
+    origscore = minscore = weightCostLuma(b, mcbuf, NULL);
+
+    if (!minscore)
+        return;
+
+    unsigned int s = 0;
+    int cur_scale = minscale;
+    int cur_offset = (int)(fenc_mean - ref_mean * cur_scale / (1 << mindenom) + 0.5f);
+    if (cur_offset < -128 || cur_offset > 127)
+    {
+        /* Rescale considering the constraints on cur_offset. We do it in this order
+            * because scale has a much wider range than offset (because of denom), so
+            * it should almost never need to be clamped. */
+        cur_offset = Clip3(-128, 127, cur_offset);
+        cur_scale = (int)((1 << mindenom) * (fenc_mean - cur_offset) / ref_mean + 0.5f);
+        cur_scale = Clip3(0, 127, cur_scale);
+    }
+    SET_WEIGHT(w, 1, cur_scale, mindenom, cur_offset);
+    s = weightCostLuma(b, mcbuf, &w);
+    COPY4_IF_LT(minscore, s, minscale, cur_scale, minoff, cur_offset, found, 1);
+
+    /* Use a smaller denominator if possible */
+    while (mindenom > 0 && !(minscale & 1))
+    {
+        mindenom--;
+        minscale >>= 1;
+    }
+
+    if (!found || (minscale == 1 << mindenom && minoff == 0) || (float)minscore / origscore > 0.998f)
+    {
+        SET_WEIGHT(w, 0, 1, 0, 0);
+        return;
+    }
+    else
+    {
+        SET_WEIGHT(w, 1, minscale, mindenom, minoff);
+    }
+}
+
 #define NUM_CUS (widthInCU > 2 && heightInCU > 2 ? (widthInCU - 2) * (heightInCU - 2) : widthInCU * heightInCU)
 
 int Lookahead::estimateFrameCost(int p0, int p1, int b, bool bIntraPenalty)
@@ -197,10 +340,6 @@
     int score = 0;
     Lowres *fenc = frames[b];
 
-    curb = b;
-    curp0 = p0;
-    curp1 = p1;
-
     if (fenc->costEst[b - p0][p1 - b] >= 0 && fenc->rowSatds[b - p0][p1 - b][0] != -1)
         score = fenc->costEst[b - p0][p1 - b];
     else
@@ -209,9 +348,22 @@
         bDoSearch[0] = b != p0 && fenc->lowresMvs[0][b - p0 - 1][0].x == 0x7FFF;
         bDoSearch[1] = b != p1 && fenc->lowresMvs[1][p1 - b - 1][0].x == 0x7FFF;
 
-        if (bDoSearch[0]) fenc->lowresMvs[0][b - p0 - 1][0].x = 0;
+        if (bDoSearch[0])
+        {
+            if (cfg->param.bEnableWeightedPred && b == p1)
+            {
+                weightsAnalyse(b, p0);
+            }
+
+            bDoSearch[0] = b != p0 && fenc->lowresMvs[0][b - p0 - 1][0].x == 0x7FFF;
+            bDoSearch[1] = b != p1 && fenc->lowresMvs[1][p1 - b - 1][0].x == 0x7FFF;
+            fenc->lowresMvs[0][b - p0 - 1][0].x = 0;
+        }
         if (bDoSearch[1]) fenc->lowresMvs[1][p1 - b - 1][0].x = 0;
 
+        curb = b;
+        curp0 = p0;
+        curp1 = p1;
         fenc->costEst[b - p0][p1 - b] = 0;
         fenc->costEstAq[b - p0][p1 - b] = 0;
         // TODO: use lowres MVs as motion candidates in full-res search
@@ -613,14 +765,6 @@
             } */
         }
 
-        /* Analyse for weighted P frames
-        if (!h->param.rc.b_stat_read && h->lookahead->next.list[bframes]->i_type == X264_TYPE_P
-            && h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE)
-        {
-            x265_emms();
-            x264_weights_analyse(h, h->lookahead->next.list[bframes], h->lookahead->last_nonb, 0);
-        }*/
-
         /* dequeue all frames from inputQueue that are about to be enqueued
          * in the output queue.  The order is important because TComPic can
          * only be in one list at a time */
diff -r 5683ee5b793c -r dea83349f786 source/encoder/slicetype.h
--- a/source/encoder/slicetype.h	Wed Nov 13 13:53:13 2013 +0000
+++ b/source/encoder/slicetype.h	Thu Nov 14 18:04:11 2013 +0530
@@ -52,6 +52,7 @@
     int widthInCU;
     int heightInCU;
     int merange;
+    Lowres *weightedRef;
 
     LookaheadRow()
     {
@@ -110,6 +111,10 @@
     int slicetypePathCost(char *path, int threshold);
 
     void processRow(int row);
+
+    void weightsAnalyse(int b, int p0);
+    unsigned int weightCostLuma(int b, pixel *src, wpScalingParam *w);
+    pixel* weightCostInit(int b, int p0, pixel *dest);
 };
 }
 


More information about the x265-devel mailing list