[x265] [PATCH] Slicetype: Modified Lookahead structure and Added slicetype_cu_cost()

Thu Aug 8 13:33:54 CEST 2013

# HG changeset patch
# User ggopu
# Date 1375961599 -19800
# Node ID fe327a9a7a1b695b2363eecd3dc1c8939f303c6c
# Parent  33b0a6829d01735e137b0aadb1b276928a1d8fa4
Slicetype: Modified Lookahead structure and Added slicetype_cu_cost()

diff -r 33b0a6829d01 -r fe327a9a7a1b source/common/lookahead.h

--- a/source/common/lookahead.h	Thu Aug 08 16:19:22 2013 +0530
+++ b/source/common/lookahead.h	Thu Aug 08 17:03:19 2013 +0530
@@ -24,11 +24,13 @@
 #include "x265.h"
 #include "common.h"
 #include "mv.h"
+#include "reference.h"
 
 namespace x265 {
 class ReferencePlanes;
 
 #define X265_BFRAME_MAX 16
+#define FDEC_STRIDE 32
 
 struct LookaheadFrame : public ReferencePlanes
 {
@@ -45,6 +47,8 @@
     uint16_t(*lowresCosts[X265_BFRAME_MAX + 2][X265_BFRAME_MAX + 2]);
     int      *lowresMvCosts[2][X265_BFRAME_MAX + 1];
     MV       *lowresMvs[2][X265_BFRAME_MAX + 1];
+    int      cuWidth;
+    int      cuHeight;
 };
 
 struct Lookahead
@@ -57,5 +61,4 @@
     TComList<TComPic*> inputQueue;      // input pictures in order received
     TComList<TComPic*> outputQueue;     // pictures to be encoded, in encode order
 };
-
 }
diff -r 33b0a6829d01 -r fe327a9a7a1b source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp	Thu Aug 08 16:19:22 2013 +0530
+++ b/source/encoder/slicetype.cpp	Thu Aug 08 17:03:19 2013 +0530
@@ -1,12 +1,10 @@
 /*****************************************************************************
- * slicetype.c: lookahead analysis
- *****************************************************************************
- * Copyright (C) 2005-2013 x264 project
+ * Copyright (C) 2013 x265 project
  *
- * Authors: Jason Garrett-Glaser <darkshikari at gmail.com>
- *          Loren Merritt <lorenm at u.washington.edu>
- *          Dylan Yudaken <dyudaken at gmail.com>
- *
+ * Authors: Steve Borho <steve at borho.org>
+ *          Gopu Govindaswamy <gopu at multicorewareinc.com>
+ *          Mandar Gurav<mandar at multicorewareinc.com>
+ * 
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -22,27 +20,149 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  *
  * This program is also available under a commercial proprietary license.
- * For more information, contact us at licensing at x264.com.
+ * For more information, contact us at licensing at multicorewareinc.com.
  *****************************************************************************/
 
-// Short history:
-//
-// This file was originally borrowed from x264 source tree circa Dec 4, 2012
-// with x264 bug fixes applied from Dec 11th and Jan 8th 2013.  But without
-// taking any of the threading changes because we will eventually use the x265
-// thread pool and wavefront processing.
+#include "x265.h"
+#include "lookahead.h"
+#include "primitives.h"
 
-#include "common/common.h"
-#include "macroblock.h"
-#include "me.h"
+int slicetype_frame_cost(x265::LookaheadFrame **frames, int p0, int p1, int b, int bIntraPenalty)
+{
+    int score = 0;
+    int do_search[2];
+    x265::LookaheadFrame *fenc;
 
+    fenc = frames[b];
+
+    /* Currently Default set as 0 this should be param->bframebias */
+    int bframe_bias = 0;
+
+    if (fenc->costEst[b - p0][p1 - b] >= 0 && fenc->rowSatds[b - p0][p1 - b][0] != -1)
+        score = fenc->costEst[b - p0][p1 - b];
+    else
+    {
+        int dist_scale_factor = 128;
+
+        /* For each list, check to see whether we have lowres motion-searched this reference frame before. */
+        do_search[0] = b != p0 && fenc->lowresMvs[0][b - p0 - 1][0].x == 0x7FFF;
+        do_search[1] = b != p1 && fenc->lowresMvs[1][p1 - b - 1][0].x == 0x7FFF;
+
+        if (do_search[0])
+        {
+            fenc->lowresMvs[0][b - p0 - 1][0].x = 0;
+        }
+
+        if (do_search[1]) fenc->lowresMvs[1][p1 - b - 1][0].x = 0;
+
+        if (p1 != p0)
+            dist_scale_factor = (((b - p0) << 8) + ((p1 - p0) >> 1)) / (p1 - p0);
+
+        fenc->costEst[b - p0][p1 - b] = 0;
+        fenc->costEst[b - p0][p1 - b] = 0;
+
+        /* Lowres lookahead goes backwards because the MVs are used as predictors in the main encode.
+        * This considerably improves MV prediction overall. */
+
+        /* The edge mbs seem to reduce the predictive quality of the
+        * whole frame's score, but are needed for a spatial distribution. */
+
+        for (int i = fenc->cuWidth - 1; i >= 0; i--)
+        {
+            for (int j = fenc->cuHeight - 1; j >= 0; j--)
+            {
+                slicetype_cu_cost(frames, p0, p1, b, dist_scale_factor, do_search);
+            }
+        }
+
+        score = fenc->costEst[b - p0][p1 - b];
+
+        if (b != p1) /* have to check use of 120 magical number but followed by x264 used here */
+            score = (uint64_t)score * 100 / (120) + bframe_bias;
+
+        fenc->costEst[b - p0][p1 - b] = score;
+        x265_emms();
+    }
+
+    if (bIntraPenalty)
+    {
+        // arbitrary penalty for I-blocks after B-frames
+        int nmb = fenc->cuWidth * fenc->cuHeight;
+        score += (uint64_t)score * fenc->intraMbs[b - p0] / (nmb * 8);
+    }
+    return score;
+}
+
+int slicetype_cu_cost(x265::LookaheadFrame **frames, int p0, int p1, int b, int dist_scale_factor, int do_search[2])
+{
+    x265::LookaheadFrame *fref0 = frames[p0];
+    x265::LookaheadFrame *fref1 = frames[p1];
+    x265::LookaheadFrame *fenc  = frames[b];
+
+    /* TODO : need clarifications how can be initialized motion vector in this function */
+    x265::MV *pmv;
+
+    const int b_bidir = (b < p1);
+    const int cu_x = pmv->x;
+    const int cu_y = pmv->y;
+
+    const int cu_stride = fenc->cuWidth;
+    const int cu_xy = cu_x + cu_y * cu_stride;
+    const int stride = fenc->stride;
+    const int pel_offset = 8 * (cu_x + cu_y * stride);
+
+    x265::MV(*fenc_mvs[2][2]) = { &fenc->lowresMvs[0][b - p0 - 1][cu_xy], &fenc->lowresMvs[1][p1 - b - 1][cu_xy] };
+
+    int(*fenc_costs[2]) = { &fenc->lowresMvCosts[0][b - p0 - 1][cu_xy], &fenc->lowresMvCosts[1][p1 - b - 1][cu_xy] };
+
+    int b_frame_score_mb = (cu_x > 0 && cu_x < fenc->cuWidth - 1 &&
+                            cu_y > 0 && cu_y < fenc->cuHeight - 1) ||
+        fenc->cuWidth <= 2 || fenc->cuHeight <= 2;
+
+    ALIGN_VAR_16(pixel, *pix1, [9 * FDEC_STRIDE]);
+    pixel *pix2 = pix1 + 8;
+    int i_bcost = 0;
+    int list_used = 0;
+
+    /* A small, arbitrary bias to avoid VBV problems caused by zero-residual lookahead blocks. */
+    int lowres_penalty = 4;
+    pixel *buffer[4];
+    buffer[0] = fenc->buffer[0];
+
+    /* TODO : need confirmation, in x264 copying square block 8 x 8 followed same way here 32 x 32 in our cu  max size is 64 x 64 */
+    x265::primitives.blockcpy_pp(32, 32, buffer[0], FENC_STRIDE, fenc->m_lumaPlane[0][pel_offset], stride);
+
+    if (p0 == p1)
+        goto lowres_intra_mb;
+
+lowres_intra_mb:
+
+    /* Need to check how can get this lambda */
+    int lambda;
+
+    ALIGN_VAR_16(pixel, edge, [36]);
+    pixel *pix = &pix1[8 + FDEC_STRIDE - 1];
+    pixel *src = fenc->m_lumaPlane[0][pel_offset - 1];
+    const int intra_penalty = 5 * lambda;
+    int satds[3];
+
+    memcpy(pix - FDEC_STRIDE, src - stride, 17 * sizeof(pixel));
+    for (int i = 0; i < 32; i++)
+    {
+        pix[i * FDEC_STRIDE] = src[i * stride];
+    }
+
+    pix++;
+
+    //TODO: Calculate the x3_satd costs
+
+    return 0;
+}
+
+#if 0
 // Indexed by pic_struct values
 static const uint8_t delta_tfi_divisor[10] = { 0, 2, 1, 1, 2, 2, 3, 3, 4, 6 };
 
-static int x264_slicetype_frame_cost(x264_t *h, x264_mb_analysis_t *a,
-                                     x264_frame_t **frames, int p0, int p1, int b,
-                                     int b_intra_penalty);
-
 static void x264_lowres_context_init(x264_t *h, x264_mb_analysis_t *a)
 {
     a->i_qp = X264_LOOKAHEAD_QP;
@@ -677,103 +797,6 @@
      (h->mb.i_mb_width - 2) * (h->mb.i_mb_height - 2) : \
      h->mb.i_mb_width * h->mb.i_mb_height)
 
-static int x264_slicetype_frame_cost(x264_t *h, x264_mb_analysis_t *a,
-                                     x264_frame_t **frames, int p0, int p1, int b,
-                                     int b_intra_penalty)
-{
-    int i_score = 0;
-    int do_search[2];
-    const x264_weight_t *w = x264_weight_none;
-
-    /* Check whether we already evaluated this frame
-     * If we have tried this frame as P, then we have also tried
-     * the preceding frames as B. (is this still true?) */
-    /* Also check that we already calculated the row SATDs for the current frame. */
-    if (frames[b]->i_cost_est[b - p0][p1 - b] >= 0 && (!h->param.rc.i_vbv_buffer_size || frames[b]->i_row_satds[b - p0][p1 - b][0] != -1))
-        i_score = frames[b]->i_cost_est[b - p0][p1 - b];
-    else
-    {
-        int dist_scale_factor = 128;
-        int *row_satd = frames[b]->i_row_satds[b - p0][p1 - b];
-        int *row_satd_intra = frames[b]->i_row_satds[0][0];
-
-        /* For each list, check to see whether we have lowres motion-searched this reference frame before. */
-        do_search[0] = b != p0 && frames[b]->lowres_mvs[0][b - p0 - 1][0][0] == 0x7FFF;
-        do_search[1] = b != p1 && frames[b]->lowres_mvs[1][p1 - b - 1][0][0] == 0x7FFF;
-        if (do_search[0])
-        {
-            if (h->param.analyse.i_weighted_pred && b == p1)
-            {
-                x264_emms();
-                x264_weights_analyse(h, frames[b], frames[p0], 1);
-                w = frames[b]->weight[0];
-            }
-            frames[b]->lowres_mvs[0][b - p0 - 1][0][0] = 0;
-        }
-        if (do_search[1]) frames[b]->lowres_mvs[1][p1 - b - 1][0][0] = 0;
-
-        if (b == p1)
-            frames[b]->i_intra_mbs[b - p0] = 0;
-        if (!frames[b]->b_intra_calculated)
-        {
-            frames[b]->i_cost_est[0][0] = 0;
-            frames[b]->i_cost_est_aq[0][0] = 0;
-        }
-        if (p1 != p0)
-            dist_scale_factor = (((b - p0) << 8) + ((p1 - p0) >> 1)) / (p1 - p0);
-
-        frames[b]->i_cost_est[b - p0][p1 - b] = 0;
-        frames[b]->i_cost_est_aq[b - p0][p1 - b] = 0;
-
-        /* Lowres lookahead goes backwards because the MVs are used as predictors in the main encode.
-         * This considerably improves MV prediction overall. */
-
-        /* The edge mbs seem to reduce the predictive quality of the
-         * whole frame's score, but are needed for a spatial distribution. */
-        if (h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size ||
-            h->mb.i_mb_width <= 2 || h->mb.i_mb_height <= 2)
-        {
-            for (h->mb.i_mb_y = h->mb.i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y--)
-            {
-                row_satd[h->mb.i_mb_y] = 0;
-                if (!frames[b]->b_intra_calculated)
-                    row_satd_intra[h->mb.i_mb_y] = 0;
-                for (h->mb.i_mb_x = h->mb.i_mb_width - 1; h->mb.i_mb_x >= 0; h->mb.i_mb_x--)
-                {
-                    x264_slicetype_mb_cost(h, a, frames, p0, p1, b, dist_scale_factor, do_search, w);
-                }
-            }
-        }
-        else
-        {
-            for (h->mb.i_mb_y = h->mb.i_mb_height - 2; h->mb.i_mb_y >= 1; h->mb.i_mb_y--)
-            {
-                for (h->mb.i_mb_x = h->mb.i_mb_width - 2; h->mb.i_mb_x >= 1; h->mb.i_mb_x--)
-                {
-                    x264_slicetype_mb_cost(h, a, frames, p0, p1, b, dist_scale_factor, do_search, w);
-                }
-            }
-        }
-
-        i_score = frames[b]->i_cost_est[b - p0][p1 - b];
-        if (b != p1)
-            i_score = (uint64_t)i_score * 100 / (120 + h->param.i_bframe_bias);
-        else
-            frames[b]->b_intra_calculated = 1;
-
-        frames[b]->i_cost_est[b - p0][p1 - b] = i_score;
-        x264_emms();
-    }
-
-    if (b_intra_penalty)
-    {
-        // arbitrary penalty for I-blocks after B-frames
-        int nmb = NUM_MBS;
-        i_score += (uint64_t)i_score * frames[b]->i_intra_mbs[b - p0] / (nmb * 8);
-    }
-    return i_score;
-}
-
 static void x264_macroblock_tree_finish(x264_t *h, x264_frame_t *frame, float average_duration, int ref0_distance)
 {
     int fps_factor = round(CLIP_DURATION(average_duration) / CLIP_DURATION(frame->f_duration) * 256);
@@ -1624,3 +1647,5 @@
             h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
     }
 }
+
+#endif // if 0