[x265-commits] [x265] cli: change --keyint short option to capital I, add min-k...

Steve Borho steve at borho.org
Wed Feb 5 00:09:08 CET 2014


details:   http://hg.videolan.org/x265/rev/2beb0bfb9503
branches:  
changeset: 6002:2beb0bfb9503
user:      Steve Borho <steve at borho.org>
date:      Tue Feb 04 12:17:11 2014 -0600
description:
cli: change --keyint short option to capital I, add min-keyint, [no-]shortcut

All these options now match x264's command line features
Subject: [x265] common: move malloc/free macros to our header

details:   http://hg.videolan.org/x265/rev/0bd4e7603ea1
branches:  
changeset: 6003:0bd4e7603ea1
user:      Steve Borho <steve at borho.org>
date:      Tue Feb 04 13:23:05 2014 -0600
description:
common: move malloc/free macros to our header
Subject: [x265] wavefront: rename bitmaps for clarity, add more comments

details:   http://hg.videolan.org/x265/rev/23d30a6d4db1
branches:  
changeset: 6004:23d30a6d4db1
user:      Steve Borho <steve at borho.org>
date:      Tue Feb 04 13:24:03 2014 -0600
description:
wavefront: rename bitmaps for clarity, add more comments
Subject: [x265] slicetype: weight the extended lowres frame prior to MC cost estimate

details:   http://hg.videolan.org/x265/rev/592675e50c29
branches:  
changeset: 6005:592675e50c29
user:      Steve Borho <steve at borho.org>
date:      Tue Feb 04 13:38:09 2014 -0600
description:
slicetype: weight the extended lowres frame prior to MC cost estimate

valgrind spotted this problem where the top and bottom padded areas of the
lowres reference frame were not weighted into the weightedRef buffer, so any
lowres MVs used for MC that referenced past the top or bottom picture boundary
would access uninitialized pixels.
Subject: [x265] ipfilterharness: fix loop bounds, caused crashes in 16bpp

details:   http://hg.videolan.org/x265/rev/c16c7b8416ee
branches:  
changeset: 6006:c16c7b8416ee
user:      Steve Borho <steve at borho.org>
date:      Tue Feb 04 16:06:55 2014 -0600
description:
ipfilterharness: fix loop bounds, caused crashes in 16bpp
Subject: [x265] lowres: initialize downscale planes immediately after alloc

details:   http://hg.videolan.org/x265/rev/8d2b20447b8a
branches:  
changeset: 6007:8d2b20447b8a
user:      Steve Borho <steve at borho.org>
date:      Tue Feb 04 16:43:44 2014 -0600
description:
lowres: initialize downscale planes immediately after alloc

Valgrind was reporting potential uninitialized memory being used by the lowres
weightp cost estimate function.  It is weighting the entire padded luma plane
instead of weighting just the real pixels and then extending them.  The problem
is that the buffer stride is wider than (width + padw*2).  We round up the
stride to the nearest multiple of 32 so the row starts are well aligned, and
those pixels at the very right edge of the buffer were never written to.  They
should never be used by the encoder, but the weight_pp primitive does try to
weight them; and the last step of the weight function is a clip, and that
conditional move is what triggered the valgrind warning.

diffstat:

 source/Lib/TLibCommon/CommonDef.h |   4 ---
 source/common/common.cpp          |   6 ++++-
 source/common/common.h            |   2 +
 source/common/lowres.cpp          |   5 +++-
 source/common/wavefront.cpp       |  40 +++++++++++++++++++-------------------
 source/common/wavefront.h         |  32 ++++++++++++++++++++++--------
 source/encoder/slicetype.cpp      |  11 +++++----
 source/encoder/slicetype.h        |   2 +-
 source/test/ipfilterharness.cpp   |   2 +-
 source/x265.cpp                   |  13 +++++++++--
 10 files changed, 72 insertions(+), 45 deletions(-)

diffs (truncated from 368 to 300 lines):

diff -r 4be0ca7b4448 -r 8d2b20447b8a source/Lib/TLibCommon/CommonDef.h
--- a/source/Lib/TLibCommon/CommonDef.h	Tue Feb 04 11:28:17 2014 -0600
+++ b/source/Lib/TLibCommon/CommonDef.h	Tue Feb 04 16:43:44 2014 -0600
@@ -138,10 +138,6 @@
 
 #define NOT_VALID                  -1
 
-// for use in HM, replaces old xMalloc/xFree macros
-#define X265_MALLOC(type, count)    (type*)x265_malloc(sizeof(type) * (count))
-#define X265_FREE(ptr)              x265_free(ptr)
-
 // ====================================================================================================================
 // Coding tool configuration
 // ====================================================================================================================
diff -r 4be0ca7b4448 -r 8d2b20447b8a source/common/common.cpp
--- a/source/common/common.cpp	Tue Feb 04 11:28:17 2014 -0600
+++ b/source/common/common.cpp	Tue Feb 04 16:43:44 2014 -0600
@@ -586,7 +586,7 @@ void x265_print_params(x265_param *param
 
     x265_log(param, X265_LOG_INFO, "ME / range / subpel / merge         : %s / %d / %d / %d\n",
              x265_motion_est_names[param->searchMethod], param->searchRange, param->subpelRefine, param->maxNumMergeCand);
-    x265_log(param, X265_LOG_INFO, "Keyframe min / max                  : %d / %d\n", param->keyframeMin, param->keyframeMax);
+    x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut       : %d / %d / %d\n", param->keyframeMin, param->keyframeMax, param->scenecutThreshold);
     switch (param->rc.rateControlMode)
     {
     case X265_RC_ABR:
@@ -696,7 +696,9 @@ int x265_param_parse(x265_param *p, cons
     OPT("strong-intra-smoothing") p->bEnableStrongIntraSmoothing = bvalue;
     OPT("constrained-intra") p->bEnableConstrainedIntra = bvalue;
     OPT("open-gop") p->bOpenGOP = bvalue;
+    OPT("scenecut") p->scenecutThreshold = atoi(value);
     OPT("keyint") p->keyframeMax = atoi(value);
+    OPT("min-keyint") p->keyframeMin = atoi(value);
     OPT("rc-lookahead") p->lookaheadDepth = atoi(value);
     OPT("bframes") p->bframes = atoi(value);
     OPT("bframe-bias") p->bFrameBias = atoi(value);
@@ -779,6 +781,8 @@ char *x265_param2string(x265_param *p)
     BOOL(p->bEnableConstrainedIntra, "constrained-intra");
     BOOL(p->bOpenGOP, "open-gop");
     s += sprintf(s, " keyint=%d", p->keyframeMax);
+    s += sprintf(s, " min-keyint=%d", p->keyframeMin);
+    s += sprintf(s, " scenecut=%d", p->scenecutThreshold);
     s += sprintf(s, " rc-lookahead=%d", p->lookaheadDepth);
     s += sprintf(s, " bframes=%d", p->bframes);
     s += sprintf(s, " bframe-bias=%d", p->bFrameBias);
diff -r 4be0ca7b4448 -r 8d2b20447b8a source/common/common.h
--- a/source/common/common.h	Tue Feb 04 11:28:17 2014 -0600
+++ b/source/common/common.h	Tue Feb 04 16:43:44 2014 -0600
@@ -72,6 +72,8 @@
 #define MAX_NAL_UNITS 5
 #define MIN_FIFO_SIZE 1000
 
+#define X265_MALLOC(type, count)    (type*)x265_malloc(sizeof(type) * (count))
+#define X265_FREE(ptr)              x265_free(ptr)
 #define CHECKED_MALLOC(var, type, count) \
     { \
         var = (type*)x265_malloc(sizeof(type) * (count)); \
diff -r 4be0ca7b4448 -r 8d2b20447b8a source/common/lowres.cpp
--- a/source/common/lowres.cpp	Tue Feb 04 11:28:17 2014 -0600
+++ b/source/common/lowres.cpp	Tue Feb 04 16:43:44 2014 -0600
@@ -54,9 +54,12 @@ void Lowres::create(TComPicYuv *orig, in
     propagateCost = (uint16_t*)x265_malloc(sizeof(uint16_t) * cuCount);
 
     /* allocate lowres buffers */
+    size_t planesize = lumaStride * (lines + 2 * orig->getLumaMarginY());
     for (int i = 0; i < 4; i++)
     {
-        buffer[i] = X265_MALLOC(pixel, lumaStride * (lines + 2 * orig->getLumaMarginY()));
+        buffer[i] = X265_MALLOC(pixel, planesize);
+        /* initialize the whole buffer to prevent valgrind warnings on right edge */
+        memset(buffer[i], 0, sizeof(pixel) * planesize);
     }
 
     int padoffset = lumaStride * orig->getLumaMarginY() + orig->getLumaMarginX();
diff -r 4be0ca7b4448 -r 8d2b20447b8a source/common/wavefront.cpp
--- a/source/common/wavefront.cpp	Tue Feb 04 11:28:17 2014 -0600
+++ b/source/common/wavefront.cpp	Tue Feb 04 16:43:44 2014 -0600
@@ -38,15 +38,15 @@ bool WaveFront::init(int numRows)
     if (m_pool)
     {
         m_numWords = (numRows + 63) >> 6;
-        m_queuedBitmap = (uint64_t*)x265_malloc(sizeof(uint64_t) * m_numWords);
-        if (m_queuedBitmap)
-            memset((void*)m_queuedBitmap, 0, sizeof(uint64_t) * m_numWords);
+        m_internalDependencyBitmap = X265_MALLOC(uint64_t, m_numWords);
+        if (m_internalDependencyBitmap)
+            memset((void*)m_internalDependencyBitmap, 0, sizeof(uint64_t) * m_numWords);
 
-        m_enableBitmap = (uint64_t*)x265_malloc(sizeof(uint64_t) * m_numWords);
-        if (m_enableBitmap)
-            memset((void*)m_enableBitmap, 0, sizeof(uint64_t) * m_numWords);
+        m_externalDependencyBitmap = X265_MALLOC(uint64_t, m_numWords);
+        if (m_externalDependencyBitmap)
+            memset((void*)m_externalDependencyBitmap, 0, sizeof(uint64_t) * m_numWords);
 
-        return m_queuedBitmap && m_enableBitmap;
+        return m_internalDependencyBitmap && m_externalDependencyBitmap;
     }
 
     return false;
@@ -54,13 +54,13 @@ bool WaveFront::init(int numRows)
 
 WaveFront::~WaveFront()
 {
-    x265_free((void*)m_queuedBitmap);
-    x265_free((void*)m_enableBitmap);
+    x265_free((void*)m_internalDependencyBitmap);
+    x265_free((void*)m_externalDependencyBitmap);
 }
 
 void WaveFront::clearEnabledRowMask()
 {
-    memset((void*)m_enableBitmap, 0, sizeof(uint64_t) * m_numWords);
+    memset((void*)m_externalDependencyBitmap, 0, sizeof(uint64_t) * m_numWords);
 }
 
 void WaveFront::enqueueRow(int row)
@@ -69,7 +69,7 @@ void WaveFront::enqueueRow(int row)
     uint64_t bit = 1LL << (row & 63);
 
     assert(row < m_numRows);
-    ATOMIC_OR(&m_queuedBitmap[row >> 6], bit);
+    ATOMIC_OR(&m_internalDependencyBitmap[row >> 6], bit);
     m_pool->pokeIdleThread();
 }
 
@@ -79,12 +79,12 @@ void WaveFront::enableRow(int row)
     uint64_t bit = 1LL << (row & 63);
 
     assert(row < m_numRows);
-    ATOMIC_OR(&m_enableBitmap[row >> 6], bit);
+    ATOMIC_OR(&m_externalDependencyBitmap[row >> 6], bit);
 }
 
 void WaveFront::enableAllRows()
 {
-    memset((void*)m_enableBitmap, ~0, sizeof(uint64_t) * m_numWords);
+    memset((void*)m_externalDependencyBitmap, ~0, sizeof(uint64_t) * m_numWords);
 }
 
 bool WaveFront::checkHigherPriorityRow(int curRow)
@@ -95,12 +95,12 @@ bool WaveFront::checkHigherPriorityRow(i
     // Check full bitmap words before curRow
     for (int i = 0; i < fullwords; i++)
     {
-        if (m_queuedBitmap[i] & m_enableBitmap[i])
+        if (m_internalDependencyBitmap[i] & m_externalDependencyBitmap[i])
             return true;
     }
 
     // check the partially masked bitmap word of curRow
-    if (m_queuedBitmap[fullwords] & m_enableBitmap[fullwords] & mask)
+    if (m_internalDependencyBitmap[fullwords] & m_externalDependencyBitmap[fullwords] & mask)
         return true;
     return false;
 }
@@ -112,22 +112,22 @@ bool WaveFront::findJob()
     // thread safe
     for (int w = 0; w < m_numWords; w++)
     {
-        uint64_t oldval = m_queuedBitmap[w];
-        while (oldval & m_enableBitmap[w])
+        uint64_t oldval = m_internalDependencyBitmap[w];
+        while (oldval & m_externalDependencyBitmap[w])
         {
-            uint64_t mask = oldval & m_enableBitmap[w];
+            uint64_t mask = oldval & m_externalDependencyBitmap[w];
 
             CTZ64(id, mask);
 
             uint64_t newval = oldval & ~(1LL << id);
-            if (ATOMIC_CAS(&m_queuedBitmap[w], oldval, newval) == oldval)
+            if (ATOMIC_CAS(&m_internalDependencyBitmap[w], oldval, newval) == oldval)
             {
                 // we cleared the bit, process row
                 processRow(w * 64 + id);
                 return true;
             }
             // some other thread cleared the bit, try another bit
-            oldval = m_queuedBitmap[w];
+            oldval = m_internalDependencyBitmap[w];
         }
     }
 
diff -r 4be0ca7b4448 -r 8d2b20447b8a source/common/wavefront.h
--- a/source/common/wavefront.h	Tue Feb 04 11:28:17 2014 -0600
+++ b/source/common/wavefront.h	Tue Feb 04 16:43:44 2014 -0600
@@ -38,9 +38,13 @@ class WaveFront : public JobProvider
 {
 private:
 
-    // bitmap of rows queued for processing, uses atomic intrinsics
-    uint64_t volatile *m_queuedBitmap;
-    uint64_t volatile *m_enableBitmap;
+    // bitmaps of rows queued for processing, uses atomic intrinsics
+
+    // Dependencies are categorized as internal and external. Internal dependencies
+    // are caused by neighbor block availability.  External dependencies are generally
+    // reference frame reconstructed pixels being available.
+    uint64_t volatile *m_internalDependencyBitmap;
+    uint64_t volatile *m_externalDependencyBitmap;
 
     // number of words in the bitmap
     int m_numWords;
@@ -49,21 +53,31 @@ private:
 
 public:
 
-    WaveFront(ThreadPool *pool) : JobProvider(pool), m_queuedBitmap(0), m_enableBitmap(0) {}
+    WaveFront(ThreadPool *pool)
+        : JobProvider(pool)
+        , m_internalDependencyBitmap(0)
+        , m_externalDependencyBitmap(0)
+    {}
 
     virtual ~WaveFront();
 
     // If returns false, the frame must be encoded in series.
     bool init(int numRows);
 
-    // Enqueue a row to be processed. A worker thread will later call ProcessRow(row)
+    // Enqueue a row to be processed (mark its internal dependencies as resolved).
+    // A worker thread will later call processRow(row).
     // This provider must be enqueued in the pool before enqueuing a row
     void enqueueRow(int row);
 
+    // Mark the row's external dependencies as being resolved
     void enableRow(int row);
 
+    // Mark all row external dependencies as being resolved. Some wavefront
+    // implementations (lookahead, for instance) have no recon pixel dependencies.
     void enableAllRows();
 
+    // Mark all rows as having external dependencies which must be
+    // resolved before each row may proceed.
     void clearEnabledRowMask();
 
     // WaveFront's implementation of JobProvider::findJob. Consults
@@ -71,13 +85,13 @@ public:
     // or returns false
     bool findJob();
 
+    // Start or resume encode processing of this row, must be implemented by
+    // derived classes.
+    virtual void processRow(int row) = 0;
+
     // Returns true if a row above curRow is available for processing.  The processRow()
     // method may call this function periodically and voluntarily exit
     bool checkHigherPriorityRow(int curRow);
-
-    // Start or resume encode processing of this row, must be implemented by
-    // derived classes.
-    virtual void processRow(int row) = 0;
 };
 } // end namespace x265
 
diff -r 4be0ca7b4448 -r 8d2b20447b8a source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp	Tue Feb 04 11:28:17 2014 -0600
+++ b/source/encoder/slicetype.cpp	Tue Feb 04 16:43:44 2014 -0600
@@ -1153,9 +1153,11 @@ int64_t CostEstimate::estimateFrameCost(
     return score;
 }
 
-uint32_t CostEstimate::weightCostLuma(Lowres **frames, int b, pixel *src, wpScalingParam *wp)
+uint32_t CostEstimate::weightCostLuma(Lowres **frames, int b, int p0, wpScalingParam *wp)
 {
     Lowres *fenc = frames[b];
+    Lowres *ref  = frames[p0];
+    pixel *src = ref->fpelPlane;
     int stride = fenc->lumaStride;
 
     if (wp)
@@ -1166,7 +1168,7 @@ uint32_t CostEstimate::weightCostLuma(Lo
         int correction = IF_INTERNAL_PREC - X265_DEPTH;
 
         // Adding (IF_INTERNAL_PREC - X265_DEPTH) to cancel effect of pixel to short conversion inside the primitive
-        primitives.weight_pp(src, weightedRef.fpelPlane, stride, stride, stride, fenc->lines,
+        primitives.weight_pp(ref->buffer[0], wbuffer[0], stride, stride, stride, paddedLines,
                              scale, (1 << (denom - 1 + correction)), denom + correction, offset);
         src = weightedRef.fpelPlane;
     }
@@ -1219,8 +1221,7 @@ void CostEstimate::weightsAnalyse(Lowres
     mindenom = w.log2WeightDenom;
     minscale = w.inputWeight;
 
-    pixel *mcbuf = frames[p0]->fpelPlane;
-    origscore = minscore = weightCostLuma(frames, b, mcbuf, NULL);
+    origscore = minscore = weightCostLuma(frames, b, p0, NULL);
 
     if (!minscore)
         return;
@@ -1238,7 +1239,7 @@ void CostEstimate::weightsAnalyse(Lowres
         curScale = Clip3(0, 127, curScale);
     }
     SET_WEIGHT(w, 1, curScale, mindenom, curOffset);
-    s = weightCostLuma(frames, b, mcbuf, &w);
+    s = weightCostLuma(frames, b, p0, &w);
     COPY4_IF_LT(minscore, s, minscale, curScale, minoff, curOffset, found, 1);
 
     /* Use a smaller denominator if possible */


More information about the x265-commits mailing list