[x265-commits] [x265] motion: fixup weightp - use unweighted reference pixels a...

Steve Borho steve at borho.org
Thu Nov 7 08:45:47 CET 2013


details:   http://hg.videolan.org/x265/rev/93cccbe49a93
branches:  
changeset: 4914:93cccbe49a93
user:      Steve Borho <steve at borho.org>
date:      Wed Nov 06 19:49:38 2013 -0600
description:
motion: fixup weightp - use unweighted reference pixels as interpolation source
Subject: [x265] api: simplistic auto-determination of frame thread count

details:   http://hg.videolan.org/x265/rev/90d9c1067f50
branches:  
changeset: 4915:90d9c1067f50
user:      Steve Borho <steve at borho.org>
date:      Wed Nov 06 21:38:18 2013 -0600
description:
api: simplistic auto-determination of frame thread count
Subject: [x265] api: output x265_picture.bitDepth should reflect actual internal bitdepth

details:   http://hg.videolan.org/x265/rev/0ab509a661c7
branches:  
changeset: 4916:0ab509a661c7
user:      Steve Borho <steve at borho.org>
date:      Thu Nov 07 00:16:48 2013 -0600
description:
api: output x265_picture.bitDepth should reflect actual internal bitdepth

And not sizeof(pixel) * 8.
Subject: [x265] tcompicyuv: fix for copyFromPicture() when HIGH_BIT_DEPTH enable, sizeof(Pel)=2 and pic.bitDepth=8

details:   http://hg.videolan.org/x265/rev/ed1b1a7b0b38
branches:  
changeset: 4917:ed1b1a7b0b38
user:      Gopu Govindaswamy <gopu at multicorewareinc.com>
date:      Thu Nov 07 13:05:53 2013 +0530
description:
tcompicyuv: fix for copyFromPicture() when HIGH_BIT_DEPTH enable, sizeof(Pel)=2 and pic.bitDepth=8

diffstat:

 source/Lib/TLibCommon/TComPicYuv.cpp |  87 ++++++++++++++++++++++++++++++++++++
 source/common/common.cpp             |   6 +-
 source/encoder/encoder.cpp           |  26 ++++++++--
 source/encoder/motion.cpp            |   4 +-
 source/x265.cpp                      |   2 +-
 source/x265.h                        |   4 +-
 6 files changed, 116 insertions(+), 13 deletions(-)

diffs (230 lines):

diff -r 60f78cbfacc8 -r ed1b1a7b0b38 source/Lib/TLibCommon/TComPicYuv.cpp
--- a/source/Lib/TLibCommon/TComPicYuv.cpp	Wed Nov 06 17:51:53 2013 -0600
+++ b/source/Lib/TLibCommon/TComPicYuv.cpp	Thu Nov 07 13:05:53 2013 +0530
@@ -423,6 +423,93 @@ void TComPicYuv::copyFromPicture(const x
             }
         }
     }
+    else if(pic.bitDepth == 8)
+    {
+        uint8_t *y = (uint8_t*)pic.planes[0];
+        uint8_t *u = (uint8_t*)pic.planes[1];
+        uint8_t *v = (uint8_t*)pic.planes[2];
+
+        /* width and height - without padsize */
+        int width = m_picWidth - padx;
+        int height = m_picHeight - pady;
+
+        // Manually copy pixels to up-size them
+        for (int r = 0; r < height; r++)
+        {
+            for (int c = 0; c < width; c++)
+            {
+                Y[c] = (Pel)y[c];
+            }
+
+            Y += getStride();
+            y += pic.stride[0];
+        }
+
+        for (int r = 0; r < height >> m_vChromaShift; r++)
+        {
+            for (int c = 0; c < width >> m_hChromaShift; c++)
+            {
+                U[c] = (Pel)u[c];
+                V[c] = (Pel)v[c];
+            }
+
+            U += getCStride();
+            V += getCStride();
+            u += pic.stride[1];
+            v += pic.stride[2];
+        }
+
+        /* Extend the right if width is not multiple of minimum CU size */
+
+        if (padx)
+        {
+            Y = getLumaAddr();
+            U = getCbAddr();
+            V = getCrAddr();
+
+            for (int r = 0; r < height; r++)
+            {
+                for (int x = 0; x < padx; x++)
+                {
+                    Y[width + x] = Y[width - 1];
+                }
+
+                Y += getStride();
+            }
+
+            for (int r = 0; r < height >> m_vChromaShift; r++)
+            {
+                for (int x = 0; x < padx >> m_hChromaShift; x++)
+                {
+                    U[(width >> m_hChromaShift) + x] = U[(width >> m_hChromaShift) - 1];
+                    V[(width >> m_hChromaShift) + x] = V[(width >> m_hChromaShift) - 1];
+                }
+
+                U += getCStride();
+                V += getCStride();
+            }
+        }
+
+        /* extend the bottom if height is not multiple of the minimum CU size */
+        if (pady)
+        {
+            width = m_picWidth;
+            Y = getLumaAddr() + (height - 1) * getStride();
+            U = getCbAddr() + ((height >> m_vChromaShift) - 1) * getCStride();
+            V = getCrAddr() + ((height >> m_vChromaShift) - 1) * getCStride();
+
+            for (uint32_t i = 1; i <= pady; i++)
+            {
+                memcpy(Y + i * getStride(), Y, width * sizeof(Pel));
+            }
+
+            for (uint32_t j = 1; j <= pady >> m_vChromaShift; j++)
+            {
+                memcpy(U + j * getCStride(), U, (width >> m_hChromaShift) * sizeof(Pel));
+                memcpy(V + j * getCStride(), V, (width >> m_hChromaShift) * sizeof(Pel));
+            }
+        }
+    }
     else
 #endif // if HIGH_BIT_DEPTH
     {
diff -r 60f78cbfacc8 -r ed1b1a7b0b38 source/common/common.cpp
--- a/source/common/common.cpp	Wed Nov 06 17:51:53 2013 -0600
+++ b/source/common/common.cpp	Thu Nov 07 13:05:53 2013 +0530
@@ -141,7 +141,7 @@ void x265_param_default(x265_param *para
     /* Applying non-zero default values to all elements in the param structure */
     param->logLevel = X265_LOG_INFO;
     param->bEnableWavefront = 1;
-    param->frameNumThreads = 1;
+    param->frameNumThreads = 0;
     param->inputBitDepth = 8;
     param->sourceCsp = X265_CSP_I420;
 
@@ -444,8 +444,8 @@ int x265_check_params(x265_param *param)
           "Search Range must be less than 32768");
     CHECK(param->keyframeMax < 0,
           "Keyframe interval must be 0 (auto) 1 (intra-only) or greater than 1");
-    CHECK(param->frameNumThreads <= 0,
-          "frameNumThreads (--frame-threads) must be 1 or higher");
+    CHECK(param->frameNumThreads < 0,
+          "frameNumThreads (--frame-threads) must be 0 or higher");
     CHECK(param->cbQpOffset < -12, "Min. Chroma Cb QP Offset is -12");
     CHECK(param->cbQpOffset >  12, "Max. Chroma Cb QP Offset is  12");
     CHECK(param->crQpOffset < -12, "Min. Chroma Cr QP Offset is -12");
diff -r 60f78cbfacc8 -r ed1b1a7b0b38 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Wed Nov 06 17:51:53 2013 -0600
+++ b/source/encoder/encoder.cpp	Thu Nov 07 13:05:53 2013 +0530
@@ -255,7 +255,7 @@ int Encoder::encode(bool flush, const x2
         {
             TComPicYuv *recpic = out->getPicYuvRec();
             pic_out->poc = out->getSlice()->getPOC();
-            pic_out->bitDepth = sizeof(Pel) * 8;
+            pic_out->bitDepth = X265_DEPTH;
             pic_out->userData = out->m_userData;
             pic_out->pts = out->m_pts;
             switch (out->getSlice()->getSliceType())
@@ -1024,11 +1024,26 @@ void Encoder::configure(x265_param *_par
         _param->poolNumThreads = 1;
 
     setThreadPool(ThreadPool::allocThreadPool(_param->poolNumThreads));
-    int actual = ThreadPool::getThreadPool()->getThreadCount();
-    if (actual > 1)
+    int poolThreadCount = ThreadPool::getThreadPool()->getThreadCount();
+    int rows = (_param->sourceHeight + _param->maxCUSize - 1) / _param->maxCUSize;
+
+    if (_param->frameNumThreads == 0)
     {
-        x265_log(_param, X265_LOG_INFO, "WPP streams / pool / frames  : %d / %d / %d\n",
-                 (_param->sourceHeight + _param->maxCUSize - 1) / _param->maxCUSize, actual, _param->frameNumThreads);
+        // auto-detect frame threads
+        if (poolThreadCount > 32)
+            _param->frameNumThreads = 6;  // dual-socket 10-core IvyBridge or higher
+        else if (poolThreadCount >= 16)
+            _param->frameNumThreads = 5;  // 8 HT cores, or dual socket
+        else if (poolThreadCount >= 12)
+            _param->frameNumThreads = 3;  // 6 HT cores
+        else if (poolThreadCount >= 4)
+            _param->frameNumThreads = 2;  // Dual or Quad core
+        else
+            _param->frameNumThreads = 1;
+    }
+    if (poolThreadCount > 1)
+    {
+        x265_log(_param, X265_LOG_INFO, "WPP streams / pool / frames  : %d / %d / %d\n", rows, poolThreadCount, _param->frameNumThreads);
     }
     else if (_param->frameNumThreads > 1)
     {
@@ -1044,7 +1059,6 @@ void Encoder::configure(x265_param *_par
     {
         x265_log(_param, X265_LOG_INFO, "Warning: picture-based SAO used with frame parallelism\n");
     }
-
     if (!_param->keyframeMin)
     {
         _param->keyframeMin = _param->keyframeMax;
diff -r 60f78cbfacc8 -r ed1b1a7b0b38 source/encoder/motion.cpp
--- a/source/encoder/motion.cpp	Wed Nov 06 17:51:53 2013 -0600
+++ b/source/encoder/motion.cpp	Thu Nov 07 13:05:53 2013 +0530
@@ -1147,12 +1147,12 @@ me_hex2:
 
 int MotionEstimate::subpelCompare(ReferencePlanes *ref, const MV& qmv, pixelcmp_t cmp)
 {
-    pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
     int xFrac = qmv.x & 0x3;
     int yFrac = qmv.y & 0x3;
 
     if ((yFrac | xFrac) == 0)
     {
+        pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
         return cmp(fenc, FENC_STRIDE, fref, ref->lumaStride);
     }
     else
@@ -1162,6 +1162,7 @@ int MotionEstimate::subpelCompare(Refere
             int shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
             int shift = ref->shift + shiftNum;
             int round = shift ? (1 << (shift - 1)) : 0;
+            pixel *fref = ref->unweightedFPelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
 
             if (yFrac == 0)
             {
@@ -1184,6 +1185,7 @@ int MotionEstimate::subpelCompare(Refere
         }
         else
         {
+            pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
             if (yFrac == 0)
             {
                 primitives.luma_hpp[partEnum](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, xFrac);
diff -r 60f78cbfacc8 -r ed1b1a7b0b38 source/x265.cpp
--- a/source/x265.cpp	Wed Nov 06 17:51:53 2013 -0600
+++ b/source/x265.cpp	Thu Nov 07 13:05:53 2013 +0530
@@ -266,7 +266,7 @@ void CLIOptions::showHelp(x265_param *pa
     H0("   --threads                     Number of threads for thread pool (0: detect CPU core count, default)\n");
     H0("-p/--preset                      ultrafast, veryfast, faster, fast, medium, slow, slower, veryslow, or placebo\n");
     H0("-t/--tune                        Tune the settings for a particular type of source or situation\n");
-    H0("-F/--frame-threads               Number of concurrently encoded frames. Default %d\n", param->frameNumThreads);
+    H0("-F/--frame-threads               Number of concurrently encoded frames. 0: auto-determined by core count\n");
     H0("   --log                         Logging level 0:ERROR 1:WARNING 2:INFO 3:DEBUG -1:NONE. Default %d\n", param->logLevel);
     H0("   --csv                         Comma separated log file, log level >= 3 frame log, else one line per run\n");
     H0("   --y4m                         Parse input stream as YUV4MPEG2 regardless of file extension\n");
diff -r 60f78cbfacc8 -r ed1b1a7b0b38 source/x265.h
--- a/source/x265.h	Wed Nov 06 17:51:53 2013 -0600
+++ b/source/x265.h	Thu Nov 07 13:05:53 2013 +0530
@@ -289,8 +289,8 @@ typedef struct x265_param
 {
     int       logLevel;
     int       bEnableWavefront;                ///< enable wavefront parallel processing
-    int       poolNumThreads;                  ///< number of threads to allocate for thread pool
-    int       frameNumThreads;                 ///< number of concurrently encoded frames
+    int       poolNumThreads;                  ///< number of threads to allocate for thread pool, 0 implies auto-detection (default)
+    int       frameNumThreads;                 ///< number of concurrently encoded frames, 0 implies auto-detection (default)
     const char *csvfn;                         ///< csv log filename. logLevel >= 3 is frame logging, else one line per run
 
     // source specification


More information about the x265-commits mailing list