[x265-commits] [x265] motion: fixup weightp - use unweighted reference pixels a...
Steve Borho
steve at borho.org
Thu Nov 7 08:45:47 CET 2013
details: http://hg.videolan.org/x265/rev/93cccbe49a93
branches:
changeset: 4914:93cccbe49a93
user: Steve Borho <steve at borho.org>
date: Wed Nov 06 19:49:38 2013 -0600
description:
motion: fixup weightp - use unweighted reference pixels as interpolation source
Subject: [x265] api: simplistic auto-determination of frame thread count
details: http://hg.videolan.org/x265/rev/90d9c1067f50
branches:
changeset: 4915:90d9c1067f50
user: Steve Borho <steve at borho.org>
date: Wed Nov 06 21:38:18 2013 -0600
description:
api: simplistic auto-determination of frame thread count
Subject: [x265] api: output x265_picture.bitDepth should reflect actual internal bitdepth
details: http://hg.videolan.org/x265/rev/0ab509a661c7
branches:
changeset: 4916:0ab509a661c7
user: Steve Borho <steve at borho.org>
date: Thu Nov 07 00:16:48 2013 -0600
description:
api: output x265_picture.bitDepth should reflect actual internal bitdepth
And not sizeof(pixel) * 8.
Subject: [x265] tcompicyuv: fix for copyFromPicture() when HIGH_BIT_DEPTH enable, sizeof(Pel)=2 and pic.bitDepth=8
details: http://hg.videolan.org/x265/rev/ed1b1a7b0b38
branches:
changeset: 4917:ed1b1a7b0b38
user: Gopu Govindaswamy <gopu at multicorewareinc.com>
date: Thu Nov 07 13:05:53 2013 +0530
description:
tcompicyuv: fix for copyFromPicture() when HIGH_BIT_DEPTH enable, sizeof(Pel)=2 and pic.bitDepth=8
diffstat:
source/Lib/TLibCommon/TComPicYuv.cpp | 87 ++++++++++++++++++++++++++++++++++++
source/common/common.cpp | 6 +-
source/encoder/encoder.cpp | 26 ++++++++--
source/encoder/motion.cpp | 4 +-
source/x265.cpp | 2 +-
source/x265.h | 4 +-
6 files changed, 116 insertions(+), 13 deletions(-)
diffs (230 lines):
diff -r 60f78cbfacc8 -r ed1b1a7b0b38 source/Lib/TLibCommon/TComPicYuv.cpp
--- a/source/Lib/TLibCommon/TComPicYuv.cpp Wed Nov 06 17:51:53 2013 -0600
+++ b/source/Lib/TLibCommon/TComPicYuv.cpp Thu Nov 07 13:05:53 2013 +0530
@@ -423,6 +423,93 @@ void TComPicYuv::copyFromPicture(const x
}
}
}
+ else if(pic.bitDepth == 8)
+ {
+ uint8_t *y = (uint8_t*)pic.planes[0];
+ uint8_t *u = (uint8_t*)pic.planes[1];
+ uint8_t *v = (uint8_t*)pic.planes[2];
+
+ /* width and height - without padsize */
+ int width = m_picWidth - padx;
+ int height = m_picHeight - pady;
+
+ // Manually copy pixels to up-size them
+ for (int r = 0; r < height; r++)
+ {
+ for (int c = 0; c < width; c++)
+ {
+ Y[c] = (Pel)y[c];
+ }
+
+ Y += getStride();
+ y += pic.stride[0];
+ }
+
+ for (int r = 0; r < height >> m_vChromaShift; r++)
+ {
+ for (int c = 0; c < width >> m_hChromaShift; c++)
+ {
+ U[c] = (Pel)u[c];
+ V[c] = (Pel)v[c];
+ }
+
+ U += getCStride();
+ V += getCStride();
+ u += pic.stride[1];
+ v += pic.stride[2];
+ }
+
+ /* Extend the right if width is not multiple of minimum CU size */
+
+ if (padx)
+ {
+ Y = getLumaAddr();
+ U = getCbAddr();
+ V = getCrAddr();
+
+ for (int r = 0; r < height; r++)
+ {
+ for (int x = 0; x < padx; x++)
+ {
+ Y[width + x] = Y[width - 1];
+ }
+
+ Y += getStride();
+ }
+
+ for (int r = 0; r < height >> m_vChromaShift; r++)
+ {
+ for (int x = 0; x < padx >> m_hChromaShift; x++)
+ {
+ U[(width >> m_hChromaShift) + x] = U[(width >> m_hChromaShift) - 1];
+ V[(width >> m_hChromaShift) + x] = V[(width >> m_hChromaShift) - 1];
+ }
+
+ U += getCStride();
+ V += getCStride();
+ }
+ }
+
+ /* extend the bottom if height is not multiple of the minimum CU size */
+ if (pady)
+ {
+ width = m_picWidth;
+ Y = getLumaAddr() + (height - 1) * getStride();
+ U = getCbAddr() + ((height >> m_vChromaShift) - 1) * getCStride();
+ V = getCrAddr() + ((height >> m_vChromaShift) - 1) * getCStride();
+
+ for (uint32_t i = 1; i <= pady; i++)
+ {
+ memcpy(Y + i * getStride(), Y, width * sizeof(Pel));
+ }
+
+ for (uint32_t j = 1; j <= pady >> m_vChromaShift; j++)
+ {
+ memcpy(U + j * getCStride(), U, (width >> m_hChromaShift) * sizeof(Pel));
+ memcpy(V + j * getCStride(), V, (width >> m_hChromaShift) * sizeof(Pel));
+ }
+ }
+ }
else
#endif // if HIGH_BIT_DEPTH
{
diff -r 60f78cbfacc8 -r ed1b1a7b0b38 source/common/common.cpp
--- a/source/common/common.cpp Wed Nov 06 17:51:53 2013 -0600
+++ b/source/common/common.cpp Thu Nov 07 13:05:53 2013 +0530
@@ -141,7 +141,7 @@ void x265_param_default(x265_param *para
/* Applying non-zero default values to all elements in the param structure */
param->logLevel = X265_LOG_INFO;
param->bEnableWavefront = 1;
- param->frameNumThreads = 1;
+ param->frameNumThreads = 0;
param->inputBitDepth = 8;
param->sourceCsp = X265_CSP_I420;
@@ -444,8 +444,8 @@ int x265_check_params(x265_param *param)
"Search Range must be less than 32768");
CHECK(param->keyframeMax < 0,
"Keyframe interval must be 0 (auto) 1 (intra-only) or greater than 1");
- CHECK(param->frameNumThreads <= 0,
- "frameNumThreads (--frame-threads) must be 1 or higher");
+ CHECK(param->frameNumThreads < 0,
+ "frameNumThreads (--frame-threads) must be 0 or higher");
CHECK(param->cbQpOffset < -12, "Min. Chroma Cb QP Offset is -12");
CHECK(param->cbQpOffset > 12, "Max. Chroma Cb QP Offset is 12");
CHECK(param->crQpOffset < -12, "Min. Chroma Cr QP Offset is -12");
diff -r 60f78cbfacc8 -r ed1b1a7b0b38 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Wed Nov 06 17:51:53 2013 -0600
+++ b/source/encoder/encoder.cpp Thu Nov 07 13:05:53 2013 +0530
@@ -255,7 +255,7 @@ int Encoder::encode(bool flush, const x2
{
TComPicYuv *recpic = out->getPicYuvRec();
pic_out->poc = out->getSlice()->getPOC();
- pic_out->bitDepth = sizeof(Pel) * 8;
+ pic_out->bitDepth = X265_DEPTH;
pic_out->userData = out->m_userData;
pic_out->pts = out->m_pts;
switch (out->getSlice()->getSliceType())
@@ -1024,11 +1024,26 @@ void Encoder::configure(x265_param *_par
_param->poolNumThreads = 1;
setThreadPool(ThreadPool::allocThreadPool(_param->poolNumThreads));
- int actual = ThreadPool::getThreadPool()->getThreadCount();
- if (actual > 1)
+ int poolThreadCount = ThreadPool::getThreadPool()->getThreadCount();
+ int rows = (_param->sourceHeight + _param->maxCUSize - 1) / _param->maxCUSize;
+
+ if (_param->frameNumThreads == 0)
{
- x265_log(_param, X265_LOG_INFO, "WPP streams / pool / frames : %d / %d / %d\n",
- (_param->sourceHeight + _param->maxCUSize - 1) / _param->maxCUSize, actual, _param->frameNumThreads);
+ // auto-detect frame threads
+ if (poolThreadCount > 32)
+ _param->frameNumThreads = 6; // dual-socket 10-core IvyBridge or higher
+ else if (poolThreadCount >= 16)
+ _param->frameNumThreads = 5; // 8 HT cores, or dual socket
+ else if (poolThreadCount >= 12)
+ _param->frameNumThreads = 3; // 6 HT cores
+ else if (poolThreadCount >= 4)
+ _param->frameNumThreads = 2; // Dual or Quad core
+ else
+ _param->frameNumThreads = 1;
+ }
+ if (poolThreadCount > 1)
+ {
+ x265_log(_param, X265_LOG_INFO, "WPP streams / pool / frames : %d / %d / %d\n", rows, poolThreadCount, _param->frameNumThreads);
}
else if (_param->frameNumThreads > 1)
{
@@ -1044,7 +1059,6 @@ void Encoder::configure(x265_param *_par
{
x265_log(_param, X265_LOG_INFO, "Warning: picture-based SAO used with frame parallelism\n");
}
-
if (!_param->keyframeMin)
{
_param->keyframeMin = _param->keyframeMax;
diff -r 60f78cbfacc8 -r ed1b1a7b0b38 source/encoder/motion.cpp
--- a/source/encoder/motion.cpp Wed Nov 06 17:51:53 2013 -0600
+++ b/source/encoder/motion.cpp Thu Nov 07 13:05:53 2013 +0530
@@ -1147,12 +1147,12 @@ me_hex2:
int MotionEstimate::subpelCompare(ReferencePlanes *ref, const MV& qmv, pixelcmp_t cmp)
{
- pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
int xFrac = qmv.x & 0x3;
int yFrac = qmv.y & 0x3;
if ((yFrac | xFrac) == 0)
{
+ pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
return cmp(fenc, FENC_STRIDE, fref, ref->lumaStride);
}
else
@@ -1162,6 +1162,7 @@ int MotionEstimate::subpelCompare(Refere
int shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
int shift = ref->shift + shiftNum;
int round = shift ? (1 << (shift - 1)) : 0;
+ pixel *fref = ref->unweightedFPelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
if (yFrac == 0)
{
@@ -1184,6 +1185,7 @@ int MotionEstimate::subpelCompare(Refere
}
else
{
+ pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
if (yFrac == 0)
{
primitives.luma_hpp[partEnum](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, xFrac);
diff -r 60f78cbfacc8 -r ed1b1a7b0b38 source/x265.cpp
--- a/source/x265.cpp Wed Nov 06 17:51:53 2013 -0600
+++ b/source/x265.cpp Thu Nov 07 13:05:53 2013 +0530
@@ -266,7 +266,7 @@ void CLIOptions::showHelp(x265_param *pa
H0(" --threads Number of threads for thread pool (0: detect CPU core count, default)\n");
H0("-p/--preset ultrafast, veryfast, faster, fast, medium, slow, slower, veryslow, or placebo\n");
H0("-t/--tune Tune the settings for a particular type of source or situation\n");
- H0("-F/--frame-threads Number of concurrently encoded frames. Default %d\n", param->frameNumThreads);
+ H0("-F/--frame-threads Number of concurrently encoded frames. 0: auto-determined by core count\n");
H0(" --log Logging level 0:ERROR 1:WARNING 2:INFO 3:DEBUG -1:NONE. Default %d\n", param->logLevel);
H0(" --csv Comma separated log file, log level >= 3 frame log, else one line per run\n");
H0(" --y4m Parse input stream as YUV4MPEG2 regardless of file extension\n");
diff -r 60f78cbfacc8 -r ed1b1a7b0b38 source/x265.h
--- a/source/x265.h Wed Nov 06 17:51:53 2013 -0600
+++ b/source/x265.h Thu Nov 07 13:05:53 2013 +0530
@@ -289,8 +289,8 @@ typedef struct x265_param
{
int logLevel;
int bEnableWavefront; ///< enable wavefront parallel processing
- int poolNumThreads; ///< number of threads to allocate for thread pool
- int frameNumThreads; ///< number of concurrently encoded frames
+ int poolNumThreads; ///< number of threads to allocate for thread pool, 0 implies auto-detection (default)
+ int frameNumThreads; ///< number of concurrently encoded frames, 0 implies auto-detection (default)
const char *csvfn; ///< csv log filename. logLevel >= 3 is frame logging, else one line per run
// source specification
More information about the x265-commits
mailing list