[x265-commits] [x265] docs: cleanup restructured text for --pools
Steve Borho
steve at borho.org
Tue Feb 24 22:45:51 CET 2015
details: http://hg.videolan.org/x265/rev/7252c10278a1
branches:
changeset: 9405:7252c10278a1
user: Steve Borho <steve at borho.org>
date: Mon Feb 23 23:11:49 2015 -0600
description:
docs: cleanup restructured text for --pools
Subject: [x265] entropy: nit - initialize m_pad
details: http://hg.videolan.org/x265/rev/ba32f5d14a8b
branches:
changeset: 9406:ba32f5d14a8b
user: Steve Borho <steve at borho.org>
date: Tue Feb 24 12:45:56 2015 -0600
description:
entropy: nit - initialize m_pad
the variable is never used, but it looks conspicuous in a debugger that it is
never initialized.
Subject: [x265] threadpool: only mention NUMA node in pool message if multiple are present
details: http://hg.videolan.org/x265/rev/282903f583a3
branches:
changeset: 9407:282903f583a3
user: Steve Borho <steve at borho.org>
date: Tue Feb 24 13:33:40 2015 -0600
description:
threadpool: only mention NUMA node in pool message if multiple are present
don't want to confuse the great quantity of users with single-socket systems
Subject: [x265] slicetype: fencIntra[] alignment is implied by 8x8 size of prediction[]
details: http://hg.videolan.org/x265/rev/0d2ebfdc61e9
branches:
changeset: 9408:0d2ebfdc61e9
user: Steve Borho <steve at borho.org>
date: Tue Feb 24 13:36:32 2015 -0600
description:
slicetype: fencIntra[] alignment is implied by 8x8 size of prediction[]
Subject: [x265] slicetype: nit
details: http://hg.videolan.org/x265/rev/0cea19ff76d2
branches:
changeset: 9409:0cea19ff76d2
user: Steve Borho <steve at borho.org>
date: Tue Feb 24 13:50:36 2015 -0600
description:
slicetype: nit
Subject: [x265] slicetype: fix I slice cost estimates
details: http://hg.videolan.org/x265/rev/55713d567fc2
branches:
changeset: 9410:55713d567fc2
user: Steve Borho <steve at borho.org>
date: Tue Feb 24 13:50:29 2015 -0600
description:
slicetype: fix I slice cost estimates
Subject: [x265] slicetype: zero row-satds prior to cooperative (slice based) cost estimate
details: http://hg.videolan.org/x265/rev/8ba297f59e48
branches:
changeset: 9411:8ba297f59e48
user: Steve Borho <steve at borho.org>
date: Tue Feb 24 14:00:37 2015 -0600
description:
slicetype: zero row-satds prior to cooperative (slice based) cost estimate
diffstat:
doc/reST/cli.rst | 30 +++++++++++++++---------------
source/common/threadpool.cpp | 2 +-
source/encoder/entropy.cpp | 1 +
source/encoder/slicetype.cpp | 26 +++++++++++++++++++++-----
4 files changed, 38 insertions(+), 21 deletions(-)
diffs (141 lines):
diff -r 8986b6e427b2 -r 8ba297f59e48 doc/reST/cli.rst
--- a/doc/reST/cli.rst Mon Feb 23 22:42:02 2015 -0600
+++ b/doc/reST/cli.rst Tue Feb 24 14:00:37 2015 -0600
@@ -184,16 +184,17 @@ Performance Options
'-' is a special value indicating no cores on the node, same as '0'
example strings for a 4-node system::
- "" - default, unspecified, all numa nodes are used for thread pools
- "*" - same as default
- "none" - no thread pools are created, only frame parallelism possible
- "-" - same as "none"
- "10" - allocate one pool, using up to 10 cores on node 0
- "-,+" - allocate one pool, using all cores on node 1
- "+,-,+" - allocate two pools, using all cores on nodes 0 and 2
- "+,-,+,-" - allocate two pools, using all cores on nodes 0 and 2
- "-,*" - allocate three pools, using all cores on nodes 1, 2 and 3
- "8,8,8,8" - allocate four pools with up to 8 threads in each pool
+
+ "" - default, unspecified, all numa nodes are used for thread pools
+ "*" - same as default
+ "none" - no thread pools are created, only frame parallelism possible
+ "-" - same as "none"
+ "10" - allocate one pool, using up to 10 cores on node 0
+ "-,+" - allocate one pool, using all cores on node 1
+ "+,-,+" - allocate two pools, using all cores on nodes 0 and 2
+ "+,-,+,-" - allocate two pools, using all cores on nodes 0 and 2
+ "-,*" - allocate three pools, using all cores on nodes 1, 2 and 3
+ "8,8,8,8" - allocate four pools with up to 8 threads in each pool
The total number of threads will be determined by the number of threads
assigned to all nodes. The worker threads will each be given affinity for
@@ -211,11 +212,10 @@ Performance Options
64 logical CPU cores. But any given thread pool will always use at most
one NUMA node.
- Frame encoders are distributed between the available thread pools, and
- the encoder will never generate more thread pools than frameNumThreads
-
- Number of threads to allocate for the worker thread pool This pool
- is used for WPP and for distributed analysis and motion search:
+ Frame encoders are distributed between the available thread pools,
+ and the encoder will never generate more thread pools than
+ :option:`--frame-threads`. The pools are used for WPP and for
+ distributed analysis and motion search.
Default "", one thread is allocated per detected hardware thread
(logical CPU cores) and one thread pool per NUMA node.
diff -r 8986b6e427b2 -r 8ba297f59e48 source/common/threadpool.cpp
--- a/source/common/threadpool.cpp Mon Feb 23 22:42:02 2015 -0600
+++ b/source/common/threadpool.cpp Tue Feb 24 14:00:37 2015 -0600
@@ -321,7 +321,7 @@ ThreadPool* ThreadPool::allocThreadPools
numPools = 0;
return NULL;
}
- if (bNumaSupport)
+ if (numNumaNodes > 1)
x265_log(p, X265_LOG_INFO, "Thread pool %d using %d threads on NUMA node %d\n", i, cores, node);
else
x265_log(p, X265_LOG_INFO, "Thread pool created using %d threads\n", cores);
diff -r 8986b6e427b2 -r 8ba297f59e48 source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp Mon Feb 23 22:42:02 2015 -0600
+++ b/source/encoder/entropy.cpp Tue Feb 24 14:00:37 2015 -0600
@@ -43,6 +43,7 @@ Entropy::Entropy()
{
markValid();
m_fracBits = 0;
+ m_pad = 0;
X265_CHECK(sizeof(m_contextState) >= sizeof(m_contextState[0]) * MAX_OFF_CTX_MOD, "context state table is too small\n");
}
diff -r 8986b6e427b2 -r 8ba297f59e48 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Mon Feb 23 22:42:02 2015 -0600
+++ b/source/encoder/slicetype.cpp Tue Feb 24 14:00:37 2015 -0600
@@ -210,7 +210,7 @@ void LookaheadTLD::calcAdaptiveQuantFram
void LookaheadTLD::lowresIntraEstimate(Lowres& fenc)
{
ALIGN_VAR_32(pixel, prediction[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
- ALIGN_VAR_32(pixel, fencIntra[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
+ pixel fencIntra[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE];
pixel neighbours[2][X265_LOWRES_CU_SIZE * 4 + 1];
const int lookAheadLambda = (int)x265_lambda_tab[X265_LOOKAHEAD_QP];
@@ -224,6 +224,9 @@ void LookaheadTLD::lowresIntraEstimate(L
pixel *planar = (cuSize >= 8) ? neighbours[1] : neighbours[0];
pixelcmp_t satd = primitives.pu[sizeIdx].satd;
+ fenc.costEst[0][0] = 0;
+ fenc.costEstAq[0][0] = 0;
+
for (int cuY = 0; cuY < heightInCU; cuY++)
{
fenc.rowSatds[0][0][cuY] = 0;
@@ -239,7 +242,7 @@ void LookaheadTLD::lowresIntraEstimate(L
memcpy(neighbours[0], pixCur - 1 - fenc.lumaStride, (cuSize + 1) * sizeof(pixel));
for (int i = 1; i < cuSize + 1; i++)
- neighbours[0][i + cuSize2] = pixCur[-1 - fenc.lumaStride + i * fenc.lumaStride]; /* todo: fixme */
+ neighbours[0][i + cuSize2] = pixCur[-1 - fenc.lumaStride + i * fenc.lumaStride]; /* TODO: gcc warning */
for (int i = 0; i < cuSize; i++)
{
@@ -264,7 +267,7 @@ void LookaheadTLD::lowresIntraEstimate(L
uint32_t ilowmode = 0;
/* DC and planar */
- primitives.cu[sizeIdx].intra_pred[DC_IDX](prediction, cuSize, neighbours[0], 0, (cuSize <= 16));
+ primitives.cu[sizeIdx].intra_pred[DC_IDX](prediction, cuSize, neighbours[0], 0, cuSize <= 16);
cost = satd(fencIntra, cuSize, prediction, cuSize);
COPY2_IF_LT(icost, cost, ilowmode, DC_IDX);
@@ -306,8 +309,20 @@ void LookaheadTLD::lowresIntraEstimate(L
fenc.lowresCosts[0][0][cuXY] = (uint16_t)(X265_MIN(icost, LOWRES_COST_MASK) | (0 << LOWRES_COST_SHIFT));
fenc.intraCost[cuXY] = icost;
fenc.intraMode[cuXY] = (uint8_t)ilowmode;
- fenc.rowSatds[0][0][cuY] += icost;
- fenc.costEst[0][0] += icost;
+
+ /* do not include edge blocks in the frame cost estimates, they are not very accurate */
+ const bool bFrameScoreCU = (cuX > 0 && cuX < widthInCU - 1 &&
+ cuY > 0 && cuY < heightInCU - 1) || widthInCU <= 2 || heightInCU <= 2;
+
+ int icostAq = (bFrameScoreCU && fenc.invQscaleFactor) ? ((icost * fenc.invQscaleFactor[cuXY] + 128) >> 8) : icost;
+
+ if (bFrameScoreCU)
+ {
+ fenc.costEst[0][0] += icost;
+ fenc.costEstAq[0][0] += icostAq;
+ }
+
+ fenc.rowSatds[0][0][cuY] += icostAq;
}
}
}
@@ -1888,6 +1903,7 @@ int64_t CostEstimateGroup::estimateFrame
* going to need motion searches or bidir measurements */
memset(&m_slice, 0, sizeof(Slice) * m_lookahead.m_numCoopSlices);
+ memset(fenc->rowSatds, 0, sizeof(fenc->rowSatds[0]) * m_lookahead.m_heightInCU);
m_lock.acquire();
X265_CHECK(!m_batchMode, "single CostEstimateGroup instance cannot mix batch modes\n");
More information about the x265-commits
mailing list