[x265-commits] [x265] weightp: non-trivial constructors and destructors should ...
Steve Borho
steve at borho.org
Fri Feb 7 02:49:39 CET 2014
details: http://hg.videolan.org/x265/rev/d87b6e92c996
branches:
changeset: 6038:d87b6e92c996
user: Steve Borho <steve at borho.org>
date: Thu Feb 06 18:35:28 2014 -0600
description:
weightp: non-trivial constructors and destructors should not be in headers
Subject: [x265] weightp: do not blindly assume 4:2:0 chroma dimensions
details: http://hg.videolan.org/x265/rev/9bc4b7b1454e
branches:
changeset: 6039:9bc4b7b1454e
user: Steve Borho <steve at borho.org>
date: Thu Feb 06 18:36:02 2014 -0600
description:
weightp: do not blindly assume 4:2:0 chroma dimensions
Subject: [x265] weightp: don't use m_ prefix for non member variable
details: http://hg.videolan.org/x265/rev/8f025ee0a506
branches:
changeset: 6040:8f025ee0a506
user: Steve Borho <steve at borho.org>
date: Thu Feb 06 18:47:57 2014 -0600
description:
weightp: don't use m_ prefix for non member variable
Subject: [x265] nit
details: http://hg.videolan.org/x265/rev/1776b9a58585
branches:
changeset: 6041:1776b9a58585
user: Steve Borho <steve at borho.org>
date: Thu Feb 06 18:48:08 2014 -0600
description:
nit
Subject: [x265] Merge
details: http://hg.videolan.org/x265/rev/21d808d834c4
branches:
changeset: 6042:21d808d834c4
user: Steve Borho <steve at borho.org>
date: Thu Feb 06 19:24:41 2014 -0600
description:
Merge
Subject: [x265] weightp: remove useless m_dstStride variable
details: http://hg.videolan.org/x265/rev/c54271b906da
branches:
changeset: 6043:c54271b906da
user: Steve Borho <steve at borho.org>
date: Thu Feb 06 18:59:19 2014 -0600
description:
weightp: remove useless m_dstStride variable
diffstat:
source/Lib/TLibCommon/TComPicYuv.h | 2 +-
source/Lib/TLibEncoder/TEncCu.cpp | 4 -
source/common/common.cpp | 2 +-
source/common/threadpool.cpp | 135 +-
source/common/vec/intra-ssse3.cpp | 1245 -------------
source/common/x86/asm-primitives.cpp | 61 +-
source/common/x86/intrapred.h | 14 +
source/common/x86/intrapred8.asm | 3130 ++++++++++++++++++++++++++++++++-
source/common/x86/pixel-a.asm | 70 +-
source/common/x86/sad16-a.asm | 14 +-
source/encoder/compress.cpp | 2 -
source/encoder/slicetype.cpp | 42 +-
source/encoder/weightPrediction.cpp | 54 +-
source/encoder/weightPrediction.h | 38 +-
source/test/pixelharness.cpp | 252 +-
source/test/pixelharness.h | 6 +-
16 files changed, 3484 insertions(+), 1587 deletions(-)
diffs (truncated from 5720 to 300 lines):
diff -r fc90c9b265fd -r c54271b906da source/Lib/TLibCommon/TComPicYuv.h
--- a/source/Lib/TLibCommon/TComPicYuv.h Wed Feb 05 18:20:41 2014 -0600
+++ b/source/Lib/TLibCommon/TComPicYuv.h Thu Feb 06 18:59:19 2014 -0600
@@ -166,7 +166,7 @@ public:
void copyFromPicture(const x265_picture&, int32_t *pad);
}; // END CLASS DEFINITION TComPicYuv
-void updateChecksum(const Pel* plane, uint32_t& checksumVal, uint32_t height, uint32_t width, uint32_t stride, int row, uint32_t cu_Height);
+void updateChecksum(const Pel* plane, uint32_t& checksumVal, uint32_t height, uint32_t width, uint32_t stride, int row, uint32_t cuHeight);
void updateCRC(const Pel* plane, uint32_t& crcVal, uint32_t height, uint32_t width, uint32_t stride);
void crcFinish(uint32_t & crc, UChar digest[16]);
void checksumFinish(uint32_t & checksum, UChar digest[16]);
diff -r fc90c9b265fd -r c54271b906da source/Lib/TLibEncoder/TEncCu.cpp
--- a/source/Lib/TLibEncoder/TEncCu.cpp Wed Feb 05 18:20:41 2014 -0600
+++ b/source/Lib/TLibEncoder/TEncCu.cpp Thu Feb 06 18:59:19 2014 -0600
@@ -1395,8 +1395,6 @@ void TEncCu::xCheckRDCostIntra(TComDataC
m_search->estIntraPredQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth], preCalcDistC, true);
- m_tmpRecoYuv[depth]->copyToPicLuma(outTempCU->getPic()->getPicYuvRec(), outTempCU->getAddr(), outTempCU->getZorderIdxInCU());
-
m_search->estIntraPredChromaQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth], preCalcDistC);
m_entropyCoder->resetBits();
@@ -1444,8 +1442,6 @@ void TEncCu::xCheckRDCostIntraInInter(TC
m_search->estIntraPredQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth],
preCalcDistC, bSeparateLumaChroma);
- m_tmpRecoYuv[depth]->copyToPicLuma(outTempCU->getPic()->getPicYuvRec(), outTempCU->getAddr(), outTempCU->getZorderIdxInCU());
-
m_search->estIntraPredChromaQT(outTempCU, m_origYuv[depth], m_tmpPredYuv[depth], m_tmpResiYuv[depth], m_tmpRecoYuv[depth], preCalcDistC);
m_entropyCoder->resetBits();
diff -r fc90c9b265fd -r c54271b906da source/common/common.cpp
--- a/source/common/common.cpp Wed Feb 05 18:20:41 2014 -0600
+++ b/source/common/common.cpp Thu Feb 06 18:59:19 2014 -0600
@@ -532,7 +532,7 @@ int x265_set_globals(x265_param *param)
static int once /* = 0 */;
- if (ATOMIC_CAS(&once, 0, 1) == 1)
+ if (ATOMIC_CAS32(&once, 0, 1) == 1)
{
if (param->maxCUSize != g_maxCUWidth)
{
diff -r fc90c9b265fd -r c54271b906da source/common/threadpool.cpp
--- a/source/common/threadpool.cpp Wed Feb 05 18:20:41 2014 -0600
+++ b/source/common/threadpool.cpp Thu Feb 06 18:59:19 2014 -0600
@@ -78,12 +78,8 @@ public:
virtual ~PoolThread() {}
void threadMain();
-
- static volatile uint64_t s_sleepMap;
};
-volatile uint64_t PoolThread::s_sleepMap /* = 0 */;
-
class ThreadPoolImpl : public ThreadPool
{
private:
@@ -91,7 +87,9 @@ private:
bool m_ok;
int m_referenceCount;
int m_numThreads;
+ int m_numSleepMapWords;
PoolThread *m_threads;
+ volatile uint64_t *m_sleepMap;
/* Lock for write access to the provider lists. Threads are
* always allowed to read m_firstProvider and follow the
@@ -119,6 +117,10 @@ public:
return this;
}
+ void markThreadAsleep(int id);
+
+ void waitForAllIdle();
+
int getThreadCount() const { return m_numThreads; }
void release();
@@ -166,8 +168,7 @@ void PoolThread::threadMain()
if (cur == NULL)
{
- uint64_t bit = 1LL << m_id;
- ATOMIC_OR(&s_sleepMap, bit);
+ m_pool.markThreadAsleep(m_id);
m_wakeEvent.wait();
}
}
@@ -175,19 +176,34 @@ void PoolThread::threadMain()
m_exited = true;
}
+void ThreadPoolImpl::markThreadAsleep(int id)
+{
+ int word = id >> 6;
+ uint64_t bit = 1LL << (id & 63);
+ ATOMIC_OR(&m_sleepMap[word], bit);
+}
+
void ThreadPoolImpl::pokeIdleThread()
{
- /* Find a bit in the sleeping thread bitmap and poke it awake */
- uint64_t oldval = PoolThread::s_sleepMap;
+ /* Find a bit in the sleeping thread bitmap and poke it awake, do
+ * not give up until a thread is awakened or all of them are awake */
+ for (int i = 0; i < m_numSleepMapWords; i++)
+ {
+ uint64_t oldval = m_sleepMap[i];
+ while (oldval)
+ {
+ unsigned long id;
+ CTZ64(id, oldval);
- if (oldval)
- {
- unsigned long id;
- CTZ64(id, oldval);
+ uint64_t newval = oldval & ~(1LL << id);
+ if (ATOMIC_CAS(&m_sleepMap[i], oldval, newval) == oldval)
+ {
+ m_threads[(i << 6) | id].poke();
+ return;
+ }
- uint64_t newval = oldval & ~(1LL << id);
- if (ATOMIC_CAS(&PoolThread::s_sleepMap, oldval, newval) == oldval)
- m_threads[id].poke();
+ oldval = m_sleepMap[i];
+ }
}
}
@@ -228,71 +244,80 @@ ThreadPoolImpl::ThreadPoolImpl(int numTh
{
if (numThreads == 0)
numThreads = get_cpu_count();
- numThreads = X265_MIN(64, numThreads); // do not overflow sleep map
+ m_numSleepMapWords = (numThreads + 63) >> 6;
+ m_sleepMap = X265_MALLOC(uint64_t, m_numSleepMapWords);
- char *buffer = new char[sizeof(PoolThread) * numThreads];
+ char *buffer = (char*)X265_MALLOC(PoolThread, numThreads);
m_threads = reinterpret_cast<PoolThread*>(buffer);
m_numThreads = numThreads;
- if (m_threads)
+ if (m_threads && m_sleepMap)
{
- uint64_t idlemap = 0;
+ for (int i = 0; i < m_numSleepMapWords; i++)
+ m_sleepMap[i] = 0;
m_ok = true;
- for (int i = 0; i < numThreads; i++)
+ int i;
+ for (i = 0; i < numThreads; i++)
{
new (buffer)PoolThread(*this, i);
buffer += sizeof(PoolThread);
- m_ok = m_ok && m_threads[i].start();
- idlemap |= (1LL << i);
+ if (!m_threads[i].start())
+ {
+ m_ok = false;
+ break;
+ }
}
- // Wait for threads to spin up and idle
- while (PoolThread::s_sleepMap != idlemap)
+ if (m_ok)
+ {
+ waitForAllIdle();
+ }
+ else
+ {
+ // stop threads that did start up
+ for (int j = 0; j < i; j++)
+ {
+ m_threads[j].poke();
+ m_threads[j].stop();
+ }
+ }
+ }
+}
+
+void ThreadPoolImpl::waitForAllIdle()
+{
+ if (!m_ok)
+ return;
+
+ int id = 0;
+ do
+ {
+ int word = id >> 6;
+ uint64_t bit = 1LL << (id & 63);
+ if (m_sleepMap[word] & bit)
+ {
+ id++;
+ }
+ else
{
GIVE_UP_TIME();
}
}
+ while (id < m_numThreads);
}
void ThreadPoolImpl::Stop()
{
if (m_ok)
{
- uint64_t idlemap = 0;
- for (int i = 0; i < m_numThreads; i++)
- {
- idlemap |= (1LL << i);
- }
-
- // wait for all threads to idle
- while (PoolThread::s_sleepMap != idlemap)
- {
- GIVE_UP_TIME();
- }
+ waitForAllIdle();
// set invalid flag, then wake them up so they exit their main func
m_ok = false;
for (int i = 0; i < m_numThreads; i++)
{
- pokeIdleThread();
- }
-
- int exited_count = 0;
- do
- {
- GIVE_UP_TIME();
- exited_count = 0;
- for (int i = 0; i < m_numThreads; i++)
- {
- exited_count += m_threads[i].isExited() ? 1 : 0;
- }
- }
- while (exited_count < m_numThreads);
-
- // join each thread to cleanup resources
- for (int i = 0; i < m_numThreads; i++)
- {
+ m_threads[i].poke();
m_threads[i].stop();
}
}
@@ -300,6 +325,8 @@ void ThreadPoolImpl::Stop()
ThreadPoolImpl::~ThreadPoolImpl()
{
+ X265_FREE((void*)m_sleepMap);
+
if (m_threads)
{
// cleanup thread handles
@@ -308,7 +335,7 @@ ThreadPoolImpl::~ThreadPoolImpl()
m_threads[i].~PoolThread();
}
- delete[] reinterpret_cast<char*>(m_threads);
+ X265_FREE(reinterpret_cast<char*>(m_threads));
}
}
diff -r fc90c9b265fd -r c54271b906da source/common/vec/intra-ssse3.cpp
--- a/source/common/vec/intra-ssse3.cpp Wed Feb 05 18:20:41 2014 -0600
+++ b/source/common/vec/intra-ssse3.cpp Thu Feb 06 18:59:19 2014 -0600
@@ -557,1249 +557,6 @@ void intraPredAng16x16(pixel* dst, intpt
#undef MB4
#undef CALC_BLND_8ROWS
-//32x32
-#define PREDANG_CALCROW_VER(X) \
- v_deltaPos = _mm_add_epi16(v_deltaPos, v_ipAngle); \
- v_deltaFract = _mm_and_si128(v_deltaPos, thirty1); \
- itmp = _mm_loadu_si128((__m128i const*)(refMain + 1 + (angAP[8 - (lookIdx)][(X)]))); \
- row11L = _mm_unpacklo_epi8(itmp, _mm_setzero_si128()); \
- row11H = _mm_unpackhi_epi8(itmp, _mm_setzero_si128()); \
-\
- itmp = _mm_loadu_si128((__m128i const*)(refMain + 1 + (angAP[8 - (lookIdx)][(X)] + 1))); \
- row12L = _mm_unpacklo_epi8(itmp, _mm_setzero_si128()); \
- row12H = _mm_unpackhi_epi8(itmp, _mm_setzero_si128()); \
-\
- it1 = _mm_sub_epi16(thirty2, v_deltaFract); \
- it2 = _mm_mullo_epi16(it1, row11L); \
- it3 = _mm_mullo_epi16(v_deltaFract, row12L); \
- it2 = _mm_add_epi16(it2, it3); \
- i16 = _mm_set1_epi16(16); \
- it2 = _mm_add_epi16(it2, i16); \
- row11L = _mm_srai_epi16(it2, 5); \
- it2 = _mm_mullo_epi16(it1, row11H); \
More information about the x265-commits
mailing list