How about this patch?<br><br>On Saturday, April 9, 2016, Ximing Cheng <<a href="mailto:chengximing1989@gmail.com">chengximing1989@gmail.com</a>> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr">With the two patches, I just do a simple test on my Windows PC, and I test for several times and finally get the follow results:<div><br></div><div>./x265.exe --fps 30 --input-res 1280x720 /d/project/sharp/lol.yuv -o test1.265<br></div><div><div>encoded 500 frames in 28.83s (17.34 fps), 1330.90 kb/s, Avg QP:34.94</div></div><div><br></div><div>./x265_withpatch.exe --fps 30 --input-res 1280x720 /d/project/sharp/lol.yuv -o test2.265<br></div><div>encoded 500 frames in 28.14s (17.77 fps), 1330.90 kb/s, Avg QP:34.94<br></div><div><br></div><div>so just about 1% ~ 2% speed up on Windows in my test, but my test covers little YUV sequence.</div></div><div class="gmail_extra"><br><div class="gmail_quote">On Fri, Apr 8, 2016 at 6:07 PM, Deepthi Nandakumar <span dir="ltr"><<a href="javascript:_e(%7B%7D,'cvml','deepthi@multicorewareinc.com');" target="_blank">deepthi@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr">Thank you for the patches. The Slim RW locks should be a performance improvement from the documentation. I'm not so sure about the POSIX RWlocks though. Did you do any performance testing - does the change in behaviour of the reconRow and framefilter locks improve performance? </div><div class="gmail_extra"><div><div><br><div class="gmail_quote">On Sun, Apr 3, 2016 at 2:00 PM, Ximing Cheng <span dir="ltr"><<a href="javascript:_e(%7B%7D,'cvml','chengximing1989@foxmail.com');" target="_blank">chengximing1989@foxmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span># HG changeset patch<br>
# User Ximing Cheng <<a href="javascript:_e(%7B%7D,'cvml','ximingcheng@tencent.com');" target="_blank">ximingcheng@tencent.com</a>><br>
</span># Date 1459672199 -28800<br>
# Sun Apr 03 16:29:59 2016 +0800<br>
# Node ID 601877ef465c549efe24063afa0479a39e369010<br>
<span># Parent 5b01678f6fb4e89e23cd41295592a9aa5d51d4ba<br>
ThreadSafeInteger: change default lock into read write lock<br>
<br>
</span>diff -r 5b01678f6fb4 -r 601877ef465c source/common/frame.cpp<br>
<span>--- a/source/common/frame.cpp Sat Apr 02 19:08:49 2016 +0100<br>
</span>+++ b/source/common/frame.cpp Sun Apr 03 16:29:59 2016 +0800<br>
<span>@@ -30,6 +30,7 @@<br>
<br>
Frame::Frame()<br>
{<br>
+ m_reconRowCount.init();<br>
m_bChromaExtended = false;<br>
m_lowresInit = false;<br>
m_reconRowCount.set(0);<br>
@@ -55,6 +56,8 @@<br>
X265_CHECK((m_reconColCount == NULL), "m_reconColCount was initialized");<br>
m_numRows = (m_fencPic->m_picHeight + g_maxCUSize - 1) / g_maxCUSize;<br>
m_reconColCount = new ThreadSafeInteger[m_numRows];<br>
+ for (int i = 0; i < m_numRows; i++)<br>
+ m_reconColCount[i].init(true);<br>
<br>
if (quantOffsets)<br>
{<br>
</span>diff -r 5b01678f6fb4 -r 601877ef465c source/common/threading.h<br>
<span>--- a/source/common/threading.h Sat Apr 02 19:08:49 2016 +0100<br>
</span>+++ b/source/common/threading.h Sun Apr 03 16:29:59 2016 +0800<br>
@@ -174,73 +174,127 @@<br>
<div><div> class ThreadSafeInteger<br>
{<br>
public:<br>
-<br>
- ThreadSafeInteger()<br>
+ /* useReadWriteLock is useless, just be compatible with pthread version */<br>
+ void init(bool useReadWriteLock = true)<br>
{<br>
+ /* disable MSVC warnnings */<br>
+ (void)useReadWriteLock;<br>
m_val = 0;<br>
+ InitializeConditionVariable(&m_cv);<br>
+ /* Slim Reader/Writer (SRW) Locks minimum supported in Windows Vista */<br>
+#if _WIN32_WINNT < _WIN32_WINNT_VISTA<br>
InitializeCriticalSection(&m_cs);<br>
- InitializeConditionVariable(&m_cv);<br>
+#else<br>
+ InitializeSRWLock(&m_rwlock);<br>
+#endif<br>
}<br>
<br>
~ThreadSafeInteger()<br>
{<br>
+ /* SRW locks do not need to be explicitly destroyed */<br>
+#if defined(_WIN32_WINNT) && _WIN32_WINNT < _WIN32_WINNT_VISTA<br>
DeleteCriticalSection(&m_cs);<br>
+#endif<br>
XP_CONDITION_VAR_FREE(&m_cv);<br>
}<br>
<br>
int waitForChange(int prev)<br>
{<br>
+#if _WIN32_WINNT < _WIN32_WINNT_VISTA<br>
EnterCriticalSection(&m_cs);<br>
- if (m_val == prev)<br>
+ while (m_val == prev)<br>
SleepConditionVariableCS(&m_cv, &m_cs, INFINITE);<br>
LeaveCriticalSection(&m_cs);<br>
+#else<br>
+ AcquireSRWLockShared(&m_rwlock);<br>
+ while (m_val == prev)<br>
+ SleepConditionVariableSRW(&m_cv, &m_rwlock, INFINITE, CONDITION_VARIABLE_LOCKMODE_SHARED);<br>
+ ReleaseSRWLockShared(&m_rwlock);<br>
+#endif<br>
return m_val;<br>
}<br>
<br>
int get()<br>
{<br>
+#if _WIN32_WINNT < _WIN32_WINNT_VISTA<br>
EnterCriticalSection(&m_cs);<br>
int ret = m_val;<br>
LeaveCriticalSection(&m_cs);<br>
+#else<br>
+ AcquireSRWLockShared(&m_rwlock);<br>
+ int ret = m_val;<br>
+ ReleaseSRWLockShared(&m_rwlock);<br>
+#endif<br>
return ret;<br>
}<br>
<br>
int getIncr(int n = 1)<br>
{<br>
+#if _WIN32_WINNT < _WIN32_WINNT_VISTA<br>
EnterCriticalSection(&m_cs);<br>
int ret = m_val;<br>
m_val += n;<br>
LeaveCriticalSection(&m_cs);<br>
+#else<br>
+ AcquireSRWLockExclusive(&m_rwlock);<br>
+ int ret = m_val;<br>
+ m_val += n;<br>
+ ReleaseSRWLockExclusive(&m_rwlock);<br>
+#endif<br>
return ret;<br>
}<br>
<br>
void set(int newval)<br>
{<br>
+#if _WIN32_WINNT < _WIN32_WINNT_VISTA<br>
EnterCriticalSection(&m_cs);<br>
m_val = newval;<br>
WakeAllConditionVariable(&m_cv);<br>
LeaveCriticalSection(&m_cs);<br>
+#else<br>
+ AcquireSRWLockExclusive(&m_rwlock);<br>
+ m_val = newval;<br>
+ WakeAllConditionVariable(&m_cv);<br>
+ ReleaseSRWLockExclusive(&m_rwlock);<br>
+#endif<br>
}<br>
<br>
void poke(void)<br>
{<br>
/* awaken all waiting threads, but make no change */<br>
+#if _WIN32_WINNT < _WIN32_WINNT_VISTA<br>
EnterCriticalSection(&m_cs);<br>
WakeAllConditionVariable(&m_cv);<br>
LeaveCriticalSection(&m_cs);<br>
+#else<br>
+ AcquireSRWLockExclusive(&m_rwlock);<br>
+ WakeAllConditionVariable(&m_cv);<br>
+ ReleaseSRWLockExclusive(&m_rwlock);<br>
+#endif<br>
}<br>
<br>
void incr()<br>
{<br>
+#if _WIN32_WINNT < _WIN32_WINNT_VISTA<br>
EnterCriticalSection(&m_cs);<br>
m_val++;<br>
WakeAllConditionVariable(&m_cv);<br>
LeaveCriticalSection(&m_cs);<br>
+#else<br>
+ AcquireSRWLockExclusive(&m_rwlock);<br>
+ m_val++;<br>
+ WakeAllConditionVariable(&m_cv);<br>
+ ReleaseSRWLockExclusive(&m_rwlock);<br>
+#endif<br>
}<br>
<br>
protected:<br>
<br>
</div></div>+#if _WIN32_WINNT < _WIN32_WINNT_VISTA<br>
CRITICAL_SECTION m_cs;<br>
+#else<br>
<span>+ SRWLOCK m_rwlock;<br>
+#endif<br>
CONDITION_VARIABLE m_cv;<br>
int m_val;<br>
};<br>
</span>@@ -369,27 +423,38 @@<br>
<div><div> class ThreadSafeInteger<br>
{<br>
public:<br>
-<br>
- ThreadSafeInteger()<br>
+ /* pthread_cond_wait only accept mutex param */<br>
+ void init(bool useReadWriteLock = false)<br>
{<br>
+ m_useReadWriteLock = useReadWriteLock;<br>
m_val = 0;<br>
- if (pthread_mutex_init(&m_mutex, NULL) ||<br>
- pthread_cond_init(&m_cond, NULL))<br>
+ if (!m_useReadWriteLock && (pthread_mutex_init(&m_mutex, NULL) ||<br>
+ pthread_cond_init(&m_cond, NULL)))<br>
{<br>
x265_log(NULL, X265_LOG_ERROR, "fatal: unable to initialize conditional variable\n");<br>
}<br>
+ else if (m_useReadWriteLock && pthread_rwlock_init(&m_rwlock, NULL))<br>
+ {<br>
+ x265_log(NULL, X265_LOG_ERROR, "fatal: unable to initialize read write lock\n");<br>
+ }<br>
}<br>
<br>
~ThreadSafeInteger()<br>
{<br>
- pthread_cond_destroy(&m_cond);<br>
- pthread_mutex_destroy(&m_mutex);<br>
+ if (!m_useReadWriteLock)<br>
+ {<br>
+ pthread_cond_destroy(&m_cond);<br>
+ pthread_mutex_destroy(&m_mutex);<br>
+ }<br>
+ else<br>
+ pthread_rwlock_destroy(&m_rwlock);<br>
}<br>
<br>
int waitForChange(int prev)<br>
{<br>
+ X265_CHECK(!m_useReadWriteLock, "ThreadSafeInteger with waitForChange should disable read write lock!\n");<br>
pthread_mutex_lock(&m_mutex);<br>
- if (m_val == prev)<br>
+ while (m_val == prev)<br>
pthread_cond_wait(&m_cond, &m_mutex);<br>
pthread_mutex_unlock(&m_mutex);<br>
return m_val;<br>
</div></div>@@ -397,31 +462,62 @@<br>
<div><div><br>
int get()<br>
{<br>
- pthread_mutex_lock(&m_mutex);<br>
- int ret = m_val;<br>
- pthread_mutex_unlock(&m_mutex);<br>
+ int ret;<br>
+ if (!m_useReadWriteLock)<br>
+ {<br>
+ pthread_mutex_lock(&m_mutex);<br>
+ ret = m_val;<br>
+ pthread_mutex_unlock(&m_mutex);<br>
+ }<br>
+ else<br>
+ {<br>
+ pthread_rwlock_rdlock(&m_rwlock);<br>
+ ret = m_val;<br>
+ pthread_rwlock_unlock(&m_rwlock);<br>
+ }<br>
return ret;<br>
}<br>
<br>
int getIncr(int n = 1)<br>
{<br>
- pthread_mutex_lock(&m_mutex);<br>
- int ret = m_val;<br>
- m_val += n;<br>
- pthread_mutex_unlock(&m_mutex);<br>
+ int ret;<br>
+ if (!m_useReadWriteLock)<br>
+ {<br>
+ pthread_mutex_lock(&m_mutex);<br>
+ ret = m_val;<br>
+ m_val += n;<br>
+ pthread_mutex_unlock(&m_mutex);<br>
+ }<br>
+ else<br>
+ {<br>
+ pthread_rwlock_wrlock(&m_rwlock);<br>
+ ret = m_val;<br>
+ m_val += n;<br>
+ pthread_rwlock_unlock(&m_rwlock);<br>
+ }<br>
return ret;<br>
}<br>
<br>
void set(int newval)<br>
{<br>
- pthread_mutex_lock(&m_mutex);<br>
- m_val = newval;<br>
- pthread_cond_broadcast(&m_cond);<br>
- pthread_mutex_unlock(&m_mutex);<br>
+ if (!m_useReadWriteLock)<br>
+ {<br>
+ pthread_mutex_lock(&m_mutex);<br>
+ m_val = newval;<br>
+ pthread_cond_broadcast(&m_cond);<br>
+ pthread_mutex_unlock(&m_mutex);<br>
+ }<br>
+ else<br>
+ {<br>
+ pthread_rwlock_wrlock(&m_rwlock);<br>
+ m_val = newval;<br>
+ pthread_rwlock_unlock(&m_rwlock);<br>
+ }<br>
}<br>
<br>
void poke(void)<br>
{<br>
+ X265_CHECK(!m_useReadWriteLock, "ThreadSafeInteger with poke should disable read write lock!\n");<br>
/* awaken all waiting threads, but make no change */<br>
pthread_mutex_lock(&m_mutex);<br>
pthread_cond_broadcast(&m_cond);<br>
</div></div>@@ -430,17 +526,28 @@<br>
<div><div><br>
void incr()<br>
{<br>
- pthread_mutex_lock(&m_mutex);<br>
- m_val++;<br>
- pthread_cond_broadcast(&m_cond);<br>
- pthread_mutex_unlock(&m_mutex);<br>
+ if (!m_useReadWriteLock)<br>
+ {<br>
+ pthread_mutex_lock(&m_mutex);<br>
+ m_val++;<br>
+ pthread_cond_broadcast(&m_cond);<br>
+ pthread_mutex_unlock(&m_mutex);<br>
+ }<br>
+ else<br>
+ {<br>
+ pthread_rwlock_wrlock(&m_rwlock);<br>
+ m_val++;<br>
+ pthread_rwlock_unlock(&m_rwlock);<br>
+ }<br>
}<br>
<br>
protected:<br>
<br>
- pthread_mutex_t m_mutex;<br>
- pthread_cond_t m_cond;<br>
- int m_val;<br>
+ pthread_mutex_t m_mutex;<br>
+ pthread_rwlock_t m_rwlock;<br>
+ pthread_cond_t m_cond;<br>
+ int m_val;<br>
+ bool m_useReadWriteLock;<br>
};<br>
<br>
#endif // ifdef _WIN32<br>
</div></div>diff -r 5b01678f6fb4 -r 601877ef465c source/common/threadpool.h<br>
<span>--- a/source/common/threadpool.h Sat Apr 02 19:08:49 2016 +0100<br>
</span>+++ b/source/common/threadpool.h Sun Apr 03 16:29:59 2016 +0800<br>
<span>@@ -129,7 +129,7 @@<br>
int m_jobTotal;<br>
int m_jobAcquired;<br>
<br>
- BondedTaskGroup() { m_bondedPeerCount = m_jobTotal = m_jobAcquired = 0; }<br>
+ BondedTaskGroup() { m_bondedPeerCount = m_jobTotal = m_jobAcquired = 0; m_exitedPeerCount.init(); }<br>
<br>
/* Do not allow the instance to be destroyed before all bonded peers have<br>
* exited processTasks() */<br>
</span>diff -r 5b01678f6fb4 -r 601877ef465c source/encoder/framefilter.h<br>
<span>--- a/source/encoder/framefilter.h Sat Apr 02 19:08:49 2016 +0100<br>
</span>+++ b/source/encoder/framefilter.h Sun Apr 03 16:29:59 2016 +0800<br>
<span>@@ -82,6 +82,9 @@<br>
, m_encData(NULL)<br>
, m_prevRow(NULL)<br>
{<br>
+ m_lastCol.init(true);<br>
+ m_allowedCol.init(true);<br>
+ m_lastDeblocked.init(true);<br>
}<br>
<br>
~ParallelFilter()<br>
</span>diff -r 5b01678f6fb4 -r 601877ef465c source/encoder/ratecontrol.cpp<br>
<span>--- a/source/encoder/ratecontrol.cpp Sat Apr 02 19:08:49 2016 +0100<br>
</span>+++ b/source/encoder/ratecontrol.cpp Sun Apr 03 16:29:59 2016 +0800<br>
<span>@@ -166,6 +166,7 @@<br>
<br>
RateControl::RateControl(x265_param& p)<br>
{<br>
+ m_startEndOrder.init();<br>
m_param = &p;<br>
int lowresCuWidth = ((m_param->sourceWidth / 2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;<br>
int lowresCuHeight = ((m_param->sourceHeight / 2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;<br>
</span>diff -r 5b01678f6fb4 -r 601877ef465c source/input/y4m.cpp<br>
<span>--- a/source/input/y4m.cpp Sat Apr 02 19:08:49 2016 +0100<br>
</span>+++ b/source/input/y4m.cpp Sun Apr 03 16:29:59 2016 +0800<br>
<span>@@ -43,6 +43,8 @@<br>
<br>
Y4MInput::Y4MInput(InputFileInfo& info)<br>
{<br>
+ readCount.init();<br>
+ writeCount.init();<br>
for (int i = 0; i < QUEUE_SIZE; i++)<br>
buf[i] = NULL;<br>
<br>
</span>diff -r 5b01678f6fb4 -r 601877ef465c source/input/yuv.cpp<br>
<span>--- a/source/input/yuv.cpp Sat Apr 02 19:08:49 2016 +0100<br>
</span>+++ b/source/input/yuv.cpp Sun Apr 03 16:29:59 2016 +0800<br>
<span>@@ -41,6 +41,8 @@<br>
<br>
YUVInput::YUVInput(InputFileInfo& info)<br>
{<br>
+ readCount.init();<br>
+ writeCount.init();<br>
for (int i = 0; i < QUEUE_SIZE; i++)<br>
buf[i] = NULL;<br>
<br>
</span>diff -r 5b01678f6fb4 -r 601877ef465c source/output/reconplay.cpp<br>
<span>--- a/source/output/reconplay.cpp Sat Apr 02 19:08:49 2016 +0100<br>
</span>+++ b/source/output/reconplay.cpp Sun Apr 03 16:29:59 2016 +0800<br>
<div><div>@@ -54,7 +54,8 @@<br>
if (signal(SIGPIPE, sigpipe_handler) == SIG_ERR)<br>
general_log(¶m, "exec", X265_LOG_ERROR, "Unable to register SIGPIPE handler: %s\n", strerror(errno));<br>
#endif<br>
-<br>
+ readCount.init();<br>
+ writeCount.init();<br>
width = param.sourceWidth;<br>
height = param.sourceHeight;<br>
colorSpace = param.internalCsp;<br>
<br>
<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="javascript:_e(%7B%7D,'cvml','x265-devel@videolan.org');" target="_blank">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</div></div></blockquote></div><br><br clear="all"><div><br></div></div></div><span><font color="#888888">-- <br><div><div dir="ltr"><div><div>Deepthi Nandakumar<br></div>Engineering Manager, x265<br></div>Multicoreware, Inc<br></div></div>
</font></span></div>
<br>_______________________________________________<br>
x265-devel mailing list<br>
<a href="javascript:_e(%7B%7D,'cvml','x265-devel@videolan.org');" target="_blank">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
<br></blockquote></div><br></div>
</blockquote>