[x265] [PATCH] ThreadSafeInteger: change default lock into read write lock
Ximing Cheng
chengximing1989 at gmail.com
Mon Apr 18 04:40:25 CEST 2016
How about this patch?
On Saturday, April 9, 2016, Ximing Cheng <chengximing1989 at gmail.com> wrote:
> With the two patches, I just do a simple test on my Windows PC, and I test
> for several times and finally get the follow results:
>
> ./x265.exe --fps 30 --input-res 1280x720 /d/project/sharp/lol.yuv -o
> test1.265
> encoded 500 frames in 28.83s (17.34 fps), 1330.90 kb/s, Avg QP:34.94
>
> ./x265_withpatch.exe --fps 30 --input-res 1280x720
> /d/project/sharp/lol.yuv -o test2.265
> encoded 500 frames in 28.14s (17.77 fps), 1330.90 kb/s, Avg QP:34.94
>
> so just about 1% ~ 2% speed up on Windows in my test, but my test covers
> little YUV sequence.
>
> On Fri, Apr 8, 2016 at 6:07 PM, Deepthi Nandakumar <
> deepthi at multicorewareinc.com
> <javascript:_e(%7B%7D,'cvml','deepthi at multicorewareinc.com');>> wrote:
>
>> Thank you for the patches. The Slim RW locks should be a performance
>> improvement from the documentation. I'm not so sure about the POSIX RWlocks
>> though. Did you do any performance testing - does the change in behaviour
>> of the reconRow and framefilter locks improve performance?
>>
>> On Sun, Apr 3, 2016 at 2:00 PM, Ximing Cheng <chengximing1989 at foxmail.com
>> <javascript:_e(%7B%7D,'cvml','chengximing1989 at foxmail.com');>> wrote:
>>
>>> # HG changeset patch
>>> # User Ximing Cheng <ximingcheng at tencent.com
>>> <javascript:_e(%7B%7D,'cvml','ximingcheng at tencent.com');>>
>>> # Date 1459672199 -28800
>>> # Sun Apr 03 16:29:59 2016 +0800
>>> # Node ID 601877ef465c549efe24063afa0479a39e369010
>>> # Parent 5b01678f6fb4e89e23cd41295592a9aa5d51d4ba
>>> ThreadSafeInteger: change default lock into read write lock
>>>
>>> diff -r 5b01678f6fb4 -r 601877ef465c source/common/frame.cpp
>>> --- a/source/common/frame.cpp Sat Apr 02 19:08:49 2016 +0100
>>> +++ b/source/common/frame.cpp Sun Apr 03 16:29:59 2016 +0800
>>> @@ -30,6 +30,7 @@
>>>
>>> Frame::Frame()
>>> {
>>> + m_reconRowCount.init();
>>> m_bChromaExtended = false;
>>> m_lowresInit = false;
>>> m_reconRowCount.set(0);
>>> @@ -55,6 +56,8 @@
>>> X265_CHECK((m_reconColCount == NULL), "m_reconColCount was
>>> initialized");
>>> m_numRows = (m_fencPic->m_picHeight + g_maxCUSize - 1) /
>>> g_maxCUSize;
>>> m_reconColCount = new ThreadSafeInteger[m_numRows];
>>> + for (int i = 0; i < m_numRows; i++)
>>> + m_reconColCount[i].init(true);
>>>
>>> if (quantOffsets)
>>> {
>>> diff -r 5b01678f6fb4 -r 601877ef465c source/common/threading.h
>>> --- a/source/common/threading.h Sat Apr 02 19:08:49 2016 +0100
>>> +++ b/source/common/threading.h Sun Apr 03 16:29:59 2016 +0800
>>> @@ -174,73 +174,127 @@
>>> class ThreadSafeInteger
>>> {
>>> public:
>>> -
>>> - ThreadSafeInteger()
>>> + /* useReadWriteLock is useless, just be compatible with pthread
>>> version */
>>> + void init(bool useReadWriteLock = true)
>>> {
>>> + /* disable MSVC warnnings */
>>> + (void)useReadWriteLock;
>>> m_val = 0;
>>> + InitializeConditionVariable(&m_cv);
>>> + /* Slim Reader/Writer (SRW) Locks minimum supported in Windows
>>> Vista */
>>> +#if _WIN32_WINNT < _WIN32_WINNT_VISTA
>>> InitializeCriticalSection(&m_cs);
>>> - InitializeConditionVariable(&m_cv);
>>> +#else
>>> + InitializeSRWLock(&m_rwlock);
>>> +#endif
>>> }
>>>
>>> ~ThreadSafeInteger()
>>> {
>>> + /* SRW locks do not need to be explicitly destroyed */
>>> +#if defined(_WIN32_WINNT) && _WIN32_WINNT < _WIN32_WINNT_VISTA
>>> DeleteCriticalSection(&m_cs);
>>> +#endif
>>> XP_CONDITION_VAR_FREE(&m_cv);
>>> }
>>>
>>> int waitForChange(int prev)
>>> {
>>> +#if _WIN32_WINNT < _WIN32_WINNT_VISTA
>>> EnterCriticalSection(&m_cs);
>>> - if (m_val == prev)
>>> + while (m_val == prev)
>>> SleepConditionVariableCS(&m_cv, &m_cs, INFINITE);
>>> LeaveCriticalSection(&m_cs);
>>> +#else
>>> + AcquireSRWLockShared(&m_rwlock);
>>> + while (m_val == prev)
>>> + SleepConditionVariableSRW(&m_cv, &m_rwlock, INFINITE,
>>> CONDITION_VARIABLE_LOCKMODE_SHARED);
>>> + ReleaseSRWLockShared(&m_rwlock);
>>> +#endif
>>> return m_val;
>>> }
>>>
>>> int get()
>>> {
>>> +#if _WIN32_WINNT < _WIN32_WINNT_VISTA
>>> EnterCriticalSection(&m_cs);
>>> int ret = m_val;
>>> LeaveCriticalSection(&m_cs);
>>> +#else
>>> + AcquireSRWLockShared(&m_rwlock);
>>> + int ret = m_val;
>>> + ReleaseSRWLockShared(&m_rwlock);
>>> +#endif
>>> return ret;
>>> }
>>>
>>> int getIncr(int n = 1)
>>> {
>>> +#if _WIN32_WINNT < _WIN32_WINNT_VISTA
>>> EnterCriticalSection(&m_cs);
>>> int ret = m_val;
>>> m_val += n;
>>> LeaveCriticalSection(&m_cs);
>>> +#else
>>> + AcquireSRWLockExclusive(&m_rwlock);
>>> + int ret = m_val;
>>> + m_val += n;
>>> + ReleaseSRWLockExclusive(&m_rwlock);
>>> +#endif
>>> return ret;
>>> }
>>>
>>> void set(int newval)
>>> {
>>> +#if _WIN32_WINNT < _WIN32_WINNT_VISTA
>>> EnterCriticalSection(&m_cs);
>>> m_val = newval;
>>> WakeAllConditionVariable(&m_cv);
>>> LeaveCriticalSection(&m_cs);
>>> +#else
>>> + AcquireSRWLockExclusive(&m_rwlock);
>>> + m_val = newval;
>>> + WakeAllConditionVariable(&m_cv);
>>> + ReleaseSRWLockExclusive(&m_rwlock);
>>> +#endif
>>> }
>>>
>>> void poke(void)
>>> {
>>> /* awaken all waiting threads, but make no change */
>>> +#if _WIN32_WINNT < _WIN32_WINNT_VISTA
>>> EnterCriticalSection(&m_cs);
>>> WakeAllConditionVariable(&m_cv);
>>> LeaveCriticalSection(&m_cs);
>>> +#else
>>> + AcquireSRWLockExclusive(&m_rwlock);
>>> + WakeAllConditionVariable(&m_cv);
>>> + ReleaseSRWLockExclusive(&m_rwlock);
>>> +#endif
>>> }
>>>
>>> void incr()
>>> {
>>> +#if _WIN32_WINNT < _WIN32_WINNT_VISTA
>>> EnterCriticalSection(&m_cs);
>>> m_val++;
>>> WakeAllConditionVariable(&m_cv);
>>> LeaveCriticalSection(&m_cs);
>>> +#else
>>> + AcquireSRWLockExclusive(&m_rwlock);
>>> + m_val++;
>>> + WakeAllConditionVariable(&m_cv);
>>> + ReleaseSRWLockExclusive(&m_rwlock);
>>> +#endif
>>> }
>>>
>>> protected:
>>>
>>> +#if _WIN32_WINNT < _WIN32_WINNT_VISTA
>>> CRITICAL_SECTION m_cs;
>>> +#else
>>> + SRWLOCK m_rwlock;
>>> +#endif
>>> CONDITION_VARIABLE m_cv;
>>> int m_val;
>>> };
>>> @@ -369,27 +423,38 @@
>>> class ThreadSafeInteger
>>> {
>>> public:
>>> -
>>> - ThreadSafeInteger()
>>> + /* pthread_cond_wait only accept mutex param */
>>> + void init(bool useReadWriteLock = false)
>>> {
>>> + m_useReadWriteLock = useReadWriteLock;
>>> m_val = 0;
>>> - if (pthread_mutex_init(&m_mutex, NULL) ||
>>> - pthread_cond_init(&m_cond, NULL))
>>> + if (!m_useReadWriteLock && (pthread_mutex_init(&m_mutex, NULL)
>>> ||
>>> + pthread_cond_init(&m_cond, NULL)))
>>> {
>>> x265_log(NULL, X265_LOG_ERROR, "fatal: unable to initialize
>>> conditional variable\n");
>>> }
>>> + else if (m_useReadWriteLock && pthread_rwlock_init(&m_rwlock,
>>> NULL))
>>> + {
>>> + x265_log(NULL, X265_LOG_ERROR, "fatal: unable to initialize
>>> read write lock\n");
>>> + }
>>> }
>>>
>>> ~ThreadSafeInteger()
>>> {
>>> - pthread_cond_destroy(&m_cond);
>>> - pthread_mutex_destroy(&m_mutex);
>>> + if (!m_useReadWriteLock)
>>> + {
>>> + pthread_cond_destroy(&m_cond);
>>> + pthread_mutex_destroy(&m_mutex);
>>> + }
>>> + else
>>> + pthread_rwlock_destroy(&m_rwlock);
>>> }
>>>
>>> int waitForChange(int prev)
>>> {
>>> + X265_CHECK(!m_useReadWriteLock, "ThreadSafeInteger with
>>> waitForChange should disable read write lock!\n");
>>> pthread_mutex_lock(&m_mutex);
>>> - if (m_val == prev)
>>> + while (m_val == prev)
>>> pthread_cond_wait(&m_cond, &m_mutex);
>>> pthread_mutex_unlock(&m_mutex);
>>> return m_val;
>>> @@ -397,31 +462,62 @@
>>>
>>> int get()
>>> {
>>> - pthread_mutex_lock(&m_mutex);
>>> - int ret = m_val;
>>> - pthread_mutex_unlock(&m_mutex);
>>> + int ret;
>>> + if (!m_useReadWriteLock)
>>> + {
>>> + pthread_mutex_lock(&m_mutex);
>>> + ret = m_val;
>>> + pthread_mutex_unlock(&m_mutex);
>>> + }
>>> + else
>>> + {
>>> + pthread_rwlock_rdlock(&m_rwlock);
>>> + ret = m_val;
>>> + pthread_rwlock_unlock(&m_rwlock);
>>> + }
>>> return ret;
>>> }
>>>
>>> int getIncr(int n = 1)
>>> {
>>> - pthread_mutex_lock(&m_mutex);
>>> - int ret = m_val;
>>> - m_val += n;
>>> - pthread_mutex_unlock(&m_mutex);
>>> + int ret;
>>> + if (!m_useReadWriteLock)
>>> + {
>>> + pthread_mutex_lock(&m_mutex);
>>> + ret = m_val;
>>> + m_val += n;
>>> + pthread_mutex_unlock(&m_mutex);
>>> + }
>>> + else
>>> + {
>>> + pthread_rwlock_wrlock(&m_rwlock);
>>> + ret = m_val;
>>> + m_val += n;
>>> + pthread_rwlock_unlock(&m_rwlock);
>>> + }
>>> return ret;
>>> }
>>>
>>> void set(int newval)
>>> {
>>> - pthread_mutex_lock(&m_mutex);
>>> - m_val = newval;
>>> - pthread_cond_broadcast(&m_cond);
>>> - pthread_mutex_unlock(&m_mutex);
>>> + if (!m_useReadWriteLock)
>>> + {
>>> + pthread_mutex_lock(&m_mutex);
>>> + m_val = newval;
>>> + pthread_cond_broadcast(&m_cond);
>>> + pthread_mutex_unlock(&m_mutex);
>>> + }
>>> + else
>>> + {
>>> + pthread_rwlock_wrlock(&m_rwlock);
>>> + m_val = newval;
>>> + pthread_rwlock_unlock(&m_rwlock);
>>> + }
>>> }
>>>
>>> void poke(void)
>>> {
>>> + X265_CHECK(!m_useReadWriteLock, "ThreadSafeInteger with poke
>>> should disable read write lock!\n");
>>> /* awaken all waiting threads, but make no change */
>>> pthread_mutex_lock(&m_mutex);
>>> pthread_cond_broadcast(&m_cond);
>>> @@ -430,17 +526,28 @@
>>>
>>> void incr()
>>> {
>>> - pthread_mutex_lock(&m_mutex);
>>> - m_val++;
>>> - pthread_cond_broadcast(&m_cond);
>>> - pthread_mutex_unlock(&m_mutex);
>>> + if (!m_useReadWriteLock)
>>> + {
>>> + pthread_mutex_lock(&m_mutex);
>>> + m_val++;
>>> + pthread_cond_broadcast(&m_cond);
>>> + pthread_mutex_unlock(&m_mutex);
>>> + }
>>> + else
>>> + {
>>> + pthread_rwlock_wrlock(&m_rwlock);
>>> + m_val++;
>>> + pthread_rwlock_unlock(&m_rwlock);
>>> + }
>>> }
>>>
>>> protected:
>>>
>>> - pthread_mutex_t m_mutex;
>>> - pthread_cond_t m_cond;
>>> - int m_val;
>>> + pthread_mutex_t m_mutex;
>>> + pthread_rwlock_t m_rwlock;
>>> + pthread_cond_t m_cond;
>>> + int m_val;
>>> + bool m_useReadWriteLock;
>>> };
>>>
>>> #endif // ifdef _WIN32
>>> diff -r 5b01678f6fb4 -r 601877ef465c source/common/threadpool.h
>>> --- a/source/common/threadpool.h Sat Apr 02 19:08:49 2016 +0100
>>> +++ b/source/common/threadpool.h Sun Apr 03 16:29:59 2016 +0800
>>> @@ -129,7 +129,7 @@
>>> int m_jobTotal;
>>> int m_jobAcquired;
>>>
>>> - BondedTaskGroup() { m_bondedPeerCount = m_jobTotal = m_jobAcquired
>>> = 0; }
>>> + BondedTaskGroup() { m_bondedPeerCount = m_jobTotal = m_jobAcquired
>>> = 0; m_exitedPeerCount.init(); }
>>>
>>> /* Do not allow the instance to be destroyed before all bonded
>>> peers have
>>> * exited processTasks() */
>>> diff -r 5b01678f6fb4 -r 601877ef465c source/encoder/framefilter.h
>>> --- a/source/encoder/framefilter.h Sat Apr 02 19:08:49 2016 +0100
>>> +++ b/source/encoder/framefilter.h Sun Apr 03 16:29:59 2016 +0800
>>> @@ -82,6 +82,9 @@
>>> , m_encData(NULL)
>>> , m_prevRow(NULL)
>>> {
>>> + m_lastCol.init(true);
>>> + m_allowedCol.init(true);
>>> + m_lastDeblocked.init(true);
>>> }
>>>
>>> ~ParallelFilter()
>>> diff -r 5b01678f6fb4 -r 601877ef465c source/encoder/ratecontrol.cpp
>>> --- a/source/encoder/ratecontrol.cpp Sat Apr 02 19:08:49 2016 +0100
>>> +++ b/source/encoder/ratecontrol.cpp Sun Apr 03 16:29:59 2016 +0800
>>> @@ -166,6 +166,7 @@
>>>
>>> RateControl::RateControl(x265_param& p)
>>> {
>>> + m_startEndOrder.init();
>>> m_param = &p;
>>> int lowresCuWidth = ((m_param->sourceWidth / 2) +
>>> X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
>>> int lowresCuHeight = ((m_param->sourceHeight / 2) +
>>> X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
>>> diff -r 5b01678f6fb4 -r 601877ef465c source/input/y4m.cpp
>>> --- a/source/input/y4m.cpp Sat Apr 02 19:08:49 2016 +0100
>>> +++ b/source/input/y4m.cpp Sun Apr 03 16:29:59 2016 +0800
>>> @@ -43,6 +43,8 @@
>>>
>>> Y4MInput::Y4MInput(InputFileInfo& info)
>>> {
>>> + readCount.init();
>>> + writeCount.init();
>>> for (int i = 0; i < QUEUE_SIZE; i++)
>>> buf[i] = NULL;
>>>
>>> diff -r 5b01678f6fb4 -r 601877ef465c source/input/yuv.cpp
>>> --- a/source/input/yuv.cpp Sat Apr 02 19:08:49 2016 +0100
>>> +++ b/source/input/yuv.cpp Sun Apr 03 16:29:59 2016 +0800
>>> @@ -41,6 +41,8 @@
>>>
>>> YUVInput::YUVInput(InputFileInfo& info)
>>> {
>>> + readCount.init();
>>> + writeCount.init();
>>> for (int i = 0; i < QUEUE_SIZE; i++)
>>> buf[i] = NULL;
>>>
>>> diff -r 5b01678f6fb4 -r 601877ef465c source/output/reconplay.cpp
>>> --- a/source/output/reconplay.cpp Sat Apr 02 19:08:49 2016 +0100
>>> +++ b/source/output/reconplay.cpp Sun Apr 03 16:29:59 2016 +0800
>>> @@ -54,7 +54,8 @@
>>> if (signal(SIGPIPE, sigpipe_handler) == SIG_ERR)
>>> general_log(¶m, "exec", X265_LOG_ERROR, "Unable to register
>>> SIGPIPE handler: %s\n", strerror(errno));
>>> #endif
>>> -
>>> + readCount.init();
>>> + writeCount.init();
>>> width = param.sourceWidth;
>>> height = param.sourceHeight;
>>> colorSpace = param.internalCsp;
>>>
>>>
>>> _______________________________________________
>>> x265-devel mailing list
>>> x265-devel at videolan.org
>>> <javascript:_e(%7B%7D,'cvml','x265-devel at videolan.org');>
>>> https://mailman.videolan.org/listinfo/x265-devel
>>>
>>
>>
>>
>> --
>> Deepthi Nandakumar
>> Engineering Manager, x265
>> Multicoreware, Inc
>>
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> <javascript:_e(%7B%7D,'cvml','x265-devel at videolan.org');>
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20160418/b079136b/attachment-0001.html>
More information about the x265-devel
mailing list