[x265-commits] [x265] threading: use atomic increment when building semaphore o...
Steve Borho
steve at borho.org
Wed Feb 26 18:34:00 CET 2014
details: http://hg.videolan.org/x265/rev/948626475a46
branches:
changeset: 6303:948626475a46
user: Steve Borho <steve at borho.org>
date: Tue Feb 25 23:53:43 2014 -0600
description:
threading: use atomic increment when building semaphore object
Required moving atomic defines to the top of threading.h, removing the need
for potentially redundant includes of unistd.h
Subject: [x265] encoder: tweak slice stats to more closely match x264 outputs
details: http://hg.videolan.org/x265/rev/46207f6f5c8c
branches:
changeset: 6304:46207f6f5c8c
user: Steve Borho <steve at borho.org>
date: Wed Feb 26 00:43:46 2014 -0600
description:
encoder: tweak slice stats to more closely match x264 outputs
Subject: [x265] fix: uninitialized variable
details: http://hg.videolan.org/x265/rev/483e699a9527
branches:
changeset: 6305:483e699a9527
user: Satoshi Nakagawa <nakagawa424 at oki.com>
date: Wed Feb 26 21:06:22 2014 +0900
description:
fix: uninitialized variable
Subject: [x265] all_angs_pred_32x32, asm code improvement
details: http://hg.videolan.org/x265/rev/0b9c77b41599
branches:
changeset: 6306:0b9c77b41599
user: Praveen Tiwari
date: Wed Feb 26 17:58:24 2014 +0530
description:
all_angs_pred_32x32, asm code improvement
Subject: [x265] rc:bug fix-store average Qp as decided by AQ only if aq is enabled
details: http://hg.videolan.org/x265/rev/9b0c9b76d902
branches:
changeset: 6307:9b0c9b76d902
user: Santhoshini Sekar <santhoshini at multicorewareinc.com>
date: Wed Feb 26 18:01:05 2014 +0530
description:
rc:bug fix-store average Qp as decided by AQ only if aq is enabled
diffstat:
source/common/threading.cpp | 2 +-
source/common/threading.h | 192 ++++++-----
source/common/x86/intrapred8.asm | 622 ++++++++++++++------------------------
source/encoder/encoder.cpp | 8 +-
source/encoder/frameencoder.cpp | 3 +-
source/encoder/slicetype.cpp | 2 +
6 files changed, 341 insertions(+), 488 deletions(-)
diffs (truncated from 1654 to 300 lines):
diff -r b47fc23c75df -r 9b0c9b76d902 source/common/threading.cpp
--- a/source/common/threading.cpp Wed Feb 26 03:26:00 2014 +0530
+++ b/source/common/threading.cpp Wed Feb 26 18:01:05 2014 +0530
@@ -65,7 +65,7 @@ Thread::~Thread()
#else /* POSIX / pthreads */
-int Event::s_incr /* = 0 */;
+volatile int Event::s_incr /* = 0 */;
static void *ThreadShim(void *opaque)
{
diff -r b47fc23c75df -r 9b0c9b76d902 source/common/threading.h
--- a/source/common/threading.h Wed Feb 26 03:26:00 2014 +0530
+++ b/source/common/threading.h Wed Feb 26 18:01:05 2014 +0530
@@ -35,9 +35,102 @@
#include <stdio.h>
#include <errno.h>
#include <fcntl.h>
+#endif
+
+#include <stdint.h>
+
+#if MACOS
+#include <sys/param.h>
+#include <sys/sysctl.h>
+#endif
+
+#ifdef __GNUC__ /* GCCs builtin atomics */
+
#include <unistd.h>
+#include <limits.h>
+
+#define CTZ64(id, x) id = (unsigned long)__builtin_ctzll(x)
+#define ATOMIC_OR(ptr, mask) __sync_or_and_fetch(ptr, mask)
+#define ATOMIC_CAS(ptr, oldval, newval) __sync_val_compare_and_swap(ptr, oldval, newval)
+#define ATOMIC_CAS32(ptr, oldval, newval) __sync_val_compare_and_swap(ptr, oldval, newval)
+#define ATOMIC_INC(ptr) __sync_add_and_fetch((volatile int32_t*)ptr, 1)
+#define ATOMIC_DEC(ptr) __sync_add_and_fetch((volatile int32_t*)ptr, -1)
+#define GIVE_UP_TIME() usleep(0)
+
+#elif defined(_MSC_VER) /* Windows atomic intrinsics */
+
+#include <intrin.h>
+
+#if !_WIN64
+inline int _BitScanReverse64(DWORD *id, uint64_t x64) // fake 64bit CLZ
+{
+ uint32_t high32 = (uint32_t)(x64 >> 32);
+ uint32_t low32 = (uint32_t)x64;
+
+ if (high32)
+ {
+ _BitScanReverse(id, high32);
+ *id += 32;
+ return 1;
+ }
+ else if (low32)
+ return _BitScanReverse(id, low32);
+ else
+ return *id = 0;
+}
+
+inline int _BitScanForward64(DWORD *id, uint64_t x64) // fake 64bit CLZ
+{
+ uint32_t high32 = (uint32_t)(x64 >> 32);
+ uint32_t low32 = (uint32_t)x64;
+
+ if (high32)
+ {
+ _BitScanForward(id, high32);
+ *id += 32;
+ return 1;
+ }
+ else if (low32)
+ return _BitScanForward(id, low32);
+ else
+ return *id = 0;
+}
+
+#endif // if !_WIN64
+
+#if _WIN32_WINNT <= _WIN32_WINNT_WINXP
+/* Windows XP did not define this intrinsic */
+FORCEINLINE LONGLONG x265_interlocked_OR64(__inout LONGLONG volatile *Destination,
+ __in LONGLONG Value)
+{
+ LONGLONG Old;
+
+ do
+ {
+ Old = *Destination;
+ }
+ while (_InterlockedCompareExchange64(Destination, Old | Value, Old) != Old);
+
+ return Old;
+}
+
+#define ATOMIC_OR(ptr, mask) x265_interlocked_OR64((volatile LONG64*)ptr, mask)
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+#pragma intrinsic(_InterlockedCompareExchange64)
#endif
-#include <stdint.h>
+#else // if _WIN32_WINNT <= _WIN32_WINNT_WINXP
+#define ATOMIC_OR(ptr, mask) InterlockedOr64((volatile LONG64*)ptr, mask)
+#endif // if _WIN32_WINNT <= _WIN32_WINNT_WINXP
+
+#define CTZ64(id, x) _BitScanForward64(&id, x)
+#define ATOMIC_CAS(ptr, oldval, newval) (uint64_t)_InterlockedCompareExchange64((volatile LONG64*)ptr, newval, oldval)
+#define ATOMIC_CAS32(ptr, oldval, newval) (uint64_t)_InterlockedCompareExchange((volatile LONG*)ptr, newval, oldval)
+#define ATOMIC_INC(ptr) InterlockedIncrement((volatile LONG*)ptr)
+#define ATOMIC_DEC(ptr) InterlockedDecrement((volatile LONG*)ptr)
+#define GIVE_UP_TIME() Sleep(0)
+
+#endif // ifdef __GNUC__
+
namespace x265 {
// x265 private namespace
@@ -146,7 +239,8 @@ public:
int pid = (int)getpid();
do
{
- snprintf(name, sizeof(name), "/x265_%d_%d", pid, s_incr++);
+ int num = ATOMIC_INC(&s_incr);
+ snprintf(name, sizeof(name), "/x265_%d_%d", pid, num);
this->semaphore = sem_open(name, O_CREAT | O_EXCL, 0777, 0);
}
while (this->semaphore == SEM_FAILED);
@@ -175,7 +269,7 @@ public:
protected:
- static int s_incr;
+ static volatile int s_incr;
char name[64];
/* the POSIX version uses a counting semaphore */
@@ -229,96 +323,4 @@ public:
};
} // end namespace x265
-#if MACOS
-#include <sys/param.h>
-#include <sys/sysctl.h>
-#endif
-
-#ifdef __GNUC__ /* GCCs builtin atomics */
-
-#include <unistd.h>
-#include <limits.h>
-
-#define CTZ64(id, x) id = (unsigned long)__builtin_ctzll(x)
-#define ATOMIC_OR(ptr, mask) __sync_or_and_fetch(ptr, mask)
-#define ATOMIC_CAS(ptr, oldval, newval) __sync_val_compare_and_swap(ptr, oldval, newval)
-#define ATOMIC_CAS32(ptr, oldval, newval) __sync_val_compare_and_swap(ptr, oldval, newval)
-#define ATOMIC_INC(ptr) __sync_add_and_fetch((volatile int32_t*)ptr, 1)
-#define ATOMIC_DEC(ptr) __sync_add_and_fetch((volatile int32_t*)ptr, -1)
-#define GIVE_UP_TIME() usleep(0)
-
-#elif defined(_MSC_VER) /* Windows atomic intrinsics */
-
-#include <intrin.h>
-
-#if !_WIN64
-inline int _BitScanReverse64(DWORD *id, uint64_t x64) // fake 64bit CLZ
-{
- uint32_t high32 = (uint32_t)(x64 >> 32);
- uint32_t low32 = (uint32_t)x64;
-
- if (high32)
- {
- _BitScanReverse(id, high32);
- *id += 32;
- return 1;
- }
- else if (low32)
- return _BitScanReverse(id, low32);
- else
- return *id = 0;
-}
-
-inline int _BitScanForward64(DWORD *id, uint64_t x64) // fake 64bit CLZ
-{
- uint32_t high32 = (uint32_t)(x64 >> 32);
- uint32_t low32 = (uint32_t)x64;
-
- if (high32)
- {
- _BitScanForward(id, high32);
- *id += 32;
- return 1;
- }
- else if (low32)
- return _BitScanForward(id, low32);
- else
- return *id = 0;
-}
-
-#endif // if !_WIN64
-
-#if _WIN32_WINNT <= _WIN32_WINNT_WINXP
-/* Windows XP did not define this intrinsic */
-FORCEINLINE LONGLONG x265_interlocked_OR64(__inout LONGLONG volatile *Destination,
- __in LONGLONG Value)
-{
- LONGLONG Old;
-
- do
- {
- Old = *Destination;
- }
- while (_InterlockedCompareExchange64(Destination, Old | Value, Old) != Old);
-
- return Old;
-}
-
-#define ATOMIC_OR(ptr, mask) x265_interlocked_OR64((volatile LONG64*)ptr, mask)
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-#pragma intrinsic(_InterlockedCompareExchange64)
-#endif
-#else // if _WIN32_WINNT <= _WIN32_WINNT_WINXP
-#define ATOMIC_OR(ptr, mask) InterlockedOr64((volatile LONG64*)ptr, mask)
-#endif // if _WIN32_WINNT <= _WIN32_WINNT_WINXP
-
-#define CTZ64(id, x) _BitScanForward64(&id, x)
-#define ATOMIC_CAS(ptr, oldval, newval) (uint64_t)_InterlockedCompareExchange64((volatile LONG64*)ptr, newval, oldval)
-#define ATOMIC_CAS32(ptr, oldval, newval) (uint64_t)_InterlockedCompareExchange((volatile LONG*)ptr, newval, oldval)
-#define ATOMIC_INC(ptr) InterlockedIncrement((volatile LONG*)ptr)
-#define ATOMIC_DEC(ptr) InterlockedDecrement((volatile LONG*)ptr)
-#define GIVE_UP_TIME() Sleep(0)
-
-#endif // ifdef __GNUC__
-
#endif // ifndef X265_THREADING_H
diff -r b47fc23c75df -r 9b0c9b76d902 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm Wed Feb 26 03:26:00 2014 +0530
+++ b/source/common/x86/intrapred8.asm Wed Feb 26 18:01:05 2014 +0530
@@ -21028,20 +21028,17 @@ pinsrb m6, [r3 + 7], 0
pmaddubsw m3, m6, [r5 + 24 * 16]
pmulhrsw m3, m7
pslldq m2, 2
-pinsrb m2, [r4 + 6], 1
-pinsrb m2, [r4 + 5], 0
+pinsrw m2, [r4 + 5], 0
pmaddubsw m5, m2, [r5 + 24 * 16]
pmulhrsw m5, m7
packuswb m3, m5
movu [r0 + 782 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 14], 1
-pinsrb m1, [r4 + 13], 0
+pinsrw m1, [r4 + 13], 0
pmaddubsw m3, m1, [r5 + 24 * 16]
pmulhrsw m3, m7
pslldq m4, 2
-pinsrb m4, [r4 + 22], 1
-pinsrb m4, [r4 + 21], 0
+pinsrw m4, [r4 + 21], 0
pmaddubsw m5, m4, [r5 + 24 * 16]
pmulhrsw m5, m7
packuswb m3, m5
@@ -21242,15 +21239,13 @@ pinsrb m7, [r3 + 10], 0
pmaddubsw m3, m7, [r5 + 30 * 16]
pmulhrsw m3, [pw_1024]
pslldq m2, 2
-pinsrb m2, [r4 + 5], 1
-pinsrb m2, [r4 + 4], 0
+pinsrw m2, [r4 + 4], 0
pmaddubsw m5, m2, [r5 + 30 * 16]
pmulhrsw m5, [pw_1024]
packuswb m3, m5
movu [r0 + 786 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 13], 1
-pinsrb m1, [r4 + 12], 0
+pinsrw m1, [r4 + 12], 0
pmaddubsw m3, m1, [r5 + 30 * 16]
pmulhrsw m3, [pw_1024]
pslldq m4, 2
@@ -21459,20 +21454,17 @@ pinsrb m6, [r3 + 18], 0
pmaddubsw m3, m6, [r5 + 30 * 16]
pmulhrsw m3, [pw_1024]
pslldq m2, 2
-pinsrb m2, [r4 + 4], 1
-pinsrb m2, [r4 + 3], 0
+pinsrw m2, [r4 + 3], 0
pmaddubsw m5, m2, [r5 + 30 * 16]
pmulhrsw m5, [pw_1024]
packuswb m3, m5
movu [r0 + 738 * 16], m3
pslldq m1, 2
-pinsrb m1, [r4 + 12], 1
-pinsrb m1, [r4 + 11], 0
+pinsrw m1, [r4 + 11], 0
pmaddubsw m3, m1, [r5 + 30 * 16]
pmulhrsw m3, [pw_1024]
pslldq m4, 2
-pinsrb m4, [r4 + 20], 1
-pinsrb m4, [r4 + 19], 0
More information about the x265-commits
mailing list