[x265-commits] [x265] param: add some more validation checks

Sagar Kotecha sagar at multicorewareinc.com
Thu Feb 27 07:31:50 CET 2014


details:   http://hg.videolan.org/x265/rev/fa23612e2850
branches:  
changeset: 6308:fa23612e2850
user:      Sagar Kotecha <sagar at multicorewareinc.com>
date:      Wed Feb 26 18:43:04 2014 +0530
description:
param: add some more validation checks
Subject: [x265] reintroduce thread pool unit test

details:   http://hg.videolan.org/x265/rev/d36764800215
branches:  
changeset: 6309:d36764800215
user:      Steve Borho <steve at borho.org>
date:      Fri Feb 07 14:00:02 2014 -0600
description:
reintroduce thread pool unit test

It is reproducing a deadlock on POSIX roughly once per 1M runs
Subject: [x265] asm: 16bpp assembly code for intra_pred_ang16 - all modes

details:   http://hg.videolan.org/x265/rev/41bc98a92b49
branches:  
changeset: 6310:41bc98a92b49
user:      Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
date:      Wed Feb 26 11:01:40 2014 +0530
description:
asm: 16bpp assembly code for intra_pred_ang16 - all modes
Subject: [x265] asm: 10bpp code for chroma interpolation filters

details:   http://hg.videolan.org/x265/rev/d317026aa0ad
branches:  
changeset: 6311:d317026aa0ad
user:      Murugan Vairavel <murugan at multicorewareinc.com>
date:      Wed Feb 26 14:18:13 2014 +0530
description:
asm: 10bpp code for chroma interpolation filters
Subject: [x265] threading: use a pthread conditional variable instead of semaphore

details:   http://hg.videolan.org/x265/rev/8ac1e112f3ea
branches:  
changeset: 6312:8ac1e112f3ea
user:      Steve Borho <steve at borho.org>
date:      Wed Feb 26 20:31:01 2014 -0600
description:
threading: use a pthread conditional variable instead of semaphore

This allows more control over the exact behavior, removes the global nature
of the semaphore on some systems, and will allow the addition of a timed wait
to the Event class.  It appears to resolve the deadlocks reproduced in the
pool test on Mac OS X; hopefully on Linux as well.
Subject: [x265] threading: add a timedWait() method to Event class

details:   http://hg.videolan.org/x265/rev/c9a0802b64ac
branches:  
changeset: 6313:c9a0802b64ac
user:      Steve Borho <steve at borho.org>
date:      Wed Feb 26 22:16:28 2014 -0600
description:
threading: add a timedWait() method to Event class

diffstat:

 source/common/param.cpp              |     8 +-
 source/common/threading.cpp          |     2 -
 source/common/threading.h            |    76 +-
 source/common/x86/asm-primitives.cpp |   102 +
 source/common/x86/intrapred16.asm    |  3689 ++++++++++++++++++++++++++++++++++
 source/common/x86/ipfilter16.asm     |  1469 +++++++++++++-
 source/common/x86/ipfilter8.h        |   175 +-
 source/test/CMakeLists.txt           |     3 +
 source/test/ipfilterharness.cpp      |     4 +-
 source/test/testpool.cpp             |   241 ++
 10 files changed, 5694 insertions(+), 75 deletions(-)

diffs (truncated from 5972 to 300 lines):

diff -r 9b0c9b76d902 -r c9a0802b64ac source/common/param.cpp
--- a/source/common/param.cpp	Wed Feb 26 18:01:05 2014 +0530
+++ b/source/common/param.cpp	Wed Feb 26 22:16:28 2014 -0600
@@ -791,12 +791,12 @@ int x265_check_params(x265_param *param)
     CHECK((1u << tuQTMaxLog2Size) > param->maxCUSize,
           "QuadtreeTULog2MaxSize must be log2(maxCUSize) or smaller.");
 
-    CHECK(param->tuQTMaxInterDepth < 1,
-          "QuadtreeTUMaxDepthInter must be greater than or equal to 1");
+    CHECK(param->tuQTMaxInterDepth < 1 || param->tuQTMaxInterDepth > 4,
+          "QuadtreeTUMaxDepthInter must be greater than 0 and less than 5");
     CHECK(param->maxCUSize < (1u << (tuQTMinLog2Size + param->tuQTMaxInterDepth - 1)),
           "QuadtreeTUMaxDepthInter must be less than or equal to the difference between log2(maxCUSize) and QuadtreeTULog2MinSize plus 1");
-    CHECK(param->tuQTMaxIntraDepth < 1,
-          "QuadtreeTUMaxDepthIntra must be greater than or equal to 1");
+    CHECK(param->tuQTMaxIntraDepth < 1 || param->tuQTMaxIntraDepth > 4,
+          "QuadtreeTUMaxDepthIntra must be greater 0 and less than 5");
     CHECK(param->maxCUSize < (1u << (tuQTMinLog2Size + param->tuQTMaxIntraDepth - 1)),
           "QuadtreeTUMaxDepthInter must be less than or equal to the difference between log2(maxCUSize) and QuadtreeTULog2MinSize plus 1");
 
diff -r 9b0c9b76d902 -r c9a0802b64ac source/common/threading.cpp
--- a/source/common/threading.cpp	Wed Feb 26 18:01:05 2014 +0530
+++ b/source/common/threading.cpp	Wed Feb 26 22:16:28 2014 -0600
@@ -65,8 +65,6 @@ Thread::~Thread()
 
 #else /* POSIX / pthreads */
 
-volatile int Event::s_incr /* = 0 */;
-
 static void *ThreadShim(void *opaque)
 {
     // defer processing to the virtual function implemented in the derived class
diff -r 9b0c9b76d902 -r c9a0802b64ac source/common/threading.h
--- a/source/common/threading.h	Wed Feb 26 18:01:05 2014 +0530
+++ b/source/common/threading.h	Wed Feb 26 22:16:28 2014 -0600
@@ -26,6 +26,9 @@
 #ifndef X265_THREADING_H
 #define X265_THREADING_H
 
+#include "common.h"
+#include "x265.h"
+
 #ifdef _WIN32
 #include <windows.h>
 #else
@@ -187,6 +190,12 @@ public:
         WaitForSingleObject(this->handle, INFINITE);
     }
 
+    bool timedWait(uint32_t milliseconds)
+    {
+        /* returns true if event was signaled */
+        return WaitForSingleObject(this->handle, milliseconds) == WAIT_OBJECT_0;
+    }
+
     void trigger()
     {
         SetEvent(this->handle);
@@ -236,44 +245,75 @@ public:
 
     Event()
     {
-        int pid = (int)getpid();
-        do
+        m_counter = 0;
+        if (pthread_mutex_init(&m_mutex, NULL) ||
+            pthread_cond_init(&m_cond, NULL))
         {
-            int num = ATOMIC_INC(&s_incr);
-            snprintf(name, sizeof(name), "/x265_%d_%d", pid, num);
-            this->semaphore = sem_open(name, O_CREAT | O_EXCL, 0777, 0);
+            x265_log(NULL, X265_LOG_ERROR, "fatal: unable to initialize conditional variable\n");
         }
-        while (this->semaphore == SEM_FAILED);
     }
 
     ~Event()
     {
-        sem_close(this->semaphore);
-        sem_unlink(name);
+        pthread_cond_destroy(&m_cond);
+        pthread_mutex_destroy(&m_mutex);
     }
 
     void wait()
     {
-        // keep waiting even if interrupted
-        while (sem_wait(this->semaphore) < 0)
+        pthread_mutex_lock(&m_mutex);
+        /* blocking wait on conditional variable, mutex is atomically released
+         * while blocked. When condition is signaled, mutex is re-acquired */
+        while (m_counter == 0)
+            pthread_cond_wait(&m_cond, &m_mutex);
+        m_counter--;
+        pthread_mutex_unlock(&m_mutex);
+    }
+
+    bool timedWait(uint32_t waitms)
+    {
+        bool bTimedOut = false;
+        pthread_mutex_lock(&m_mutex);
+        if (m_counter == 0)
         {
-            if (errno != EINTR)
-                break;
+            struct timeval tv;
+            struct timespec ts;
+            gettimeofday(&tv, NULL);
+            /* convert current time from (sec, usec) to (sec, nsec) */
+            ts.tv_sec = tv.tv_sec;
+            ts.tv_nsec = tv.tv_usec * 1000;
+
+            ts.tv_nsec += 1000 * 1000 * (waitms % 1000);    /* add ms to tv_nsec */
+            ts.tv_sec += ts.tv_nsec / (1000 * 1000 * 1000); /* overflow tv_nsec */
+            ts.tv_nsec %= (1000 * 1000 * 1000);             /* clamp tv_nsec */
+            ts.tv_sec += waitms / 1000;                     /* add seconds */
+
+            /* blocking wait on conditional variable, mutex is atomically released
+             * while blocked. When condition is signaled, mutex is re-acquired.
+             * ts is absolute time to stop waiting */
+            bTimedOut = pthread_cond_timedwait(&m_cond, &m_mutex, &ts) == ETIMEDOUT;
         }
+        if (m_counter > 0)
+            m_counter--;
+        pthread_mutex_unlock(&m_mutex);
+        return bTimedOut;
     }
 
     void trigger()
     {
-        sem_post(this->semaphore);
+        pthread_mutex_lock(&m_mutex);
+        if (m_counter < UINT32_MAX)
+            m_counter++;
+        /* Signal a single blocking thread */
+        pthread_cond_signal(&m_cond);
+        pthread_mutex_unlock(&m_mutex);
     }
 
 protected:
 
-    static volatile int s_incr;
-    char name[64];
-
-    /* the POSIX version uses a counting semaphore */
-    sem_t *semaphore;
+    pthread_mutex_t m_mutex;
+    pthread_cond_t  m_cond;
+    uint32_t        m_counter;
 };
 
 #endif // ifdef _WIN32
diff -r 9b0c9b76d902 -r c9a0802b64ac source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed Feb 26 18:01:05 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp	Wed Feb 26 22:16:28 2014 -0600
@@ -718,6 +718,68 @@ extern "C" {
 #define SETUP_INTRA_ANG32(mode, fno, cpu) \
     p.intra_pred[BLOCK_32x32][mode] = x265_intra_pred_ang32_ ## fno ## _ ## cpu;
 
+#define SETUP_CHROMA_VERT_FUNC_DEF(W, H, cpu) \
+    p.chroma[X265_CSP_I420].filter_vss[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ss_ ## W ## x ## H ## cpu; \
+    p.chroma[X265_CSP_I420].filter_vsp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_sp_ ## W ## x ## H ## cpu;
+
+#define CHROMA_VERT_FILTERS(cpu) \
+    SETUP_CHROMA_VERT_FUNC_DEF(4, 4, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(4, 2, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(8, 8, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(8, 4, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(4, 8, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(8, 6, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(8, 2, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(16, 16, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(16, 8, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(8, 16, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(16, 12, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(12, 16, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(16, 4, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(4, 16, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(32, 32, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(32, 16, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(16, 32, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(32, 24, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(24, 32, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(32, 8, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(8, 32, cpu);
+
+#define CHROMA_VERT_FILTERS_SSE4(cpu) \
+    SETUP_CHROMA_VERT_FUNC_DEF(2, 4, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(2, 8, cpu); \
+    SETUP_CHROMA_VERT_FUNC_DEF(6, 8, cpu);
+
+#define SETUP_CHROMA_HORIZ_FUNC_DEF(W, H, cpu) \
+    p.chroma[X265_CSP_I420].filter_hpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_pp_ ## W ## x ## H ## cpu; \
+    p.chroma[X265_CSP_I420].filter_hps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x ## H ## cpu;
+
+#define CHROMA_HORIZ_FILTERS(cpu) \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(4, 4, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(4, 2, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(2, 4, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(8, 8, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(8, 4, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(4, 8, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(8, 6, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(6, 8, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(8, 2, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(2, 8, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(16, 16, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(16, 8, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(8, 16, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(16, 12, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(12, 16, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(16, 4, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(4, 16, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(32, 32, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(32, 16, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(16, 32, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(32, 24, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(24, 32, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(32, 8, cpu); \
+    SETUP_CHROMA_HORIZ_FUNC_DEF(8, 32, cpu)
+
 namespace x265 {
 // private x265 namespace
 void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
@@ -802,6 +864,9 @@ void Setup_Assembly_Primitives(EncoderPr
         CHROMA_BLOCKCOPY(_sse2);
         LUMA_BLOCKCOPY(_sse2);
 
+        CHROMA_VERT_FILTERS(_sse2);
+        p.chroma_p2s[X265_CSP_I420] = x265_chroma_p2s_sse2;
+
         p.blockfill_s[BLOCK_4x4] = x265_blockfill_s_4x4_sse2;
         p.blockfill_s[BLOCK_8x8] = x265_blockfill_s_8x8_sse2;
         p.blockfill_s[BLOCK_16x16] = x265_blockfill_s_16x16_sse2;
@@ -832,6 +897,9 @@ void Setup_Assembly_Primitives(EncoderPr
         SETUP_INTRA_ANG8(2, 2, ssse3);
         SETUP_INTRA_ANG8(34, 2, ssse3);
 
+        SETUP_INTRA_ANG16(2, 2, ssse3);
+        SETUP_INTRA_ANG16(34, 2, ssse3);
+
         SETUP_INTRA_ANG32(2, 2, ssse3);
         SETUP_INTRA_ANG32(34, 2, ssse3);
 
@@ -842,6 +910,8 @@ void Setup_Assembly_Primitives(EncoderPr
         LUMA_ADDAVG(_sse4);
         CHROMA_ADDAVG(_sse4);
         LUMA_FILTERS(_sse4);
+        CHROMA_HORIZ_FILTERS(_sse4);
+        CHROMA_VERT_FILTERS_SSE4(_sse4);
 
         p.dct[DCT_8x8] = x265_dct8_sse4;
         p.quant = x265_quant_sse4;
@@ -921,6 +991,38 @@ void Setup_Assembly_Primitives(EncoderPr
         SETUP_INTRA_ANG8(32, 32, sse4);
         SETUP_INTRA_ANG8(33, 33, sse4);
 
+        SETUP_INTRA_ANG16(3,  3,  sse4);
+        SETUP_INTRA_ANG16(4,  4,  sse4);
+        SETUP_INTRA_ANG16(5,  5,  sse4);
+        SETUP_INTRA_ANG16(6,  6,  sse4);
+        SETUP_INTRA_ANG16(7,  7,  sse4);
+        SETUP_INTRA_ANG16(8,  8,  sse4);
+        SETUP_INTRA_ANG16(9,  9,  sse4);
+        SETUP_INTRA_ANG16(10, 10, sse4);
+        SETUP_INTRA_ANG16(11, 11, sse4);
+        SETUP_INTRA_ANG16(12, 12, sse4);
+        SETUP_INTRA_ANG16(13, 13, sse4);
+        SETUP_INTRA_ANG16(14, 14, sse4);
+        SETUP_INTRA_ANG16(15, 15, sse4);
+        SETUP_INTRA_ANG16(16, 16, sse4);
+        SETUP_INTRA_ANG16(17, 17, sse4);
+        SETUP_INTRA_ANG16(18, 18, sse4);
+        SETUP_INTRA_ANG16(19, 19, sse4);
+        SETUP_INTRA_ANG16(20, 20, sse4);
+        SETUP_INTRA_ANG16(21, 21, sse4);
+        SETUP_INTRA_ANG16(22, 22, sse4);
+        SETUP_INTRA_ANG16(23, 23, sse4);
+        SETUP_INTRA_ANG16(24, 24, sse4);
+        SETUP_INTRA_ANG16(25, 25, sse4);
+        SETUP_INTRA_ANG16(26, 26, sse4);
+        SETUP_INTRA_ANG16(27, 27, sse4);
+        SETUP_INTRA_ANG16(28, 28, sse4);
+        SETUP_INTRA_ANG16(29, 29, sse4);
+        SETUP_INTRA_ANG16(30, 30, sse4);
+        SETUP_INTRA_ANG16(31, 31, sse4);
+        SETUP_INTRA_ANG16(32, 32, sse4);
+        SETUP_INTRA_ANG16(33, 33, sse4);
+
         SETUP_INTRA_ANG32(3,  3,  sse4);
         SETUP_INTRA_ANG32(4,  4,  sse4);
         SETUP_INTRA_ANG32(5,  5,  sse4);
diff -r 9b0c9b76d902 -r c9a0802b64ac source/common/x86/intrapred16.asm
--- a/source/common/x86/intrapred16.asm	Wed Feb 26 18:01:05 2014 +0530
+++ b/source/common/x86/intrapred16.asm	Wed Feb 26 22:16:28 2014 -0600
@@ -53,6 +53,9 @@ const pw_ang8_16,   db 0, 0, 0, 0, 0, 0,
 const pw_ang8_17,   db 0, 0, 14, 15, 12, 13, 10, 11, 8, 9, 4, 5, 2, 3, 0, 1
 const pw_swap16,    db 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1
 
+const pw_ang16_13,   db 14, 15, 8, 9, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0


More information about the x265-commits mailing list