[vlc-devel] [PATCH] coreaudio: replace TPCircularBuffer by os_unfair_lock and a block chain

Thomas Guillem thomas at gllm.fr
Tue Mar 13 19:00:54 CET 2018


Remove the usage of TPCircularBuffer and multiple atomic variables that start
to make this code way too complicated. Replace it by os_unfair_lock and a block
chain.

os_unfair_lock is a safe spinlock that waits in the kernel in case of thread
contention.

Fallback to pthread_mutex_t if os_unfair_lock is not availaible (before macOS
10.12 / iOS 10.0).

The unfairness of this new lock is not an issue here since both locking threads
(the render callback and the VLC DecoderThread calling aout_DecPlay) will be
automatically paced (and will let the other thread take the lock). Indeed, the
render thread need a sample every 22 or 88ms, and the DecoderThread will wait
for the decoder, wait in the decoder lock, or wait from the aout if the FIFO is
full.
---
 modules/audio_output/coreaudio_common.c | 287 ++++++++++++++++++++------------
 modules/audio_output/coreaudio_common.h |  27 ++-
 2 files changed, 200 insertions(+), 114 deletions(-)

diff --git a/modules/audio_output/coreaudio_common.c b/modules/audio_output/coreaudio_common.c
index 292146f28c..1a5653cf23 100644
--- a/modules/audio_output/coreaudio_common.c
+++ b/modules/audio_output/coreaudio_common.c
@@ -25,6 +25,8 @@
 #import "coreaudio_common.h"
 #import <CoreAudio/CoreAudioTypes.h>
 
+#import <dlfcn.h>
+
 #if !TARGET_OS_IPHONE
 #import <CoreServices/CoreServices.h>
 #import <vlc_dialog.h>
@@ -42,17 +44,80 @@ FramesToUs(struct aout_sys_common *p_sys, uint64_t i_nb_frames)
     return i_nb_frames * CLOCK_FREQ / p_sys->i_rate;
 }
 
+static void
+ca_ClearOutBuffers(audio_output_t *p_aout)
+{
+    struct aout_sys_common *p_sys = (struct aout_sys_common *) p_aout->sys;
+
+    block_ChainRelease(p_sys->p_out_chain);
+    p_sys->p_out_chain = NULL;
+    p_sys->pp_out_last = &p_sys->p_out_chain;
+
+    p_sys->i_out_size = 0;
+}
+
+static struct
+{
+    void (*lock)(os_unfair_lock *lock);
+    void (*unlock)(os_unfair_lock *lock);
+} unfair_lock;
+
+static void
+unfair_lock_dlinit(void)
+{
+    unfair_lock.lock = dlsym(RTLD_DEFAULT, "os_unfair_lock_lock");
+    if (!unfair_lock.lock)
+        return;
+    unfair_lock.unlock = dlsym(RTLD_DEFAULT, "os_unfair_lock_unlock");
+    if (!unfair_lock.unlock)
+        unfair_lock.lock = NULL;
+}
+
+static void
+lock_init(struct aout_sys_common *p_sys)
+{
+    static pthread_once_t once = PTHREAD_ONCE_INIT;
+    pthread_once(&once, unfair_lock_dlinit);
+
+    if (unfair_lock.lock)
+        p_sys->lock.unfair = OS_UNFAIR_LOCK_INIT;
+    else
+        vlc_mutex_init(&p_sys->lock.mutex);
+}
+
+static void
+lock_destroy(struct aout_sys_common *p_sys)
+{
+    if (!unfair_lock.lock)
+        vlc_mutex_destroy(&p_sys->lock.mutex);
+}
+
+static void
+lock_lock(struct aout_sys_common *p_sys)
+{
+    if (unfair_lock.lock)
+        unfair_lock.lock(&p_sys->lock.unfair);
+    else
+        vlc_mutex_lock(&p_sys->lock.mutex);
+}
+
+static void
+lock_unlock(struct aout_sys_common *p_sys)
+{
+    if (unfair_lock.lock)
+        unfair_lock.unlock(&p_sys->lock.unfair);
+    else
+        vlc_mutex_unlock(&p_sys->lock.mutex);
+}
+
 void
 ca_Open(audio_output_t *p_aout)
 {
     struct aout_sys_common *p_sys = (struct aout_sys_common *) p_aout->sys;
 
-    atomic_init(&p_sys->i_underrun_size, 0);
-    atomic_init(&p_sys->b_paused, false);
-    atomic_init(&p_sys->b_do_flush, false);
-    atomic_init(&p_sys->b_highlatency, true);
     vlc_sem_init(&p_sys->flush_sem, 0);
-    vlc_mutex_init(&p_sys->lock);
+    lock_init(p_sys);
+    p_sys->p_out_chain = NULL;
 
     p_aout->play = ca_Play;
     p_aout->pause = ca_Pause;
@@ -66,7 +131,7 @@ ca_Close(audio_output_t *p_aout)
     struct aout_sys_common *p_sys = (struct aout_sys_common *) p_aout->sys;
 
     vlc_sem_destroy(&p_sys->flush_sem);
-    vlc_mutex_destroy(&p_sys->lock);
+    lock_destroy(p_sys);
 }
 
 /* Called from render callbacks. No lock, wait, and IO here */
@@ -76,52 +141,62 @@ ca_Render(audio_output_t *p_aout, uint32_t i_nb_samples, uint8_t *p_output,
 {
     struct aout_sys_common *p_sys = (struct aout_sys_common *) p_aout->sys;
 
-    const bool b_highlatency = CLOCK_FREQ * (uint64_t)i_nb_samples / p_sys->i_rate
-                             > AOUT_MAX_PTS_DELAY;
+    lock_lock(p_sys);
 
-    if (b_highlatency)
-        atomic_store(&p_sys->b_highlatency, true);
+    p_sys->b_highlatency = CLOCK_FREQ * (uint64_t)i_nb_samples / p_sys->i_rate
+                         > AOUT_MAX_PTS_DELAY;
 
-    bool expected = true;
-    if (atomic_compare_exchange_weak(&p_sys->b_do_flush, &expected, false))
+    if (p_sys->b_do_flush)
     {
-        TPCircularBufferClear(&p_sys->circular_buffer);
+        ca_ClearOutBuffers(p_aout);
         /* Signal that the renderer is flushed */
+        p_sys->b_do_flush = false;
         vlc_sem_post(&p_sys->flush_sem);
     }
 
-    if (atomic_load_explicit(&p_sys->b_paused, memory_order_relaxed))
+    if (p_sys->b_paused)
     {
         memset(p_output, 0, i_requested);
+        lock_unlock(p_sys);
         return;
     }
 
-    /* Pull audio from buffer */
-    int32_t i_available;
-    void *p_data = TPCircularBufferTail(&p_sys->circular_buffer,
-                                        &i_available);
-    if (i_available < 0)
-        i_available = 0;
-
-    size_t i_tocopy = __MIN(i_requested, (size_t) i_available);
-
-    if (i_tocopy > 0)
+    size_t i_copied = 0;
+    block_t *p_block = p_sys->p_out_chain;
+    while (p_block != NULL && i_requested != 0)
     {
-        memcpy(p_output, p_data, i_tocopy);
-        TPCircularBufferConsume(&p_sys->circular_buffer, i_tocopy);
+        size_t i_tocopy = __MIN(i_requested, p_block->i_buffer);
+        memcpy(&p_output[i_copied], p_block->p_buffer, i_tocopy);
+        i_requested -= i_tocopy;
+        i_copied += i_tocopy;
+        if (i_tocopy == p_block->i_buffer)
+        {
+            block_t *p_release = p_block;
+            p_block = p_block->p_next;
+            block_Release(p_release);
+        }
+        else
+        {
+            assert(i_requested == 0);
+
+            p_block->p_buffer += i_tocopy;
+            p_block->i_buffer -= i_tocopy;
+        }
     }
+    p_sys->p_out_chain = p_block;
+    if (!p_sys->p_out_chain)
+        p_sys->pp_out_last = &p_sys->p_out_chain;
+    p_sys->i_out_size -= i_copied;
 
     /* Pad with 0 */
-    if (i_requested > i_tocopy)
+    if (i_requested > 0)
     {
-        atomic_fetch_add(&p_sys->i_underrun_size, i_requested - i_tocopy);
-        memset(&p_output[i_tocopy], 0, i_requested - i_tocopy);
+        assert(p_sys->i_out_size == 0);
+        p_sys->i_underrun_size += i_requested;
+        memset(&p_output[i_copied], 0, i_requested);
     }
 
-    /* Set high delay to false (re-enabling ca_Timeget) after consuming the
-     * circular buffer */
-    if (!b_highlatency)
-        atomic_store(&p_sys->b_highlatency, false);
+    lock_unlock(p_sys);
 }
 
 int
@@ -129,16 +204,17 @@ ca_TimeGet(audio_output_t *p_aout, mtime_t *delay)
 {
     struct aout_sys_common *p_sys = (struct aout_sys_common *) p_aout->sys;
 
-    /* Too high delay: TimeGet will be too imprecise */
-    if (atomic_load(&p_sys->b_highlatency))
+    lock_lock(p_sys);
+    if (p_sys->b_highlatency)
+    {
+        lock_unlock(p_sys);
         return -1;
+    }
 
-    int32_t i_bytes;
-    TPCircularBufferTail(&p_sys->circular_buffer, &i_bytes);
-
-    int64_t i_frames = BytesToFrames(p_sys, i_bytes);
+    int64_t i_frames = BytesToFrames(p_sys, p_sys->i_out_size);
     *delay = FramesToUs(p_sys, i_frames) + p_sys->i_dev_latency_us;
 
+    lock_unlock(p_sys);
     return 0;
 }
 
@@ -149,43 +225,41 @@ ca_Flush(audio_output_t *p_aout, bool wait)
 
     if (wait)
     {
-        int32_t i_bytes;
+        lock_lock(p_sys);
 
-        while (TPCircularBufferTail(&p_sys->circular_buffer, &i_bytes) != NULL)
+        while (p_sys->i_out_size > 0)
         {
-            if (atomic_load(&p_sys->b_paused))
+            if (p_sys->b_paused)
             {
-                TPCircularBufferClear(&p_sys->circular_buffer);
-                return;
+                ca_ClearOutBuffers(p_aout);
+                break;
             }
-
             /* Calculate the duration of the circular buffer, in order to wait
              * for the render thread to play it all */
             const mtime_t i_frame_us =
-                FramesToUs(p_sys, BytesToFrames(p_sys, i_bytes)) + 10000;
+                FramesToUs(p_sys, BytesToFrames(p_sys, p_sys->i_out_size)) + 10000;
 
+            lock_unlock(p_sys);
             msleep(i_frame_us / 2);
+            lock_lock(p_sys);
         }
+
+        lock_unlock(p_sys);
     }
     else
     {
-        /* Request the renderer to flush, and wait for an ACK.
-         * b_do_flush and b_paused need to be locked together in order to not
-         * get stuck here when b_paused is being set after reading. This can
-         * happen when setAliveState() is called from any thread through an
-         * interrupt notification */
-
-        vlc_mutex_lock(&p_sys->lock);
-        assert(!atomic_load(&p_sys->b_do_flush));
-         if (atomic_load(&p_sys->b_paused))
+        lock_lock(p_sys);
+
+        assert(!p_sys->b_do_flush);
+        if (p_sys->b_paused)
         {
-            vlc_mutex_unlock(&p_sys->lock);
-            TPCircularBufferClear(&p_sys->circular_buffer);
+            ca_ClearOutBuffers(p_aout);
+            lock_unlock(p_sys);
             return;
         }
 
-        atomic_store_explicit(&p_sys->b_do_flush, true, memory_order_release);
-        vlc_mutex_unlock(&p_sys->lock);
+        p_sys->b_do_flush = true;
+        lock_unlock(p_sys);
         vlc_sem_wait(&p_sys->flush_sem);
     }
 }
@@ -196,7 +270,9 @@ ca_Pause(audio_output_t * p_aout, bool pause, mtime_t date)
     struct aout_sys_common *p_sys = (struct aout_sys_common *) p_aout->sys;
     VLC_UNUSED(date);
 
-    atomic_store_explicit(&p_sys->b_paused, pause, memory_order_relaxed);
+    lock_lock(p_sys);
+    p_sys->b_paused = pause;
+    lock_unlock(p_sys);
 }
 
 void
@@ -210,43 +286,41 @@ ca_Play(audio_output_t * p_aout, block_t * p_block)
                            p_sys->chans_to_reorder, p_sys->chan_table,
                            VLC_CODEC_FL32);
 
-    /* move data to buffer */
-    while (!TPCircularBufferProduceBytes(&p_sys->circular_buffer,
-                                         p_block->p_buffer, p_block->i_buffer))
+    lock_lock(p_sys);
+
+    if (likely(p_block->i_buffer <= p_sys->i_out_max_size))
     {
-        if (atomic_load_explicit(&p_sys->b_paused, memory_order_relaxed))
+        while (p_sys->i_out_max_size - p_sys->i_out_size < p_block->i_buffer)
         {
-            msg_Warn(p_aout, "dropping block because the circular buffer is "
-                     "full and paused");
-            break;
-        }
+            /* Worst case scenario */
 
-        /* Try to play what we can */
-        int32_t i_avalaible_bytes;
-        TPCircularBufferHead(&p_sys->circular_buffer, &i_avalaible_bytes);
-        assert(i_avalaible_bytes >= 0);
-        if (unlikely((size_t) i_avalaible_bytes >= p_block->i_buffer))
-            continue;
-
-        bool ret =
-            TPCircularBufferProduceBytes(&p_sys->circular_buffer,
-                                         p_block->p_buffer, i_avalaible_bytes);
-        VLC_UNUSED(ret);
-        assert(ret == true);
-        p_block->p_buffer += i_avalaible_bytes;
-        p_block->i_buffer -= i_avalaible_bytes;
-
-        /* Wait for the render buffer to play the remaining data */
-        const mtime_t i_frame_us =
-            FramesToUs(p_sys, BytesToFrames(p_sys, p_block->i_buffer));
-        msleep(i_frame_us / 2);
+            if (p_sys->b_paused)
+            {
+                lock_lock(p_sys);
+                return;
+            }
+            const size_t i_waiting_bytes = p_block->i_buffer
+                - p_sys->i_out_max_size + p_sys->i_out_size;
+            /* Wait for the render buffer to play the remaining data */
+            const mtime_t i_frame_us =
+                FramesToUs(p_sys, BytesToFrames(p_sys, i_waiting_bytes));
+
+            lock_unlock(p_sys);
+            msleep(i_frame_us / 2);
+            lock_lock(p_sys);
+        }
     }
 
-    unsigned i_underrun_size = atomic_exchange(&p_sys->i_underrun_size, 0);
-    if (i_underrun_size > 0)
-        msg_Warn(p_aout, "underrun of %u bytes", i_underrun_size);
+    block_ChainLastAppend(&p_sys->pp_out_last, p_block);
+    p_sys->i_out_size += p_block->i_buffer;
+
+    size_t i_underrun_size = p_sys->i_underrun_size;
+    p_sys->i_underrun_size = 0;
+
+    lock_unlock(p_sys);
 
-    block_Release(p_block);
+    if (i_underrun_size > 0)
+        msg_Warn(p_aout, "underrun of %zu bytes", i_underrun_size);
 }
 
 int
@@ -255,9 +329,10 @@ ca_Initialize(audio_output_t *p_aout, const audio_sample_format_t *fmt,
 {
     struct aout_sys_common *p_sys = (struct aout_sys_common *) p_aout->sys;
 
-    atomic_store(&p_sys->i_underrun_size, 0);
-    atomic_store(&p_sys->b_paused, false);
-    atomic_store(&p_sys->b_highlatency, true);
+    p_sys->i_underrun_size = 0;
+    p_sys->b_paused = false;
+    p_sys->b_highlatency = true;
+
     p_sys->i_rate = fmt->i_rate;
     p_sys->i_bytes_per_frame = fmt->i_bytes_per_frame;
     p_sys->i_frame_length = fmt->i_frame_length;
@@ -290,8 +365,9 @@ ca_Initialize(audio_output_t *p_aout, const audio_sample_format_t *fmt,
         i_audiobuffer_size = i_audiobuffer_size * AOUT_MAX_ADVANCE_TIME
                            / CLOCK_FREQ;
     }
-    if (!TPCircularBufferInit(&p_sys->circular_buffer, i_audiobuffer_size))
-        return VLC_EGENERIC;
+
+    p_sys->i_out_max_size = i_audiobuffer_size;
+    ca_ClearOutBuffers(p_aout);
 
     return VLC_SUCCESS;
 }
@@ -300,8 +376,8 @@ void
 ca_Uninitialize(audio_output_t *p_aout)
 {
     struct aout_sys_common *p_sys = (struct aout_sys_common *) p_aout->sys;
-    /* clean-up circular buffer */
-    TPCircularBufferCleanup(&p_sys->circular_buffer);
+    ca_ClearOutBuffers(p_aout);
+    p_sys->i_out_max_size = 0;
 }
 
 void
@@ -309,17 +385,18 @@ ca_SetAliveState(audio_output_t *p_aout, bool alive)
 {
     struct aout_sys_common *p_sys = (struct aout_sys_common *) p_aout->sys;
 
-    vlc_mutex_lock(&p_sys->lock);
-    atomic_store(&p_sys->b_paused, !alive);
+    lock_lock(p_sys);
 
-    bool expected = true;
-    if (!alive && atomic_compare_exchange_strong(&p_sys->b_do_flush, &expected, false))
+    p_sys->b_paused = !alive;
+
+    if (!alive && p_sys->b_do_flush)
     {
-        TPCircularBufferClear(&p_sys->circular_buffer);
-        /* Signal that the renderer is flushed */
+        ca_ClearOutBuffers(p_aout);
+        p_sys->b_do_flush = false;
         vlc_sem_post(&p_sys->flush_sem);
     }
-    vlc_mutex_unlock(&p_sys->lock);
+
+    lock_unlock(p_sys);
 }
 
 AudioUnit
diff --git a/modules/audio_output/coreaudio_common.h b/modules/audio_output/coreaudio_common.h
index 21cdcb6518..048f9ad7d3 100644
--- a/modules/audio_output/coreaudio_common.h
+++ b/modules/audio_output/coreaudio_common.h
@@ -27,13 +27,12 @@
 #endif
 
 #import <vlc_common.h>
-#import <stdatomic.h>
 #import <vlc_aout.h>
 #import <vlc_threads.h>
 
 #import <AudioUnit/AudioUnit.h>
 #import <AudioToolbox/AudioToolbox.h>
-#import "TPCircularBuffer.h"
+#import <os/lock.h>
 
 #define STREAM_FORMAT_MSG(pre, sfm) \
     pre "[%f][%4.4s][%u][%u][%u][%u][%u][%u]", \
@@ -50,14 +49,24 @@ struct aout_sys_common
     /* The following is owned by common.c (initialized from ca_Init, cleaned
      * from ca_Clean) */
 
-    /* circular buffer to swap the audio data */
-    TPCircularBuffer    circular_buffer;
-    atomic_uint         i_underrun_size;
-    atomic_bool         b_paused;
-    atomic_bool         b_do_flush;
-    atomic_bool         b_highlatency;
+    size_t              i_underrun_size;
+    bool                b_paused;
+    bool                b_do_flush;
+    bool                b_highlatency;
+
+    size_t              i_out_max_size;
+    size_t              i_out_size;
+    block_t             *p_out_chain;
+    block_t             **pp_out_last;
+
     vlc_sem_t           flush_sem;
-    vlc_mutex_t         lock;
+
+    union lock
+    {
+        os_unfair_lock  unfair;
+        pthread_mutex_t mutex;
+    } lock;
+
     int                 i_rate;
     unsigned int        i_bytes_per_frame;
     unsigned int        i_frame_length;
-- 
2.11.0



More information about the vlc-devel mailing list