[vlc-devel] [RFC 3/3] vlc_arrays: append items in amortized O(1)

Romain Vimont rom1v at videolabs.io
Thu Jul 19 17:57:18 CEST 2018


Use exponential growth to append items in _amortized constant time_,
instead of realloc() on every append/remove.

The strategy is similar to the one used by ARRAY_* macros (and C++
std::vector, Java ArrayList, Rust std::vec, etc.).

Also expose vlc_array_reserve() to avoid unnecessary reallocations, and
vlc_array_shrink_to_fit() to give a better control on the array size.
---
 include/vlc_arrays.h | 107 +++++++++++++++++++++------
 include/vlc_common.h |   9 ++-
 src/test/arrays.c    | 168 ++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 259 insertions(+), 25 deletions(-)

diff --git a/include/vlc_arrays.h b/include/vlc_arrays.h
index ce2936f1452..ce22c2e1a92 100644
--- a/include/vlc_arrays.h
+++ b/include/vlc_arrays.h
@@ -25,6 +25,9 @@
 #ifndef VLC_ARRAYS_H_
 #define VLC_ARRAYS_H_
 
+#include <vlc_common.h>
+#include <assert.h>
+
 /**
  * \file
  * This file defines functions, structures and macros for handling arrays in vlc
@@ -257,12 +260,16 @@ static inline void *realloc_or_free( void *p, size_t sz )
  ************************************************************************/
 typedef struct vlc_array_t
 {
+    size_t i_capacity;
     size_t i_count;
     void ** pp_elems;
 } vlc_array_t;
 
+#define VLC_ARRAY_MAX_LENGTH (SIZE_MAX / sizeof(void *))
+
 static inline void vlc_array_init( vlc_array_t * p_array )
 {
+    p_array->i_capacity = 0;
     p_array->i_count = 0;
     p_array->pp_elems = NULL;
 }
@@ -308,21 +315,75 @@ static inline ssize_t vlc_array_find( const vlc_array_t *ar,
     return -1;
 }
 
+static inline int vlc_array_resize_internal(vlc_array_t *array, size_t target)
+{
+    void **pp = (void **) vlc_realloc(array->pp_elems, target, sizeof(void *));
+    if (unlikely(!pp))
+        return -1;
+
+    array->pp_elems = pp;
+    array->i_capacity = target;
+    return 0;
+}
+
+static inline int vlc_array_reserve(vlc_array_t *array, size_t min_capacity)
+{
+    if (min_capacity <= array->i_capacity)
+        return 0; /* nothing to do */
+
+    if (min_capacity > VLC_ARRAY_MAX_LENGTH)
+        return -1;
+
+    if (min_capacity < 10)
+        min_capacity = 10; /* do not allocate tiny arrays */
+
+    /* multiply by 1.5 first (this may not overflow due to the way
+     * VLC_ARRAY_MAX_LENGTH is defined, unless sizeof(void *) == 1, which will
+     * never happen) */
+    size_t new_capacity = array->i_capacity + (array->i_capacity >> 1);
+    /* if it's still not sufficient */
+    if (new_capacity < min_capacity)
+        new_capacity = min_capacity;
+    else if (new_capacity > VLC_ARRAY_MAX_LENGTH)
+        /* the capacity must never exceed VLC_ARRAY_MAX_LENGTH */
+        new_capacity = VLC_ARRAY_MAX_LENGTH;
+
+    return vlc_array_resize_internal(array, new_capacity);
+}
+
+static inline int vlc_array_shrink_to_internal(vlc_array_t *array,
+                                               size_t target)
+{
+    if (array->i_capacity == target)
+        return 0;
+
+    if (target == 0)
+    {
+        vlc_array_clear(array);
+        return 0;
+    }
+
+    return vlc_array_resize_internal(array, target);
+}
+
+static inline int vlc_array_shrink_to_fit(vlc_array_t *array)
+{
+    return vlc_array_shrink_to_internal(array, array->i_count);
+}
+
 /* Write */
 static inline int vlc_array_insert( vlc_array_t *ar, void *elem, int idx )
 {
-    void **pp = (void **)realloc( ar->pp_elems,
-                                  sizeof( void * ) * (ar->i_count + 1) );
-    if( unlikely(pp == NULL) )
+    if (unlikely(vlc_array_reserve(ar, ar->i_count + 1)))
         return -1;
 
+    void **pp = ar->pp_elems;
     size_t tail = ar->i_count - idx;
     if( tail > 0 )
         memmove( pp + idx + 1, pp + idx, sizeof( void * ) * tail );
 
-    pp[idx] = elem;
+    ar->pp_elems[idx] = elem;
     ar->i_count++;
-    ar->pp_elems = pp;
     return 0;
 }
 
@@ -334,13 +395,10 @@ static inline void vlc_array_insert_or_abort( vlc_array_t *ar, void *elem, int i
 
 static inline int vlc_array_append( vlc_array_t *ar, void *elem )
 {
-    void **pp = (void **)realloc( ar->pp_elems,
-                                  sizeof( void * ) * (ar->i_count + 1) );
-    if( unlikely(pp == NULL) )
+    if (unlikely(vlc_array_reserve(ar, ar->i_count + 1)))
         return -1;
 
-    pp[ar->i_count++] = elem;
-    ar->pp_elems = pp;
+    ar->pp_elems[ar->i_count++] = elem;
     return 0;
 }
 
@@ -350,6 +408,22 @@ static inline void vlc_array_append_or_abort( vlc_array_t *ar, void *elem )
         abort();
 }
 
+/* shrink when necessary after remove() */
+static inline int vlc_array_autoshrink_internal(vlc_array_t *array)
+{
+    if (array->i_capacity <= 10)
+        return 0; /* do not shrink to tiny length */
+
+    if (array->i_count + (array->i_count >> 1) + 5 > array->i_capacity)
+        return 0; /* no need to shrink */
+
+    size_t target = array->i_count <= 6 ? 10 : array->i_count + 5;
+    if (target >= array->i_capacity)
+        return 0; /* do not grow */
+
+    return vlc_array_shrink_to_internal(array, target);
+}
+
 static inline void vlc_array_remove( vlc_array_t *ar, size_t idx )
 {
     void **pp = ar->pp_elems;
@@ -359,18 +433,7 @@ static inline void vlc_array_remove( vlc_array_t *ar, size_t idx )
         memmove( pp + idx, pp + idx + 1, sizeof( void * ) * tail );
 
     ar->i_count--;
-
-    if( ar->i_count > 0 )
-    {
-        pp = (void **)realloc( pp, sizeof( void * ) * ar->i_count );
-        if( likely(pp != NULL) )
-            ar->pp_elems = pp;
-    }
-    else
-    {
-        free( pp );
-        ar->pp_elems = NULL;
-    }
+    vlc_array_autoshrink_internal(ar);
 }
 
 
diff --git a/include/vlc_common.h b/include/vlc_common.h
index c88503c1a55..5d379865819 100644
--- a/include/vlc_common.h
+++ b/include/vlc_common.h
@@ -937,8 +937,6 @@ static inline bool mul_overflow(unsigned long long a, unsigned long long b,
 
 VLC_API char const * vlc_error( int ) VLC_USED;
 
-#include <vlc_arrays.h>
-
 /* MSB (big endian)/LSB (little endian) conversions - network order is always
  * MSB, and should be used for both network communications and files. */
 
@@ -1118,6 +1116,12 @@ static inline void *vlc_alloc(size_t count, size_t size)
     return mul_overflow(count, size, &size) ? NULL : malloc(size);
 }
 
+VLC_USED VLC_MALLOC
+static inline void *vlc_realloc(void *ptr, size_t count, size_t size)
+{
+    return mul_overflow(count, size, &size) ? NULL : realloc(ptr, size);
+}
+
 /*****************************************************************************
  * I18n stuff
  *****************************************************************************/
@@ -1180,6 +1184,7 @@ VLC_API const char * VLC_Compiler( void ) VLC_USED;
 /*****************************************************************************
  * Additional vlc stuff
  *****************************************************************************/
+#include "vlc_arrays.h"
 #include "vlc_messages.h"
 #include "vlc_objects.h"
 #include "vlc_variables.h"
diff --git a/src/test/arrays.c b/src/test/arrays.c
index 82277bbbc12..b7f5b829bd4 100644
--- a/src/test/arrays.c
+++ b/src/test/arrays.c
@@ -1,5 +1,5 @@
 /*****************************************************************************
- * arrays.h : Test for ARRAY_* macros
+ * arrays.h : Test for VLC arrays
  *****************************************************************************
  * Copyright (C) 2018 VLC authors and VideoLAN
  *
@@ -158,10 +158,176 @@ static void test_array_bsearch(void)
     ARRAY_RESET(array);
 }
 
+static void test_vlc_array_insert_remove(void)
+{
+    vlc_array_t array;
+    vlc_array_init(&array);
+
+    char data[32];
+
+    vlc_array_append(&array, &data[0]);
+    assert(vlc_array_count(&array) == 1);
+    assert(vlc_array_get(&array, 0) == &data[0]);
+
+    vlc_array_remove(&array, 0);
+    assert(vlc_array_count(&array) == 0);
+
+    vlc_array_append(&array, &data[1]);
+    vlc_array_append(&array, &data[2]);
+    vlc_array_append(&array, &data[3]);
+    vlc_array_remove(&array, 1);
+    assert(vlc_array_count(&array) == 2);
+    assert(vlc_array_get(&array, 0) == &data[1]);
+    assert(vlc_array_get(&array, 1) == &data[3]);
+
+    vlc_array_insert(&array, &data[4], 1);
+    assert(vlc_array_count(&array) == 3);
+    assert(vlc_array_get(&array, 0) == &data[1]);
+    assert(vlc_array_get(&array, 1) == &data[4]);
+    assert(vlc_array_get(&array, 2) == &data[3]);
+
+    vlc_array_clear(&array);
+}
+
+static void test_vlc_array_find(void)
+{
+    vlc_array_t array;
+    vlc_array_init(&array);
+
+    char data[10];
+
+    for (int i = 0; i < 10; ++i)
+        vlc_array_append(&array, &data[i]);
+
+    ssize_t idx;
+
+    idx = vlc_array_find(&array, &data[0]);
+    assert(idx == 0);
+
+    idx = vlc_array_find(&array, &data[1]);
+    assert(idx == 1);
+
+    idx = vlc_array_find(&array, &data[4]);
+    assert(idx == 4);
+
+    idx = vlc_array_find(&array, &data[9]);
+    assert(idx == 9);
+
+    idx = vlc_array_find(&array, "some other pointer");
+    assert(idx == -1);
+
+    vlc_array_clear(&array);
+}
+
+static void test_vlc_array_grow()
+{
+    vlc_array_t array;
+    vlc_array_init(&array);
+
+    char data;
+
+    for (int i = 0; i < 50; ++i)
+        vlc_array_append(&array, &data); /* append */
+
+    assert(vlc_array_count(&array) == 50);
+
+    for (int i = 0; i < 25; ++i)
+        vlc_array_insert(&array, &data, 20); /* insert from the middle */
+
+    assert(vlc_array_count(&array) == 75);
+
+    for (int i = 0; i < 25; ++i)
+        vlc_array_insert(&array, &data, 0); /* prepend */
+
+    assert(vlc_array_count(&array) == 100);
+
+    for (int i = 0; i < 50; ++i)
+        vlc_array_remove(&array, 20); /* remove from the middle */
+
+    assert(vlc_array_count(&array) == 50);
+
+    for (int i = 0; i < 25; ++i)
+        vlc_array_remove(&array, 0); /* remove from the head */
+
+    assert(vlc_array_count(&array) == 25);
+
+    for (int i = 24; i >= 0; --i)
+        vlc_array_remove(&array, i); /* remove from the tail */
+
+    assert(vlc_array_count(&array) == 0);
+
+    vlc_array_clear(&array);
+}
+
+static void test_vlc_array_exp_growth()
+{
+    vlc_array_t array;
+    vlc_array_init(&array);
+
+    char data;
+    size_t old_capacity = array.i_capacity;
+    int realloc_count = 0;
+    for (int i = 0; i < 10000; ++i)
+    {
+        vlc_array_append(&array, &data);
+        if (array.i_capacity != old_capacity)
+        {
+            realloc_count++;
+            old_capacity = array.i_capacity;
+        }
+    }
+
+    /* Test specifically for an expected growth factor of 1.5. In practice, the
+     * result is even lower (19) due to the first alloc of size 10 */
+    assert(realloc_count <= 23); /* ln(10000) / ln(1.5) ~= 23 */
+
+    realloc_count = 0;
+    for (int i = 9999; i >= 0; --i)
+    {
+        vlc_array_remove(&array, i);
+        if (array.i_capacity != old_capacity)
+        {
+            realloc_count++;
+            old_capacity = array.i_capacity;
+        }
+    }
+
+    /* Same expectation for removals */
+    assert(realloc_count <= 23);
+
+    vlc_array_clear(&array);
+}
+
+static void test_vlc_array_reserve()
+{
+    vlc_array_t array;
+    vlc_array_init(&array);
+
+    vlc_array_reserve(&array, 800);
+    assert(array.i_capacity >= 800);
+
+    size_t initial_capacity = array.i_capacity;
+
+    char data;
+    for (int i = 0; i < 800; ++i)
+    {
+        vlc_array_append(&array, &data);
+        assert(array.i_capacity == initial_capacity); /* no realloc */
+    }
+
+    vlc_array_clear(&array);
+}
+
 int main(void)
 {
     test_array_insert_remove();
     test_array_foreach();
     test_array_find();
     test_array_bsearch();
+
+    test_vlc_array_insert_remove();
+    test_vlc_array_find();
+    test_vlc_array_grow();
+    test_vlc_array_reserve();
+    test_vlc_array_exp_growth();
 }
-- 
2.18.0



More information about the vlc-devel mailing list