[vlc-devel] [RFC 3/3] vlc_arrays: append items in amortized O(1)

Romain Vimont rom1v at videolabs.io
Thu Jul 19 18:36:11 CEST 2018


On Thu, Jul 19, 2018 at 06:22:01PM +0200, Hugo Beauzée-Luyssen wrote:
> On Thu, Jul 19, 2018, at 6:15 PM, Romain Vimont wrote:
> > Thank you for the review.
> > 
> > On Thu, Jul 19, 2018 at 06:05:44PM +0200, Hugo Beauzée-Luyssen wrote:
> > > 
> > > 
> > > On Thu, Jul 19, 2018, at 5:57 PM, Romain Vimont wrote:
> > > > Use exponential growth to append items in _amortized constant time_,
> > > > instead of realloc() on every append/remove.
> > > > 
> > > > The strategy is similar to the one used by ARRAY_* macros (and C++
> > > > std::vector, Java ArrayList, Rust std::vec, etc.).
> > > > 
> > > > Also expose vlc_array_reserve() to avoid unnecessary reallocations, and
> > > > vlc_array_shrink_to_fit() to give a better control on the array size.
> > > > ---
> > > >  include/vlc_arrays.h | 107 +++++++++++++++++++++------
> > > >  include/vlc_common.h |   9 ++-
> > > >  src/test/arrays.c    | 168 ++++++++++++++++++++++++++++++++++++++++++-
> > > >  3 files changed, 259 insertions(+), 25 deletions(-)
> > > > 
> > > > diff --git a/include/vlc_arrays.h b/include/vlc_arrays.h
> > > > index ce2936f1452..ce22c2e1a92 100644
> > > > --- a/include/vlc_arrays.h
> > > > +++ b/include/vlc_arrays.h
> > > > @@ -25,6 +25,9 @@
> > > >  #ifndef VLC_ARRAYS_H_
> > > >  #define VLC_ARRAYS_H_
> > > >  
> > > > +#include <vlc_common.h>
> > > 
> > > Is this required?
> > 
> > I added vlc_realloc() in vlc_common.h in this patch. And I use
> > vlc_assert(), also defined in vlc_common.h.
> > 
> 
> Isn't vlc_common.h a requirement for every source file more or less?

IMO, as a general rule, if an implementation depends on a specific
header, the header should be included _in that file_, it should not rely
on an "external" inclusion of the header.

In this specific case, there is a cyclic dependency (that probably needs
to be broken):
 - vlc_arrays.h includes vlc_common.h (for vlc_realloc() and
   vlc_assert())
 - vlc_common.h includes vlc_arrays.h (so that vlc_arrays.h stuff is
   exposed when including vlc_common.h I guess)

> > > > +#include <assert.h>
> > > 
> > > This should be vlc_assert.h
> > 
> > I call vlc_assert(), but this still requires <assert.h> (like in other
> > vlc headers).
> > 
> 
> Woops sorry I got confused and was thinking we added a vlc_assert.h, my bad :(
> > > > +
> > > >  /**
> > > >   * \file
> > > >   * This file defines functions, structures and macros for handling 
> > > > arrays in vlc
> > > > @@ -257,12 +260,16 @@ static inline void *realloc_or_free( void *p, 
> > > > size_t sz )
> > > >   
> > > > ************************************************************************/
> > > >  typedef struct vlc_array_t
> > > >  {
> > > > +    size_t i_capacity;
> > > >      size_t i_count;
> > > >      void ** pp_elems;
> > > >  } vlc_array_t;
> > > >  
> > > > +#define VLC_ARRAY_MAX_LENGTH (SIZE_MAX / sizeof(void *))
> > > > +
> > > >  static inline void vlc_array_init( vlc_array_t * p_array )
> > > >  {
> > > > +    p_array->i_capacity = 0;
> > > >      p_array->i_count = 0;
> > > >      p_array->pp_elems = NULL;
> > > >  }
> > > > @@ -308,21 +315,75 @@ static inline ssize_t vlc_array_find( const 
> > > > vlc_array_t *ar,
> > > >      return -1;
> > > >  }
> > > >  
> > > > +static inline int vlc_array_resize_internal(vlc_array_t *array, size_t target)
> > > > +{
> > > > +    void **pp = (void **) vlc_realloc(array->pp_elems, target, sizeof(void *));
> > > > +    if (unlikely(!pp))
> > > > +        return -1;
> > > > +
> > > > +    array->pp_elems = pp;
> > > > +    array->i_capacity = target;
> > > > +    return 0;
> > > > +}
> > > > +
> > > > +static inline int vlc_array_reserve(vlc_array_t *array, size_t min_capacity)
> > > > +{
> > > > +    if (min_capacity <= array->i_capacity)
> > > > +        return 0; /* nothing to do */
> > > > +
> > > > +    if (min_capacity > VLC_ARRAY_MAX_LENGTH)
> > > > +        return -1;
> > > > +
> > > > +    if (min_capacity < 10)
> > > > +        min_capacity = 10; /* do not allocate tiny arrays */
> > > > +
> > > > +    /* multiply by 1.5 first (this may not overflow due to the way
> > > > +     * VLC_ARRAY_MAX_LENGTH is defined, unless sizeof(void *) == 1, which will
> > > > +     * never happen) */
> > > > +    size_t new_capacity = array->i_capacity + (array->i_capacity >> 1);
> > > > +    /* if it's still not sufficient */
> > > > +    if (new_capacity < min_capacity)
> > > > +        new_capacity = min_capacity;
> > > > +    else if (new_capacity > VLC_ARRAY_MAX_LENGTH)
> > > > +        /* the capacity must never exceed VLC_ARRAY_MAX_LENGTH */
> > > > +        new_capacity = VLC_ARRAY_MAX_LENGTH;
> > > > +
> > > > +    return vlc_array_resize_internal(array, new_capacity);
> > > > +}
> > > > +
> > > > +static inline int vlc_array_shrink_to_internal(vlc_array_t *array,
> > > > +                                               size_t target)
> > > > +{
> > > > +    if (array->i_capacity == target)
> > > > +        return 0;
> > > > +
> > > > +    if (target == 0)
> > > > +    {
> > > > +        vlc_array_clear(array);
> > > > +        return 0;
> > > > +    }
> > > > +
> > > > +    return vlc_array_resize_internal(array, target);
> > > > +}
> > > > +
> > > > +static inline int vlc_array_shrink_to_fit(vlc_array_t *array)
> > > > +{
> > > > +    return vlc_array_shrink_to_internal(array, array->i_count);
> > > > +}
> > > > +
> > > >  /* Write */
> > > >  static inline int vlc_array_insert( vlc_array_t *ar, void *elem, int idx )
> > > >  {
> > > > -    void **pp = (void **)realloc( ar->pp_elems,
> > > > -                                  sizeof( void * ) * (ar->i_count + 1) );
> > > > -    if( unlikely(pp == NULL) )
> > > > +    if (unlikely(vlc_array_reserve(ar, ar->i_count + 1)))
> > > >          return -1;
> > > >  
> > > > +    void **pp = ar->pp_elems;
> > > >      size_t tail = ar->i_count - idx;
> > > >      if( tail > 0 )
> > > >          memmove( pp + idx + 1, pp + idx, sizeof( void * ) * tail );
> > > >  
> > > > -    pp[idx] = elem;
> > > > +    ar->pp_elems[idx] = elem;
> > > >      ar->i_count++;
> > > > -    ar->pp_elems = pp;
> > > >      return 0;
> > > >  }
> > > >  
> > > > @@ -334,13 +395,10 @@ static inline void 
> > > > vlc_array_insert_or_abort( vlc_array_t *ar, void *elem, int i
> > > >  
> > > >  static inline int vlc_array_append( vlc_array_t *ar, void *elem )
> > > >  {
> > > > -    void **pp = (void **)realloc( ar->pp_elems,
> > > > -                                  sizeof( void * ) * (ar->i_count + 1) );
> > > > -    if( unlikely(pp == NULL) )
> > > > +    if (unlikely(vlc_array_reserve(ar, ar->i_count + 1)))
> > > >          return -1;
> > > >  
> > > > -    pp[ar->i_count++] = elem;
> > > > -    ar->pp_elems = pp;
> > > > +    ar->pp_elems[ar->i_count++] = elem;
> > > >      return 0;
> > > >  }
> > > >  
> > > > @@ -350,6 +408,22 @@ static inline void 
> > > > vlc_array_append_or_abort( vlc_array_t *ar, void *elem )
> > > >          abort();
> > > >  }
> > > >  
> > > > +/* shrink when necessary after remove() */
> > > > +static inline int vlc_array_autoshrink_internal(vlc_array_t *array)
> > > > +{
> > > > +    if (array->i_capacity <= 10)
> > > 
> > > Maybe it would be better to have some clear constant/macros to avoid magic numbers flying around
> > 
> > Possible :)
> > 
> > > > +        return 0; /* do not shrink to tiny length */
> > > > +
> > > > +    if (array->i_count + (array->i_count >> 1) + 5 > array->i_capacity)
> > > > +        return 0; /* no need to shrink */
> > > > +
> > > > +    size_t target = array->i_count <= 6 ? 10 : array->i_count + 5;
> > > > +    if (target >= array->i_capacity)
> > > > +        return 0; /* do not grow */
> > > > +
> > > > +    return vlc_array_shrink_to_internal(array, target);
> > > > +}
> > > > +
> > > >  static inline void vlc_array_remove( vlc_array_t *ar, size_t idx )
> > > >  {
> > > >      void **pp = ar->pp_elems;
> > > > @@ -359,18 +433,7 @@ static inline void vlc_array_remove( vlc_array_t 
> > > > *ar, size_t idx )
> > > >          memmove( pp + idx, pp + idx + 1, sizeof( void * ) * tail );
> > > >  
> > > >      ar->i_count--;
> > > > -
> > > > -    if( ar->i_count > 0 )
> > > > -    {
> > > > -        pp = (void **)realloc( pp, sizeof( void * ) * ar->i_count );
> > > > -        if( likely(pp != NULL) )
> > > > -            ar->pp_elems = pp;
> > > > -    }
> > > > -    else
> > > > -    {
> > > > -        free( pp );
> > > > -        ar->pp_elems = NULL;
> > > > -    }
> > > > +    vlc_array_autoshrink_internal(ar);
> > > >  }
> > > >  
> > > >  
> > > > diff --git a/include/vlc_common.h b/include/vlc_common.h
> > > > index c88503c1a55..5d379865819 100644
> > > > --- a/include/vlc_common.h
> > > > +++ b/include/vlc_common.h
> > > > @@ -937,8 +937,6 @@ static inline bool mul_overflow(unsigned long long 
> > > > a, unsigned long long b,
> > > >  
> > > >  VLC_API char const * vlc_error( int ) VLC_USED;
> > > >  
> > > > -#include <vlc_arrays.h>
> > > > -
> > > >  /* MSB (big endian)/LSB (little endian) conversions - network order is always
> > > >   * MSB, and should be used for both network communications and files. */
> > > >  
> > > > @@ -1118,6 +1116,12 @@ static inline void *vlc_alloc(size_t count, size_t size)
> > > >      return mul_overflow(count, size, &size) ? NULL : malloc(size);
> > > >  }
> > > >  
> > > > +VLC_USED VLC_MALLOC
> > > > +static inline void *vlc_realloc(void *ptr, size_t count, size_t size)
> > > > +{
> > > > +    return mul_overflow(count, size, &size) ? NULL : realloc(ptr, size);
> > > > +}
> > > > +
> > > >  /*****************************************************************************
> > > >   * I18n stuff
> > > >   *****************************************************************************/
> > > > @@ -1180,6 +1184,7 @@ VLC_API const char * VLC_Compiler( void ) VLC_USED;
> > > >  /*****************************************************************************
> > > >   * Additional vlc stuff
> > > >   *****************************************************************************/
> > > > +#include "vlc_arrays.h"
> > > >  #include "vlc_messages.h"
> > > >  #include "vlc_objects.h"
> > > >  #include "vlc_variables.h"
> > > > diff --git a/src/test/arrays.c b/src/test/arrays.c
> > > > index 82277bbbc12..b7f5b829bd4 100644
> > > > --- a/src/test/arrays.c
> > > > +++ b/src/test/arrays.c
> > > > @@ -1,5 +1,5 @@
> > > >  /*****************************************************************************
> > > > - * arrays.h : Test for ARRAY_* macros
> > > > + * arrays.h : Test for VLC arrays
> > > >   *****************************************************************************
> > > >   * Copyright (C) 2018 VLC authors and VideoLAN
> > > >   *
> > > > @@ -158,10 +158,176 @@ static void test_array_bsearch(void)
> > > >      ARRAY_RESET(array);
> > > >  }
> > > >  
> > > > +static void test_vlc_array_insert_remove(void)
> > > > +{
> > > > +    vlc_array_t array;
> > > > +    vlc_array_init(&array);
> > > > +
> > > > +    char data[32];
> > > > +
> > > > +    vlc_array_append(&array, &data[0]);
> > > > +    assert(vlc_array_count(&array) == 1);
> > > > +    assert(vlc_array_get(&array, 0) == &data[0]);
> > > > +
> > > > +    vlc_array_remove(&array, 0);
> > > > +    assert(vlc_array_count(&array) == 0);
> > > > +
> > > > +    vlc_array_append(&array, &data[1]);
> > > > +    vlc_array_append(&array, &data[2]);
> > > > +    vlc_array_append(&array, &data[3]);
> > > > +    vlc_array_remove(&array, 1);
> > > > +    assert(vlc_array_count(&array) == 2);
> > > > +    assert(vlc_array_get(&array, 0) == &data[1]);
> > > > +    assert(vlc_array_get(&array, 1) == &data[3]);
> > > > +
> > > > +    vlc_array_insert(&array, &data[4], 1);
> > > > +    assert(vlc_array_count(&array) == 3);
> > > > +    assert(vlc_array_get(&array, 0) == &data[1]);
> > > > +    assert(vlc_array_get(&array, 1) == &data[4]);
> > > > +    assert(vlc_array_get(&array, 2) == &data[3]);
> > > > +
> > > > +    vlc_array_clear(&array);
> > > > +}
> > > > +
> > > > +static void test_vlc_array_find(void)
> > > > +{
> > > > +    vlc_array_t array;
> > > > +    vlc_array_init(&array);
> > > > +
> > > > +    char data[10];
> > > > +
> > > > +    for (int i = 0; i < 10; ++i)
> > > > +        vlc_array_append(&array, &data[i]);
> > > > +
> > > > +    ssize_t idx;
> > > > +
> > > > +    idx = vlc_array_find(&array, &data[0]);
> > > > +    assert(idx == 0);
> > > > +
> > > > +    idx = vlc_array_find(&array, &data[1]);
> > > > +    assert(idx == 1);
> > > > +
> > > > +    idx = vlc_array_find(&array, &data[4]);
> > > > +    assert(idx == 4);
> > > > +
> > > > +    idx = vlc_array_find(&array, &data[9]);
> > > > +    assert(idx == 9);
> > > > +
> > > > +    idx = vlc_array_find(&array, "some other pointer");
> > > > +    assert(idx == -1);
> > > > +
> > > > +    vlc_array_clear(&array);
> > > > +}
> > > > +
> > > > +static void test_vlc_array_grow()
> > > > +{
> > > > +    vlc_array_t array;
> > > > +    vlc_array_init(&array);
> > > > +
> > > > +    char data;
> > > > +
> > > > +    for (int i = 0; i < 50; ++i)
> > > > +        vlc_array_append(&array, &data); /* append */
> > > > +
> > > > +    assert(vlc_array_count(&array) == 50);
> > > > +
> > > > +    for (int i = 0; i < 25; ++i)
> > > > +        vlc_array_insert(&array, &data, 20); /* insert from the middle */
> > > > +
> > > > +    assert(vlc_array_count(&array) == 75);
> > > > +
> > > > +    for (int i = 0; i < 25; ++i)
> > > > +        vlc_array_insert(&array, &data, 0); /* prepend */
> > > > +
> > > > +    assert(vlc_array_count(&array) == 100);
> > > > +
> > > > +    for (int i = 0; i < 50; ++i)
> > > > +        vlc_array_remove(&array, 20); /* remove from the middle */
> > > > +
> > > > +    assert(vlc_array_count(&array) == 50);
> > > > +
> > > > +    for (int i = 0; i < 25; ++i)
> > > > +        vlc_array_remove(&array, 0); /* remove from the head */
> > > > +
> > > > +    assert(vlc_array_count(&array) == 25);
> > > > +
> > > > +    for (int i = 24; i >= 0; --i)
> > > > +        vlc_array_remove(&array, i); /* remove from the tail */
> > > > +
> > > > +    assert(vlc_array_count(&array) == 0);
> > > > +
> > > > +    vlc_array_clear(&array);
> > > > +}
> > > > +
> > > > +static void test_vlc_array_exp_growth()
> > > > +{
> > > > +    vlc_array_t array;
> > > > +    vlc_array_init(&array);
> > > > +
> > > > +    char data;
> > > > +    size_t old_capacity = array.i_capacity;
> > > > +    int realloc_count = 0;
> > > > +    for (int i = 0; i < 10000; ++i)
> > > > +    {
> > > > +        vlc_array_append(&array, &data);
> > > > +        if (array.i_capacity != old_capacity)
> > > > +        {
> > > > +            realloc_count++;
> > > > +            old_capacity = array.i_capacity;
> > > > +        }
> > > > +    }
> > > > +
> > > > +    /* Test specifically for an expected growth factor of 1.5. In practice, the
> > > > +     * result is even lower (19) due to the first alloc of size 10 */
> > > > +    assert(realloc_count <= 23); /* ln(10000) / ln(1.5) ~= 23 */
> > > > +
> > > > +    realloc_count = 0;
> > > > +    for (int i = 9999; i >= 0; --i)
> > > > +    {
> > > > +        vlc_array_remove(&array, i);
> > > > +        if (array.i_capacity != old_capacity)
> > > > +        {
> > > > +            realloc_count++;
> > > > +            old_capacity = array.i_capacity;
> > > > +        }
> > > > +    }
> > > > +
> > > > +    /* Same expectation for removals */
> > > > +    assert(realloc_count <= 23);
> > > > +
> > > > +    vlc_array_clear(&array);
> > > > +}
> > > > +
> > > > +static void test_vlc_array_reserve()
> > > > +{
> > > > +    vlc_array_t array;
> > > > +    vlc_array_init(&array);
> > > > +
> > > > +    vlc_array_reserve(&array, 800);
> > > > +    assert(array.i_capacity >= 800);
> > > > +
> > > > +    size_t initial_capacity = array.i_capacity;
> > > > +
> > > > +    char data;
> > > > +    for (int i = 0; i < 800; ++i)
> > > > +    {
> > > > +        vlc_array_append(&array, &data);
> > > > +        assert(array.i_capacity == initial_capacity); /* no realloc */
> > > > +    }
> > > > +
> > > > +    vlc_array_clear(&array);
> > > > +}
> > > > +
> > > >  int main(void)
> > > >  {
> > > >      test_array_insert_remove();
> > > >      test_array_foreach();
> > > >      test_array_find();
> > > >      test_array_bsearch();
> > > > +
> > > > +    test_vlc_array_insert_remove();
> > > > +    test_vlc_array_find();
> > > > +    test_vlc_array_grow();
> > > > +    test_vlc_array_reserve();
> > > > +    test_vlc_array_exp_growth();
> > > >  }
> > > > -- 
> > > > 2.18.0
> > > > 
> > > > _______________________________________________
> > > > vlc-devel mailing list
> > > > To unsubscribe or modify your subscription options:
> > > > https://mailman.videolan.org/listinfo/vlc-devel
> > > 
> > > 
> > > -- 
> > >   Hugo Beauzée-Luyssen
> > >   hugo at beauzee.fr
> > _______________________________________________
> > vlc-devel mailing list
> > To unsubscribe or modify your subscription options:
> > https://mailman.videolan.org/listinfo/vlc-devel
> 
> 
> -- 
>   Hugo Beauzée-Luyssen
>   hugo at beauzee.fr
> _______________________________________________
> vlc-devel mailing list
> To unsubscribe or modify your subscription options:
> https://mailman.videolan.org/listinfo/vlc-devel


More information about the vlc-devel mailing list