[vlc-devel] [RFC 3/3] vlc_arrays: append items in amortized O(1)
Filip Roséen
filip at atch.se
Fri Jul 20 10:46:42 CEST 2018
Hi Romain,
I recommend adding documentation for the newly introduced functions,
as we will never have a good documentation of the codebase if we keep
adding things without writing some for them.
There are a ton of entities without documentation, but let's walk
towards documenting everything, and not away from it.
On 2018-07-19 17:57, Romain Vimont wrote:
> Use exponential growth to append items in _amortized constant time_,
> instead of realloc() on every append/remove.
>
> The strategy is similar to the one used by ARRAY_* macros (and C++
> std::vector, Java ArrayList, Rust std::vec, etc.).
>
> Also expose vlc_array_reserve() to avoid unnecessary reallocations, and
> vlc_array_shrink_to_fit() to give a better control on the array size.
> ---
> include/vlc_arrays.h | 107 +++++++++++++++++++++------
> include/vlc_common.h | 9 ++-
> src/test/arrays.c | 168 ++++++++++++++++++++++++++++++++++++++++++-
> 3 files changed, 259 insertions(+), 25 deletions(-)
>
> diff --git a/include/vlc_arrays.h b/include/vlc_arrays.h
> index ce2936f1452..ce22c2e1a92 100644
> --- a/include/vlc_arrays.h
> +++ b/include/vlc_arrays.h
> @@ -25,6 +25,9 @@
> #ifndef VLC_ARRAYS_H_
> #define VLC_ARRAYS_H_
>
> +#include <vlc_common.h>
> +#include <assert.h>
> +
> /**
> * \file
> * This file defines functions, structures and macros for handling arrays in vlc
> @@ -257,12 +260,16 @@ static inline void *realloc_or_free( void *p, size_t sz )
> ************************************************************************/
> typedef struct vlc_array_t
> {
> + size_t i_capacity;
> size_t i_count;
> void ** pp_elems;
> } vlc_array_t;
If one is to be pedantic you should order the data-members in
likelihood of being read, to prevent cache-misses. It's an extremely
minor thing so probably not worth changing though.
>
> +#define VLC_ARRAY_MAX_LENGTH (SIZE_MAX / sizeof(void *))
> +
> static inline void vlc_array_init( vlc_array_t * p_array )
> {
> + p_array->i_capacity = 0;
> p_array->i_count = 0;
> p_array->pp_elems = NULL;
> }
> @@ -308,21 +315,75 @@ static inline ssize_t vlc_array_find( const vlc_array_t *ar,
> return -1;
Use `VLC_EGENERIC`, `VLC_ENOMEM`, and `VLC_SUCCESS` (this applies to
every `return` introduced in this commit. I know the file currently
uses `0` and `-1` do denote *success* vs *error*, so you could take
some extra time and fix those too (in a separate commit, probably
before you start adding new things).
> }
>
> +static inline int vlc_array_resize_internal(vlc_array_t *array, size_t target)
> +{
> + void **pp = (void **) vlc_realloc(array->pp_elems, target, sizeof(void *));
> + if (unlikely(!pp))
> + return -1;
> +
> + array->pp_elems = pp;
> + array->i_capacity = target;
> + return 0;
> +}
> +
> +static inline int vlc_array_reserve(vlc_array_t *array, size_t min_capacity)
> +{
> + if (min_capacity <= array->i_capacity)
> + return 0; /* nothing to do */
> +
> + if (min_capacity > VLC_ARRAY_MAX_LENGTH)
> + return -1;
> +
> + if (min_capacity < 10)
> + min_capacity = 10; /* do not allocate tiny arrays */
> +
> + /* multiply by 1.5 first (this may not overflow due to the way
> + * VLC_ARRAY_MAX_LENGTH is defined, unless sizeof(void *) == 1, which will
> + * never happen) */
> + size_t new_capacity = array->i_capacity + (array->i_capacity >> 1);
> + /* if it's still not sufficient */
> + if (new_capacity < min_capacity)
> + new_capacity = min_capacity;
> + else if (new_capacity > VLC_ARRAY_MAX_LENGTH)
> + /* the capacity must never exceed VLC_ARRAY_MAX_LENGTH */
> + new_capacity = VLC_ARRAY_MAX_LENGTH;
> +
> + return vlc_array_resize_internal(array, new_capacity);
> +}
> +
> +static inline int vlc_array_shrink_to_internal(vlc_array_t *array,
> + size_t target)
> +{
> + if (array->i_capacity == target)
> + return 0;
> +
> + if (target == 0)
> + {
> + vlc_array_clear(array);
> + return 0;
> + }
> +
> + return vlc_array_resize_internal(array, target);
> +}
> +
> +static inline int vlc_array_shrink_to_fit(vlc_array_t *array)
> +{
> + return vlc_array_shrink_to_internal(array, array->i_count);
> +}
> +
> /* Write */
> static inline int vlc_array_insert( vlc_array_t *ar, void *elem, int idx )
> {
> - void **pp = (void **)realloc( ar->pp_elems,
> - sizeof( void * ) * (ar->i_count + 1) );
> - if( unlikely(pp == NULL) )
> + if (unlikely(vlc_array_reserve(ar, ar->i_count + 1)))
> return -1;
>
> + void **pp = ar->pp_elems;
> size_t tail = ar->i_count - idx;
> if( tail > 0 )
> memmove( pp + idx + 1, pp + idx, sizeof( void * ) * tail );
>
> - pp[idx] = elem;
> + ar->pp_elems[idx] = elem;
> ar->i_count++;
> - ar->pp_elems = pp;
> return 0;
> }
>
> @@ -334,13 +395,10 @@ static inline void vlc_array_insert_or_abort( vlc_array_t *ar, void *elem, int i
>
> static inline int vlc_array_append( vlc_array_t *ar, void *elem )
> {
> - void **pp = (void **)realloc( ar->pp_elems,
> - sizeof( void * ) * (ar->i_count + 1) );
> - if( unlikely(pp == NULL) )
> + if (unlikely(vlc_array_reserve(ar, ar->i_count + 1)))
> return -1;
>
> - pp[ar->i_count++] = elem;
> - ar->pp_elems = pp;
> + ar->pp_elems[ar->i_count++] = elem;
> return 0;
> }
>
> @@ -350,6 +408,22 @@ static inline void vlc_array_append_or_abort( vlc_array_t *ar, void *elem )
> abort();
> }
>
> +/* shrink when necessary after remove() */
> +static inline int vlc_array_autoshrink_internal(vlc_array_t *array)
> +{
> + if (array->i_capacity <= 10)
> + return 0; /* do not shrink to tiny length */
> +
> + if (array->i_count + (array->i_count >> 1) + 5 > array->i_capacity)
> + return 0; /* no need to shrink */
Even though I am in *CET*, it is really late in terms of my biological
clock, so I might be reading this wrong: but is the above not backwards?
I assume you want to shrink *if* `array->i_capacity` is more than 1.5
times the current size, not if it is less than that. Also, even though
operator precedence order is something everyone *should* know, an
extra set of parenthesis would not hurt readability.
> +
> + size_t target = array->i_count <= 6 ? 10 : array->i_count + 5;
> + if (target >= array->i_capacity)
> + return 0; /* do not grow */
> +
> + return vlc_array_shrink_to_internal(array, target);
> +}
> +
> static inline void vlc_array_remove( vlc_array_t *ar, size_t idx )
> {
> void **pp = ar->pp_elems;
> @@ -359,18 +433,7 @@ static inline void vlc_array_remove( vlc_array_t *ar, size_t idx )
> memmove( pp + idx, pp + idx + 1, sizeof( void * ) * tail );
>
> ar->i_count--;
> -
> - if( ar->i_count > 0 )
> - {
> - pp = (void **)realloc( pp, sizeof( void * ) * ar->i_count );
> - if( likely(pp != NULL) )
> - ar->pp_elems = pp;
> - }
> - else
> - {
> - free( pp );
> - ar->pp_elems = NULL;
> - }
> + vlc_array_autoshrink_internal(ar);
> }
>
>
> diff --git a/include/vlc_common.h b/include/vlc_common.h
> index c88503c1a55..5d379865819 100644
> --- a/include/vlc_common.h
> +++ b/include/vlc_common.h
> @@ -937,8 +937,6 @@ static inline bool mul_overflow(unsigned long long a, unsigned long long b,
>
> VLC_API char const * vlc_error( int ) VLC_USED;
>
> -#include <vlc_arrays.h>
> -
> /* MSB (big endian)/LSB (little endian) conversions - network order is always
> * MSB, and should be used for both network communications and files. */
>
> @@ -1118,6 +1116,12 @@ static inline void *vlc_alloc(size_t count, size_t size)
> return mul_overflow(count, size, &size) ? NULL : malloc(size);
> }
>
> +VLC_USED VLC_MALLOC
> +static inline void *vlc_realloc(void *ptr, size_t count, size_t size)
> +{
> + return mul_overflow(count, size, &size) ? NULL : realloc(ptr, size);
> +}
> +
> /*****************************************************************************
> * I18n stuff
> *****************************************************************************/
> @@ -1180,6 +1184,7 @@ VLC_API const char * VLC_Compiler( void ) VLC_USED;
> /*****************************************************************************
> * Additional vlc stuff
> *****************************************************************************/
> +#include "vlc_arrays.h"
> #include "vlc_messages.h"
> #include "vlc_objects.h"
> #include "vlc_variables.h"
> diff --git a/src/test/arrays.c b/src/test/arrays.c
> index 82277bbbc12..b7f5b829bd4 100644
> --- a/src/test/arrays.c
> +++ b/src/test/arrays.c
> @@ -1,5 +1,5 @@
> /*****************************************************************************
> - * arrays.h : Test for ARRAY_* macros
> + * arrays.h : Test for VLC arrays
> *****************************************************************************
> * Copyright (C) 2018 VLC authors and VideoLAN
> *
> @@ -158,10 +158,176 @@ static void test_array_bsearch(void)
> ARRAY_RESET(array);
> }
Your tests should also assert on the return-code for functions which
may fail.
>
> +static void test_vlc_array_insert_remove(void)
> +{
> + vlc_array_t array;
> + vlc_array_init(&array);
> +
> + char data[32];
> +
> + vlc_array_append(&array, &data[0]);
> + assert(vlc_array_count(&array) == 1);
> + assert(vlc_array_get(&array, 0) == &data[0]);
> +
> + vlc_array_remove(&array, 0);
> + assert(vlc_array_count(&array) == 0);
> +
> + vlc_array_append(&array, &data[1]);
> + vlc_array_append(&array, &data[2]);
> + vlc_array_append(&array, &data[3]);
> + vlc_array_remove(&array, 1);
> + assert(vlc_array_count(&array) == 2);
> + assert(vlc_array_get(&array, 0) == &data[1]);
> + assert(vlc_array_get(&array, 1) == &data[3]);
> +
> + vlc_array_insert(&array, &data[4], 1);
> + assert(vlc_array_count(&array) == 3);
> + assert(vlc_array_get(&array, 0) == &data[1]);
> + assert(vlc_array_get(&array, 1) == &data[4]);
> + assert(vlc_array_get(&array, 2) == &data[3]);
> +
> + vlc_array_clear(&array);
> +}
> +
> +static void test_vlc_array_find(void)
> +{
> + vlc_array_t array;
> + vlc_array_init(&array);
> +
> + char data[10];
> +
> + for (int i = 0; i < 10; ++i)
> + vlc_array_append(&array, &data[i]);
> +
> + ssize_t idx;
> +
> + idx = vlc_array_find(&array, &data[0]);
> + assert(idx == 0);
> +
> + idx = vlc_array_find(&array, &data[1]);
> + assert(idx == 1);
> +
> + idx = vlc_array_find(&array, &data[4]);
> + assert(idx == 4);
> +
> + idx = vlc_array_find(&array, &data[9]);
> + assert(idx == 9);
> +
> + idx = vlc_array_find(&array, "some other pointer");
> + assert(idx == -1);
> +
> + vlc_array_clear(&array);
> +}
> +
> +static void test_vlc_array_grow()
> +{
> + vlc_array_t array;
> + vlc_array_init(&array);
> +
> + char data;
> +
> + for (int i = 0; i < 50; ++i)
> + vlc_array_append(&array, &data); /* append */
> +
> + assert(vlc_array_count(&array) == 50);
> +
> + for (int i = 0; i < 25; ++i)
> + vlc_array_insert(&array, &data, 20); /* insert from the middle */
> +
> + assert(vlc_array_count(&array) == 75);
> +
> + for (int i = 0; i < 25; ++i)
> + vlc_array_insert(&array, &data, 0); /* prepend */
> +
> + assert(vlc_array_count(&array) == 100);
> +
> + for (int i = 0; i < 50; ++i)
> + vlc_array_remove(&array, 20); /* remove from the middle */
> +
> + assert(vlc_array_count(&array) == 50);
> +
> + for (int i = 0; i < 25; ++i)
> + vlc_array_remove(&array, 0); /* remove from the head */
> +
> + assert(vlc_array_count(&array) == 25);
> +
> + for (int i = 24; i >= 0; --i)
> + vlc_array_remove(&array, i); /* remove from the tail */
> +
> + assert(vlc_array_count(&array) == 0);
> +
> + vlc_array_clear(&array);
> +}
> +
> +static void test_vlc_array_exp_growth()
> +{
> + vlc_array_t array;
> + vlc_array_init(&array);
> +
> + char data;
> + size_t old_capacity = array.i_capacity;
> + int realloc_count = 0;
> + for (int i = 0; i < 10000; ++i)
> + {
> + vlc_array_append(&array, &data);
> + if (array.i_capacity != old_capacity)
> + {
> + realloc_count++;
> + old_capacity = array.i_capacity;
> + }
> + }
> +
> + /* Test specifically for an expected growth factor of 1.5. In practice, the
> + * result is even lower (19) due to the first alloc of size 10 */
> + assert(realloc_count <= 23); /* ln(10000) / ln(1.5) ~= 23 */
> +
> + realloc_count = 0;
> + for (int i = 9999; i >= 0; --i)
> + {
> + vlc_array_remove(&array, i);
> + if (array.i_capacity != old_capacity)
> + {
> + realloc_count++;
> + old_capacity = array.i_capacity;
> + }
> + }
> +
> + /* Same expectation for removals */
> + assert(realloc_count <= 23);
> +
> + vlc_array_clear(&array);
> +}
> +
> +static void test_vlc_array_reserve()
> +{
> + vlc_array_t array;
> + vlc_array_init(&array);
> +
> + vlc_array_reserve(&array, 800);
> + assert(array.i_capacity >= 800);
> +
> + size_t initial_capacity = array.i_capacity;
> +
> + char data;
> + for (int i = 0; i < 800; ++i)
> + {
> + vlc_array_append(&array, &data);
> + assert(array.i_capacity == initial_capacity); /* no realloc */
> + }
> +
> + vlc_array_clear(&array);
> +}
> +
> int main(void)
> {
> test_array_insert_remove();
> test_array_foreach();
> test_array_find();
> test_array_bsearch();
There must be a patch missing in this set, perhaps it is intentional,
but as we currently have no tests for arrays in `src/test/array.c` the
above is very odd.
I recommend trying to send patches that apply to the current codebase,
even if the patches are marked as *RFC* (makes it easier for those who
review by applying). Sure, I am not one of those people but.. there
are certainly a few.
> +
> + test_vlc_array_insert_remove();
> + test_vlc_array_find();
> + test_vlc_array_grow();
> + test_vlc_array_reserve();
> + test_vlc_array_exp_growth();
> }
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/vlc-devel/attachments/20180720/e4f13b5c/attachment.html>
More information about the vlc-devel
mailing list