[vlc-devel] [PATCH 1/3] vlc_list: helpers for doubly linked lists

Mon Jun 11 19:14:41 CEST 2018

Le maanantaina 11. kesäkuuta 2018, 18.14.47 EEST Romain Vimont a écrit :
> On Mon, Jun 11, 2018 at 01:27:12PM +0300, Rémi Denis-Courmont wrote:
> > I am storing the head so that the head expression is expansion-safe. In
> > practice, the compiler should be able to optimize it away. Likewise, it
> > should be able to optimize the constant offset away.
> Since the offset is compile-time constant, I would not store it at all.
> 
> > Point is that an iterator is unavoidable in the general case. With that in
> > mind, I'd rather leverage the iterator to make the code as safe as
> > possible.
> > 
> > I already replaced goal with head to make the iteration safe against
> > removal.
> OK, so you want the safe foreach version only.

As expected, GCC is perfectly able to optimize the head and the offset:

----8<--------8<--------8<--------8<--------8<--------8<--------8<----
% cat test.c
#include "include/vlc_list.h"

struct datum {
        int i;
        struct vlc_list node;
};

extern void process(int d);

int card(struct vlc_list *head)
{
        struct vlc_list_it it;
        struct datum *d;
        int count = 0;

        vlc_list_foreach(d, it, head, struct datum, node)
                count++;

        return count;
}

int sum(struct vlc_list *head)
{
        struct vlc_list_it it;
        struct datum *d;
        int sum = 0;

        vlc_list_foreach(d, it, head, struct datum, node)
                sum += d->i;

        return sum;
}

void iterate(struct vlc_list *head)
{
        struct vlc_list_it it;
        struct datum *d;

        vlc_list_foreach(d, it, head, struct datum, node)
                process(d->i);
}
% aarch64-linux-gnu-gcc -ffunction-sections -O3 -c test.c -o test.o
% aarch64-linux-gnu-objdump -d test.o

test.o:     format de fichier elf64-littleaarch64

Déassemblage de la section .text.card :

0000000000000000 <card>:
   0:   f9400402        ldr     x2, [x0, #8]
   4:   52800000        mov     w0, #0x0                        // #0
   8:   f9400441        ldr     x1, [x2, #8]
   c:   eb01005f        cmp     x2, x1
  10:   540000a0        b.eq    24 <card+0x24>  // b.none
  14:   f9400421        ldr     x1, [x1, #8]
  18:   11000400        add     w0, w0, #0x1
  1c:   eb01005f        cmp     x2, x1
  20:   54ffffa1        b.ne    14 <card+0x14>  // b.any
  24:   d65f03c0        ret

Déassemblage de la section .text.sum :

0000000000000000 <sum>:
   0:   f9400404        ldr     x4, [x0, #8]
   4:   52800000        mov     w0, #0x0                        // #0
   8:   d1002082        sub     x2, x4, #0x8
   c:   f9400481        ldr     x1, [x4, #8]
  10:   eb01009f        cmp     x4, x1
  14:   540000e0        b.eq    30 <sum+0x30>  // b.none
  18:   b9400043        ldr     w3, [x2]
  1c:   d1002022        sub     x2, x1, #0x8
  20:   f9400421        ldr     x1, [x1, #8]
  24:   0b030000        add     w0, w0, w3
  28:   eb01009f        cmp     x4, x1
  2c:   54ffff61        b.ne    18 <sum+0x18>  // b.any
  30:   d65f03c0        ret

Déassemblage de la section .text.iterate :

0000000000000000 <iterate>:
   0:   a9be7bfd        stp     x29, x30, [sp, #-32]!
   4:   910003fd        mov     x29, sp
   8:   f9400401        ldr     x1, [x0, #8]
   c:   f9000bf3        str     x19, [sp, #16]
  10:   f9400433        ldr     x19, [x1, #8]
  14:   eb13003f        cmp     x1, x19
  18:   54000180        b.eq    48 <iterate+0x48>  // b.none
  1c:   f9000fb4        str     x20, [x29, #24]
  20:   aa0003f4        mov     x20, x0
  24:   d1002020        sub     x0, x1, #0x8
  28:   b9400000        ldr     w0, [x0]
  2c:   94000000        bl      0 <process>
  30:   d1002260        sub     x0, x19, #0x8
  34:   f9400681        ldr     x1, [x20, #8]
  38:   f9400673        ldr     x19, [x19, #8]
  3c:   eb01027f        cmp     x19, x1
  40:   54ffff41        b.ne    28 <iterate+0x28>  // b.any
  44:   f9400fb4        ldr     x20, [x29, #24]
  48:   f9400bf3        ldr     x19, [sp, #16]
  4c:   a8c27bfd        ldp     x29, x30, [sp], #32
  50:   d65f03c0        ret

---->8-------->8-------->8-------->8-------->8-------->8-------->8----

Or if you are not fluent in AArch64 (wut mate?), the first two correspond to:

int card(struct list_node *x0)
{
    int w0;
    struct vlc_list *x1, *x2;

    x2 = x0->next;
    w0 = 0;
    x1 = x2->next;
    if (x2 == x1)
        goto card_24;
card_14:
    x1 = x1->next;
    w0++;
    if (x2 != x1)
        goto card_14;
card_24:
    return w0;
}

int sum(struct list_node *x0)
{
    int w0, w3;
    struct vlc_list *x1, *x4;
    struct data *x2;

    x4 = x0->next;
    w0 = 0;
    x2 = container_of(x4, struct data, node);
    x1 = x4->next;
    if (x4 == x1)
        goto _sum30;
_sum18:
    w3 = x2->i;
    x2 = container_of(x1, struct data, node);
    x1 = x1->next;
    w0 += w3;
    if (x4 != x1)
        goto _sum18;
_sum30:
    return w0;
}

And the last one is left as an exercise.

The offset is optimized away. The head (actually head->next) is retained in a 
register, not copied into the iterator. In the first case, even the cursor is 
optimized away. The iterator is never written to memory.

With that noted, I don't really see the point in adding an unsafe iterator 
variant, except for developers to shoot themselves in the foot. Likewise, I 
don't see the point in removing the head copy from the iterator, except to 
make the head expression no longer expansion-safe.

With -O2, the iterator functions are put out-of-line(!), and the iterator is 
fully written to/read from memory, of course.

-- 
Реми Дёни-Курмон
http://www.remlab.net/