[vlc-devel] [PATCH 3/4] mmal: handle VCSM buffers

Steve Lhomme robux4 at ycbcr.xyz
Fri Jan 17 17:00:57 CET 2020


From: John Cox <jc at kynesim.co.uk>

Via the VideoCore Shared Memory CMA API. Available on the RaspberryPi 3/4.

Enabled by booting with vc4-fkms-v3d in dtoverlay (on by default for RPi4).

Signed-off-by: Steve Lhomme <robux4 at ycbcr.xyz>
---
 modules/hw/mmal/Makefile.am    |   9 +-
 modules/hw/mmal/mmal_cma.c     | 628 +++++++++++++++++++++++++++++++++
 modules/hw/mmal/mmal_cma.h     |  69 ++++
 modules/hw/mmal/mmal_picture.c | 214 ++++++++++-
 modules/hw/mmal/mmal_picture.h |  10 +
 modules/hw/mmal/vout.c         |   2 +
 6 files changed, 928 insertions(+), 4 deletions(-)
 create mode 100644 modules/hw/mmal/mmal_cma.c
 create mode 100644 modules/hw/mmal/mmal_cma.h

diff --git a/modules/hw/mmal/Makefile.am b/modules/hw/mmal/Makefile.am
index f6bb51b3ff0..646289a5d9a 100644
--- a/modules/hw/mmal/Makefile.am
+++ b/modules/hw/mmal/Makefile.am
@@ -4,7 +4,8 @@ mmaldir = $(pluginsdir)/mmal
 AM_CFLAGS += $(CFLAGS_mmal)
 AM_LDFLAGS += -rpath '$(mmaldir)' $(LDFLAGS_mmal)
 
-libmmal_vout_plugin_la_SOURCES = vout.c subpic.c subpic.h mmal_picture.c mmal_picture.h
+libmmal_vout_plugin_la_SOURCES = vout.c subpic.c subpic.h mmal_picture.c mmal_picture.h \
+  mmal_cma.c mmal_cma.h
 libmmal_vout_plugin_la_CFLAGS = $(AM_CFLAGS)
 if ENABLE_QT
 libmmal_vout_plugin_la_CFLAGS += -DHAVE_QT
@@ -13,13 +14,15 @@ libmmal_vout_plugin_la_LDFLAGS = $(AM_LDFLAGS) -lm
 libmmal_vout_plugin_la_LIBADD = $(LIBS_mmal)
 mmal_LTLIBRARIES = libmmal_vout_plugin.la
 
-libmmal_codec_plugin_la_SOURCES = codec.c mmal_picture.c mmal_picture.h
+libmmal_codec_plugin_la_SOURCES = codec.c mmal_picture.c mmal_picture.h \
+  mmal_cma.c mmal_cma.h
 libmmal_codec_plugin_la_CFLAGS = $(AM_CFLAGS)
 libmmal_codec_plugin_la_LDFLAGS = $(AM_LDFLAGS)
 libmmal_codec_plugin_la_LIBADD = $(LIBS_mmal)
 mmal_LTLIBRARIES += libmmal_codec_plugin.la
 
-libmmal_deinterlace_plugin_la_SOURCES = deinterlace.c mmal_picture.c mmal_picture.h
+libmmal_deinterlace_plugin_la_SOURCES = deinterlace.c mmal_picture.c mmal_picture.h \
+  mmal_cma.c mmal_cma.h
 libmmal_deinterlace_plugin_la_CFLAGS = $(AM_CFLAGS)
 libmmal_deinterlace_plugin_la_LDFLAGS = $(AM_LDFLAGS)
 libmmal_deinterlace_plugin_la_LIBADD = $(LIBS_mmal)
diff --git a/modules/hw/mmal/mmal_cma.c b/modules/hw/mmal/mmal_cma.c
new file mode 100644
index 00000000000..9b9eb48bbd6
--- /dev/null
+++ b/modules/hw/mmal/mmal_cma.c
@@ -0,0 +1,628 @@
+/*****************************************************************************
+ * mmal_cmal.c:
+ *****************************************************************************
+ * Copyright © 2018-2020 John Cox
+ *
+ * Authors: John Cox <jc at kynesim.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdatomic.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <interface/vcsm/user-vcsm.h>
+
+#include <vlc_common.h>
+#include <vlc_picture.h>
+
+#include "mmal_cma.h"
+#include "mmal_picture.h"
+
+#include <assert.h>
+
+//-----------------------------------------------------------------------------
+//
+// Generic pool functions
+// Knows nothing about pool entries
+
+// Pool structure
+// Ref count is held by pool owner and pool els that have been got
+// Els in the pool do not count towards its ref count
+struct cma_pool_fixed_s
+{
+    atomic_int ref_count;
+
+    vlc_mutex_t lock;
+    unsigned int n_in;
+    unsigned int n_out;
+    unsigned int pool_size;
+    int flight_size;
+    size_t el_size;
+    cma_buf_t ** pool;
+
+    bool cancel;
+    int in_flight;
+    vlc_cond_t flight_cond;
+
+    cma_buf_pool_t * alloc_v;
+
+    char * name;
+};
+
+static unsigned int inc_mod(const unsigned int n, const unsigned int m)
+{
+    return n + 1 >= m ? 0 : n + 1;
+}
+
+static cma_buf_t * cma_pool_alloc_cb(cma_buf_pool_t * const, size_t);
+static void cma_pool_delete(cma_buf_t * const);
+static void cma_buf_pool_on_delete_cb(cma_buf_pool_t * const);
+
+static void free_pool(cma_buf_t ** const pool, const unsigned int pool_size)
+{
+    if (pool == NULL)
+        return;
+
+    for (unsigned int n = 0; n != pool_size; ++n)
+        if (pool[n] != NULL)
+            cma_pool_delete(pool[n]);
+    free(pool);
+}
+
+// Just kill this - no checks
+static void cma_pool_fixed_delete(cma_pool_fixed_t * const p)
+{
+    cma_buf_pool_t * const v = p->alloc_v;
+
+    free_pool(p->pool, p->pool_size);
+
+    free(p->name);
+
+    vlc_cond_destroy(&p->flight_cond);
+    vlc_mutex_destroy(&p->lock);
+    free(p);
+
+    // Inform our container that we are dead (if it cares)
+    cma_buf_pool_on_delete_cb(v);
+}
+
+static void cma_pool_fixed_unref(cma_pool_fixed_t * const p)
+{
+    if (atomic_fetch_sub(&p->ref_count, 1) <= 1)
+        cma_pool_fixed_delete(p);
+}
+
+static void cma_pool_fixed_ref(cma_pool_fixed_t * const p)
+{
+    atomic_fetch_add(&p->ref_count, 1);
+}
+
+static void cma_pool_fixed_inc_in_flight(cma_pool_fixed_t * const p)
+{
+    vlc_mutex_lock(&p->lock);
+    ++p->in_flight;
+    vlc_mutex_unlock(&p->lock);
+}
+
+static void cma_pool_fixed_dec_in_flight(cma_pool_fixed_t * const p)
+{
+    vlc_mutex_lock(&p->lock);
+    if (--p->in_flight == 0)
+        vlc_cond_signal(&p->flight_cond);
+    vlc_mutex_unlock(&p->lock);
+}
+
+static cma_buf_t * cma_pool_fixed_get(cma_pool_fixed_t * const p, const size_t req_el_size, const bool inc_flight, const bool no_pool)
+{
+    cma_buf_t * v = NULL;
+
+    vlc_mutex_lock(&p->lock);
+
+    for (;;)
+    {
+        if (req_el_size != p->el_size)
+        {
+            cma_buf_t ** const deadpool = p->pool;
+            const unsigned int dead_n = p->pool_size;
+
+            p->pool = NULL;
+            p->n_in = 0;
+            p->n_out = 0;
+            p->el_size = req_el_size;
+
+            if (deadpool != NULL)
+            {
+                vlc_mutex_unlock(&p->lock);
+                // Do the free old op outside the mutex in case the free is slow
+                free_pool(deadpool, dead_n);
+                vlc_mutex_lock(&p->lock);
+                continue;
+            }
+        }
+
+        // Late abort if flush or cancel so we can still kill the pool
+        if (req_el_size == 0 || p->cancel)
+        {
+            vlc_mutex_unlock(&p->lock);
+            return NULL;
+        }
+
+        if (p->pool != NULL && !no_pool)
+        {
+            v = p->pool[p->n_in];
+            if (v != NULL)
+            {
+                p->pool[p->n_in] = NULL;
+                p->n_in = inc_mod(p->n_in, p->pool_size);
+                break;
+            }
+        }
+
+        if (p->in_flight <= 0)
+            break;
+
+        vlc_cond_wait(&p->flight_cond, &p->lock);
+    }
+
+    if (inc_flight)
+        ++p->in_flight;
+
+    vlc_mutex_unlock(&p->lock);
+
+    if (v == NULL && req_el_size != 0)
+        v = cma_pool_alloc_cb(p->alloc_v, req_el_size);
+
+    // Tag ref
+    if (v != NULL)
+        cma_pool_fixed_ref(p);
+    // Remove flight if we set it and error
+    else if (inc_flight)
+        cma_pool_fixed_dec_in_flight(p);
+
+    return v;
+}
+
+static void cma_pool_fixed_put(cma_pool_fixed_t * const p, cma_buf_t * v, const size_t el_size, const bool was_in_flight)
+{
+    vlc_mutex_lock(&p->lock);
+
+    if (el_size == p->el_size && (p->pool == NULL || p->pool[p->n_out] == NULL))
+    {
+        if (p->pool == NULL)
+            p->pool = calloc(p->pool_size, sizeof(void*));
+
+        p->pool[p->n_out] = v;
+        p->n_out = inc_mod(p->n_out, p->pool_size);
+        v = NULL;
+    }
+
+    if (was_in_flight)
+        --p->in_flight;
+
+    vlc_mutex_unlock(&p->lock);
+
+    vlc_cond_signal(&p->flight_cond);
+
+    if (v != NULL)
+        cma_pool_delete(v);
+
+    cma_pool_fixed_unref(p);
+}
+
+static int cma_pool_fixed_resize(cma_pool_fixed_t * const p,
+                           const unsigned int new_pool_size, const int new_flight_size)
+{
+    cma_buf_t ** dead_pool = NULL;
+    unsigned int dead_n = 0;
+
+    // This makes this non-reentrant but saves us a lot of time in the normal
+    // "nothing happens" case
+    if (p->pool_size == new_pool_size && p->flight_size == new_flight_size)
+        return 0;
+
+    vlc_mutex_lock(&p->lock);
+
+    if (p->pool != NULL && new_pool_size != p->pool_size)
+    {
+        cma_buf_t ** const new_pool = calloc(new_pool_size, sizeof(void*));
+        unsigned int d, s;
+        dead_pool = p->pool;
+        dead_n = p->pool_size;
+
+        if (new_pool == NULL)
+        {
+            vlc_mutex_unlock(&p->lock);
+            return -1;
+        }
+
+        for (d = 0, s = p->n_in; d != new_pool_size && (new_pool[d] = dead_pool[s]) != NULL; ++d, s = inc_mod(s, dead_n))
+            dead_pool[s] = NULL;
+
+        p->n_out = 0;
+        p->n_in = (d != new_pool_size) ? d : 0;
+        p->pool = new_pool;
+    }
+
+    p->pool_size = new_pool_size;
+    if (new_flight_size > p->flight_size)
+        vlc_cond_broadcast(&p->flight_cond);  // Lock still active so nothing happens till we release it
+    p->in_flight += p->flight_size - new_flight_size;
+    p->flight_size = new_flight_size;
+
+    vlc_mutex_unlock(&p->lock);
+
+    free_pool(dead_pool, dead_n);
+    return 0;
+}
+
+static int cma_pool_fixed_fill(cma_pool_fixed_t * const p, const size_t el_size)
+{
+    for (;;)
+    {
+        vlc_mutex_lock(&p->lock);
+        bool done = el_size == p->el_size && p->pool != NULL && p->pool[p->n_out] != NULL;
+        vlc_mutex_unlock(&p->lock);
+        if (done)
+            break;
+        cma_buf_t * buf = cma_pool_fixed_get(p, el_size, false, true);
+        if (buf == NULL)
+            return -ENOMEM;
+        cma_pool_fixed_put(p, buf, el_size, false);
+    }
+    return 0;
+}
+
+static void cma_pool_fixed_cancel(cma_pool_fixed_t * const p)
+{
+    vlc_mutex_lock(&p->lock);
+    p->cancel = true;
+    vlc_cond_broadcast(&p->flight_cond);
+    vlc_mutex_unlock(&p->lock);
+}
+
+static void cma_pool_fixed_uncancel(cma_pool_fixed_t * const p)
+{
+    vlc_mutex_lock(&p->lock);
+    p->cancel = false;
+    vlc_mutex_unlock(&p->lock);
+}
+
+
+// Purge pool & unref
+static void cma_pool_fixed_kill(cma_pool_fixed_t * const p)
+{
+    if (p == NULL)
+        return;
+
+    // This flush is not strictly needed but it reclaims what memory we can reclaim asap
+    cma_pool_fixed_get(p, 0, false, false);
+    cma_pool_fixed_unref(p);
+}
+
+// Create a new pool
+static cma_pool_fixed_t*
+cma_pool_fixed_new(const unsigned int pool_size,
+                   const int flight_size,
+                   cma_buf_pool_t * const alloc_v,
+                   const char * const name)
+{
+    cma_pool_fixed_t* const p = calloc(1, sizeof(cma_pool_fixed_t));
+    if (p == NULL)
+        return NULL;
+
+    atomic_store(&p->ref_count, 1);
+    vlc_mutex_init(&p->lock);
+    vlc_cond_init(&p->flight_cond);
+
+    p->pool_size = pool_size;
+    p->flight_size = flight_size;
+    p->in_flight = -flight_size;
+
+    p->alloc_v = alloc_v;
+    p->name = name == NULL ? NULL : strdup(name);
+
+    return p;
+}
+
+// ---------------------------------------------------------------------------
+//
+// CMA buffer functions - uses cma_pool_fixed for pooling
+
+struct cma_buf_pool_s {
+    cma_pool_fixed_t * pool;
+    vcsm_init_type_t init_type;
+
+    bool all_in_flight;
+};
+
+typedef struct cma_buf_s {
+    atomic_int ref_count;
+    cma_buf_pool_t * cbp;
+    bool in_flight;
+    size_t size;
+    unsigned int vcsm_h;   // VCSM handle from initial alloc
+    unsigned int vc_h;     // VC handle for ZC mmal buffers
+    unsigned int vc_addr;  // VC addr - unused by us but wanted by FFmpeg
+    int fd;                // dmabuf handle for GL
+    void * mmap;           // ARM mapped address
+    picture_context_t *ctx2;
+} cma_buf_t;
+
+static void cma_pool_delete(cma_buf_t * const cb)
+{
+    assert(atomic_load(&cb->ref_count) == 0);
+
+    if (cb->ctx2 != NULL)
+        cb->ctx2->destroy(cb->ctx2);
+
+    if (cb->mmap != MAP_FAILED)
+    {
+        if (cb->cbp->init_type == VCSM_INIT_CMA)
+            munmap(cb->mmap, cb->size);
+        else
+            vcsm_unlock_hdl(cb->vcsm_h);
+    }
+    if (cb->fd != -1)
+        close(cb->fd);
+    if (cb->vcsm_h != 0)
+        vcsm_free(cb->vcsm_h);
+    free(cb);
+}
+
+static cma_buf_t * cma_pool_alloc_cb(cma_buf_pool_t * const v, size_t size)
+{
+    cma_buf_pool_t * const cbp = v;
+
+    cma_buf_t * const cb = malloc(sizeof(cma_buf_t));
+    if (cb == NULL)
+        return NULL;
+
+    *cb = (cma_buf_t){
+        .ref_count = ATOMIC_VAR_INIT(0),
+        .cbp = cbp,
+        .in_flight = 0,
+        .size = size,
+        .vcsm_h = 0,
+        .vc_h = 0,
+        .fd = -1,
+        .mmap = MAP_FAILED,
+        .ctx2 = NULL
+    };
+
+    // 0x80 is magic value to force full ARM-side mapping - otherwise
+    // cache requests can cause kernel crashes
+    if ((cb->vcsm_h = vcsm_malloc_cache(size, VCSM_CACHE_TYPE_HOST | 0x80, "VLC frame")) == 0)
+    {
+        goto fail;
+    }
+
+    if ((cb->vc_h = vcsm_vc_hdl_from_hdl(cb->vcsm_h)) == 0)
+    {
+        goto fail;
+    }
+
+    if (cbp->init_type == VCSM_INIT_CMA)
+    {
+        if ((cb->fd = vcsm_export_dmabuf(cb->vcsm_h)) == -1)
+        {
+            goto fail;
+        }
+
+        if ((cb->mmap = mmap(NULL, cb->size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, cb->fd, 0)) == MAP_FAILED)
+            goto fail;
+    }
+    else
+    {
+        void * arm_addr;
+        if ((arm_addr = vcsm_lock(cb->vcsm_h)) == NULL)
+        {
+            goto fail;
+        }
+        cb->mmap = arm_addr;
+    }
+
+    cb->vc_addr = vcsm_vc_addr_from_hdl(cb->vcsm_h);
+
+    return cb;
+
+fail:
+    cma_pool_delete(cb);
+    return NULL;
+}
+
+// Pool has died - safe now to exit vcsm
+static void cma_buf_pool_on_delete_cb(cma_buf_pool_t * const cbp)
+{
+    cma_vcsm_exit(cbp->init_type);
+    free(cbp);
+}
+
+void cma_buf_pool_cancel(cma_buf_pool_t * const cbp)
+{
+    if (cbp == NULL || cbp->pool == NULL)
+        return;
+
+    cma_pool_fixed_cancel(cbp->pool);
+}
+
+void cma_buf_pool_uncancel(cma_buf_pool_t * const cbp)
+{
+    if (cbp == NULL || cbp->pool == NULL)
+        return;
+
+    cma_pool_fixed_uncancel(cbp->pool);
+}
+
+// User finished with pool
+void cma_buf_pool_delete(cma_buf_pool_t * const cbp)
+{
+    if (cbp == NULL)
+        return;
+
+    if (cbp->pool != NULL)
+    {
+        // We will call cma_buf_pool_on_delete_cb when the pool finally dies
+        // (might be now) which will free up our env.
+        cma_pool_fixed_kill(cbp->pool);
+    }
+    else
+    {
+        // Had no pool for some reason (error) but must still finish cleanup
+        cma_buf_pool_on_delete_cb(cbp);
+    }
+}
+
+int cma_buf_pool_fill(cma_buf_pool_t * const cbp, const size_t el_size)
+{
+    return cma_pool_fixed_fill(cbp->pool, el_size);
+}
+
+int cma_buf_pool_resize(cma_buf_pool_t * const cbp,
+                        const unsigned int new_pool_size, const int new_flight_size)
+{
+    return cma_pool_fixed_resize(cbp->pool, new_pool_size, new_flight_size);
+}
+
+cma_buf_pool_t * cma_buf_pool_new(const unsigned int pool_size, const unsigned int flight_size, const bool all_in_flight, const char * const name)
+{
+    vcsm_init_type_t const init_type = cma_vcsm_init();
+    if (init_type == VCSM_INIT_NONE)
+        return NULL;
+
+    cma_buf_pool_t * const cbp = calloc(1, sizeof(cma_buf_pool_t));
+    if (cbp == NULL)
+        return NULL;
+
+    cbp->init_type = init_type;
+    cbp->all_in_flight = all_in_flight;
+
+    if ((cbp->pool = cma_pool_fixed_new(pool_size, flight_size, cbp, name)) == NULL)
+        goto fail;
+    return cbp;
+
+fail:
+    cma_buf_pool_delete(cbp);
+    return NULL;
+}
+
+
+void cma_buf_in_flight(cma_buf_t * const cb)
+{
+    if (!cb->cbp->all_in_flight)
+    {
+        assert(!cb->in_flight);
+        cb->in_flight = true;
+        cma_pool_fixed_inc_in_flight(cb->cbp->pool);
+    }
+}
+
+void cma_buf_end_flight(cma_buf_t * const cb)
+{
+    if (cb != NULL && !cb->cbp->all_in_flight && cb->in_flight)
+    {
+        cb->in_flight = false;
+        cma_pool_fixed_dec_in_flight(cb->cbp->pool);
+    }
+}
+
+
+// Return vcsm handle
+unsigned int cma_buf_vcsm_handle(const cma_buf_t * const cb)
+{
+    return cb->vcsm_h;
+}
+
+size_t cma_buf_size(const cma_buf_t * const cb)
+{
+    return cb->size;
+}
+
+int cma_buf_add_context2(cma_buf_t *const cb, picture_context_t * const ctx2)
+{
+    if (cb->ctx2 != NULL)
+        return VLC_EGENERIC;
+
+    cb->ctx2 = ctx2;
+    return VLC_SUCCESS;
+}
+
+unsigned int cma_buf_vc_handle(const cma_buf_t *const cb)
+{
+    return cb->vc_h;
+}
+
+int cma_buf_fd(const cma_buf_t *const cb)
+{
+    return cb->fd;
+}
+
+void * cma_buf_addr(const cma_buf_t *const cb)
+{
+    return cb->mmap;
+}
+
+unsigned int cma_buf_vc_addr(const cma_buf_t *const cb)
+{
+    return cb->vc_addr;
+}
+
+
+picture_context_t * cma_buf_context2(const cma_buf_t *const cb)
+{
+    return cb->ctx2;
+}
+
+
+void cma_buf_unref(cma_buf_t * const cb)
+{
+    if (cb == NULL)
+        return;
+    if (atomic_fetch_sub(&cb->ref_count, 1) <= 1)
+    {
+        const bool was_in_flight = cb->in_flight;
+        cb->in_flight = false;
+        cma_pool_fixed_put(cb->cbp->pool, cb, cb->size, was_in_flight);
+    }
+}
+
+cma_buf_t * cma_buf_ref(cma_buf_t * const cb)
+{
+    if (cb == NULL)
+        return NULL;
+    atomic_fetch_add(&cb->ref_count, 1);
+    return cb;
+}
+
+cma_buf_t * cma_buf_pool_alloc_buf(cma_buf_pool_t * const cbp, const size_t size)
+{
+    cma_buf_t *const cb = cma_pool_fixed_get(cbp->pool, size, cbp->all_in_flight, false);
+
+    if (cb == NULL)
+        return NULL;
+
+    cb->in_flight = cbp->all_in_flight;
+    // When 1st allocated or retrieved from the pool the block will have a
+    // ref count of 0 so ref here
+    return cma_buf_ref(cb);
+}
+
diff --git a/modules/hw/mmal/mmal_cma.h b/modules/hw/mmal/mmal_cma.h
new file mode 100644
index 00000000000..9001070a207
--- /dev/null
+++ b/modules/hw/mmal/mmal_cma.h
@@ -0,0 +1,69 @@
+/*****************************************************************************
+ * mmal_cmal.h:
+ *****************************************************************************
+ * Copyright © 2018-2020 John Cox
+ *
+ * Authors: John Cox <jc at kynesim.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifndef VLC_MMAL_MMAL_CMA_H_
+#define VLC_MMAL_MMAL_CMA_H_
+
+
+struct cma_pool_fixed_s;
+typedef struct cma_pool_fixed_s cma_pool_fixed_t;
+
+struct cma_buf_s;
+typedef struct cma_buf_s cma_buf_t;
+
+void cma_buf_in_flight(cma_buf_t * const cb);
+void cma_buf_end_flight(cma_buf_t * const cb);
+unsigned int cma_buf_vcsm_handle(const cma_buf_t * const cb);
+size_t cma_buf_size(const cma_buf_t * const cb);
+int cma_buf_add_context2(cma_buf_t *const cb, picture_context_t * const ctx2);
+unsigned int cma_buf_vc_handle(const cma_buf_t *const cb);
+int cma_buf_fd(const cma_buf_t *const cb);
+void * cma_buf_addr(const cma_buf_t *const cb);
+unsigned int cma_buf_vc_addr(const cma_buf_t *const cb);
+picture_context_t * cma_buf_context2(const cma_buf_t *const cb);
+
+void cma_buf_unref(cma_buf_t * const cb);
+cma_buf_t * cma_buf_ref(cma_buf_t * const cb);
+
+struct cma_buf_pool_s;
+typedef struct cma_buf_pool_s cma_buf_pool_t;
+
+cma_buf_t * cma_buf_pool_alloc_buf(cma_buf_pool_t * const p, const size_t size);
+void cma_buf_pool_cancel(cma_buf_pool_t * const cbp);
+void cma_buf_pool_uncancel(cma_buf_pool_t * const cbp);
+void cma_buf_pool_delete(cma_buf_pool_t * const p);
+int cma_buf_pool_fill(cma_buf_pool_t * const cbp, const size_t el_size);
+int cma_buf_pool_resize(cma_buf_pool_t * const cbp,
+                          const unsigned int new_pool_size, const int new_flight_size);
+cma_buf_pool_t * cma_buf_pool_new(const unsigned int pool_size, const unsigned int flight_size,
+                                  const bool all_in_flight, const char * const name);
+
+static inline void cma_buf_pool_deletez(cma_buf_pool_t ** const pp)
+{
+    cma_buf_pool_t * const p = *pp;
+    if (p != NULL) {
+        *pp = NULL;
+        cma_buf_pool_delete(p);
+    }
+}
+
+#endif // VLC_MMAL_MMAL_CMA_H_
diff --git a/modules/hw/mmal/mmal_picture.c b/modules/hw/mmal/mmal_picture.c
index 5dd5169a9d7..3db21a32b92 100644
--- a/modules/hw/mmal/mmal_picture.c
+++ b/modules/hw/mmal/mmal_picture.c
@@ -31,12 +31,34 @@
 #include <interface/vmcs_host/vcgencmd.h>
 #include <interface/vcsm/user-vcsm.h>
 
+#include "mmal_cma.h"
 #include "mmal_picture.h"
 
 const vlc_fourcc_t hw_mmal_vzc_subpicture_chromas[] = { VLC_CODEC_RGBA, VLC_CODEC_BGRA, VLC_CODEC_ARGB, 0 };
 
 #define UINT64_SIZE(s) (((s) + sizeof(uint64_t) - 1)/sizeof(uint64_t))
 
+// WB + Inv
+static void flush_range(void * const start, const size_t len)
+{
+    uint64_t buf[UINT64_SIZE(sizeof(struct vcsm_user_clean_invalid2_s) + sizeof(struct vcsm_user_clean_invalid2_block_s))];
+    struct vcsm_user_clean_invalid2_s * const b = (struct vcsm_user_clean_invalid2_s *)buf;
+
+    *b = (struct vcsm_user_clean_invalid2_s){
+        .op_count = 1
+    };
+
+    b->s[0] = (struct vcsm_user_clean_invalid2_block_s){
+        .invalidate_mode = 3,   // wb + invalidate
+        .block_count = 1,
+        .start_address = start, // Rely on clean inv to fix up align & size boundries
+        .block_size = len,
+        .inter_block_stride = 0
+    };
+
+    vcsm_clean_invalid2(b);
+}
+
 MMAL_FOURCC_T vlc_to_mmal_color_space(const video_color_space_t vlc_cs)
 {
     switch (vlc_cs)
@@ -293,6 +315,9 @@ static void hw_mmal_pic_ctx_destroy(picture_context_t * pic_ctx_cmn)
             mmal_buffer_header_release(ctx->bufs[i]);
     }
 
+    cma_buf_end_flight(ctx->cb);
+    cma_buf_unref(ctx->cb);
+
     free(ctx);
 }
 
@@ -308,6 +333,8 @@ static picture_context_t * hw_mmal_pic_ctx_copy(picture_context_t * pic_ctx_cmn)
     // Copy
     dst_ctx->cmn = src_ctx->cmn;
 
+    dst_ctx->cb = cma_buf_ref(src_ctx->cb);
+
     dst_ctx->buf_count = src_ctx->buf_count;
     for (i = 0; i != src_ctx->buf_count; ++i) {
         dst_ctx->bufs[i] = src_ctx->bufs[i];
@@ -483,6 +510,10 @@ int hw_mmal_copy_pic_to_buf(void * const buf_data,
             return VLC_EBADVAR;
     }
 
+    if (cma_vcsm_type() == VCSM_INIT_LEGACY) {  // ** CMA is currently always uncached
+        flush_range(dest, length);
+    }
+
     if (pLength != NULL)
         *pLength = (uint32_t)length;
 
@@ -490,6 +521,15 @@ int hw_mmal_copy_pic_to_buf(void * const buf_data,
 }
 
 
+static MMAL_BOOL_T rep_buf_free_cb(MMAL_BUFFER_HEADER_T *header, void *userdata)
+{
+    cma_buf_t * const cb = userdata;
+    VLC_UNUSED(header);
+
+    cma_buf_unref(cb);
+    return MMAL_FALSE;
+}
+
 static void pic_to_buf_copy_props(MMAL_BUFFER_HEADER_T * const buf, const picture_t * const pic)
 {
     if (!pic->b_progressive)
@@ -516,6 +556,55 @@ static void pic_to_buf_copy_props(MMAL_BUFFER_HEADER_T * const buf, const pictur
     buf->dts = buf->pts;
 }
 
+static int cma_buf_buf_attach(MMAL_BUFFER_HEADER_T * const buf, cma_buf_t * const cb)
+{
+    // Just a CMA buffer - fill in new buffer
+    const uintptr_t vc_h = cma_buf_vc_handle(cb);
+    if (vc_h == 0)
+        return VLC_EGENERIC;
+
+    mmal_buffer_header_reset(buf);
+    buf->data       = (uint8_t *)vc_h;
+    buf->alloc_size = cma_buf_size(cb);
+    buf->length     = buf->alloc_size;
+    // Ensure cb remains valid for the duration of this buffer
+    mmal_buffer_header_pre_release_cb_set(buf, rep_buf_free_cb, cma_buf_ref(cb));
+    return VLC_SUCCESS;
+}
+
+MMAL_BUFFER_HEADER_T * hw_mmal_pic_buf_copied(const picture_t *const pic,
+                                              MMAL_POOL_T * const rep_pool,
+                                              MMAL_PORT_T * const port,
+                                              cma_buf_pool_t * const cbp)
+{
+    MMAL_BUFFER_HEADER_T *const buf = mmal_queue_wait(rep_pool->queue);
+    if (buf == NULL)
+        goto fail0;
+
+    cma_buf_t * const cb = cma_buf_pool_alloc_buf(cbp, port->buffer_size);
+    if (cb == NULL)
+        goto fail1;
+
+    if (cma_buf_buf_attach(buf, cb) != VLC_SUCCESS)
+        goto fail2;
+
+    pic_to_buf_copy_props(buf, pic);
+
+    if (hw_mmal_copy_pic_to_buf(cma_buf_addr(cb), &buf->length, port->format, pic) != VLC_SUCCESS)
+        goto fail2;
+    buf->flags = MMAL_BUFFER_HEADER_FLAG_FRAME_END;
+
+    cma_buf_unref(cb);
+    return buf;
+
+fail2:
+    cma_buf_unref(cb);
+fail1:
+    mmal_buffer_header_release(buf);
+fail0:
+    return NULL;
+}
+
 MMAL_BUFFER_HEADER_T * hw_mmal_pic_buf_replicated(const picture_t *const pic, MMAL_POOL_T * const rep_pool)
 {
     pic_ctx_mmal_t *const ctx = (pic_ctx_mmal_t *)pic->context;
@@ -530,6 +619,12 @@ MMAL_BUFFER_HEADER_T * hw_mmal_pic_buf_replicated(const picture_t *const pic, MM
         if (mmal_buffer_header_replicate(rep_buf, ctx->bufs[0]) != MMAL_SUCCESS)
             goto fail;
     }
+    else if (ctx->cb != NULL)
+    {
+        // Just a CMA buffer - fill in new buffer
+        if (cma_buf_buf_attach(rep_buf, ctx->cb) != 0)
+            goto fail;
+    }
     else
         goto fail;
 
@@ -555,6 +650,7 @@ typedef struct pool_ent_s
 
     size_t size;
 
+    int vcsm_hdl;
     int vc_hdl;
     void * buf;
 
@@ -652,6 +748,11 @@ static void ent_free(pool_ent_t * const ent)
         if (ent->pic != NULL)
             picture_Release(ent->pic);
 
+        // Free contents
+        vcsm_unlock_hdl(ent->vcsm_hdl);
+
+        vcsm_free(ent->vcsm_hdl);
+
         free(ent);
     }
 }
@@ -691,6 +792,34 @@ static pool_ent_t * ent_list_extract_pic_ent(ent_list_hdr_t * const elh, picture
 
 #define POOL_ENT_ALLOC_BLOCK  0x10000
 
+static pool_ent_t * pool_ent_alloc_new(size_t req_size)
+{
+    pool_ent_t * ent = calloc(1, sizeof(*ent));
+    const size_t alloc_size = (req_size + POOL_ENT_ALLOC_BLOCK - 1) & ~(POOL_ENT_ALLOC_BLOCK - 1);
+
+    if (ent == NULL)
+        return NULL;
+
+    ent->next = ent->prev = NULL;
+
+    // Alloc from vcsm
+    if ((ent->vcsm_hdl = vcsm_malloc_cache(alloc_size, VCSM_CACHE_TYPE_HOST, (char *)"vlc-subpic")) == -1)
+        goto fail1;
+    if ((ent->vc_hdl = vcsm_vc_hdl_from_hdl(ent->vcsm_hdl)) == 0)
+        goto fail2;
+    if ((ent->buf = vcsm_lock(ent->vcsm_hdl)) == NULL)
+        goto fail2;
+
+    ent->size = alloc_size;
+    return ent;
+
+fail2:
+    vcsm_free(ent->vcsm_hdl);
+fail1:
+    free(ent);
+    return NULL;
+}
+
 static pool_ent_t * pool_ent_ref(pool_ent_t * const ent)
 {
 //    int n = atomic_fetch_add(&ent->ref_count, 1) + 1;
@@ -739,6 +868,39 @@ static void pool_recycle_list(vzc_pool_ctl_t * const pc, ent_list_hdr_t * const
     }
 }
 
+static pool_ent_t * pool_best_fit(vzc_pool_ctl_t * const pc, size_t req_size)
+{
+    pool_ent_t * best = NULL;
+
+    vlc_mutex_lock(&pc->lock);
+
+    {
+        pool_ent_t * ent = pc->ent_pool.ents;
+
+        // Simple scan
+        while (ent != NULL) {
+            if (ent->size >= req_size && ent->size <= req_size * 2 + POOL_ENT_ALLOC_BLOCK &&
+                    (best == NULL || best->size > ent->size))
+                best = ent;
+            ent = ent->next;
+        }
+
+        // extract best from chain if we've found it
+        ent_extract(&pc->ent_pool, best);
+    }
+
+    vlc_mutex_unlock(&pc->lock);
+
+    if (best == NULL)
+        best = pool_ent_alloc_new(req_size);
+
+    if ((best->seq = ++pc->seq) == 0)
+        best->seq = ++pc->seq;  // Never allow to be zero
+
+    atomic_store(&best->ref_count, 1);
+    return best;
+}
+
 
 bool hw_mmal_vzc_buf_set_format(MMAL_BUFFER_HEADER_T * const buf, MMAL_ES_FORMAT_T * const es_fmt)
 {
@@ -845,6 +1007,7 @@ MMAL_BUFFER_HEADER_T * hw_mmal_vzc_buf_from_pic(vzc_pool_ctl_t * const pc,
         const size_t dst_size = dst_stride * dst_lines;
 
         pool_ent_t * ent = ent_list_extract_pic_ent(&pc->ents_prev, pic);
+        bool needs_copy = false;
 
         // If we didn't find ent in last then look in cur in case is_first
         // isn't working
@@ -853,7 +1016,15 @@ MMAL_BUFFER_HEADER_T * hw_mmal_vzc_buf_from_pic(vzc_pool_ctl_t * const pc,
 
         if (ent == NULL)
         {
-            goto fail2;
+            // Need a new ent
+            needs_copy = true;
+
+            if ((ent = pool_best_fit(pc, dst_size)) == NULL)
+                goto fail2;
+            if ((ent->enc_type = vlc_to_mmal_video_fourcc(&pic->format)) == 0)
+                goto fail2;
+
+            ent->pic = picture_Hold(pic);
         }
         buf->user_data = sb;
 
@@ -902,6 +1073,26 @@ MMAL_BUFFER_HEADER_T * hw_mmal_vzc_buf_from_pic(vzc_pool_ctl_t * const pc,
             .width  = fmt->i_visible_width,
             .height = fmt->i_visible_height
         };
+
+        if (needs_copy)
+        {
+            ent->width = dst_stride / bpp;
+            ent->height = dst_lines;
+
+            // 2D copy
+            {
+                uint8_t *d = ent->buf;
+                const uint8_t *s = pic->p[0].p_pixels + xl * bpp + fmt->i_y_offset * pic->p[0].i_pitch;
+
+                // TODO replace by plane_CopyPixels()
+                mem_copy_2d(d, dst_stride, s, pic->p[0].i_pitch, fmt->i_visible_height, dst_stride);
+
+                // And make sure it is actually in memory
+                if (pc->vcsm_init_type != VCSM_INIT_CMA) {  // ** CMA is currently always uncached
+                    flush_range(ent->buf, dst_stride * fmt->i_visible_height);
+                }
+            }
+        }
     }
 
     return buf;
@@ -1003,6 +1194,7 @@ vzc_pool_ctl_t * hw_mmal_vzc_pool_new()
     return pc;
 }
 
+
 //----------------------------------------------------------------------------
 
 /* Returns the type of the Pi being used
@@ -1022,10 +1214,28 @@ vcsm_init_type_t cma_vcsm_type(void)
 vcsm_init_type_t cma_vcsm_init(void)
 {
     vcsm_init_type_t rv = VCSM_INIT_NONE;
+    // We don't bother locking - taking a copy here should be good enough
+    vcsm_init_type_t try_type = last_vcsm_type;
+
+    if (try_type == VCSM_INIT_NONE) {
+        if (bcm_host_is_fkms_active())
+            try_type = VCSM_INIT_CMA;
+        else
+            try_type = VCSM_INIT_LEGACY;
+    }
 
+    if (try_type == VCSM_INIT_CMA) {
+        if (vcsm_init_ex(1, -1) == 0)
+            rv = VCSM_INIT_CMA;
+        else if (vcsm_init_ex(0, -1) == 0)
+            rv = VCSM_INIT_LEGACY;
+    }
+    else
     {
         if (vcsm_init_ex(0, -1) == 0)
             rv = VCSM_INIT_LEGACY;
+        else if (vcsm_init_ex(1, -1) == 0)
+            rv = VCSM_INIT_CMA;
     }
 
     // Just in case this affects vcsm init do after that
@@ -1049,6 +1259,8 @@ const char * cma_vcsm_init_str(const vcsm_init_type_t init_mode)
 {
     switch (init_mode)
     {
+        case VCSM_INIT_CMA:
+            return "CMA";
         case VCSM_INIT_LEGACY:
             return "Legacy";
         case VCSM_INIT_NONE:
diff --git a/modules/hw/mmal/mmal_picture.h b/modules/hw/mmal/mmal_picture.h
index ab64081b0a1..045a4f9d28b 100644
--- a/modules/hw/mmal/mmal_picture.h
+++ b/modules/hw/mmal/mmal_picture.h
@@ -27,6 +27,8 @@
 #include <vlc_common.h>
 #include <interface/mmal/mmal.h>
 
+#include "mmal_cma.h"
+
 /* Think twice before changing this. Incorrect values cause havoc. */
 #define NUM_ACTUAL_OPAQUE_BUFFERS 30
 
@@ -42,6 +44,8 @@ typedef struct mmal_port_pool_ref_s
 typedef struct pic_ctx_mmal_s {
     picture_context_t cmn;  // PARENT: Common els at start
 
+    cma_buf_t * cb;
+
     unsigned int buf_count;
     MMAL_BUFFER_HEADER_T * bufs[CTX_BUFS_MAX];
 
@@ -113,6 +117,11 @@ static inline void buf_to_pic_copy_props(picture_t * const pic, const MMAL_BUFFE
             VLC_TICK_INVALID;
 }
 
+MMAL_BUFFER_HEADER_T * hw_mmal_pic_buf_copied(const picture_t *const pic,
+                                              MMAL_POOL_T * const rep_pool,
+                                              MMAL_PORT_T * const port,
+                                              cma_buf_pool_t * const cbp);
+
 MMAL_BUFFER_HEADER_T * hw_mmal_pic_buf_replicated(const picture_t *const pic, MMAL_POOL_T * const rep_pool);
 
 struct vzc_pool_ctl_s;
@@ -142,6 +151,7 @@ bool rpi_is_model_pi4(void);
 typedef enum vcsm_init_type_e {
     VCSM_INIT_NONE = 0,
     VCSM_INIT_LEGACY,
+    VCSM_INIT_CMA
 } vcsm_init_type_t;
 
 vcsm_init_type_t cma_vcsm_init(void);
diff --git a/modules/hw/mmal/vout.c b/modules/hw/mmal/vout.c
index 78f2dbbeb5d..dde0701c5b5 100644
--- a/modules/hw/mmal/vout.c
+++ b/modules/hw/mmal/vout.c
@@ -170,6 +170,8 @@ static MMAL_FOURCC_T vout_vlc_to_mmal_pic_fourcc(const unsigned int fcc)
     switch (fcc){
     case VLC_CODEC_MMAL_OPAQUE:
         return MMAL_ENCODING_OPAQUE;
+    case VLC_CODEC_I420:
+        return MMAL_ENCODING_I420;
     default:
         break;
     }
-- 
2.17.1



More information about the vlc-devel mailing list