[x265] [PATCH] Enable sharing cutree data among encoders

Aruna Matheswaran aruna at multicorewareinc.com
Wed Oct 6 08:53:29 UTC 2021


On Wed, Oct 6, 2021 at 12:10 PM Dakshinya T R S <
dakshinya at multicorewareinc.com> wrote:

> From 69d41f124d14347ec71cc61f3290d3eae7369631 Mon Sep 17 00:00:00 2001
> From: lwWang <liwei at multicorewareinc.com>
> Date: Wed, 8 Sep 2021 13:38:37 +0800
> Subject: [PATCH] Enable sharing cutree data among encoders
>
[AM] Commit message doesn't seem to reflect the purpose of the patch.
Please update.

>
> ---
>  source/common/CMakeLists.txt   |   3 +-
>  source/common/ringmem.cpp      | 357 +++++++++++++++++++
>  source/common/ringmem.h        |  85 +++++
>  source/common/threading.h      | 137 ++++++++
>  source/encoder/encoder.cpp     |   4 +-
>  source/encoder/encoder.h       |   2 +-
>  source/encoder/ratecontrol.cpp | 603 +++++++++++++++++++++------------
>  source/encoder/ratecontrol.h   |  20 +-
>  8 files changed, 987 insertions(+), 224 deletions(-)
>  create mode 100644 source/common/ringmem.cpp
>  create mode 100644 source/common/ringmem.h
>
> diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt
> index 12b643ad5..8752c6199 100644
> --- a/source/common/CMakeLists.txt
> +++ b/source/common/CMakeLists.txt
> @@ -169,4 +169,5 @@ add_library(common OBJECT
>      scalinglist.cpp scalinglist.h
>      quant.cpp quant.h contexts.h
>      deblock.cpp deblock.h
> -    scaler.cpp scaler.h)
> +    scaler.cpp scaler.h
> +    ringmem.cpp ringmem.h)
> diff --git a/source/common/ringmem.cpp b/source/common/ringmem.cpp
> new file mode 100644
> index 000000000..a4f191c90
> --- /dev/null
> +++ b/source/common/ringmem.cpp
> @@ -0,0 +1,357 @@
>
> +/*****************************************************************************
> + * Copyright (C) 2013-2017 MulticoreWare, Inc
> + *
> + * Authors: liwei <liwei at multicorewareinc.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111,
> USA.
> + *
> + * This program is also available under a commercial proprietary license.
> + * For more information, contact us at license @ x265.com
> +
> *****************************************************************************/
> +
> +#include "ringmem.h"
> +
> +#ifndef _WIN32
> +#include <sys/mman.h>
> +#endif ////< _WIN32
> +
> +#ifdef _WIN32
> +#define X265_SHARED_MEM_NAME                    "Local\\_x265_shr_mem_"
> +#define X265_SEMAPHORE_RINGMEM_WRITER_NAME    "_x265_semW_"
> +#define X265_SEMAPHORE_RINGMEM_READER_NAME    "_x265_semR_"
> +#else /* POSIX / pthreads */
> +#define X265_SHARED_MEM_NAME                    "/tmp/_x265_shr_mem_"
> +#define X265_SEMAPHORE_RINGMEM_WRITER_NAME    "/tmp/_x265_semW_"
> +#define X265_SEMAPHORE_RINGMEM_READER_NAME    "/tmp/_x265_semR_"
> +#endif
> +
> +#define RINGMEM_ALLIGNMENT                       64
> +
> +namespace X265_NS {
> +    RingMem::RingMem()
> +        : m_initialized(false)
> +        , m_protectRW(false)
> +        , m_itemSize(0)
> +        , m_itemCnt(0)
> +        , m_dataPool(NULL)
> +        , m_shrMem(NULL)
> +#ifdef _WIN32
> +        , m_handle(NULL)
> +#else //_WIN32
> +        , m_filepath(NULL)
> +#endif //_WIN32
> +        , m_writeSem(NULL)
> +        , m_readSem(NULL)
> +    {
> +    }
> +
> +
> +    RingMem::~RingMem()
> +    {
> +    }
> +
> +    bool RingMem::skipRead(int32_t cnt) {
> +        if (!m_initialized)
> +        {
> +            return false;
> +        }
> +
> +        if (m_protectRW)
> +        {
> +            for (int i = 0; i < cnt; i++)
> +            {
> +                m_readSem->take();
> +            }
> +        }
> +
> +        ATOMIC_ADD(&m_shrMem->m_read, cnt);
> +
> +        if (m_protectRW)
> +        {
> +            m_writeSem->give(cnt);
> +        }
> +
> +        return true;
> +    }
> +
> +    bool RingMem::skipWrite(int32_t cnt) {
> +        if (!m_initialized)
> +        {
> +            return false;
> +        }
> +
> +        if (m_protectRW)
> +        {
> +            for (int i = 0; i < cnt; i++)
> +            {
> +                m_writeSem->take();
> +            }
> +        }
> +
> +        ATOMIC_ADD(&m_shrMem->m_write, cnt);
> +
> +        if (m_protectRW)
> +        {
> +            m_readSem->give(cnt);
> +        }
> +
> +        return true;
> +    }
> +
> +    ///< initialize
> +    bool RingMem::init(int32_t itemSize, int32_t itemCnt, const char
> *name, bool protectRW)
> +    {
> +        ///< check parameters
> +        if (itemSize <= 0 || itemCnt <= 0 || NULL == name)
> +        {
> +            ///< invalid parameters
> +            return false;
> +        }
> +
> +        if (!m_initialized)
> +        {
> +            ///< formating names
> +            char nameBuf[MAX_SHR_NAME_LEN] = { 0 };
> +
> +            ///< shared memory name
> +            snprintf(nameBuf, sizeof(nameBuf) - 1, "%s%s",
> X265_SHARED_MEM_NAME, name);
> +
> +            ///< create or open shared memory
> +            bool newCreated = false;
> +
> +            ///< calculate the size of the shared memory
> +            int32_t shrMemSize = (itemSize * itemCnt + sizeof(ShrMemCtrl)
> + RINGMEM_ALLIGNMENT - 1) & ~(RINGMEM_ALLIGNMENT - 1);
> +
> +#ifdef _WIN32
> +            HANDLE h = OpenFileMappingA(FILE_MAP_WRITE | FILE_MAP_READ,
> FALSE, nameBuf);
> +            if (!h)
> +            {
> +                h = CreateFileMappingA(INVALID_HANDLE_VALUE, NULL,
> PAGE_READWRITE, 0, shrMemSize, nameBuf);
> +
> +                if (!h)
> +                {
> +                    return false;
> +                }
> +
> +                newCreated = true;
> +            }
> +
> +            void *pool = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, 0);
> +
> +            ///< should not close the handle here, otherwise the
> OpenFileMapping would fail
> +            //CloseHandle(h);
> +            m_handle = h;
> +
> +            if (!pool)
> +            {
> +                return false;
> +            }
> +
> +#else /* POSIX / pthreads */
> +            mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH
> | S_IWOTH;
> +            int flag = O_RDWR;
> +            int shrfd = -1;
> +            if ((shrfd = open(nameBuf, flag, mode)) < 0)
> +            {
> +                flag |= O_CREAT;
> +
> +                shrfd = open(nameBuf, flag, mode);
> +                if (shrfd < 0)
> +                {
> +                    return false;
> +                }
> +                newCreated = true;
> +
> +                lseek(shrfd, shrMemSize - 1, SEEK_SET);
> +
> +                if (-1 == write(shrfd, "\0", 1))
> +                {
> +                    close(shrfd);
> +                    return false;
> +                }
> +
> +                if (lseek(shrfd, 0, SEEK_END) < shrMemSize)
> +                {
> +                    close(shrfd);
> +                    return false;
> +                }
> +            }
> +
> +            void *pool = mmap(0,
> +                shrMemSize,
> +                PROT_READ | PROT_WRITE,
> +                MAP_SHARED,
> +                shrfd,
> +                0);
> +
> +            close(shrfd);
> +            if (pool == MAP_FAILED)
> +            {
> +                return false;
> +            }
> +
> +            m_filepath = strdup(nameBuf);
> +#endif ///< _WIN32
> +
> +            if (newCreated)
> +            {
> +                memset(pool, 0, shrMemSize);
> +            }
> +
> +            m_shrMem = reinterpret_cast<ShrMemCtrl *>(pool);
> +            m_dataPool = reinterpret_cast<uint8_t *>(pool) +
> sizeof(ShrMemCtrl);
> +            m_itemSize = itemSize;
> +            m_itemCnt = itemCnt;
> +            m_initialized = true;
> +
> +            if (protectRW)
> +            {
> +                m_protectRW = true;
> +                m_writeSem = new NamedSemaphore();
> +                if (!m_writeSem)
> +                {
> +                    release();
> +                    return false;
> +                }
> +
> +                ///< shared memory name
> +                snprintf(nameBuf, sizeof(nameBuf - 1), "%s%s",
> X265_SEMAPHORE_RINGMEM_WRITER_NAME, name);
> +                if (!m_writeSem->create(nameBuf, m_itemCnt, m_itemCnt))
> +                {
> +                    release();
> +                    return false;
> +                }
> +
> +                m_readSem = new NamedSemaphore();
> +                if (!m_readSem)
> +                {
> +                    release();
> +                    return false;
> +                }
> +
> +                ///< shared memory name
> +                snprintf(nameBuf, sizeof(nameBuf - 1), "%s%s",
> X265_SEMAPHORE_RINGMEM_READER_NAME, name);
> +                if (!m_readSem->create(nameBuf, 0, m_itemCnt))
> +                {
> +                    release();
> +                    return false;
> +                }
> +            }
> +        }
> +
> +        return true;
> +    }
> +    ///< finalize
> +    void RingMem::release()
> +    {
> +        if (m_initialized)
> +        {
> +            m_initialized = false;
> +
> +            if (m_shrMem)
> +            {
> +#ifdef _WIN32
> +                UnmapViewOfFile(m_shrMem);
> +                CloseHandle(m_handle);
> +                m_handle = NULL;
> +#else /* POSIX / pthreads */
> +                int32_t shrMemSize = (m_itemSize * m_itemCnt +
> sizeof(ShrMemCtrl) + RINGMEM_ALLIGNMENT - 1) & (~RINGMEM_ALLIGNMENT - 1);
> +                munmap(m_shrMem, shrMemSize);
> +                unlink(m_filepath);
> +                free(m_filepath);
> +                m_filepath = NULL;
> +#endif ///< _WIN32
> +                m_shrMem = NULL;
> +                m_dataPool = NULL;
> +                m_itemSize = 0;
> +                m_itemCnt = 0;
> +            }
> +
> +            if (m_protectRW)
> +            {
> +                m_protectRW = false;
> +                if (m_writeSem)
> +                {
> +                    m_writeSem->release();
> +
> +                    delete m_writeSem;
> +                    m_writeSem = NULL;
> +                }
> +
> +                if (m_readSem)
> +                {
> +                    m_readSem->release();
> +
> +                    delete m_readSem;
> +                    m_readSem = NULL;
> +                }
> +            }
> +
> +        }
> +    }
> +
> +    ///< data read
> +    bool RingMem::readNext(void* dst, fnRWSharedData callback)
> +    {
> +        if (!m_initialized || !callback || !dst)
> +        {
> +            return false;
> +        }
> +
> +        if (m_protectRW)
> +        {
> +            if (!m_readSem->take())
> +            {
> +                return false;
> +            }
> +        }
> +
> +        int32_t index = ATOMIC_ADD(&m_shrMem->m_read, 1) % m_itemCnt;
> +        (*callback)(dst, reinterpret_cast<uint8_t *>(m_dataPool) + index
> * m_itemSize, m_itemSize);
> +
> +        if (m_protectRW)
> +        {
> +            m_writeSem->give(1);
> +        }
> +
> +        return true;
> +    }
> +    ///< data write
> +    bool RingMem::writeData(void *data, fnRWSharedData callback)
> +    {
> +        if (!m_initialized || !data || !callback)
> +        {
> +            return false;
> +        }
> +
> +        if (m_protectRW)
> +        {
> +            if (!m_writeSem->take())
> +            {
> +                return false;
> +            }
> +        }
> +
> +        int32_t index = ATOMIC_ADD(&m_shrMem->m_write, 1) % m_itemCnt;
> +        (*callback)(reinterpret_cast<uint8_t *>(m_dataPool) + index *
> m_itemSize, data, m_itemSize);
> +
> +        if (m_protectRW)
> +        {
> +            m_readSem->give(1);
> +        }
> +
> +        return true;
> +    }
> +}
> diff --git a/source/common/ringmem.h b/source/common/ringmem.h
> new file mode 100644
> index 000000000..76f54bb19
> --- /dev/null
> +++ b/source/common/ringmem.h
> @@ -0,0 +1,85 @@
>
> +/*****************************************************************************
> + * Copyright (C) 2013-2017 MulticoreWare, Inc
> + *
> + * Authors: liwei <liwei at multicorewareinc.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111,
> USA.
> + *
> + * This program is also available under a commercial proprietary license.
> + * For more information, contact us at license @ x265.com
> +
> *****************************************************************************/
> +
> +#ifndef X265_RINGMEM_H
> +#define X265_RINGMEM_H
> +
> +#include "common.h"
> +#include "threading.h"
> +
> +namespace X265_NS {
> +
> +#define MAX_SHR_NAME_LEN                         256
> +
> +    class RingMem {
> +    public:
> +        RingMem();
> +        ~RingMem();
> +
> +        bool skipRead(int32_t cnt);
> +
> +        bool skipWrite(int32_t cnt);
> +
> +        ///< initialize
> +        ///< protectRW: if use the semaphore the protect the write and
> read operation.
> +        bool init(int32_t itemSize, int32_t itemCnt, const char *name,
> bool protectRW = false);
> +        ///< finalize
> +        void release();
> +
> +        typedef void(*fnRWSharedData)(void *dst, void *src, int32_t size);
> +
> +        ///< data read
> +        bool readNext(void* dst, fnRWSharedData callback);
> +        ///< data write
> +        bool writeData(void *data, fnRWSharedData callback);
> +
> +    private:
> +        bool    m_initialized;
> +        bool    m_protectRW;
> +
> +        int32_t m_itemSize;
> +        int32_t m_itemCnt;
> +        ///< data pool
> +        void   *m_dataPool;
> +        typedef struct {
> +            ///< index to write
> +            int32_t m_write;
> +            ///< index to read
> +            int32_t m_read;
> +
> +        }ShrMemCtrl;
> +
> +        ShrMemCtrl *m_shrMem;
> +#ifdef _WIN32
> +        void       *m_handle;
> +#else // _WIN32
> +        char       *m_filepath;
> +#endif // _WIN32
> +
> +        ///< Semaphores
> +        NamedSemaphore *m_writeSem;
> +        NamedSemaphore *m_readSem;
> +    };
> +};
> +
> +#endif // ifndef X265_RINGMEM_H
> diff --git a/source/common/threading.h b/source/common/threading.h
> index 53a63beaf..dcf6081e3 100644
> --- a/source/common/threading.h
> +++ b/source/common/threading.h
> @@ -3,6 +3,7 @@
>   *
>   * Authors: Steve Borho <steve at borho.org>
>   *          Min Chen <chenm003 at 163.com>
> +            liwei <liwei at multicorewareinc.com>
>   *
>   * This program is free software; you can redistribute it and/or modify
>   * it under the terms of the GNU General Public License as published by
> @@ -253,6 +254,47 @@ protected:
>      int                m_val;
>  };
>
> +class NamedSemaphore
> +{
> +public:
> +    NamedSemaphore() : m_sem(NULL)
> +    {
> +    }
> +
> +    ~NamedSemaphore()
> +    {
> +    }
> +
> +    bool create(const char* name, const int initcnt, const int maxcnt)
> +    {
> +        if(!m_sem)
> +        {
> +            m_sem = CreateSemaphoreA(NULL, initcnt, maxcnt, name);
> +        }
> +        return m_sem != NULL;
> +    }
> +
> +    bool give(const int32_t cnt)
> +    {
> +        return ReleaseSemaphore(m_sem, (LONG)cnt, NULL) != FALSE;
> +    }
> +
> +    bool take(const uint32_t time_out = INFINITE)
> +    {
> +        int32_t rt = WaitForSingleObject(m_sem, time_out);
> +        return rt != WAIT_TIMEOUT && rt != WAIT_FAILED;
> +    }
> +
> +    void release()
> +    {
> +        CloseHandle(m_sem);
> +        m_sem = NULL;
> +    }
> +
> +private:
> +    HANDLE m_sem;
> +};
> +
>  #else /* POSIX / pthreads */
>
>  typedef pthread_t ThreadHandle;
> @@ -459,6 +501,101 @@ protected:
>      int             m_val;
>  };
>
> +#define TIMEOUT_INFINITE 0xFFFFFFFF
> +
> +class NamedSemaphore
> +{
> +public:
> +    NamedSemaphore()
> +        : m_sem(NULL)
> +        , m_name(NULL)
> +    {
> +    }
> +
> +    ~NamedSemaphore()
> +    {
> +    }
> +
> +    bool create(const char* name, const int initcnt, const int maxcnt)
> +    {
> +        bool ret = false;
> +
> +        if (initcnt >= maxcnt)
> +        {
> +            return false;
> +        }
> +
> +        m_sem = sem_open(name, O_CREAT | O_EXCL, 0666, initcnt);
> +        if (m_sem != SEM_FAILED)
> +        {
> +            m_name = strdup(name);
> +            ret = true;
> +        }
> +        else
> +        {
> +            if (EEXIST == errno)
> +            {
> +                m_sem = sem_open(name, 0);
> +                if (m_sem != SEM_FAILED)
> +                {
> +                    m_name = strdup(name);
> +                    ret = true;
> +                }
> +            }
> +        }
> +
> +        return ret;
> +    }
> +
> +    bool give(const int32_t cnt)
> +    {
> +        int ret = 0;
> +        int32_t curCnt = cnt;
> +        while (curCnt-- && !ret) {
> +            ret = sem_post(m_sem);
> +        }
> +
> +        return 0 == ret;
> +    }
> +
> +    bool take(const uint32_t time_out = TIMEOUT_INFINITE)
> +    {
> +        if (TIMEOUT_INFINITE == time_out) {
> +            return 0 == sem_wait(m_sem);
> +        }
> +        else
> +        {
> +            if (0 == time_out)
> +            {
> +                return 0 == sem_trywait(m_sem);
> +            }
> +            else
> +            {
> +                struct timespec ts;
> +                ts.tv_sec = time_out / 1000L;
> +                ts.tv_nsec = (time_out * 1000000L) - ts.tv_sec * 1000 *
> 1000 * 1000;
> +                return 0 == sem_timedwait(m_sem, &ts);
> +            }
> +        }
> +    }
> +
> +    void release()
> +    {
> +        if (m_sem)
> +        {
> +            sem_close(m_sem);
> +            sem_unlink(m_name);
> +            m_sem = NULL;
> +            free(m_name);
> +            m_name = NULL;
> +        }
> +    }
> +
> +private:
> +    sem_t *m_sem;
> +    char  *m_name;
> +};
> +
>  #endif // ifdef _WIN32
>
>  class ScopedLock
> diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
> index cc014a740..19551b1f2 100644
> --- a/source/encoder/encoder.cpp
> +++ b/source/encoder/encoder.cpp
> @@ -187,7 +187,7 @@ inline char *strcatFilename(const char *input, const
> char *suffix)
>      return output;
>  }
>
> -void Encoder::create()
> +void Encoder::create(char *dataShr)
>  {
>      if (!primitives.pu[0].sad)
>      {
> @@ -389,7 +389,7 @@ void Encoder::create()
>              lookAheadThreadPool[i].start();
>      m_lookahead->m_numPools = pools;
>      m_dpb = new DPB(m_param);
> -    m_rateControl = new RateControl(*m_param, this);
> +    m_rateControl = new RateControl(*m_param, this, dataShr);
>      if (!m_param->bResetZoneConfig)
>      {
>          zoneReadCount = new ThreadSafeInteger[m_param->rc.zonefileCount];
> diff --git a/source/encoder/encoder.h b/source/encoder/encoder.h
> index 2ee5bdaee..4b8478f16 100644
> --- a/source/encoder/encoder.h
> +++ b/source/encoder/encoder.h
> @@ -305,7 +305,7 @@ public:
>  #endif
>      };
>
> -    void create();
> +    void create(char *dataShr = NULL);
>      void stopJobs();
>      void destroy();
>
> diff --git a/source/encoder/ratecontrol.cpp
> b/source/encoder/ratecontrol.cpp
> index 71f08a73e..e2f514172 100644
> --- a/source/encoder/ratecontrol.cpp
> +++ b/source/encoder/ratecontrol.cpp
> @@ -41,6 +41,10 @@
>  #define BR_SHIFT  6
>  #define CPB_SHIFT 4
>
> +#define SHARED_DATA_ALIGNMENT      4 ///< 4btye, 32bit
> +#define CUTREE_SHARED_MEM_NAME     "cutree"
> +#define GOP_CNT_CU_TREE            3
> +
>
 using namespace X265_NS;
>
>  /* Amortize the partial cost of I frames over the next N frames */
> @@ -104,6 +108,37 @@ inline char *strcatFilename(const char *input, const
> char *suffix)
>      return output;
>  }
>
> +typedef struct CUTreeSharedDataItem
> +{
> +    uint8_t  *type;
> +    uint16_t *stats;
> +}CUTreeSharedDataItem;
> +
> +void ReadSharedCUTreeData(void *dst, void *src, int32_t size)
> +{
> +    CUTreeSharedDataItem *statsDst =
> reinterpret_cast<CUTreeSharedDataItem *>(dst);
> +    uint8_t *typeSrc = reinterpret_cast<uint8_t *>(src);
> +    *statsDst->type = *typeSrc;
> +
> +    ///< for memory alignment, the type will take 32bit in the shared
> memory
> +    int32_t offset = (sizeof(*statsDst->type) + SHARED_DATA_ALIGNMENT -
> 1) & ~(SHARED_DATA_ALIGNMENT - 1);
> +    uint16_t *statsSrc = reinterpret_cast<uint16_t *>(typeSrc + offset);
> +    memcpy(statsDst->stats, statsSrc, size - offset);
> +}
> +
> +void WriteSharedCUTreeData(void *dst, void *src, int32_t size)
> +{
> +    CUTreeSharedDataItem *statsSrc =
> reinterpret_cast<CUTreeSharedDataItem *>(src);
> +    uint8_t *typeDst = reinterpret_cast<uint8_t *>(dst);
> +    *typeDst = *statsSrc->type;
> +
> +    ///< for memory alignment, the type will take 32bit in the shared
> memory
> +    int32_t offset = (sizeof(*statsSrc->type) + SHARED_DATA_ALIGNMENT -
> 1) & ~(SHARED_DATA_ALIGNMENT - 1);
> +    uint16_t *statsDst = reinterpret_cast<uint16_t *>(typeDst + offset);
> +    memcpy(statsDst, statsSrc->stats, size - offset);
> +}
> +
> +
>  inline double qScale2bits(RateControlEntry *rce, double qScale)
>  {
>      if (qScale < 0.1)
> @@ -146,7 +181,7 @@ x265_zone* RateControl::getZone()
>      return NULL;
>  }
>
> -RateControl::RateControl(x265_param& p, Encoder *top)
> +RateControl::RateControl(x265_param& p, Encoder *top, char * dataShr)
>  {
>      m_param = &p;
>      m_top = top;
> @@ -209,6 +244,14 @@ RateControl::RateControl(x265_param& p, Encoder *top)
>      m_lastAbrResetPoc = -1;
>      m_statFileOut = NULL;
>      m_cutreeStatFileOut = m_cutreeStatFileIn = NULL;
> +    ///< store the cutree data in file by default
> +    m_cutreeStorageMode = !dataShr ? SHARED_MODE_FILE : SHARED_MODE_MEM;
>
[AM] Why not a param option (API) to toggle between file-based and
shared-memory based cutree sharing?  The external application has to
violate the API if it has to operate through this macro.

Is this shared memory only for cutree data sharing? If so, can file-based
and shared-memory based RC data sharing coexist?

> +    m_shrname = NULL;
> +    if (dataShr)
> +    {
> +        m_shrname = strdup(dataShr);
> +    }
> +    m_cutreeShrMem = NULL;
>      m_rce2Pass = NULL;
>      m_encOrder = NULL;
>      m_lastBsliceSatdCost = 0;
> @@ -320,6 +363,42 @@ RateControl::RateControl(x265_param& p, Encoder *top)
>          m_cuTreeStats.qpBuffer[i] = NULL;
>  }
>
> +bool RateControl::initCUTreeSharedMem()
> +{
> +    if (!m_cutreeShrMem) {
> +        m_cutreeShrMem = new RingMem();
> +        if (!m_cutreeShrMem)
> +        {
> +            return false;
> +        }
> +
> +        ///< now cutree data form at most 3 gops would be stored in the
> shared memory at the same time
> +        int32_t itemSize = (sizeof(uint8_t) + SHARED_DATA_ALIGNMENT - 1)
> & ~(SHARED_DATA_ALIGNMENT - 1);
> +        if (m_param->rc.qgSize == 8)
> +        {
> +            itemSize += sizeof(uint16_t) * m_ncu * 4;
> +        }
> +        else
> +        {
> +            itemSize += sizeof(uint16_t) * m_ncu;
> +        }
> +
> +        int32_t itemCnt = X265_MIN(m_param->keyframeMax, (int)(m_fps +
> 0.5));
> +        itemCnt *= GOP_CNT_CU_TREE;
> +
> +        char shrname[MAX_SHR_NAME_LEN] = { 0 };
> +        strcpy(shrname, m_shrname);
> +        strcat(shrname, CUTREE_SHARED_MEM_NAME);
> +
> +        if (!m_cutreeShrMem->init(itemSize, itemCnt, shrname))
> +        {
> +            return false;
> +        }
> +    }
> +
> +    return true;
> +}
> +
>  bool RateControl::init(const SPS& sps)
>  {
>      if (m_isVbv && !m_initVbv)
> @@ -421,244 +500,261 @@ bool RateControl::init(const SPS& sps)
>          /* Load stat file and init 2pass algo */
>          if (m_param->rc.bStatRead)
>          {
> -            m_expectedBitsSum = 0;
> -            char *p, *statsIn, *statsBuf;
> -            /* read 1st pass stats */
> -            statsIn = statsBuf = x265_slurp_file(fileName);
> -            if (!statsBuf)
> -                return false;
> -            if (m_param->rc.cuTree)
> +            if (SHARED_MODE_FILE == m_cutreeStorageMode)
>              {
> -                char *tmpFile = strcatFilename(fileName, ".cutree");
> -                if (!tmpFile)
> +                m_expectedBitsSum = 0;
> +                char *p, *statsIn, *statsBuf;
> +                /* read 1st pass stats */
> +                statsIn = statsBuf = x265_slurp_file(fileName);
> +                if (!statsBuf)
>                      return false;
> -                m_cutreeStatFileIn = x265_fopen(tmpFile, "rb");
> -                X265_FREE(tmpFile);
> -                if (!m_cutreeStatFileIn)
> +                if (m_param->rc.cuTree)
>                  {
> -                    x265_log_file(m_param, X265_LOG_ERROR, "can't open
> stats file %s.cutree\n", fileName);
> -                    return false;
> +                    char *tmpFile = strcatFilename(fileName, ".cutree");
> +                    if (!tmpFile)
> +                        return false;
> +                    m_cutreeStatFileIn = x265_fopen(tmpFile, "rb");
> +                    X265_FREE(tmpFile);
> +                    if (!m_cutreeStatFileIn)
> +                    {
> +                        x265_log_file(m_param, X265_LOG_ERROR, "can't
> open stats file %s.cutree\n", fileName);
> +                        return false;
> +                    }
>                  }
> -            }
>
> -            /* check whether 1st pass options were compatible with
> current options */
> -            if (strncmp(statsBuf, "#options:", 9))
> -            {
> -                x265_log(m_param, X265_LOG_ERROR,"options list in stats
> file not valid\n");
> -                return false;
> -            }
> -            {
> -                int i, j, m;
> -                uint32_t k , l;
> -                bool bErr = false;
> -                char *opts = statsBuf;
> -                statsIn = strchr(statsBuf, '\n');
> -                if (!statsIn)
> +                /* check whether 1st pass options were compatible with
> current options */
> +                if (strncmp(statsBuf, "#options:", 9))
>                  {
> -                    x265_log(m_param, X265_LOG_ERROR, "Malformed stats
> file\n");
> +                    x265_log(m_param, X265_LOG_ERROR, "options list in
> stats file not valid\n");
>                      return false;
>                  }
> -                *statsIn = '\0';
> -                statsIn++;
> -                if ((p = strstr(opts, " input-res=")) == 0 || sscanf(p, "
> input-res=%dx%d", &i, &j) != 2)
>                  {
> -                    x265_log(m_param, X265_LOG_ERROR, "Resolution
> specified in stats file not valid\n");
> +                    int i, j, m;
> +                    uint32_t k, l;
> +                    bool bErr = false;
> +                    char *opts = statsBuf;
> +                    statsIn = strchr(statsBuf, '\n');
> +                    if (!statsIn)
> +                    {
> +                        x265_log(m_param, X265_LOG_ERROR, "Malformed
> stats file\n");
> +                        return false;
> +                    }
> +                    *statsIn = '\0';
> +                    statsIn++;
> +                    if ((p = strstr(opts, " input-res=")) == 0 ||
> sscanf(p, " input-res=%dx%d", &i, &j) != 2)
> +                    {
> +                        x265_log(m_param, X265_LOG_ERROR, "Resolution
> specified in stats file not valid\n");
> +                        return false;
> +                    }
> +                    if ((p = strstr(opts, " fps=")) == 0 || sscanf(p, "
> fps=%u/%u", &k, &l) != 2)
> +                    {
> +                        x265_log(m_param, X265_LOG_ERROR, "fps specified
> in stats file not valid\n");
> +                        return false;
> +                    }
> +                    if (((p = strstr(opts, " vbv-maxrate=")) == 0 ||
> sscanf(p, " vbv-maxrate=%d", &m) != 1) && m_param->rc.rateControlMode ==
> X265_RC_CRF)
> +                    {
> +                        x265_log(m_param, X265_LOG_ERROR, "Constant
> rate-factor is incompatible with 2pass without vbv-maxrate in the previous
> pass\n");
> +                        return false;
> +                    }
> +                    if (k != m_param->fpsNum || l != m_param->fpsDenom)
> +                    {
> +                        x265_log(m_param, X265_LOG_ERROR, "fps mismatch
> with 1st pass (%u/%u vs %u/%u)\n",
> +                            m_param->fpsNum, m_param->fpsDenom, k, l);
> +                        return false;
> +                    }
> +                    if (m_param->analysisMultiPassRefine)
> +                    {
> +                        p = strstr(opts, "ref=");
> +                        sscanf(p, "ref=%d", &i);
> +                        if (i > m_param->maxNumReferences)
> +                        {
> +                            x265_log(m_param, X265_LOG_ERROR,
> "maxNumReferences cannot be less than 1st pass (%d vs %d)\n",
> +                                i, m_param->maxNumReferences);
> +                            return false;
> +                        }
> +                    }
> +                    if (m_param->analysisMultiPassRefine ||
> m_param->analysisMultiPassDistortion)
> +                    {
> +                        p = strstr(opts, "ctu=");
> +                        sscanf(p, "ctu=%u", &k);
> +                        if (k != m_param->maxCUSize)
> +                        {
> +                            x265_log(m_param, X265_LOG_ERROR, "maxCUSize
> mismatch with 1st pass (%u vs %u)\n",
> +                                k, m_param->maxCUSize);
> +                            return false;
> +                        }
> +                    }
> +                    CMP_OPT_FIRST_PASS("bitdepth",
> m_param->internalBitDepth);
> +                    CMP_OPT_FIRST_PASS("weightp",
> m_param->bEnableWeightedPred);
> +                    CMP_OPT_FIRST_PASS("bframes", m_param->bframes);
> +                    CMP_OPT_FIRST_PASS("b-pyramid", m_param->bBPyramid);
> +                    CMP_OPT_FIRST_PASS("open-gop", m_param->bOpenGOP);
> +                    CMP_OPT_FIRST_PASS(" keyint", m_param->keyframeMax);
> +                    CMP_OPT_FIRST_PASS("scenecut",
> m_param->scenecutThreshold);
> +                    CMP_OPT_FIRST_PASS("intra-refresh",
> m_param->bIntraRefresh);
> +                    CMP_OPT_FIRST_PASS("frame-dup",
> m_param->bEnableFrameDuplication);
> +                    if (m_param->bMultiPassOptRPS)
> +                    {
> +                        CMP_OPT_FIRST_PASS("multi-pass-opt-rps",
> m_param->bMultiPassOptRPS);
> +                        CMP_OPT_FIRST_PASS("repeat-headers",
> m_param->bRepeatHeaders);
> +                        CMP_OPT_FIRST_PASS("min-keyint",
> m_param->keyframeMin);
> +                    }
> +
> +                    if ((p = strstr(opts, "b-adapt=")) != 0 && sscanf(p,
> "b-adapt=%d", &i) && i >= X265_B_ADAPT_NONE && i <= X265_B_ADAPT_TRELLIS)
> +                    {
> +                        m_param->bFrameAdaptive = i;
> +                    }
> +                    else if (m_param->bframes)
> +                    {
> +                        x265_log(m_param, X265_LOG_ERROR, "b-adapt method
> specified in stats file not valid\n");
> +                        return false;
> +                    }
> +
> +                    if ((p = strstr(opts, "rc-lookahead=")) != 0 &&
> sscanf(p, "rc-lookahead=%d", &i))
> +                        m_param->lookaheadDepth = i;
> +                }
> +                /* find number of pics */
> +                p = statsIn;
> +                int numEntries;
> +                for (numEntries = -1; p; numEntries++)
> +                    p = strchr(p + 1, ';');
> +                if (!numEntries)
> +                {
> +                    x265_log(m_param, X265_LOG_ERROR, "empty stats
> file\n");
>                      return false;
>                  }
> -                if ((p = strstr(opts, " fps=")) == 0 || sscanf(p, "
> fps=%u/%u", &k, &l) != 2)
> +                m_numEntries = numEntries;
> +
> +                if (m_param->totalFrames < m_numEntries &&
> m_param->totalFrames > 0)
> +                {
> +                    x265_log(m_param, X265_LOG_WARNING, "2nd pass has
> fewer frames than 1st pass (%d vs %d)\n",
> +                        m_param->totalFrames, m_numEntries);
> +                }
> +                if (m_param->totalFrames > m_numEntries &&
> !m_param->bEnableFrameDuplication)
>                  {
> -                    x265_log(m_param, X265_LOG_ERROR, "fps specified in
> stats file not valid\n");
> +                    x265_log(m_param, X265_LOG_ERROR, "2nd pass has more
> frames than 1st pass (%d vs %d)\n",
> +                        m_param->totalFrames, m_numEntries);
>                      return false;
>                  }
> -                if (((p = strstr(opts, " vbv-maxrate=")) == 0 ||
> sscanf(p, " vbv-maxrate=%d", &m) != 1) && m_param->rc.rateControlMode ==
> X265_RC_CRF)
> +
> +                m_rce2Pass = X265_MALLOC(RateControlEntry, m_numEntries);
> +                if (!m_rce2Pass)
>                  {
> -                    x265_log(m_param, X265_LOG_ERROR, "Constant
> rate-factor is incompatible with 2pass without vbv-maxrate in the previous
> pass\n");
> +                    x265_log(m_param, X265_LOG_ERROR, "Rce Entries for 2
> pass cannot be allocated\n");
>                      return false;
>                  }
> -                if (k != m_param->fpsNum || l != m_param->fpsDenom)
> +                m_encOrder = X265_MALLOC(int, m_numEntries);
> +                if (!m_encOrder)
>                  {
> -                    x265_log(m_param, X265_LOG_ERROR, "fps mismatch with
> 1st pass (%u/%u vs %u/%u)\n",
> -                              m_param->fpsNum, m_param->fpsDenom, k, l);
> +                    x265_log(m_param, X265_LOG_ERROR, "Encode order for 2
> pass cannot be allocated\n");
>                      return false;
>                  }
> -                if (m_param->analysisMultiPassRefine)
> +                /* init all to skipped p frames */
> +                for (int i = 0; i < m_numEntries; i++)
>                  {
> -                    p = strstr(opts, "ref=");
> -                    sscanf(p, "ref=%d", &i);
> -                    if (i > m_param->maxNumReferences)
> +                    RateControlEntry *rce = &m_rce2Pass[i];
> +                    rce->sliceType = P_SLICE;
> +                    rce->qScale = rce->newQScale = x265_qp2qScale(20);
> +                    rce->miscBits = m_ncu + 10;
> +                    rce->newQp = 0;
> +                }
> +                /* read stats */
> +                p = statsIn;
> +                double totalQpAq = 0;
> +                for (int i = 0; i < m_numEntries; i++)
> +                {
> +                    RateControlEntry *rce, *rcePocOrder;
> +                    int frameNumber;
> +                    int encodeOrder;
> +                    char picType;
> +                    int e;
> +                    char *next;
> +                    double qpRc, qpAq, qNoVbv, qRceq;
> +                    next = strstr(p, ";");
> +                    if (next)
> +                        *next++ = 0;
> +                    e = sscanf(p, " in:%d out:%d", &frameNumber,
> &encodeOrder);
> +                    if (frameNumber < 0 || frameNumber >= m_numEntries)
>                      {
> -                        x265_log(m_param, X265_LOG_ERROR,
> "maxNumReferences cannot be less than 1st pass (%d vs %d)\n",
> -                            i, m_param->maxNumReferences);
> +                        x265_log(m_param, X265_LOG_ERROR, "bad frame
> number (%d) at stats line %d\n", frameNumber, i);
>                          return false;
>                      }
> -                }
> -                if (m_param->analysisMultiPassRefine ||
> m_param->analysisMultiPassDistortion)
> -                {
> -                    p = strstr(opts, "ctu=");
> -                    sscanf(p, "ctu=%u", &k);
> -                    if (k != m_param->maxCUSize)
> +                    rce = &m_rce2Pass[encodeOrder];
> +                    rcePocOrder = &m_rce2Pass[frameNumber];
> +                    m_encOrder[frameNumber] = encodeOrder;
> +                    if (!m_param->bMultiPassOptRPS)
>                      {
> -                        x265_log(m_param, X265_LOG_ERROR, "maxCUSize
> mismatch with 1st pass (%u vs %u)\n",
> -                            k, m_param->maxCUSize);
> +                        int scenecut = 0;
> +                        e += sscanf(p, " in:%*d out:%*d type:%c q:%lf
> q-aq:%lf q-noVbv:%lf q-Rceq:%lf tex:%d mv:%d misc:%d icu:%lf pcu:%lf
> scu:%lf sc:%d",
> +                            &picType, &qpRc, &qpAq, &qNoVbv, &qRceq,
> &rce->coeffBits,
> +                            &rce->mvBits, &rce->miscBits, &rce->iCuCount,
> &rce->pCuCount,
> +                            &rce->skipCuCount, &scenecut);
> +                        rcePocOrder->scenecut = scenecut != 0;
> +                    }
> +                    else
> +                    {
> +                        char deltaPOC[128];
> +                        char bUsed[40];
> +                        memset(deltaPOC, 0, sizeof(deltaPOC));
> +                        memset(bUsed, 0, sizeof(bUsed));
> +                        e += sscanf(p, " in:%*d out:%*d type:%c q:%lf
> q-aq:%lf q-noVbv:%lf q-Rceq:%lf tex:%d mv:%d misc:%d icu:%lf pcu:%lf
> scu:%lf nump:%d numnegp:%d numposp:%d deltapoc:%s bused:%s",
> +                            &picType, &qpRc, &qpAq, &qNoVbv, &qRceq,
> &rce->coeffBits,
> +                            &rce->mvBits, &rce->miscBits, &rce->iCuCount,
> &rce->pCuCount,
> +                            &rce->skipCuCount,
> &rce->rpsData.numberOfPictures, &rce->rpsData.numberOfNegativePictures,
> &rce->rpsData.numberOfPositivePictures, deltaPOC, bUsed);
> +                        splitdeltaPOC(deltaPOC, rce);
> +                        splitbUsed(bUsed, rce);
> +                        rce->rpsIdx = -1;
> +                    }
> +                    rce->keptAsRef = true;
> +                    rce->isIdr = false;
> +                    if (picType == 'b' || picType == 'p')
> +                        rce->keptAsRef = false;
> +                    if (picType == 'I')
> +                        rce->isIdr = true;
> +                    if (picType == 'I' || picType == 'i')
> +                        rce->sliceType = I_SLICE;
> +                    else if (picType == 'P' || picType == 'p')
> +                        rce->sliceType = P_SLICE;
> +                    else if (picType == 'B' || picType == 'b')
> +                        rce->sliceType = B_SLICE;
> +                    else
> +                        e = -1;
> +                    if (e < 10)
> +                    {
> +                        x265_log(m_param, X265_LOG_ERROR, "statistics are
> damaged at line %d, parser out=%d\n", i, e);
>                          return false;
>                      }
> +                    rce->qScale = rce->newQScale = x265_qp2qScale(qpRc);
> +                    totalQpAq += qpAq;
> +                    rce->qpNoVbv = qNoVbv;
> +                    rce->qpaRc = qpRc;
> +                    rce->qpAq = qpAq;
> +                    rce->qRceq = qRceq;
> +                    p = next;
>                  }
> -                CMP_OPT_FIRST_PASS("bitdepth", m_param->internalBitDepth);
> -                CMP_OPT_FIRST_PASS("weightp",
> m_param->bEnableWeightedPred);
> -                CMP_OPT_FIRST_PASS("bframes", m_param->bframes);
> -                CMP_OPT_FIRST_PASS("b-pyramid", m_param->bBPyramid);
> -                CMP_OPT_FIRST_PASS("open-gop", m_param->bOpenGOP);
> -                CMP_OPT_FIRST_PASS(" keyint", m_param->keyframeMax);
> -                CMP_OPT_FIRST_PASS("scenecut",
> m_param->scenecutThreshold);
> -                CMP_OPT_FIRST_PASS("intra-refresh",
> m_param->bIntraRefresh);
> -                CMP_OPT_FIRST_PASS("frame-dup",
> m_param->bEnableFrameDuplication);
> -                if (m_param->bMultiPassOptRPS)
> -                {
> -                    CMP_OPT_FIRST_PASS("multi-pass-opt-rps",
> m_param->bMultiPassOptRPS);
> -                    CMP_OPT_FIRST_PASS("repeat-headers",
> m_param->bRepeatHeaders);
> -                    CMP_OPT_FIRST_PASS("min-keyint",
> m_param->keyframeMin);
> -                }
> -
> -                if ((p = strstr(opts, "b-adapt=")) != 0 && sscanf(p,
> "b-adapt=%d", &i) && i >= X265_B_ADAPT_NONE && i <= X265_B_ADAPT_TRELLIS)
> +                X265_FREE(statsBuf);
> +                if (m_param->rc.rateControlMode != X265_RC_CQP)
>                  {
> -                    m_param->bFrameAdaptive = i;
> -                }
> -                else if (m_param->bframes)
> -                {
> -                    x265_log(m_param, X265_LOG_ERROR, "b-adapt method
> specified in stats file not valid\n");
> -                    return false;
> -                }
> -
> -                if ((p = strstr(opts, "rc-lookahead=")) != 0 && sscanf(p,
> "rc-lookahead=%d", &i))
> -                    m_param->lookaheadDepth = i;
> -            }
> -            /* find number of pics */
> -            p = statsIn;
> -            int numEntries;
> -            for (numEntries = -1; p; numEntries++)
> -                p = strchr(p + 1, ';');
> -            if (!numEntries)
> -            {
> -                x265_log(m_param, X265_LOG_ERROR, "empty stats file\n");
> -                return false;
> -            }
> -            m_numEntries = numEntries;
> -
> -            if (m_param->totalFrames < m_numEntries &&
> m_param->totalFrames > 0)
> -            {
> -                x265_log(m_param, X265_LOG_WARNING, "2nd pass has fewer
> frames than 1st pass (%d vs %d)\n",
> -                         m_param->totalFrames, m_numEntries);
> -            }
> -            if (m_param->totalFrames > m_numEntries &&
> !m_param->bEnableFrameDuplication)
> -            {
> -                x265_log(m_param, X265_LOG_ERROR, "2nd pass has more
> frames than 1st pass (%d vs %d)\n",
> -                         m_param->totalFrames, m_numEntries);
> -                return false;
> +                    m_start = 0;
> +                    m_isQpModified = true;
> +                    if (!initPass2())
> +                        return false;
> +                } /* else we're using constant quant, so no need to run
> the bitrate allocation */
>              }
> -
> -            m_rce2Pass = X265_MALLOC(RateControlEntry, m_numEntries);
> -            if (!m_rce2Pass)
> +            else if (SHARED_MODE_MEM == m_cutreeStorageMode)
>              {
> -                 x265_log(m_param, X265_LOG_ERROR, "Rce Entries for 2
> pass cannot be allocated\n");
> -                 return false;
> +                if (m_param->rc.cuTree)
> +                {
> +                    if (!initCUTreeSharedMem())
> +                    {
> +                        return false;
> +                    }
> +                }
>              }
> -            m_encOrder = X265_MALLOC(int, m_numEntries);
> -            if (!m_encOrder)
> +            else
>
[AM] Why this else condition? m_cutreeStorageMode can either
be SHARED_MODE_FILE or SHARED_MODE_MEM, right?

>              {
> -                x265_log(m_param, X265_LOG_ERROR, "Encode order for 2
> pass cannot be allocated\n");
>                  return false;
>              }
> -            /* init all to skipped p frames */
> -            for (int i = 0; i < m_numEntries; i++)
> -            {
> -                RateControlEntry *rce = &m_rce2Pass[i];
> -                rce->sliceType = P_SLICE;
> -                rce->qScale = rce->newQScale = x265_qp2qScale(20);
> -                rce->miscBits = m_ncu + 10;
> -                rce->newQp = 0;
> -            }
> -            /* read stats */
> -            p = statsIn;
> -            double totalQpAq = 0;
> -            for (int i = 0; i < m_numEntries; i++)
> -            {
> -                RateControlEntry *rce, *rcePocOrder;
> -                int frameNumber;
> -                int encodeOrder;
> -                char picType;
> -                int e;
> -                char *next;
> -                double qpRc, qpAq, qNoVbv, qRceq;
> -                next = strstr(p, ";");
> -                if (next)
> -                    *next++ = 0;
> -                e = sscanf(p, " in:%d out:%d", &frameNumber,
> &encodeOrder);
> -                if (frameNumber < 0 || frameNumber >= m_numEntries)
> -                {
> -                    x265_log(m_param, X265_LOG_ERROR, "bad frame number
> (%d) at stats line %d\n", frameNumber, i);
> -                    return false;
> -                }
> -                rce = &m_rce2Pass[encodeOrder];
> -                rcePocOrder = &m_rce2Pass[frameNumber];
> -                m_encOrder[frameNumber] = encodeOrder;
> -                if (!m_param->bMultiPassOptRPS)
> -                {
> -                    int scenecut = 0;
> -                    e += sscanf(p, " in:%*d out:%*d type:%c q:%lf
> q-aq:%lf q-noVbv:%lf q-Rceq:%lf tex:%d mv:%d misc:%d icu:%lf pcu:%lf
> scu:%lf sc:%d",
> -                        &picType, &qpRc, &qpAq, &qNoVbv, &qRceq,
> &rce->coeffBits,
> -                        &rce->mvBits, &rce->miscBits, &rce->iCuCount,
> &rce->pCuCount,
> -                        &rce->skipCuCount, &scenecut);
> -                    rcePocOrder->scenecut = scenecut != 0;
> -                }
> -                else
> -                {
> -                    char deltaPOC[128];
> -                    char bUsed[40];
> -                    memset(deltaPOC, 0, sizeof(deltaPOC));
> -                    memset(bUsed, 0, sizeof(bUsed));
> -                    e += sscanf(p, " in:%*d out:%*d type:%c q:%lf
> q-aq:%lf q-noVbv:%lf q-Rceq:%lf tex:%d mv:%d misc:%d icu:%lf pcu:%lf
> scu:%lf nump:%d numnegp:%d numposp:%d deltapoc:%s bused:%s",
> -                        &picType, &qpRc, &qpAq, &qNoVbv, &qRceq,
> &rce->coeffBits,
> -                        &rce->mvBits, &rce->miscBits, &rce->iCuCount,
> &rce->pCuCount,
> -                        &rce->skipCuCount,
> &rce->rpsData.numberOfPictures, &rce->rpsData.numberOfNegativePictures,
> &rce->rpsData.numberOfPositivePictures, deltaPOC, bUsed);
> -                    splitdeltaPOC(deltaPOC, rce);
> -                    splitbUsed(bUsed, rce);
> -                    rce->rpsIdx = -1;
> -                }
> -                rce->keptAsRef = true;
> -                rce->isIdr = false;
> -                if (picType == 'b' || picType == 'p')
> -                    rce->keptAsRef = false;
> -                if (picType == 'I')
> -                    rce->isIdr = true;
> -                if (picType == 'I' || picType == 'i')
> -                    rce->sliceType = I_SLICE;
> -                else if (picType == 'P' || picType == 'p')
> -                    rce->sliceType = P_SLICE;
> -                else if (picType == 'B' || picType == 'b')
> -                    rce->sliceType = B_SLICE;
> -                else
> -                    e = -1;
> -                if (e < 10)
> -                {
> -                    x265_log(m_param, X265_LOG_ERROR, "statistics are
> damaged at line %d, parser out=%d\n", i, e);
> -                    return false;
> -                }
> -                rce->qScale = rce->newQScale = x265_qp2qScale(qpRc);
> -                totalQpAq += qpAq;
> -                rce->qpNoVbv = qNoVbv;
> -                rce->qpaRc = qpRc;
> -                rce->qpAq = qpAq;
> -                rce->qRceq = qRceq;
> -                p = next;
> -            }
> -            X265_FREE(statsBuf);
> -            if (m_param->rc.rateControlMode != X265_RC_CQP)
> -            {
> -                m_start = 0;
> -                m_isQpModified = true;
> -                if (!initPass2())
> -                    return false;
> -            } /* else we're using constant quant, so no need to run the
> bitrate allocation */
>          }
>          /* Open output file */
>          /* If input and output files are the same, output to a temp file
> @@ -682,19 +778,33 @@ bool RateControl::init(const SPS& sps)
>              X265_FREE(p);
>              if (m_param->rc.cuTree && !m_param->rc.bStatRead)
>              {
> -                statFileTmpname = strcatFilename(fileName,
> ".cutree.temp");
> -                if (!statFileTmpname)
> -                    return false;
> -                m_cutreeStatFileOut = x265_fopen(statFileTmpname, "wb");
> -                X265_FREE(statFileTmpname);
> -                if (!m_cutreeStatFileOut)
> +                if (SHARED_MODE_FILE == m_cutreeStorageMode)
> +                {
> +                    statFileTmpname = strcatFilename(fileName,
> ".cutree.temp");
> +                    if (!statFileTmpname)
> +                        return false;
> +                    m_cutreeStatFileOut = x265_fopen(statFileTmpname,
> "wb");
> +                    X265_FREE(statFileTmpname);
> +                    if (!m_cutreeStatFileOut)
> +                    {
> +                        x265_log_file(m_param, X265_LOG_ERROR, "can't
> open mbtree stats file %s.cutree.temp\n", fileName);
> +                        return false;
> +                    }
> +                }
> +                else if (SHARED_MODE_MEM == m_cutreeStorageMode)
> +                {
> +                    if (!initCUTreeSharedMem())
> +                    {
> +                        return false;
> +                    }
> +                }
> +                else
>                  {
> -                    x265_log_file(m_param, X265_LOG_ERROR, "can't open
> mbtree stats file %s.cutree.temp\n", fileName);
>                      return false;
>                  }
>              }
>          }
> -        if (m_param->rc.cuTree)
> +        if (m_param->rc.cuTree && !m_cuTreeStats.qpBuffer[0])
>          {
>              if (m_param->rc.qgSize == 8)
>              {
> @@ -714,6 +824,10 @@ bool RateControl::init(const SPS& sps)
>      return true;
>  }
>
> +void RateControl::skipCUTreeSharedMemRead(int32_t cnt)
> +{
> +    m_cutreeShrMem->skipRead(cnt);
> +}
>  void RateControl::reconfigureRC()
>  {
>      if (m_isVbv)
> @@ -1670,10 +1784,28 @@ bool RateControl::cuTreeReadFor2Pass(Frame* frame)
>              {
>                  m_cuTreeStats.qpBufPos++;
>
> -                if (!fread(&type, 1, 1, m_cutreeStatFileIn))
> -                    goto fail;
> -                if (fread(m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos],
> sizeof(uint16_t), ncu, m_cutreeStatFileIn) != (size_t)ncu)
> -                    goto fail;
> +                if (SHARED_MODE_FILE == m_cutreeStorageMode)
> +                {
> +                    if (!fread(&type, 1, 1, m_cutreeStatFileIn))
> +                        goto fail;
> +                    if
> (fread(m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos], sizeof(uint16_t),
> ncu, m_cutreeStatFileIn) != (size_t)ncu)
> +                        goto fail;
> +                }
> +                else
> +                {
> +                    if (SHARED_MODE_MEM == m_cutreeStorageMode)
> +                    {
> +                        if (!m_cutreeShrMem)
> +                        {
> +                            goto fail;
> +                        }
> +
> +                        CUTreeSharedDataItem shrItem;
> +                        shrItem.type = &type;
> +                        shrItem.stats =
> m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos];
> +                        m_cutreeShrMem->readNext(&shrItem,
> ReadSharedCUTreeData);
> +                    }
> +                }
>
>                  if (type != sliceTypeActual && m_cuTreeStats.qpBufPos ==
> 1)
>                  {
> @@ -3064,10 +3196,34 @@ int RateControl::writeRateControlFrameStats(Frame*
> curFrame, RateControlEntry* r
>      {
>          uint8_t sliceType = (uint8_t)rce->sliceType;
>          primitives.fix8Pack(m_cuTreeStats.qpBuffer[0],
> curFrame->m_lowres.qpCuTreeOffset, ncu);
> -        if (fwrite(&sliceType, 1, 1, m_cutreeStatFileOut) < 1)
> -            goto writeFailure;
> -        if (fwrite(m_cuTreeStats.qpBuffer[0], sizeof(uint16_t), ncu,
> m_cutreeStatFileOut) < (size_t)ncu)
> +
> +        if (SHARED_MODE_FILE == m_cutreeStorageMode)
> +        {
> +            if (fwrite(&sliceType, 1, 1, m_cutreeStatFileOut) < 1)
> +                goto writeFailure;
> +            if (fwrite(m_cuTreeStats.qpBuffer[0], sizeof(uint16_t), ncu,
> m_cutreeStatFileOut) < (size_t)ncu)
> +                goto writeFailure;
> +        }
> +        else if (SHARED_MODE_MEM == m_cutreeStorageMode)
> +        {
> +            if (SHARED_MODE_MEM == m_cutreeStorageMode)
>
[AM] duplicate nested check condition

> +            {
> +                if (!m_cutreeShrMem)
> +                {
> +                    goto writeFailure;
> +                }
> +
> +                CUTreeSharedDataItem shrItem;
> +                shrItem.type = &sliceType;
> +                shrItem.stats = m_cuTreeStats.qpBuffer[0];
> +                m_cutreeShrMem->writeData(&shrItem,
> WriteSharedCUTreeData);
> +            }
> +        }
> +        else
>
[AM] Why this else? m_cutreeStorageMode can either be SHARED_MODE_FILE
or SHARED_MODE_MEM, right?

> +        {
>              goto writeFailure;
> +        }
> +
>      }
>      return 0;
>
> @@ -3143,6 +3299,13 @@ void RateControl::destroy()
>      if (m_cutreeStatFileIn)
>          fclose(m_cutreeStatFileIn);
>
> +    if (m_cutreeShrMem)
> +    {
> +        m_cutreeShrMem->release();
> +        delete m_cutreeShrMem;
> +        m_cutreeShrMem = NULL;
> +    }
> +
>      X265_FREE(m_rce2Pass);
>      X265_FREE(m_encOrder);
>      for (int i = 0; i < 2; i++)
> @@ -3151,6 +3314,8 @@ void RateControl::destroy()
>      if (m_relativeComplexity)
>          X265_FREE(m_relativeComplexity);
>
> +    free(m_shrname);
> +
>  }
>
>  void RateControl::splitdeltaPOC(char deltapoc[], RateControlEntry *rce)
> diff --git a/source/encoder/ratecontrol.h b/source/encoder/ratecontrol.h
> index 204bd71e1..19c8676b4 100644
> --- a/source/encoder/ratecontrol.h
> +++ b/source/encoder/ratecontrol.h
> @@ -28,6 +28,7 @@
>
>  #include "common.h"
>  #include "sei.h"
> +#include "ringmem.h"
>
>  namespace X265_NS {
>  // encoder namespace
> @@ -126,6 +127,13 @@ struct RateControlEntry
>      bool     isFadeEnd;
>  };
>
> +enum DataSharedMode
> +{
> +    SHARED_MODE_FILE = 0,
> +    SHARED_MODE_MEM,
> +    SHARED_MODE_CNT
> +};
> +
>  class RateControl
>  {
>  public:
> @@ -237,9 +245,16 @@ public:
>      int     m_numEntries;
>      int     m_start;
>      int     m_reencode;
> +    ///< store the cutree data in file or shared memory
> +    ///< it is not necessary to store the cutree in shared memory.
> +    ///< However, for further use, shared memeory is a better choice
> +    DataSharedMode m_cutreeStorageMode;
> +    char   *m_shrname;
>      FILE*   m_statFileOut;
>      FILE*   m_cutreeStatFileOut;
>      FILE*   m_cutreeStatFileIn;
> +    ///< store the cutree data in memory instead of file
> +    RingMem *m_cutreeShrMem;
>      double  m_lastAccumPNorm;
>      double  m_expectedBitsSum;   /* sum of qscale2bits after rceq,
> ratefactor, and overflow, only includes finished frames */
>      int64_t m_predictedBits;
> @@ -254,7 +269,7 @@ public:
>                                  * This value is the current position (0
> or 1). */
>      } m_cuTreeStats;
>
> -    RateControl(x265_param& p, Encoder *enc);
> +    RateControl(x265_param& p, Encoder *enc, char *dataShr);
>      bool init(const SPS& sps);
>      void initHRD(SPS& sps);
>      void reconfigureRC();
> @@ -274,6 +289,9 @@ public:
>      int writeRateControlFrameStats(Frame* curFrame, RateControlEntry*
> rce);
>      bool   initPass2();
>
> +    bool initCUTreeSharedMem();
> +    void skipCUTreeSharedMemRead(int32_t cnt);
> +
>      double forwardMasking(Frame* curFrame, double q);
>      double backwardMasking(Frame* curFrame, double q);
>
> --
> 2.22.0.windows.1
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>


-- 
Regards,
*Aruna Matheswaran,*
Video Codec Engineer,
Media & AI analytics BU,
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20211006/40134f20/attachment-0001.html>


More information about the x265-devel mailing list