[x265] [PATCH] --pass 2: Add support for cutree data sharing via shared memory

Mahesh Pittala mahesh at multicorewareinc.com
Tue Oct 12 10:23:40 UTC 2021


Pushed to master

On Tue, Oct 12, 2021 at 12:46 PM Liwei Wang <liwei at multicorewareinc.com>
wrote:

> From dc20a412f57c9a986cb69f54f5a3e0c821a41128 Mon Sep 17 00:00:00 2001
> From: lwWang <liwei at multicorewareinc.com>
> Date: Tue, 12 Oct 2021 14:18:30 +0800
> Subject: [PATCH] --pass 2: Add support for cutree data sharing via shared
>  memory
>
> ---
>  source/CMakeLists.txt          |   2 +-
>  source/common/CMakeLists.txt   |   3 +-
>  source/common/param.cpp        |   7 +
>  source/common/ringmem.cpp      | 357 ++++++++++++++++++++
>  source/common/ringmem.h        |  90 +++++
>  source/common/threading.h      | 137 ++++++++
>  source/encoder/encoder.cpp     |   6 +
>  source/encoder/ratecontrol.cpp | 579 ++++++++++++++++++++-------------
>  source/encoder/ratecontrol.h   |   6 +
>  source/x265.h                  |  29 +-
>  10 files changed, 988 insertions(+), 228 deletions(-)
>  create mode 100644 source/common/ringmem.cpp
>  create mode 100644 source/common/ringmem.h
>
> diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
> index 62afd3610..ad46614d2 100755
> --- a/source/CMakeLists.txt
> +++ b/source/CMakeLists.txt
> @@ -29,7 +29,7 @@ option(NATIVE_BUILD "Target the build CPU" OFF)
>  option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
>  mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
>  # X265_BUILD must be incremented each time the public API is changed
> -set(X265_BUILD 202)
> +set(X265_BUILD 203)
>  configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
>                 "${PROJECT_BINARY_DIR}/x265.def")
>  configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
> diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt
> index 2be305cac..6583ad56e 100644
> --- a/source/common/CMakeLists.txt
> +++ b/source/common/CMakeLists.txt
> @@ -172,4 +172,5 @@ add_library(common OBJECT
>      scalinglist.cpp scalinglist.h
>      quant.cpp quant.h contexts.h
>      deblock.cpp deblock.h
> -    scaler.cpp scaler.h)
> +    scaler.cpp scaler.h
> +    ringmem.cpp ringmem.h)
> diff --git a/source/common/param.cpp b/source/common/param.cpp
> index 2c1583d93..9f878927b 100755
> --- a/source/common/param.cpp
> +++ b/source/common/param.cpp
> @@ -281,7 +281,9 @@ void x265_param_default(x265_param* param)
>      param->rc.rfConstantMin = 0;
>      param->rc.bStatRead = 0;
>      param->rc.bStatWrite = 0;
> +    param->rc.dataShareMode = X265_SHARE_MODE_FILE;
>      param->rc.statFileName = NULL;
> +    param->rc.sharedMemName = NULL;
>      param->rc.bEncFocusedFramesOnly = 0;
>      param->rc.complexityBlur = 20;
>      param->rc.qblur = 0.5;
> @@ -1191,6 +1193,7 @@ int x265_param_parse(x265_param* p, const char*
> name, const char* value)
>          int pass = x265_clip3(0, 3, atoi(value));
>          p->rc.bStatWrite = pass & 1;
>          p->rc.bStatRead = pass & 2;
> +        p->rc.dataShareMode = X265_SHARE_MODE_FILE;
>      }
>      OPT("stats") p->rc.statFileName = strdup(value);
>      OPT("scaling-list") p->scalingLists = strdup(value);
> @@ -1921,6 +1924,7 @@ int x265_check_params(x265_param* param)
>              x265_log(param, X265_LOG_WARNING, "Live VBV enabled without
> VBV settings.Disabling live VBV in 2 pass\n");
>          }
>      }
> +    CHECK(param->rc.dataShareMode != X265_SHARE_MODE_FILE &&
> param->rc.dataShareMode != X265_SHARE_MODE_SHAREDMEM, "Invalid data share
> mode. It must be one of the X265_DATA_SHARE_MODES enum values\n" );
>      return check_failed;
>  }
>
> @@ -2561,8 +2565,11 @@ void x265_copy_params(x265_param* dst, x265_param*
> src)
>      dst->rc.rfConstantMin = src->rc.rfConstantMin;
>      dst->rc.bStatWrite = src->rc.bStatWrite;
>      dst->rc.bStatRead = src->rc.bStatRead;
> +    dst->rc.dataShareMode = src->rc.dataShareMode;
>      if (src->rc.statFileName)
> dst->rc.statFileName=strdup(src->rc.statFileName);
>      else dst->rc.statFileName = NULL;
> +    if (src->rc.sharedMemName) dst->rc.sharedMemName =
> strdup(src->rc.sharedMemName);
> +    else dst->rc.sharedMemName = NULL;
>      dst->rc.qblur = src->rc.qblur;
>      dst->rc.complexityBlur = src->rc.complexityBlur;
>      dst->rc.bEnableSlowFirstPass = src->rc.bEnableSlowFirstPass;
> diff --git a/source/common/ringmem.cpp b/source/common/ringmem.cpp
> new file mode 100644
> index 000000000..a4f191c90
> --- /dev/null
> +++ b/source/common/ringmem.cpp
> @@ -0,0 +1,357 @@
>
> +/*****************************************************************************
> + * Copyright (C) 2013-2017 MulticoreWare, Inc
> + *
> + * Authors: liwei <liwei at multicorewareinc.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111,
> USA.
> + *
> + * This program is also available under a commercial proprietary license.
> + * For more information, contact us at license @ x265.com
> +
> *****************************************************************************/
> +
> +#include "ringmem.h"
> +
> +#ifndef _WIN32
> +#include <sys/mman.h>
> +#endif ////< _WIN32
> +
> +#ifdef _WIN32
> +#define X265_SHARED_MEM_NAME                    "Local\\_x265_shr_mem_"
> +#define X265_SEMAPHORE_RINGMEM_WRITER_NAME    "_x265_semW_"
> +#define X265_SEMAPHORE_RINGMEM_READER_NAME    "_x265_semR_"
> +#else /* POSIX / pthreads */
> +#define X265_SHARED_MEM_NAME                    "/tmp/_x265_shr_mem_"
> +#define X265_SEMAPHORE_RINGMEM_WRITER_NAME    "/tmp/_x265_semW_"
> +#define X265_SEMAPHORE_RINGMEM_READER_NAME    "/tmp/_x265_semR_"
> +#endif
> +
> +#define RINGMEM_ALLIGNMENT                       64
> +
> +namespace X265_NS {
> +    RingMem::RingMem()
> +        : m_initialized(false)
> +        , m_protectRW(false)
> +        , m_itemSize(0)
> +        , m_itemCnt(0)
> +        , m_dataPool(NULL)
> +        , m_shrMem(NULL)
> +#ifdef _WIN32
> +        , m_handle(NULL)
> +#else //_WIN32
> +        , m_filepath(NULL)
> +#endif //_WIN32
> +        , m_writeSem(NULL)
> +        , m_readSem(NULL)
> +    {
> +    }
> +
> +
> +    RingMem::~RingMem()
> +    {
> +    }
> +
> +    bool RingMem::skipRead(int32_t cnt) {
> +        if (!m_initialized)
> +        {
> +            return false;
> +        }
> +
> +        if (m_protectRW)
> +        {
> +            for (int i = 0; i < cnt; i++)
> +            {
> +                m_readSem->take();
> +            }
> +        }
> +
> +        ATOMIC_ADD(&m_shrMem->m_read, cnt);
> +
> +        if (m_protectRW)
> +        {
> +            m_writeSem->give(cnt);
> +        }
> +
> +        return true;
> +    }
> +
> +    bool RingMem::skipWrite(int32_t cnt) {
> +        if (!m_initialized)
> +        {
> +            return false;
> +        }
> +
> +        if (m_protectRW)
> +        {
> +            for (int i = 0; i < cnt; i++)
> +            {
> +                m_writeSem->take();
> +            }
> +        }
> +
> +        ATOMIC_ADD(&m_shrMem->m_write, cnt);
> +
> +        if (m_protectRW)
> +        {
> +            m_readSem->give(cnt);
> +        }
> +
> +        return true;
> +    }
> +
> +    ///< initialize
> +    bool RingMem::init(int32_t itemSize, int32_t itemCnt, const char
> *name, bool protectRW)
> +    {
> +        ///< check parameters
> +        if (itemSize <= 0 || itemCnt <= 0 || NULL == name)
> +        {
> +            ///< invalid parameters
> +            return false;
> +        }
> +
> +        if (!m_initialized)
> +        {
> +            ///< formating names
> +            char nameBuf[MAX_SHR_NAME_LEN] = { 0 };
> +
> +            ///< shared memory name
> +            snprintf(nameBuf, sizeof(nameBuf) - 1, "%s%s",
> X265_SHARED_MEM_NAME, name);
> +
> +            ///< create or open shared memory
> +            bool newCreated = false;
> +
> +            ///< calculate the size of the shared memory
> +            int32_t shrMemSize = (itemSize * itemCnt + sizeof(ShrMemCtrl)
> + RINGMEM_ALLIGNMENT - 1) & ~(RINGMEM_ALLIGNMENT - 1);
> +
> +#ifdef _WIN32
> +            HANDLE h = OpenFileMappingA(FILE_MAP_WRITE | FILE_MAP_READ,
> FALSE, nameBuf);
> +            if (!h)
> +            {
> +                h = CreateFileMappingA(INVALID_HANDLE_VALUE, NULL,
> PAGE_READWRITE, 0, shrMemSize, nameBuf);
> +
> +                if (!h)
> +                {
> +                    return false;
> +                }
> +
> +                newCreated = true;
> +            }
> +
> +            void *pool = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, 0);
> +
> +            ///< should not close the handle here, otherwise the
> OpenFileMapping would fail
> +            //CloseHandle(h);
> +            m_handle = h;
> +
> +            if (!pool)
> +            {
> +                return false;
> +            }
> +
> +#else /* POSIX / pthreads */
> +            mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH
> | S_IWOTH;
> +            int flag = O_RDWR;
> +            int shrfd = -1;
> +            if ((shrfd = open(nameBuf, flag, mode)) < 0)
> +            {
> +                flag |= O_CREAT;
> +
> +                shrfd = open(nameBuf, flag, mode);
> +                if (shrfd < 0)
> +                {
> +                    return false;
> +                }
> +                newCreated = true;
> +
> +                lseek(shrfd, shrMemSize - 1, SEEK_SET);
> +
> +                if (-1 == write(shrfd, "\0", 1))
> +                {
> +                    close(shrfd);
> +                    return false;
> +                }
> +
> +                if (lseek(shrfd, 0, SEEK_END) < shrMemSize)
> +                {
> +                    close(shrfd);
> +                    return false;
> +                }
> +            }
> +
> +            void *pool = mmap(0,
> +                shrMemSize,
> +                PROT_READ | PROT_WRITE,
> +                MAP_SHARED,
> +                shrfd,
> +                0);
> +
> +            close(shrfd);
> +            if (pool == MAP_FAILED)
> +            {
> +                return false;
> +            }
> +
> +            m_filepath = strdup(nameBuf);
> +#endif ///< _WIN32
> +
> +            if (newCreated)
> +            {
> +                memset(pool, 0, shrMemSize);
> +            }
> +
> +            m_shrMem = reinterpret_cast<ShrMemCtrl *>(pool);
> +            m_dataPool = reinterpret_cast<uint8_t *>(pool) +
> sizeof(ShrMemCtrl);
> +            m_itemSize = itemSize;
> +            m_itemCnt = itemCnt;
> +            m_initialized = true;
> +
> +            if (protectRW)
> +            {
> +                m_protectRW = true;
> +                m_writeSem = new NamedSemaphore();
> +                if (!m_writeSem)
> +                {
> +                    release();
> +                    return false;
> +                }
> +
> +                ///< shared memory name
> +                snprintf(nameBuf, sizeof(nameBuf - 1), "%s%s",
> X265_SEMAPHORE_RINGMEM_WRITER_NAME, name);
> +                if (!m_writeSem->create(nameBuf, m_itemCnt, m_itemCnt))
> +                {
> +                    release();
> +                    return false;
> +                }
> +
> +                m_readSem = new NamedSemaphore();
> +                if (!m_readSem)
> +                {
> +                    release();
> +                    return false;
> +                }
> +
> +                ///< shared memory name
> +                snprintf(nameBuf, sizeof(nameBuf - 1), "%s%s",
> X265_SEMAPHORE_RINGMEM_READER_NAME, name);
> +                if (!m_readSem->create(nameBuf, 0, m_itemCnt))
> +                {
> +                    release();
> +                    return false;
> +                }
> +            }
> +        }
> +
> +        return true;
> +    }
> +    ///< finalize
> +    void RingMem::release()
> +    {
> +        if (m_initialized)
> +        {
> +            m_initialized = false;
> +
> +            if (m_shrMem)
> +            {
> +#ifdef _WIN32
> +                UnmapViewOfFile(m_shrMem);
> +                CloseHandle(m_handle);
> +                m_handle = NULL;
> +#else /* POSIX / pthreads */
> +                int32_t shrMemSize = (m_itemSize * m_itemCnt +
> sizeof(ShrMemCtrl) + RINGMEM_ALLIGNMENT - 1) & (~RINGMEM_ALLIGNMENT - 1);
> +                munmap(m_shrMem, shrMemSize);
> +                unlink(m_filepath);
> +                free(m_filepath);
> +                m_filepath = NULL;
> +#endif ///< _WIN32
> +                m_shrMem = NULL;
> +                m_dataPool = NULL;
> +                m_itemSize = 0;
> +                m_itemCnt = 0;
> +            }
> +
> +            if (m_protectRW)
> +            {
> +                m_protectRW = false;
> +                if (m_writeSem)
> +                {
> +                    m_writeSem->release();
> +
> +                    delete m_writeSem;
> +                    m_writeSem = NULL;
> +                }
> +
> +                if (m_readSem)
> +                {
> +                    m_readSem->release();
> +
> +                    delete m_readSem;
> +                    m_readSem = NULL;
> +                }
> +            }
> +
> +        }
> +    }
> +
> +    ///< data read
> +    bool RingMem::readNext(void* dst, fnRWSharedData callback)
> +    {
> +        if (!m_initialized || !callback || !dst)
> +        {
> +            return false;
> +        }
> +
> +        if (m_protectRW)
> +        {
> +            if (!m_readSem->take())
> +            {
> +                return false;
> +            }
> +        }
> +
> +        int32_t index = ATOMIC_ADD(&m_shrMem->m_read, 1) % m_itemCnt;
> +        (*callback)(dst, reinterpret_cast<uint8_t *>(m_dataPool) + index
> * m_itemSize, m_itemSize);
> +
> +        if (m_protectRW)
> +        {
> +            m_writeSem->give(1);
> +        }
> +
> +        return true;
> +    }
> +    ///< data write
> +    bool RingMem::writeData(void *data, fnRWSharedData callback)
> +    {
> +        if (!m_initialized || !data || !callback)
> +        {
> +            return false;
> +        }
> +
> +        if (m_protectRW)
> +        {
> +            if (!m_writeSem->take())
> +            {
> +                return false;
> +            }
> +        }
> +
> +        int32_t index = ATOMIC_ADD(&m_shrMem->m_write, 1) % m_itemCnt;
> +        (*callback)(reinterpret_cast<uint8_t *>(m_dataPool) + index *
> m_itemSize, data, m_itemSize);
> +
> +        if (m_protectRW)
> +        {
> +            m_readSem->give(1);
> +        }
> +
> +        return true;
> +    }
> +}
> diff --git a/source/common/ringmem.h b/source/common/ringmem.h
> new file mode 100644
> index 000000000..b14f7bee9
> --- /dev/null
> +++ b/source/common/ringmem.h
> @@ -0,0 +1,90 @@
>
> +/*****************************************************************************
> + * Copyright (C) 2013-2017 MulticoreWare, Inc
> + *
> + * Authors: liwei <liwei at multicorewareinc.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111,
> USA.
> + *
> + * This program is also available under a commercial proprietary license.
> + * For more information, contact us at license @ x265.com
> +
> *****************************************************************************/
> +
> +#ifndef X265_RINGMEM_H
> +#define X265_RINGMEM_H
> +
> +#include "common.h"
> +#include "threading.h"
> +
> +#if _MSC_VER
> +#define snprintf _snprintf
> +#define strdup _strdup
> +#endif
> +
> +namespace X265_NS {
> +
> +#define MAX_SHR_NAME_LEN                         256
> +
> +    class RingMem {
> +    public:
> +        RingMem();
> +        ~RingMem();
> +
> +        bool skipRead(int32_t cnt);
> +
> +        bool skipWrite(int32_t cnt);
> +
> +        ///< initialize
> +        ///< protectRW: if use the semaphore the protect the write and
> read operation.
> +        bool init(int32_t itemSize, int32_t itemCnt, const char *name,
> bool protectRW = false);
> +        ///< finalize
> +        void release();
> +
> +        typedef void(*fnRWSharedData)(void *dst, void *src, int32_t size);
> +
> +        ///< data read
> +        bool readNext(void* dst, fnRWSharedData callback);
> +        ///< data write
> +        bool writeData(void *data, fnRWSharedData callback);
> +
> +    private:
> +        bool    m_initialized;
> +        bool    m_protectRW;
> +
> +        int32_t m_itemSize;
> +        int32_t m_itemCnt;
> +        ///< data pool
> +        void   *m_dataPool;
> +        typedef struct {
> +            ///< index to write
> +            int32_t m_write;
> +            ///< index to read
> +            int32_t m_read;
> +
> +        }ShrMemCtrl;
> +
> +        ShrMemCtrl *m_shrMem;
> +#ifdef _WIN32
> +        void       *m_handle;
> +#else // _WIN32
> +        char       *m_filepath;
> +#endif // _WIN32
> +
> +        ///< Semaphores
> +        NamedSemaphore *m_writeSem;
> +        NamedSemaphore *m_readSem;
> +    };
> +};
> +
> +#endif // ifndef X265_RINGMEM_H
> diff --git a/source/common/threading.h b/source/common/threading.h
> index 53a63beaf..dcf6081e3 100644
> --- a/source/common/threading.h
> +++ b/source/common/threading.h
> @@ -3,6 +3,7 @@
>   *
>   * Authors: Steve Borho <steve at borho.org>
>   *          Min Chen <chenm003 at 163.com>
> +            liwei <liwei at multicorewareinc.com>
>   *
>   * This program is free software; you can redistribute it and/or modify
>   * it under the terms of the GNU General Public License as published by
> @@ -253,6 +254,47 @@ protected:
>      int                m_val;
>  };
>
> +class NamedSemaphore
> +{
> +public:
> +    NamedSemaphore() : m_sem(NULL)
> +    {
> +    }
> +
> +    ~NamedSemaphore()
> +    {
> +    }
> +
> +    bool create(const char* name, const int initcnt, const int maxcnt)
> +    {
> +        if(!m_sem)
> +        {
> +            m_sem = CreateSemaphoreA(NULL, initcnt, maxcnt, name);
> +        }
> +        return m_sem != NULL;
> +    }
> +
> +    bool give(const int32_t cnt)
> +    {
> +        return ReleaseSemaphore(m_sem, (LONG)cnt, NULL) != FALSE;
> +    }
> +
> +    bool take(const uint32_t time_out = INFINITE)
> +    {
> +        int32_t rt = WaitForSingleObject(m_sem, time_out);
> +        return rt != WAIT_TIMEOUT && rt != WAIT_FAILED;
> +    }
> +
> +    void release()
> +    {
> +        CloseHandle(m_sem);
> +        m_sem = NULL;
> +    }
> +
> +private:
> +    HANDLE m_sem;
> +};
> +
>  #else /* POSIX / pthreads */
>
>  typedef pthread_t ThreadHandle;
> @@ -459,6 +501,101 @@ protected:
>      int             m_val;
>  };
>
> +#define TIMEOUT_INFINITE 0xFFFFFFFF
> +
> +class NamedSemaphore
> +{
> +public:
> +    NamedSemaphore()
> +        : m_sem(NULL)
> +        , m_name(NULL)
> +    {
> +    }
> +
> +    ~NamedSemaphore()
> +    {
> +    }
> +
> +    bool create(const char* name, const int initcnt, const int maxcnt)
> +    {
> +        bool ret = false;
> +
> +        if (initcnt >= maxcnt)
> +        {
> +            return false;
> +        }
> +
> +        m_sem = sem_open(name, O_CREAT | O_EXCL, 0666, initcnt);
> +        if (m_sem != SEM_FAILED)
> +        {
> +            m_name = strdup(name);
> +            ret = true;
> +        }
> +        else
> +        {
> +            if (EEXIST == errno)
> +            {
> +                m_sem = sem_open(name, 0);
> +                if (m_sem != SEM_FAILED)
> +                {
> +                    m_name = strdup(name);
> +                    ret = true;
> +                }
> +            }
> +        }
> +
> +        return ret;
> +    }
> +
> +    bool give(const int32_t cnt)
> +    {
> +        int ret = 0;
> +        int32_t curCnt = cnt;
> +        while (curCnt-- && !ret) {
> +            ret = sem_post(m_sem);
> +        }
> +
> +        return 0 == ret;
> +    }
> +
> +    bool take(const uint32_t time_out = TIMEOUT_INFINITE)
> +    {
> +        if (TIMEOUT_INFINITE == time_out) {
> +            return 0 == sem_wait(m_sem);
> +        }
> +        else
> +        {
> +            if (0 == time_out)
> +            {
> +                return 0 == sem_trywait(m_sem);
> +            }
> +            else
> +            {
> +                struct timespec ts;
> +                ts.tv_sec = time_out / 1000L;
> +                ts.tv_nsec = (time_out * 1000000L) - ts.tv_sec * 1000 *
> 1000 * 1000;
> +                return 0 == sem_timedwait(m_sem, &ts);
> +            }
> +        }
> +    }
> +
> +    void release()
> +    {
> +        if (m_sem)
> +        {
> +            sem_close(m_sem);
> +            sem_unlink(m_name);
> +            m_sem = NULL;
> +            free(m_name);
> +            m_name = NULL;
> +        }
> +    }
> +
> +private:
> +    sem_t *m_sem;
> +    char  *m_name;
> +};
> +
>  #endif // ifdef _WIN32
>
>  class ScopedLock
> diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
> index 56f0e5433..607e64370 100644
> --- a/source/encoder/encoder.cpp
> +++ b/source/encoder/encoder.cpp
> @@ -1006,6 +1006,7 @@ void Encoder::destroy()
>          /* release string arguments that were strdup'd */
>          free((char*)m_param->rc.lambdaFileName);
>          free((char*)m_param->rc.statFileName);
> +        free((char*)m_param->rc.sharedMemName);
>          free((char*)m_param->analysisReuseFileName);
>          free((char*)m_param->scalingLists);
>          free((char*)m_param->csvfn);
> @@ -4019,6 +4020,11 @@ void Encoder::configure(x265_param *p)
>          p->rc.bStatRead = 0;
>      }
>
> +    if ((p->rc.bStatWrite || p->rc.bStatRead) && p->rc.dataShareMode !=
> X265_SHARE_MODE_FILE && p->rc.dataShareMode != X265_SHARE_MODE_SHAREDMEM)
> +    {
> +        p->rc.dataShareMode = X265_SHARE_MODE_FILE;
> +    }
> +
>      if (!p->rc.bStatRead || p->rc.rateControlMode != X265_RC_CRF)
>      {
>          p->rc.bEncFocusedFramesOnly = 0;
> diff --git a/source/encoder/ratecontrol.cpp
> b/source/encoder/ratecontrol.cpp
> index 6c59d0c64..a1321b38d 100644
> --- a/source/encoder/ratecontrol.cpp
> +++ b/source/encoder/ratecontrol.cpp
> @@ -41,6 +41,10 @@
>  #define BR_SHIFT  6
>  #define CPB_SHIFT 4
>
> +#define SHARED_DATA_ALIGNMENT      4 ///< 4btye, 32bit
> +#define CUTREE_SHARED_MEM_NAME     "cutree"
> +#define GOP_CNT_CU_TREE            3
> +
>  using namespace X265_NS;
>
>  /* Amortize the partial cost of I frames over the next N frames */
> @@ -104,6 +108,37 @@ inline char *strcatFilename(const char *input, const
> char *suffix)
>      return output;
>  }
>
> +typedef struct CUTreeSharedDataItem
> +{
> +    uint8_t  *type;
> +    uint16_t *stats;
> +}CUTreeSharedDataItem;
> +
> +void static ReadSharedCUTreeData(void *dst, void *src, int32_t size)
> +{
> +    CUTreeSharedDataItem *statsDst =
> reinterpret_cast<CUTreeSharedDataItem *>(dst);
> +    uint8_t *typeSrc = reinterpret_cast<uint8_t *>(src);
> +    *statsDst->type = *typeSrc;
> +
> +    ///< for memory alignment, the type will take 32bit in the shared
> memory
> +    int32_t offset = (sizeof(*statsDst->type) + SHARED_DATA_ALIGNMENT -
> 1) & ~(SHARED_DATA_ALIGNMENT - 1);
> +    uint16_t *statsSrc = reinterpret_cast<uint16_t *>(typeSrc + offset);
> +    memcpy(statsDst->stats, statsSrc, size - offset);
> +}
> +
> +void static WriteSharedCUTreeData(void *dst, void *src, int32_t size)
> +{
> +    CUTreeSharedDataItem *statsSrc =
> reinterpret_cast<CUTreeSharedDataItem *>(src);
> +    uint8_t *typeDst = reinterpret_cast<uint8_t *>(dst);
> +    *typeDst = *statsSrc->type;
> +
> +    ///< for memory alignment, the type will take 32bit in the shared
> memory
> +    int32_t offset = (sizeof(*statsSrc->type) + SHARED_DATA_ALIGNMENT -
> 1) & ~(SHARED_DATA_ALIGNMENT - 1);
> +    uint16_t *statsDst = reinterpret_cast<uint16_t *>(typeDst + offset);
> +    memcpy(statsDst, statsSrc->stats, size - offset);
> +}
> +
> +
>  inline double qScale2bits(RateControlEntry *rce, double qScale)
>  {
>      if (qScale < 0.1)
> @@ -209,6 +244,7 @@ RateControl::RateControl(x265_param& p, Encoder *top)
>      m_lastAbrResetPoc = -1;
>      m_statFileOut = NULL;
>      m_cutreeStatFileOut = m_cutreeStatFileIn = NULL;
> +    m_cutreeShrMem = NULL;
>      m_rce2Pass = NULL;
>      m_encOrder = NULL;
>      m_lastBsliceSatdCost = 0;
> @@ -320,6 +356,42 @@ RateControl::RateControl(x265_param& p, Encoder *top)
>          m_cuTreeStats.qpBuffer[i] = NULL;
>  }
>
> +bool RateControl::initCUTreeSharedMem()
> +{
> +    if (!m_cutreeShrMem) {
> +        m_cutreeShrMem = new RingMem();
> +        if (!m_cutreeShrMem)
> +        {
> +            return false;
> +        }
> +
> +        ///< now cutree data form at most 3 gops would be stored in the
> shared memory at the same time
> +        int32_t itemSize = (sizeof(uint8_t) + SHARED_DATA_ALIGNMENT - 1)
> & ~(SHARED_DATA_ALIGNMENT - 1);
> +        if (m_param->rc.qgSize == 8)
> +        {
> +            itemSize += sizeof(uint16_t) * m_ncu * 4;
> +        }
> +        else
> +        {
> +            itemSize += sizeof(uint16_t) * m_ncu;
> +        }
> +
> +        int32_t itemCnt = X265_MIN(m_param->keyframeMax, (int)(m_fps +
> 0.5));
> +        itemCnt *= GOP_CNT_CU_TREE;
> +
> +        char shrname[MAX_SHR_NAME_LEN] = { 0 };
> +        strcpy(shrname, m_param->rc.sharedMemName);
> +        strcat(shrname, CUTREE_SHARED_MEM_NAME);
> +
> +        if (!m_cutreeShrMem->init(itemSize, itemCnt, shrname))
> +        {
> +            return false;
> +        }
> +    }
> +
> +    return true;
> +}
> +
>  bool RateControl::init(const SPS& sps)
>  {
>      if (m_isVbv && !m_initVbv)
> @@ -421,244 +493,257 @@ bool RateControl::init(const SPS& sps)
>          /* Load stat file and init 2pass algo */
>          if (m_param->rc.bStatRead)
>          {
> -            m_expectedBitsSum = 0;
> -            char *p, *statsIn, *statsBuf;
> -            /* read 1st pass stats */
> -            statsIn = statsBuf = x265_slurp_file(fileName);
> -            if (!statsBuf)
> -                return false;
> -            if (m_param->rc.cuTree)
> +            if (X265_SHARE_MODE_FILE == m_param->rc.dataShareMode)
>              {
> -                char *tmpFile = strcatFilename(fileName, ".cutree");
> -                if (!tmpFile)
> +                m_expectedBitsSum = 0;
> +                char *p, *statsIn, *statsBuf;
> +                /* read 1st pass stats */
> +                statsIn = statsBuf = x265_slurp_file(fileName);
> +                if (!statsBuf)
>                      return false;
> -                m_cutreeStatFileIn = x265_fopen(tmpFile, "rb");
> -                X265_FREE(tmpFile);
> -                if (!m_cutreeStatFileIn)
> +                if (m_param->rc.cuTree)
>                  {
> -                    x265_log_file(m_param, X265_LOG_ERROR, "can't open
> stats file %s.cutree\n", fileName);
> -                    return false;
> +                    char *tmpFile = strcatFilename(fileName, ".cutree");
> +                    if (!tmpFile)
> +                        return false;
> +                    m_cutreeStatFileIn = x265_fopen(tmpFile, "rb");
> +                    X265_FREE(tmpFile);
> +                    if (!m_cutreeStatFileIn)
> +                    {
> +                        x265_log_file(m_param, X265_LOG_ERROR, "can't
> open stats file %s.cutree\n", fileName);
> +                        return false;
> +                    }
>                  }
> -            }
>
> -            /* check whether 1st pass options were compatible with
> current options */
> -            if (strncmp(statsBuf, "#options:", 9))
> -            {
> -                x265_log(m_param, X265_LOG_ERROR,"options list in stats
> file not valid\n");
> -                return false;
> -            }
> -            {
> -                int i, j, m;
> -                uint32_t k , l;
> -                bool bErr = false;
> -                char *opts = statsBuf;
> -                statsIn = strchr(statsBuf, '\n');
> -                if (!statsIn)
> -                {
> -                    x265_log(m_param, X265_LOG_ERROR, "Malformed stats
> file\n");
> -                    return false;
> -                }
> -                *statsIn = '\0';
> -                statsIn++;
> -                if ((p = strstr(opts, " input-res=")) == 0 || sscanf(p, "
> input-res=%dx%d", &i, &j) != 2)
> -                {
> -                    x265_log(m_param, X265_LOG_ERROR, "Resolution
> specified in stats file not valid\n");
> -                    return false;
> -                }
> -                if ((p = strstr(opts, " fps=")) == 0 || sscanf(p, "
> fps=%u/%u", &k, &l) != 2)
> -                {
> -                    x265_log(m_param, X265_LOG_ERROR, "fps specified in
> stats file not valid\n");
> -                    return false;
> -                }
> -                if (((p = strstr(opts, " vbv-maxrate=")) == 0 ||
> sscanf(p, " vbv-maxrate=%d", &m) != 1) && m_param->rc.rateControlMode ==
> X265_RC_CRF)
> +                /* check whether 1st pass options were compatible with
> current options */
> +                if (strncmp(statsBuf, "#options:", 9))
>                  {
> -                    x265_log(m_param, X265_LOG_ERROR, "Constant
> rate-factor is incompatible with 2pass without vbv-maxrate in the previous
> pass\n");
> +                    x265_log(m_param, X265_LOG_ERROR, "options list in
> stats file not valid\n");
>                      return false;
>                  }
> -                if (k != m_param->fpsNum || l != m_param->fpsDenom)
>                  {
> -                    x265_log(m_param, X265_LOG_ERROR, "fps mismatch with
> 1st pass (%u/%u vs %u/%u)\n",
> -                              m_param->fpsNum, m_param->fpsDenom, k, l);
> -                    return false;
> -                }
> -                if (m_param->analysisMultiPassRefine)
> -                {
> -                    p = strstr(opts, "ref=");
> -                    sscanf(p, "ref=%d", &i);
> -                    if (i > m_param->maxNumReferences)
> +                    int i, j, m;
> +                    uint32_t k, l;
> +                    bool bErr = false;
> +                    char *opts = statsBuf;
> +                    statsIn = strchr(statsBuf, '\n');
> +                    if (!statsIn)
>                      {
> -                        x265_log(m_param, X265_LOG_ERROR,
> "maxNumReferences cannot be less than 1st pass (%d vs %d)\n",
> -                            i, m_param->maxNumReferences);
> +                        x265_log(m_param, X265_LOG_ERROR, "Malformed
> stats file\n");
>                          return false;
>                      }
> -                }
> -                if (m_param->analysisMultiPassRefine ||
> m_param->analysisMultiPassDistortion)
> -                {
> -                    p = strstr(opts, "ctu=");
> -                    sscanf(p, "ctu=%u", &k);
> -                    if (k != m_param->maxCUSize)
> +                    *statsIn = '\0';
> +                    statsIn++;
> +                    if ((p = strstr(opts, " input-res=")) == 0 ||
> sscanf(p, " input-res=%dx%d", &i, &j) != 2)
> +                    {
> +                        x265_log(m_param, X265_LOG_ERROR, "Resolution
> specified in stats file not valid\n");
> +                        return false;
> +                    }
> +                    if ((p = strstr(opts, " fps=")) == 0 || sscanf(p, "
> fps=%u/%u", &k, &l) != 2)
> +                    {
> +                        x265_log(m_param, X265_LOG_ERROR, "fps specified
> in stats file not valid\n");
> +                        return false;
> +                    }
> +                    if (((p = strstr(opts, " vbv-maxrate=")) == 0 ||
> sscanf(p, " vbv-maxrate=%d", &m) != 1) && m_param->rc.rateControlMode ==
> X265_RC_CRF)
> +                    {
> +                        x265_log(m_param, X265_LOG_ERROR, "Constant
> rate-factor is incompatible with 2pass without vbv-maxrate in the previous
> pass\n");
> +                        return false;
> +                    }
> +                    if (k != m_param->fpsNum || l != m_param->fpsDenom)
> +                    {
> +                        x265_log(m_param, X265_LOG_ERROR, "fps mismatch
> with 1st pass (%u/%u vs %u/%u)\n",
> +                            m_param->fpsNum, m_param->fpsDenom, k, l);
> +                        return false;
> +                    }
> +                    if (m_param->analysisMultiPassRefine)
> +                    {
> +                        p = strstr(opts, "ref=");
> +                        sscanf(p, "ref=%d", &i);
> +                        if (i > m_param->maxNumReferences)
> +                        {
> +                            x265_log(m_param, X265_LOG_ERROR,
> "maxNumReferences cannot be less than 1st pass (%d vs %d)\n",
> +                                i, m_param->maxNumReferences);
> +                            return false;
> +                        }
> +                    }
> +                    if (m_param->analysisMultiPassRefine ||
> m_param->analysisMultiPassDistortion)
> +                    {
> +                        p = strstr(opts, "ctu=");
> +                        sscanf(p, "ctu=%u", &k);
> +                        if (k != m_param->maxCUSize)
> +                        {
> +                            x265_log(m_param, X265_LOG_ERROR, "maxCUSize
> mismatch with 1st pass (%u vs %u)\n",
> +                                k, m_param->maxCUSize);
> +                            return false;
> +                        }
> +                    }
> +                    CMP_OPT_FIRST_PASS("bitdepth",
> m_param->internalBitDepth);
> +                    CMP_OPT_FIRST_PASS("weightp",
> m_param->bEnableWeightedPred);
> +                    CMP_OPT_FIRST_PASS("bframes", m_param->bframes);
> +                    CMP_OPT_FIRST_PASS("b-pyramid", m_param->bBPyramid);
> +                    CMP_OPT_FIRST_PASS("open-gop", m_param->bOpenGOP);
> +                    CMP_OPT_FIRST_PASS(" keyint", m_param->keyframeMax);
> +                    CMP_OPT_FIRST_PASS("scenecut",
> m_param->scenecutThreshold);
> +                    CMP_OPT_FIRST_PASS("intra-refresh",
> m_param->bIntraRefresh);
> +                    CMP_OPT_FIRST_PASS("frame-dup",
> m_param->bEnableFrameDuplication);
> +                    if (m_param->bMultiPassOptRPS)
> +                    {
> +                        CMP_OPT_FIRST_PASS("multi-pass-opt-rps",
> m_param->bMultiPassOptRPS);
> +                        CMP_OPT_FIRST_PASS("repeat-headers",
> m_param->bRepeatHeaders);
> +                        CMP_OPT_FIRST_PASS("min-keyint",
> m_param->keyframeMin);
> +                    }
> +
> +                    if ((p = strstr(opts, "b-adapt=")) != 0 && sscanf(p,
> "b-adapt=%d", &i) && i >= X265_B_ADAPT_NONE && i <= X265_B_ADAPT_TRELLIS)
> +                    {
> +                        m_param->bFrameAdaptive = i;
> +                    }
> +                    else if (m_param->bframes)
>                      {
> -                        x265_log(m_param, X265_LOG_ERROR, "maxCUSize
> mismatch with 1st pass (%u vs %u)\n",
> -                            k, m_param->maxCUSize);
> +                        x265_log(m_param, X265_LOG_ERROR, "b-adapt method
> specified in stats file not valid\n");
>                          return false;
>                      }
> +
> +                    if ((p = strstr(opts, "rc-lookahead=")) != 0 &&
> sscanf(p, "rc-lookahead=%d", &i))
> +                        m_param->lookaheadDepth = i;
>                  }
> -                CMP_OPT_FIRST_PASS("bitdepth", m_param->internalBitDepth);
> -                CMP_OPT_FIRST_PASS("weightp",
> m_param->bEnableWeightedPred);
> -                CMP_OPT_FIRST_PASS("bframes", m_param->bframes);
> -                CMP_OPT_FIRST_PASS("b-pyramid", m_param->bBPyramid);
> -                CMP_OPT_FIRST_PASS("open-gop", m_param->bOpenGOP);
> -                CMP_OPT_FIRST_PASS(" keyint", m_param->keyframeMax);
> -                CMP_OPT_FIRST_PASS("scenecut",
> m_param->scenecutThreshold);
> -                CMP_OPT_FIRST_PASS("intra-refresh",
> m_param->bIntraRefresh);
> -                CMP_OPT_FIRST_PASS("frame-dup",
> m_param->bEnableFrameDuplication);
> -                if (m_param->bMultiPassOptRPS)
> +                /* find number of pics */
> +                p = statsIn;
> +                int numEntries;
> +                for (numEntries = -1; p; numEntries++)
> +                    p = strchr(p + 1, ';');
> +                if (!numEntries)
>                  {
> -                    CMP_OPT_FIRST_PASS("multi-pass-opt-rps",
> m_param->bMultiPassOptRPS);
> -                    CMP_OPT_FIRST_PASS("repeat-headers",
> m_param->bRepeatHeaders);
> -                    CMP_OPT_FIRST_PASS("min-keyint",
> m_param->keyframeMin);
> +                    x265_log(m_param, X265_LOG_ERROR, "empty stats
> file\n");
> +                    return false;
>                  }
> +                m_numEntries = numEntries;
>
> -                if ((p = strstr(opts, "b-adapt=")) != 0 && sscanf(p,
> "b-adapt=%d", &i) && i >= X265_B_ADAPT_NONE && i <= X265_B_ADAPT_TRELLIS)
> +                if (m_param->totalFrames < m_numEntries &&
> m_param->totalFrames > 0)
>                  {
> -                    m_param->bFrameAdaptive = i;
> +                    x265_log(m_param, X265_LOG_WARNING, "2nd pass has
> fewer frames than 1st pass (%d vs %d)\n",
> +                        m_param->totalFrames, m_numEntries);
>                  }
> -                else if (m_param->bframes)
> +                if (m_param->totalFrames > m_numEntries &&
> !m_param->bEnableFrameDuplication)
>                  {
> -                    x265_log(m_param, X265_LOG_ERROR, "b-adapt method
> specified in stats file not valid\n");
> +                    x265_log(m_param, X265_LOG_ERROR, "2nd pass has more
> frames than 1st pass (%d vs %d)\n",
> +                        m_param->totalFrames, m_numEntries);
>                      return false;
>                  }
>
> -                if ((p = strstr(opts, "rc-lookahead=")) != 0 && sscanf(p,
> "rc-lookahead=%d", &i))
> -                    m_param->lookaheadDepth = i;
> -            }
> -            /* find number of pics */
> -            p = statsIn;
> -            int numEntries;
> -            for (numEntries = -1; p; numEntries++)
> -                p = strchr(p + 1, ';');
> -            if (!numEntries)
> -            {
> -                x265_log(m_param, X265_LOG_ERROR, "empty stats file\n");
> -                return false;
> -            }
> -            m_numEntries = numEntries;
> -
> -            if (m_param->totalFrames < m_numEntries &&
> m_param->totalFrames > 0)
> -            {
> -                x265_log(m_param, X265_LOG_WARNING, "2nd pass has fewer
> frames than 1st pass (%d vs %d)\n",
> -                         m_param->totalFrames, m_numEntries);
> -            }
> -            if (m_param->totalFrames > m_numEntries &&
> !m_param->bEnableFrameDuplication)
> -            {
> -                x265_log(m_param, X265_LOG_ERROR, "2nd pass has more
> frames than 1st pass (%d vs %d)\n",
> -                         m_param->totalFrames, m_numEntries);
> -                return false;
> -            }
> -
> -            m_rce2Pass = X265_MALLOC(RateControlEntry, m_numEntries);
> -            if (!m_rce2Pass)
> -            {
> -                 x265_log(m_param, X265_LOG_ERROR, "Rce Entries for 2
> pass cannot be allocated\n");
> -                 return false;
> -            }
> -            m_encOrder = X265_MALLOC(int, m_numEntries);
> -            if (!m_encOrder)
> -            {
> -                x265_log(m_param, X265_LOG_ERROR, "Encode order for 2
> pass cannot be allocated\n");
> -                return false;
> -            }
> -            /* init all to skipped p frames */
> -            for (int i = 0; i < m_numEntries; i++)
> -            {
> -                RateControlEntry *rce = &m_rce2Pass[i];
> -                rce->sliceType = P_SLICE;
> -                rce->qScale = rce->newQScale = x265_qp2qScale(20);
> -                rce->miscBits = m_ncu + 10;
> -                rce->newQp = 0;
> -            }
> -            /* read stats */
> -            p = statsIn;
> -            double totalQpAq = 0;
> -            for (int i = 0; i < m_numEntries; i++)
> -            {
> -                RateControlEntry *rce, *rcePocOrder;
> -                int frameNumber;
> -                int encodeOrder;
> -                char picType;
> -                int e;
> -                char *next;
> -                double qpRc, qpAq, qNoVbv, qRceq;
> -                next = strstr(p, ";");
> -                if (next)
> -                    *next++ = 0;
> -                e = sscanf(p, " in:%d out:%d", &frameNumber,
> &encodeOrder);
> -                if (frameNumber < 0 || frameNumber >= m_numEntries)
> +                m_rce2Pass = X265_MALLOC(RateControlEntry, m_numEntries);
> +                if (!m_rce2Pass)
>                  {
> -                    x265_log(m_param, X265_LOG_ERROR, "bad frame number
> (%d) at stats line %d\n", frameNumber, i);
> +                    x265_log(m_param, X265_LOG_ERROR, "Rce Entries for 2
> pass cannot be allocated\n");
>                      return false;
>                  }
> -                rce = &m_rce2Pass[encodeOrder];
> -                rcePocOrder = &m_rce2Pass[frameNumber];
> -                m_encOrder[frameNumber] = encodeOrder;
> -                if (!m_param->bMultiPassOptRPS)
> +                m_encOrder = X265_MALLOC(int, m_numEntries);
> +                if (!m_encOrder)
>                  {
> -                    int scenecut = 0;
> -                    e += sscanf(p, " in:%*d out:%*d type:%c q:%lf
> q-aq:%lf q-noVbv:%lf q-Rceq:%lf tex:%d mv:%d misc:%d icu:%lf pcu:%lf
> scu:%lf sc:%d",
> -                        &picType, &qpRc, &qpAq, &qNoVbv, &qRceq,
> &rce->coeffBits,
> -                        &rce->mvBits, &rce->miscBits, &rce->iCuCount,
> &rce->pCuCount,
> -                        &rce->skipCuCount, &scenecut);
> -                    rcePocOrder->scenecut = scenecut != 0;
> +                    x265_log(m_param, X265_LOG_ERROR, "Encode order for 2
> pass cannot be allocated\n");
> +                    return false;
>                  }
> -                else
> +                /* init all to skipped p frames */
> +                for (int i = 0; i < m_numEntries; i++)
>                  {
> -                    char deltaPOC[128];
> -                    char bUsed[40];
> -                    memset(deltaPOC, 0, sizeof(deltaPOC));
> -                    memset(bUsed, 0, sizeof(bUsed));
> -                    e += sscanf(p, " in:%*d out:%*d type:%c q:%lf
> q-aq:%lf q-noVbv:%lf q-Rceq:%lf tex:%d mv:%d misc:%d icu:%lf pcu:%lf
> scu:%lf nump:%d numnegp:%d numposp:%d deltapoc:%s bused:%s",
> -                        &picType, &qpRc, &qpAq, &qNoVbv, &qRceq,
> &rce->coeffBits,
> -                        &rce->mvBits, &rce->miscBits, &rce->iCuCount,
> &rce->pCuCount,
> -                        &rce->skipCuCount,
> &rce->rpsData.numberOfPictures, &rce->rpsData.numberOfNegativePictures,
> &rce->rpsData.numberOfPositivePictures, deltaPOC, bUsed);
> -                    splitdeltaPOC(deltaPOC, rce);
> -                    splitbUsed(bUsed, rce);
> -                    rce->rpsIdx = -1;
> -                }
> -                rce->keptAsRef = true;
> -                rce->isIdr = false;
> -                if (picType == 'b' || picType == 'p')
> -                    rce->keptAsRef = false;
> -                if (picType == 'I')
> -                    rce->isIdr = true;
> -                if (picType == 'I' || picType == 'i')
> -                    rce->sliceType = I_SLICE;
> -                else if (picType == 'P' || picType == 'p')
> +                    RateControlEntry *rce = &m_rce2Pass[i];
>                      rce->sliceType = P_SLICE;
> -                else if (picType == 'B' || picType == 'b')
> -                    rce->sliceType = B_SLICE;
> -                else
> -                    e = -1;
> -                if (e < 10)
> +                    rce->qScale = rce->newQScale = x265_qp2qScale(20);
> +                    rce->miscBits = m_ncu + 10;
> +                    rce->newQp = 0;
> +                }
> +                /* read stats */
> +                p = statsIn;
> +                double totalQpAq = 0;
> +                for (int i = 0; i < m_numEntries; i++)
>                  {
> -                    x265_log(m_param, X265_LOG_ERROR, "statistics are
> damaged at line %d, parser out=%d\n", i, e);
> -                    return false;
> +                    RateControlEntry *rce, *rcePocOrder;
> +                    int frameNumber;
> +                    int encodeOrder;
> +                    char picType;
> +                    int e;
> +                    char *next;
> +                    double qpRc, qpAq, qNoVbv, qRceq;
> +                    next = strstr(p, ";");
> +                    if (next)
> +                        *next++ = 0;
> +                    e = sscanf(p, " in:%d out:%d", &frameNumber,
> &encodeOrder);
> +                    if (frameNumber < 0 || frameNumber >= m_numEntries)
> +                    {
> +                        x265_log(m_param, X265_LOG_ERROR, "bad frame
> number (%d) at stats line %d\n", frameNumber, i);
> +                        return false;
> +                    }
> +                    rce = &m_rce2Pass[encodeOrder];
> +                    rcePocOrder = &m_rce2Pass[frameNumber];
> +                    m_encOrder[frameNumber] = encodeOrder;
> +                    if (!m_param->bMultiPassOptRPS)
> +                    {
> +                        int scenecut = 0;
> +                        e += sscanf(p, " in:%*d out:%*d type:%c q:%lf
> q-aq:%lf q-noVbv:%lf q-Rceq:%lf tex:%d mv:%d misc:%d icu:%lf pcu:%lf
> scu:%lf sc:%d",
> +                            &picType, &qpRc, &qpAq, &qNoVbv, &qRceq,
> &rce->coeffBits,
> +                            &rce->mvBits, &rce->miscBits, &rce->iCuCount,
> &rce->pCuCount,
> +                            &rce->skipCuCount, &scenecut);
> +                        rcePocOrder->scenecut = scenecut != 0;
> +                    }
> +                    else
> +                    {
> +                        char deltaPOC[128];
> +                        char bUsed[40];
> +                        memset(deltaPOC, 0, sizeof(deltaPOC));
> +                        memset(bUsed, 0, sizeof(bUsed));
> +                        e += sscanf(p, " in:%*d out:%*d type:%c q:%lf
> q-aq:%lf q-noVbv:%lf q-Rceq:%lf tex:%d mv:%d misc:%d icu:%lf pcu:%lf
> scu:%lf nump:%d numnegp:%d numposp:%d deltapoc:%s bused:%s",
> +                            &picType, &qpRc, &qpAq, &qNoVbv, &qRceq,
> &rce->coeffBits,
> +                            &rce->mvBits, &rce->miscBits, &rce->iCuCount,
> &rce->pCuCount,
> +                            &rce->skipCuCount,
> &rce->rpsData.numberOfPictures, &rce->rpsData.numberOfNegativePictures,
> &rce->rpsData.numberOfPositivePictures, deltaPOC, bUsed);
> +                        splitdeltaPOC(deltaPOC, rce);
> +                        splitbUsed(bUsed, rce);
> +                        rce->rpsIdx = -1;
> +                    }
> +                    rce->keptAsRef = true;
> +                    rce->isIdr = false;
> +                    if (picType == 'b' || picType == 'p')
> +                        rce->keptAsRef = false;
> +                    if (picType == 'I')
> +                        rce->isIdr = true;
> +                    if (picType == 'I' || picType == 'i')
> +                        rce->sliceType = I_SLICE;
> +                    else if (picType == 'P' || picType == 'p')
> +                        rce->sliceType = P_SLICE;
> +                    else if (picType == 'B' || picType == 'b')
> +                        rce->sliceType = B_SLICE;
> +                    else
> +                        e = -1;
> +                    if (e < 10)
> +                    {
> +                        x265_log(m_param, X265_LOG_ERROR, "statistics are
> damaged at line %d, parser out=%d\n", i, e);
> +                        return false;
> +                    }
> +                    rce->qScale = rce->newQScale = x265_qp2qScale(qpRc);
> +                    totalQpAq += qpAq;
> +                    rce->qpNoVbv = qNoVbv;
> +                    rce->qpaRc = qpRc;
> +                    rce->qpAq = qpAq;
> +                    rce->qRceq = qRceq;
> +                    p = next;
>                  }
> -                rce->qScale = rce->newQScale = x265_qp2qScale(qpRc);
> -                totalQpAq += qpAq;
> -                rce->qpNoVbv = qNoVbv;
> -                rce->qpaRc = qpRc;
> -                rce->qpAq = qpAq;
> -                rce->qRceq = qRceq;
> -                p = next;
> -            }
> -            X265_FREE(statsBuf);
> -            if (m_param->rc.rateControlMode != X265_RC_CQP)
> -            {
> -                m_start = 0;
> -                m_isQpModified = true;
> -                if (!initPass2())
> -                    return false;
> -            } /* else we're using constant quant, so no need to run the
> bitrate allocation */
> +                X265_FREE(statsBuf);
> +                if (m_param->rc.rateControlMode != X265_RC_CQP)
> +                {
> +                    m_start = 0;
> +                    m_isQpModified = true;
> +                    if (!initPass2())
> +                        return false;
> +                } /* else we're using constant quant, so no need to run
> the bitrate allocation */
> +            }
> +            else // X265_SHARE_MODE_SHAREDMEM == m_param->rc.dataShareMode
> +            {
> +                if (m_param->rc.cuTree)
> +                {
> +                    if (!initCUTreeSharedMem())
> +                    {
> +                        return false;
> +                    }
> +                }
> +            }
>          }
>          /* Open output file */
>          /* If input and output files are the same, output to a temp file
> @@ -682,19 +767,29 @@ bool RateControl::init(const SPS& sps)
>              X265_FREE(p);
>              if (m_param->rc.cuTree && !m_param->rc.bStatRead)
>              {
> -                statFileTmpname = strcatFilename(fileName,
> ".cutree.temp");
> -                if (!statFileTmpname)
> -                    return false;
> -                m_cutreeStatFileOut = x265_fopen(statFileTmpname, "wb");
> -                X265_FREE(statFileTmpname);
> -                if (!m_cutreeStatFileOut)
> +                if (X265_SHARE_MODE_FILE == m_param->rc.dataShareMode)
>                  {
> -                    x265_log_file(m_param, X265_LOG_ERROR, "can't open
> mbtree stats file %s.cutree.temp\n", fileName);
> -                    return false;
> +                    statFileTmpname = strcatFilename(fileName,
> ".cutree.temp");
> +                    if (!statFileTmpname)
> +                        return false;
> +                    m_cutreeStatFileOut = x265_fopen(statFileTmpname,
> "wb");
> +                    X265_FREE(statFileTmpname);
> +                    if (!m_cutreeStatFileOut)
> +                    {
> +                        x265_log_file(m_param, X265_LOG_ERROR, "can't
> open mbtree stats file %s.cutree.temp\n", fileName);
> +                        return false;
> +                    }
> +                }
> +                else // X265_SHARE_MODE_SHAREDMEM ==
> m_param->rc.dataShareMode
> +                {
> +                    if (!initCUTreeSharedMem())
> +                    {
> +                        return false;
> +                    }
>                  }
>              }
>          }
> -        if (m_param->rc.cuTree)
> +        if (m_param->rc.cuTree && !m_cuTreeStats.qpBuffer[0])
>          {
>              if (m_param->rc.qgSize == 8)
>              {
> @@ -714,6 +809,10 @@ bool RateControl::init(const SPS& sps)
>      return true;
>  }
>
> +void RateControl::skipCUTreeSharedMemRead(int32_t cnt)
> +{
> +    m_cutreeShrMem->skipRead(cnt);
> +}
>  void RateControl::reconfigureRC()
>  {
>      if (m_isVbv)
> @@ -1665,10 +1764,25 @@ bool RateControl::cuTreeReadFor2Pass(Frame* frame)
>              {
>                  m_cuTreeStats.qpBufPos++;
>
> -                if (!fread(&type, 1, 1, m_cutreeStatFileIn))
> -                    goto fail;
> -                if (fread(m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos],
> sizeof(uint16_t), ncu, m_cutreeStatFileIn) != (size_t)ncu)
> -                    goto fail;
> +                if (X265_SHARE_MODE_FILE == m_param->rc.dataShareMode)
> +                {
> +                    if (!fread(&type, 1, 1, m_cutreeStatFileIn))
> +                        goto fail;
> +                    if
> (fread(m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos], sizeof(uint16_t),
> ncu, m_cutreeStatFileIn) != (size_t)ncu)
> +                        goto fail;
> +                }
> +                else // X265_SHARE_MODE_SHAREDMEM ==
> m_param->rc.dataShareMode
> +                {
> +                    if (!m_cutreeShrMem)
> +                    {
> +                        goto fail;
> +                    }
> +
> +                    CUTreeSharedDataItem shrItem;
> +                    shrItem.type = &type;
> +                    shrItem.stats =
> m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos];
> +                    m_cutreeShrMem->readNext(&shrItem,
> ReadSharedCUTreeData);
> +                }
>
>                  if (type != sliceTypeActual && m_cuTreeStats.qpBufPos ==
> 1)
>                  {
> @@ -3059,10 +3173,26 @@ int RateControl::writeRateControlFrameStats(Frame*
> curFrame, RateControlEntry* r
>      {
>          uint8_t sliceType = (uint8_t)rce->sliceType;
>          primitives.fix8Pack(m_cuTreeStats.qpBuffer[0],
> curFrame->m_lowres.qpCuTreeOffset, ncu);
> -        if (fwrite(&sliceType, 1, 1, m_cutreeStatFileOut) < 1)
> -            goto writeFailure;
> -        if (fwrite(m_cuTreeStats.qpBuffer[0], sizeof(uint16_t), ncu,
> m_cutreeStatFileOut) < (size_t)ncu)
> -            goto writeFailure;
> +
> +        if (X265_SHARE_MODE_FILE == m_param->rc.dataShareMode)
> +        {
> +            if (fwrite(&sliceType, 1, 1, m_cutreeStatFileOut) < 1)
> +                goto writeFailure;
> +            if (fwrite(m_cuTreeStats.qpBuffer[0], sizeof(uint16_t), ncu,
> m_cutreeStatFileOut) < (size_t)ncu)
> +                goto writeFailure;
> +        }
> +        else // X265_SHARE_MODE_SHAREDMEM == m_param->rc.dataShareMode
> +        {
> +            if (!m_cutreeShrMem)
> +            {
> +                goto writeFailure;
> +            }
> +
> +            CUTreeSharedDataItem shrItem;
> +            shrItem.type = &sliceType;
> +            shrItem.stats = m_cuTreeStats.qpBuffer[0];
> +            m_cutreeShrMem->writeData(&shrItem, WriteSharedCUTreeData);
> +        }
>      }
>      return 0;
>
> @@ -3138,6 +3268,13 @@ void RateControl::destroy()
>      if (m_cutreeStatFileIn)
>          fclose(m_cutreeStatFileIn);
>
> +    if (m_cutreeShrMem)
> +    {
> +        m_cutreeShrMem->release();
> +        delete m_cutreeShrMem;
> +        m_cutreeShrMem = NULL;
> +    }
> +
>      X265_FREE(m_rce2Pass);
>      X265_FREE(m_encOrder);
>      for (int i = 0; i < 2; i++)
> diff --git a/source/encoder/ratecontrol.h b/source/encoder/ratecontrol.h
> index 204bd71e1..10dfc268d 100644
> --- a/source/encoder/ratecontrol.h
> +++ b/source/encoder/ratecontrol.h
> @@ -28,6 +28,7 @@
>
>  #include "common.h"
>  #include "sei.h"
> +#include "ringmem.h"
>
>  namespace X265_NS {
>  // encoder namespace
> @@ -240,6 +241,8 @@ public:
>      FILE*   m_statFileOut;
>      FILE*   m_cutreeStatFileOut;
>      FILE*   m_cutreeStatFileIn;
> +    ///< store the cutree data in memory instead of file
> +    RingMem *m_cutreeShrMem;
>      double  m_lastAccumPNorm;
>      double  m_expectedBitsSum;   /* sum of qscale2bits after rceq,
> ratefactor, and overflow, only includes finished frames */
>      int64_t m_predictedBits;
> @@ -274,6 +277,9 @@ public:
>      int writeRateControlFrameStats(Frame* curFrame, RateControlEntry*
> rce);
>      bool   initPass2();
>
> +    bool initCUTreeSharedMem();
> +    void skipCUTreeSharedMemRead(int32_t cnt);
> +
>      double forwardMasking(Frame* curFrame, double q);
>      double backwardMasking(Frame* curFrame, double q);
>
> diff --git a/source/x265.h b/source/x265.h
> index 6bb893c98..bf945498f 100644
> --- a/source/x265.h
> +++ b/source/x265.h
> @@ -747,6 +747,13 @@ typedef struct x265_vmaf_commondata
>
>  static const x265_vmaf_commondata vcd[] = { { NULL, (char
> *)"/usr/local/share/model/vmaf_v0.6.1.pkl", NULL, NULL, 0, 0, 0, 0, 0, 0,
> 0, NULL, 0, 1, 0 } };
>
> +
> +typedef enum
> +{
> +    X265_SHARE_MODE_FILE = 0,
> +    X265_SHARE_MODE_SHAREDMEM
> +}X265_DATA_SHARE_MODES;
> +
>  /* x265 input parameters
>   *
>   * For version safety you may use x265_param_alloc/free() to manage the
> @@ -1433,19 +1440,16 @@ typedef struct x265_param
>          double    rfConstantMin;
>
>          /* Multi-pass encoding */
> -        /* Enable writing the stats in a multi-pass encode to the stat
> output file */
> +        /* Enable writing the stats in a multi-pass encode to the stat
> output file/memory */
>          int       bStatWrite;
>
> -        /* Enable loading data from the stat input file in a multi pass
> encode */
> +        /* Enable loading data from the stat input file/memory in a multi
> pass encode */
>          int       bStatRead;
>
>          /* Filename of the 2pass output/input stats file, if unspecified
> the
>           * encoder will default to using x265_2pass.log */
>          const char* statFileName;
>
> -        /* if only the focused frames would be re-encode or not */
> -        int       bEncFocusedFramesOnly;
> -
>          /* temporally blur quants */
>          double    qblur;
>
> @@ -1492,6 +1496,21 @@ typedef struct x265_param
>          /* internally enable if tune grain is set */
>          int      bEnableConstVbv;
>
> +        /* if only the focused frames would be re-encode or not */
> +        int       bEncFocusedFramesOnly;
> +
> +        /* Share the data with stats file or shared memory.
> +        It must be one of the X265_DATA_SHARE_MODES enum values
> +        Available if the bStatWrite or bStatRead is true.
> +        Use stats file by default.
> +        The stats file mode would be used among the encoders running in
> sequence.
> +        The shared memory mode could only be used among the encoders
> running in parallel.
> +        Now only the cutree data could be shared among shared memory.
> More data would be support in the future.*/
> +        int       dataShareMode;
> +
> +        /* Unique shared memory name. Required if the shared memory mode
> enabled. NULL by default */
> +        const char* sharedMemName;
> +
>      } rc;
>
>      /*== Video Usability Information ==*/
> --
> 2.22.0.windows.1
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20211012/352b4da6/attachment-0001.html>


More information about the x265-devel mailing list