[x265] [PATCH] --pass 2: Add support for cutree data sharing via shared memory

Liwei Wang liwei at multicorewareinc.com
Tue Oct 12 07:15:18 UTC 2021


>From dc20a412f57c9a986cb69f54f5a3e0c821a41128 Mon Sep 17 00:00:00 2001
From: lwWang <liwei at multicorewareinc.com>
Date: Tue, 12 Oct 2021 14:18:30 +0800
Subject: [PATCH] --pass 2: Add support for cutree data sharing via shared
 memory

---
 source/CMakeLists.txt          |   2 +-
 source/common/CMakeLists.txt   |   3 +-
 source/common/param.cpp        |   7 +
 source/common/ringmem.cpp      | 357 ++++++++++++++++++++
 source/common/ringmem.h        |  90 +++++
 source/common/threading.h      | 137 ++++++++
 source/encoder/encoder.cpp     |   6 +
 source/encoder/ratecontrol.cpp | 579 ++++++++++++++++++++-------------
 source/encoder/ratecontrol.h   |   6 +
 source/x265.h                  |  29 +-
 10 files changed, 988 insertions(+), 228 deletions(-)
 create mode 100644 source/common/ringmem.cpp
 create mode 100644 source/common/ringmem.h

diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 62afd3610..ad46614d2 100755
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -29,7 +29,7 @@ option(NATIVE_BUILD "Target the build CPU" OFF)
 option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
 mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
 # X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 202)
+set(X265_BUILD 203)
 configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
                "${PROJECT_BINARY_DIR}/x265.def")
 configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt
index 2be305cac..6583ad56e 100644
--- a/source/common/CMakeLists.txt
+++ b/source/common/CMakeLists.txt
@@ -172,4 +172,5 @@ add_library(common OBJECT
     scalinglist.cpp scalinglist.h
     quant.cpp quant.h contexts.h
     deblock.cpp deblock.h
-    scaler.cpp scaler.h)
+    scaler.cpp scaler.h
+    ringmem.cpp ringmem.h)
diff --git a/source/common/param.cpp b/source/common/param.cpp
index 2c1583d93..9f878927b 100755
--- a/source/common/param.cpp
+++ b/source/common/param.cpp
@@ -281,7 +281,9 @@ void x265_param_default(x265_param* param)
     param->rc.rfConstantMin = 0;
     param->rc.bStatRead = 0;
     param->rc.bStatWrite = 0;
+    param->rc.dataShareMode = X265_SHARE_MODE_FILE;
     param->rc.statFileName = NULL;
+    param->rc.sharedMemName = NULL;
     param->rc.bEncFocusedFramesOnly = 0;
     param->rc.complexityBlur = 20;
     param->rc.qblur = 0.5;
@@ -1191,6 +1193,7 @@ int x265_param_parse(x265_param* p, const char* name,
const char* value)
         int pass = x265_clip3(0, 3, atoi(value));
         p->rc.bStatWrite = pass & 1;
         p->rc.bStatRead = pass & 2;
+        p->rc.dataShareMode = X265_SHARE_MODE_FILE;
     }
     OPT("stats") p->rc.statFileName = strdup(value);
     OPT("scaling-list") p->scalingLists = strdup(value);
@@ -1921,6 +1924,7 @@ int x265_check_params(x265_param* param)
             x265_log(param, X265_LOG_WARNING, "Live VBV enabled without
VBV settings.Disabling live VBV in 2 pass\n");
         }
     }
+    CHECK(param->rc.dataShareMode != X265_SHARE_MODE_FILE &&
param->rc.dataShareMode != X265_SHARE_MODE_SHAREDMEM, "Invalid data share
mode. It must be one of the X265_DATA_SHARE_MODES enum values\n" );
     return check_failed;
 }

@@ -2561,8 +2565,11 @@ void x265_copy_params(x265_param* dst, x265_param*
src)
     dst->rc.rfConstantMin = src->rc.rfConstantMin;
     dst->rc.bStatWrite = src->rc.bStatWrite;
     dst->rc.bStatRead = src->rc.bStatRead;
+    dst->rc.dataShareMode = src->rc.dataShareMode;
     if (src->rc.statFileName)
dst->rc.statFileName=strdup(src->rc.statFileName);
     else dst->rc.statFileName = NULL;
+    if (src->rc.sharedMemName) dst->rc.sharedMemName =
strdup(src->rc.sharedMemName);
+    else dst->rc.sharedMemName = NULL;
     dst->rc.qblur = src->rc.qblur;
     dst->rc.complexityBlur = src->rc.complexityBlur;
     dst->rc.bEnableSlowFirstPass = src->rc.bEnableSlowFirstPass;
diff --git a/source/common/ringmem.cpp b/source/common/ringmem.cpp
new file mode 100644
index 000000000..a4f191c90
--- /dev/null
+++ b/source/common/ringmem.cpp
@@ -0,0 +1,357 @@
+/*****************************************************************************
+ * Copyright (C) 2013-2017 MulticoreWare, Inc
+ *
+ * Authors: liwei <liwei at multicorewareinc.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111,
USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com
+
*****************************************************************************/
+
+#include "ringmem.h"
+
+#ifndef _WIN32
+#include <sys/mman.h>
+#endif ////< _WIN32
+
+#ifdef _WIN32
+#define X265_SHARED_MEM_NAME                    "Local\\_x265_shr_mem_"
+#define X265_SEMAPHORE_RINGMEM_WRITER_NAME    "_x265_semW_"
+#define X265_SEMAPHORE_RINGMEM_READER_NAME    "_x265_semR_"
+#else /* POSIX / pthreads */
+#define X265_SHARED_MEM_NAME                    "/tmp/_x265_shr_mem_"
+#define X265_SEMAPHORE_RINGMEM_WRITER_NAME    "/tmp/_x265_semW_"
+#define X265_SEMAPHORE_RINGMEM_READER_NAME    "/tmp/_x265_semR_"
+#endif
+
+#define RINGMEM_ALLIGNMENT                       64
+
+namespace X265_NS {
+    RingMem::RingMem()
+        : m_initialized(false)
+        , m_protectRW(false)
+        , m_itemSize(0)
+        , m_itemCnt(0)
+        , m_dataPool(NULL)
+        , m_shrMem(NULL)
+#ifdef _WIN32
+        , m_handle(NULL)
+#else //_WIN32
+        , m_filepath(NULL)
+#endif //_WIN32
+        , m_writeSem(NULL)
+        , m_readSem(NULL)
+    {
+    }
+
+
+    RingMem::~RingMem()
+    {
+    }
+
+    bool RingMem::skipRead(int32_t cnt) {
+        if (!m_initialized)
+        {
+            return false;
+        }
+
+        if (m_protectRW)
+        {
+            for (int i = 0; i < cnt; i++)
+            {
+                m_readSem->take();
+            }
+        }
+
+        ATOMIC_ADD(&m_shrMem->m_read, cnt);
+
+        if (m_protectRW)
+        {
+            m_writeSem->give(cnt);
+        }
+
+        return true;
+    }
+
+    bool RingMem::skipWrite(int32_t cnt) {
+        if (!m_initialized)
+        {
+            return false;
+        }
+
+        if (m_protectRW)
+        {
+            for (int i = 0; i < cnt; i++)
+            {
+                m_writeSem->take();
+            }
+        }
+
+        ATOMIC_ADD(&m_shrMem->m_write, cnt);
+
+        if (m_protectRW)
+        {
+            m_readSem->give(cnt);
+        }
+
+        return true;
+    }
+
+    ///< initialize
+    bool RingMem::init(int32_t itemSize, int32_t itemCnt, const char
*name, bool protectRW)
+    {
+        ///< check parameters
+        if (itemSize <= 0 || itemCnt <= 0 || NULL == name)
+        {
+            ///< invalid parameters
+            return false;
+        }
+
+        if (!m_initialized)
+        {
+            ///< formating names
+            char nameBuf[MAX_SHR_NAME_LEN] = { 0 };
+
+            ///< shared memory name
+            snprintf(nameBuf, sizeof(nameBuf) - 1, "%s%s",
X265_SHARED_MEM_NAME, name);
+
+            ///< create or open shared memory
+            bool newCreated = false;
+
+            ///< calculate the size of the shared memory
+            int32_t shrMemSize = (itemSize * itemCnt + sizeof(ShrMemCtrl)
+ RINGMEM_ALLIGNMENT - 1) & ~(RINGMEM_ALLIGNMENT - 1);
+
+#ifdef _WIN32
+            HANDLE h = OpenFileMappingA(FILE_MAP_WRITE | FILE_MAP_READ,
FALSE, nameBuf);
+            if (!h)
+            {
+                h = CreateFileMappingA(INVALID_HANDLE_VALUE, NULL,
PAGE_READWRITE, 0, shrMemSize, nameBuf);
+
+                if (!h)
+                {
+                    return false;
+                }
+
+                newCreated = true;
+            }
+
+            void *pool = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, 0);
+
+            ///< should not close the handle here, otherwise the
OpenFileMapping would fail
+            //CloseHandle(h);
+            m_handle = h;
+
+            if (!pool)
+            {
+                return false;
+            }
+
+#else /* POSIX / pthreads */
+            mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH
| S_IWOTH;
+            int flag = O_RDWR;
+            int shrfd = -1;
+            if ((shrfd = open(nameBuf, flag, mode)) < 0)
+            {
+                flag |= O_CREAT;
+
+                shrfd = open(nameBuf, flag, mode);
+                if (shrfd < 0)
+                {
+                    return false;
+                }
+                newCreated = true;
+
+                lseek(shrfd, shrMemSize - 1, SEEK_SET);
+
+                if (-1 == write(shrfd, "\0", 1))
+                {
+                    close(shrfd);
+                    return false;
+                }
+
+                if (lseek(shrfd, 0, SEEK_END) < shrMemSize)
+                {
+                    close(shrfd);
+                    return false;
+                }
+            }
+
+            void *pool = mmap(0,
+                shrMemSize,
+                PROT_READ | PROT_WRITE,
+                MAP_SHARED,
+                shrfd,
+                0);
+
+            close(shrfd);
+            if (pool == MAP_FAILED)
+            {
+                return false;
+            }
+
+            m_filepath = strdup(nameBuf);
+#endif ///< _WIN32
+
+            if (newCreated)
+            {
+                memset(pool, 0, shrMemSize);
+            }
+
+            m_shrMem = reinterpret_cast<ShrMemCtrl *>(pool);
+            m_dataPool = reinterpret_cast<uint8_t *>(pool) +
sizeof(ShrMemCtrl);
+            m_itemSize = itemSize;
+            m_itemCnt = itemCnt;
+            m_initialized = true;
+
+            if (protectRW)
+            {
+                m_protectRW = true;
+                m_writeSem = new NamedSemaphore();
+                if (!m_writeSem)
+                {
+                    release();
+                    return false;
+                }
+
+                ///< shared memory name
+                snprintf(nameBuf, sizeof(nameBuf - 1), "%s%s",
X265_SEMAPHORE_RINGMEM_WRITER_NAME, name);
+                if (!m_writeSem->create(nameBuf, m_itemCnt, m_itemCnt))
+                {
+                    release();
+                    return false;
+                }
+
+                m_readSem = new NamedSemaphore();
+                if (!m_readSem)
+                {
+                    release();
+                    return false;
+                }
+
+                ///< shared memory name
+                snprintf(nameBuf, sizeof(nameBuf - 1), "%s%s",
X265_SEMAPHORE_RINGMEM_READER_NAME, name);
+                if (!m_readSem->create(nameBuf, 0, m_itemCnt))
+                {
+                    release();
+                    return false;
+                }
+            }
+        }
+
+        return true;
+    }
+    ///< finalize
+    void RingMem::release()
+    {
+        if (m_initialized)
+        {
+            m_initialized = false;
+
+            if (m_shrMem)
+            {
+#ifdef _WIN32
+                UnmapViewOfFile(m_shrMem);
+                CloseHandle(m_handle);
+                m_handle = NULL;
+#else /* POSIX / pthreads */
+                int32_t shrMemSize = (m_itemSize * m_itemCnt +
sizeof(ShrMemCtrl) + RINGMEM_ALLIGNMENT - 1) & (~RINGMEM_ALLIGNMENT - 1);
+                munmap(m_shrMem, shrMemSize);
+                unlink(m_filepath);
+                free(m_filepath);
+                m_filepath = NULL;
+#endif ///< _WIN32
+                m_shrMem = NULL;
+                m_dataPool = NULL;
+                m_itemSize = 0;
+                m_itemCnt = 0;
+            }
+
+            if (m_protectRW)
+            {
+                m_protectRW = false;
+                if (m_writeSem)
+                {
+                    m_writeSem->release();
+
+                    delete m_writeSem;
+                    m_writeSem = NULL;
+                }
+
+                if (m_readSem)
+                {
+                    m_readSem->release();
+
+                    delete m_readSem;
+                    m_readSem = NULL;
+                }
+            }
+
+        }
+    }
+
+    ///< data read
+    bool RingMem::readNext(void* dst, fnRWSharedData callback)
+    {
+        if (!m_initialized || !callback || !dst)
+        {
+            return false;
+        }
+
+        if (m_protectRW)
+        {
+            if (!m_readSem->take())
+            {
+                return false;
+            }
+        }
+
+        int32_t index = ATOMIC_ADD(&m_shrMem->m_read, 1) % m_itemCnt;
+        (*callback)(dst, reinterpret_cast<uint8_t *>(m_dataPool) + index *
m_itemSize, m_itemSize);
+
+        if (m_protectRW)
+        {
+            m_writeSem->give(1);
+        }
+
+        return true;
+    }
+    ///< data write
+    bool RingMem::writeData(void *data, fnRWSharedData callback)
+    {
+        if (!m_initialized || !data || !callback)
+        {
+            return false;
+        }
+
+        if (m_protectRW)
+        {
+            if (!m_writeSem->take())
+            {
+                return false;
+            }
+        }
+
+        int32_t index = ATOMIC_ADD(&m_shrMem->m_write, 1) % m_itemCnt;
+        (*callback)(reinterpret_cast<uint8_t *>(m_dataPool) + index *
m_itemSize, data, m_itemSize);
+
+        if (m_protectRW)
+        {
+            m_readSem->give(1);
+        }
+
+        return true;
+    }
+}
diff --git a/source/common/ringmem.h b/source/common/ringmem.h
new file mode 100644
index 000000000..b14f7bee9
--- /dev/null
+++ b/source/common/ringmem.h
@@ -0,0 +1,90 @@
+/*****************************************************************************
+ * Copyright (C) 2013-2017 MulticoreWare, Inc
+ *
+ * Authors: liwei <liwei at multicorewareinc.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111,
USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com
+
*****************************************************************************/
+
+#ifndef X265_RINGMEM_H
+#define X265_RINGMEM_H
+
+#include "common.h"
+#include "threading.h"
+
+#if _MSC_VER
+#define snprintf _snprintf
+#define strdup _strdup
+#endif
+
+namespace X265_NS {
+
+#define MAX_SHR_NAME_LEN                         256
+
+    class RingMem {
+    public:
+        RingMem();
+        ~RingMem();
+
+        bool skipRead(int32_t cnt);
+
+        bool skipWrite(int32_t cnt);
+
+        ///< initialize
+        ///< protectRW: if use the semaphore the protect the write and
read operation.
+        bool init(int32_t itemSize, int32_t itemCnt, const char *name,
bool protectRW = false);
+        ///< finalize
+        void release();
+
+        typedef void(*fnRWSharedData)(void *dst, void *src, int32_t size);
+
+        ///< data read
+        bool readNext(void* dst, fnRWSharedData callback);
+        ///< data write
+        bool writeData(void *data, fnRWSharedData callback);
+
+    private:
+        bool    m_initialized;
+        bool    m_protectRW;
+
+        int32_t m_itemSize;
+        int32_t m_itemCnt;
+        ///< data pool
+        void   *m_dataPool;
+        typedef struct {
+            ///< index to write
+            int32_t m_write;
+            ///< index to read
+            int32_t m_read;
+
+        }ShrMemCtrl;
+
+        ShrMemCtrl *m_shrMem;
+#ifdef _WIN32
+        void       *m_handle;
+#else // _WIN32
+        char       *m_filepath;
+#endif // _WIN32
+
+        ///< Semaphores
+        NamedSemaphore *m_writeSem;
+        NamedSemaphore *m_readSem;
+    };
+};
+
+#endif // ifndef X265_RINGMEM_H
diff --git a/source/common/threading.h b/source/common/threading.h
index 53a63beaf..dcf6081e3 100644
--- a/source/common/threading.h
+++ b/source/common/threading.h
@@ -3,6 +3,7 @@
  *
  * Authors: Steve Borho <steve at borho.org>
  *          Min Chen <chenm003 at 163.com>
+            liwei <liwei at multicorewareinc.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -253,6 +254,47 @@ protected:
     int                m_val;
 };

+class NamedSemaphore
+{
+public:
+    NamedSemaphore() : m_sem(NULL)
+    {
+    }
+
+    ~NamedSemaphore()
+    {
+    }
+
+    bool create(const char* name, const int initcnt, const int maxcnt)
+    {
+        if(!m_sem)
+        {
+            m_sem = CreateSemaphoreA(NULL, initcnt, maxcnt, name);
+        }
+        return m_sem != NULL;
+    }
+
+    bool give(const int32_t cnt)
+    {
+        return ReleaseSemaphore(m_sem, (LONG)cnt, NULL) != FALSE;
+    }
+
+    bool take(const uint32_t time_out = INFINITE)
+    {
+        int32_t rt = WaitForSingleObject(m_sem, time_out);
+        return rt != WAIT_TIMEOUT && rt != WAIT_FAILED;
+    }
+
+    void release()
+    {
+        CloseHandle(m_sem);
+        m_sem = NULL;
+    }
+
+private:
+    HANDLE m_sem;
+};
+
 #else /* POSIX / pthreads */

 typedef pthread_t ThreadHandle;
@@ -459,6 +501,101 @@ protected:
     int             m_val;
 };

+#define TIMEOUT_INFINITE 0xFFFFFFFF
+
+class NamedSemaphore
+{
+public:
+    NamedSemaphore()
+        : m_sem(NULL)
+        , m_name(NULL)
+    {
+    }
+
+    ~NamedSemaphore()
+    {
+    }
+
+    bool create(const char* name, const int initcnt, const int maxcnt)
+    {
+        bool ret = false;
+
+        if (initcnt >= maxcnt)
+        {
+            return false;
+        }
+
+        m_sem = sem_open(name, O_CREAT | O_EXCL, 0666, initcnt);
+        if (m_sem != SEM_FAILED)
+        {
+            m_name = strdup(name);
+            ret = true;
+        }
+        else
+        {
+            if (EEXIST == errno)
+            {
+                m_sem = sem_open(name, 0);
+                if (m_sem != SEM_FAILED)
+                {
+                    m_name = strdup(name);
+                    ret = true;
+                }
+            }
+        }
+
+        return ret;
+    }
+
+    bool give(const int32_t cnt)
+    {
+        int ret = 0;
+        int32_t curCnt = cnt;
+        while (curCnt-- && !ret) {
+            ret = sem_post(m_sem);
+        }
+
+        return 0 == ret;
+    }
+
+    bool take(const uint32_t time_out = TIMEOUT_INFINITE)
+    {
+        if (TIMEOUT_INFINITE == time_out) {
+            return 0 == sem_wait(m_sem);
+        }
+        else
+        {
+            if (0 == time_out)
+            {
+                return 0 == sem_trywait(m_sem);
+            }
+            else
+            {
+                struct timespec ts;
+                ts.tv_sec = time_out / 1000L;
+                ts.tv_nsec = (time_out * 1000000L) - ts.tv_sec * 1000 *
1000 * 1000;
+                return 0 == sem_timedwait(m_sem, &ts);
+            }
+        }
+    }
+
+    void release()
+    {
+        if (m_sem)
+        {
+            sem_close(m_sem);
+            sem_unlink(m_name);
+            m_sem = NULL;
+            free(m_name);
+            m_name = NULL;
+        }
+    }
+
+private:
+    sem_t *m_sem;
+    char  *m_name;
+};
+
 #endif // ifdef _WIN32

 class ScopedLock
diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
index 56f0e5433..607e64370 100644
--- a/source/encoder/encoder.cpp
+++ b/source/encoder/encoder.cpp
@@ -1006,6 +1006,7 @@ void Encoder::destroy()
         /* release string arguments that were strdup'd */
         free((char*)m_param->rc.lambdaFileName);
         free((char*)m_param->rc.statFileName);
+        free((char*)m_param->rc.sharedMemName);
         free((char*)m_param->analysisReuseFileName);
         free((char*)m_param->scalingLists);
         free((char*)m_param->csvfn);
@@ -4019,6 +4020,11 @@ void Encoder::configure(x265_param *p)
         p->rc.bStatRead = 0;
     }

+    if ((p->rc.bStatWrite || p->rc.bStatRead) && p->rc.dataShareMode !=
X265_SHARE_MODE_FILE && p->rc.dataShareMode != X265_SHARE_MODE_SHAREDMEM)
+    {
+        p->rc.dataShareMode = X265_SHARE_MODE_FILE;
+    }
+
     if (!p->rc.bStatRead || p->rc.rateControlMode != X265_RC_CRF)
     {
         p->rc.bEncFocusedFramesOnly = 0;
diff --git a/source/encoder/ratecontrol.cpp b/source/encoder/ratecontrol.cpp
index 6c59d0c64..a1321b38d 100644
--- a/source/encoder/ratecontrol.cpp
+++ b/source/encoder/ratecontrol.cpp
@@ -41,6 +41,10 @@
 #define BR_SHIFT  6
 #define CPB_SHIFT 4

+#define SHARED_DATA_ALIGNMENT      4 ///< 4btye, 32bit
+#define CUTREE_SHARED_MEM_NAME     "cutree"
+#define GOP_CNT_CU_TREE            3
+
 using namespace X265_NS;

 /* Amortize the partial cost of I frames over the next N frames */
@@ -104,6 +108,37 @@ inline char *strcatFilename(const char *input, const
char *suffix)
     return output;
 }

+typedef struct CUTreeSharedDataItem
+{
+    uint8_t  *type;
+    uint16_t *stats;
+}CUTreeSharedDataItem;
+
+void static ReadSharedCUTreeData(void *dst, void *src, int32_t size)
+{
+    CUTreeSharedDataItem *statsDst = reinterpret_cast<CUTreeSharedDataItem
*>(dst);
+    uint8_t *typeSrc = reinterpret_cast<uint8_t *>(src);
+    *statsDst->type = *typeSrc;
+
+    ///< for memory alignment, the type will take 32bit in the shared
memory
+    int32_t offset = (sizeof(*statsDst->type) + SHARED_DATA_ALIGNMENT - 1)
& ~(SHARED_DATA_ALIGNMENT - 1);
+    uint16_t *statsSrc = reinterpret_cast<uint16_t *>(typeSrc + offset);
+    memcpy(statsDst->stats, statsSrc, size - offset);
+}
+
+void static WriteSharedCUTreeData(void *dst, void *src, int32_t size)
+{
+    CUTreeSharedDataItem *statsSrc = reinterpret_cast<CUTreeSharedDataItem
*>(src);
+    uint8_t *typeDst = reinterpret_cast<uint8_t *>(dst);
+    *typeDst = *statsSrc->type;
+
+    ///< for memory alignment, the type will take 32bit in the shared
memory
+    int32_t offset = (sizeof(*statsSrc->type) + SHARED_DATA_ALIGNMENT - 1)
& ~(SHARED_DATA_ALIGNMENT - 1);
+    uint16_t *statsDst = reinterpret_cast<uint16_t *>(typeDst + offset);
+    memcpy(statsDst, statsSrc->stats, size - offset);
+}
+
+
 inline double qScale2bits(RateControlEntry *rce, double qScale)
 {
     if (qScale < 0.1)
@@ -209,6 +244,7 @@ RateControl::RateControl(x265_param& p, Encoder *top)
     m_lastAbrResetPoc = -1;
     m_statFileOut = NULL;
     m_cutreeStatFileOut = m_cutreeStatFileIn = NULL;
+    m_cutreeShrMem = NULL;
     m_rce2Pass = NULL;
     m_encOrder = NULL;
     m_lastBsliceSatdCost = 0;
@@ -320,6 +356,42 @@ RateControl::RateControl(x265_param& p, Encoder *top)
         m_cuTreeStats.qpBuffer[i] = NULL;
 }

+bool RateControl::initCUTreeSharedMem()
+{
+    if (!m_cutreeShrMem) {
+        m_cutreeShrMem = new RingMem();
+        if (!m_cutreeShrMem)
+        {
+            return false;
+        }
+
+        ///< now cutree data form at most 3 gops would be stored in the
shared memory at the same time
+        int32_t itemSize = (sizeof(uint8_t) + SHARED_DATA_ALIGNMENT - 1) &
~(SHARED_DATA_ALIGNMENT - 1);
+        if (m_param->rc.qgSize == 8)
+        {
+            itemSize += sizeof(uint16_t) * m_ncu * 4;
+        }
+        else
+        {
+            itemSize += sizeof(uint16_t) * m_ncu;
+        }
+
+        int32_t itemCnt = X265_MIN(m_param->keyframeMax, (int)(m_fps +
0.5));
+        itemCnt *= GOP_CNT_CU_TREE;
+
+        char shrname[MAX_SHR_NAME_LEN] = { 0 };
+        strcpy(shrname, m_param->rc.sharedMemName);
+        strcat(shrname, CUTREE_SHARED_MEM_NAME);
+
+        if (!m_cutreeShrMem->init(itemSize, itemCnt, shrname))
+        {
+            return false;
+        }
+    }
+
+    return true;
+}
+
 bool RateControl::init(const SPS& sps)
 {
     if (m_isVbv && !m_initVbv)
@@ -421,244 +493,257 @@ bool RateControl::init(const SPS& sps)
         /* Load stat file and init 2pass algo */
         if (m_param->rc.bStatRead)
         {
-            m_expectedBitsSum = 0;
-            char *p, *statsIn, *statsBuf;
-            /* read 1st pass stats */
-            statsIn = statsBuf = x265_slurp_file(fileName);
-            if (!statsBuf)
-                return false;
-            if (m_param->rc.cuTree)
+            if (X265_SHARE_MODE_FILE == m_param->rc.dataShareMode)
             {
-                char *tmpFile = strcatFilename(fileName, ".cutree");
-                if (!tmpFile)
+                m_expectedBitsSum = 0;
+                char *p, *statsIn, *statsBuf;
+                /* read 1st pass stats */
+                statsIn = statsBuf = x265_slurp_file(fileName);
+                if (!statsBuf)
                     return false;
-                m_cutreeStatFileIn = x265_fopen(tmpFile, "rb");
-                X265_FREE(tmpFile);
-                if (!m_cutreeStatFileIn)
+                if (m_param->rc.cuTree)
                 {
-                    x265_log_file(m_param, X265_LOG_ERROR, "can't open
stats file %s.cutree\n", fileName);
-                    return false;
+                    char *tmpFile = strcatFilename(fileName, ".cutree");
+                    if (!tmpFile)
+                        return false;
+                    m_cutreeStatFileIn = x265_fopen(tmpFile, "rb");
+                    X265_FREE(tmpFile);
+                    if (!m_cutreeStatFileIn)
+                    {
+                        x265_log_file(m_param, X265_LOG_ERROR, "can't open
stats file %s.cutree\n", fileName);
+                        return false;
+                    }
                 }
-            }

-            /* check whether 1st pass options were compatible with current
options */
-            if (strncmp(statsBuf, "#options:", 9))
-            {
-                x265_log(m_param, X265_LOG_ERROR,"options list in stats
file not valid\n");
-                return false;
-            }
-            {
-                int i, j, m;
-                uint32_t k , l;
-                bool bErr = false;
-                char *opts = statsBuf;
-                statsIn = strchr(statsBuf, '\n');
-                if (!statsIn)
-                {
-                    x265_log(m_param, X265_LOG_ERROR, "Malformed stats
file\n");
-                    return false;
-                }
-                *statsIn = '\0';
-                statsIn++;
-                if ((p = strstr(opts, " input-res=")) == 0 || sscanf(p, "
input-res=%dx%d", &i, &j) != 2)
-                {
-                    x265_log(m_param, X265_LOG_ERROR, "Resolution
specified in stats file not valid\n");
-                    return false;
-                }
-                if ((p = strstr(opts, " fps=")) == 0 || sscanf(p, "
fps=%u/%u", &k, &l) != 2)
-                {
-                    x265_log(m_param, X265_LOG_ERROR, "fps specified in
stats file not valid\n");
-                    return false;
-                }
-                if (((p = strstr(opts, " vbv-maxrate=")) == 0 || sscanf(p,
" vbv-maxrate=%d", &m) != 1) && m_param->rc.rateControlMode == X265_RC_CRF)
+                /* check whether 1st pass options were compatible with
current options */
+                if (strncmp(statsBuf, "#options:", 9))
                 {
-                    x265_log(m_param, X265_LOG_ERROR, "Constant
rate-factor is incompatible with 2pass without vbv-maxrate in the previous
pass\n");
+                    x265_log(m_param, X265_LOG_ERROR, "options list in
stats file not valid\n");
                     return false;
                 }
-                if (k != m_param->fpsNum || l != m_param->fpsDenom)
                 {
-                    x265_log(m_param, X265_LOG_ERROR, "fps mismatch with
1st pass (%u/%u vs %u/%u)\n",
-                              m_param->fpsNum, m_param->fpsDenom, k, l);
-                    return false;
-                }
-                if (m_param->analysisMultiPassRefine)
-                {
-                    p = strstr(opts, "ref=");
-                    sscanf(p, "ref=%d", &i);
-                    if (i > m_param->maxNumReferences)
+                    int i, j, m;
+                    uint32_t k, l;
+                    bool bErr = false;
+                    char *opts = statsBuf;
+                    statsIn = strchr(statsBuf, '\n');
+                    if (!statsIn)
                     {
-                        x265_log(m_param, X265_LOG_ERROR,
"maxNumReferences cannot be less than 1st pass (%d vs %d)\n",
-                            i, m_param->maxNumReferences);
+                        x265_log(m_param, X265_LOG_ERROR, "Malformed stats
file\n");
                         return false;
                     }
-                }
-                if (m_param->analysisMultiPassRefine ||
m_param->analysisMultiPassDistortion)
-                {
-                    p = strstr(opts, "ctu=");
-                    sscanf(p, "ctu=%u", &k);
-                    if (k != m_param->maxCUSize)
+                    *statsIn = '\0';
+                    statsIn++;
+                    if ((p = strstr(opts, " input-res=")) == 0 ||
sscanf(p, " input-res=%dx%d", &i, &j) != 2)
+                    {
+                        x265_log(m_param, X265_LOG_ERROR, "Resolution
specified in stats file not valid\n");
+                        return false;
+                    }
+                    if ((p = strstr(opts, " fps=")) == 0 || sscanf(p, "
fps=%u/%u", &k, &l) != 2)
+                    {
+                        x265_log(m_param, X265_LOG_ERROR, "fps specified
in stats file not valid\n");
+                        return false;
+                    }
+                    if (((p = strstr(opts, " vbv-maxrate=")) == 0 ||
sscanf(p, " vbv-maxrate=%d", &m) != 1) && m_param->rc.rateControlMode ==
X265_RC_CRF)
+                    {
+                        x265_log(m_param, X265_LOG_ERROR, "Constant
rate-factor is incompatible with 2pass without vbv-maxrate in the previous
pass\n");
+                        return false;
+                    }
+                    if (k != m_param->fpsNum || l != m_param->fpsDenom)
+                    {
+                        x265_log(m_param, X265_LOG_ERROR, "fps mismatch
with 1st pass (%u/%u vs %u/%u)\n",
+                            m_param->fpsNum, m_param->fpsDenom, k, l);
+                        return false;
+                    }
+                    if (m_param->analysisMultiPassRefine)
+                    {
+                        p = strstr(opts, "ref=");
+                        sscanf(p, "ref=%d", &i);
+                        if (i > m_param->maxNumReferences)
+                        {
+                            x265_log(m_param, X265_LOG_ERROR,
"maxNumReferences cannot be less than 1st pass (%d vs %d)\n",
+                                i, m_param->maxNumReferences);
+                            return false;
+                        }
+                    }
+                    if (m_param->analysisMultiPassRefine ||
m_param->analysisMultiPassDistortion)
+                    {
+                        p = strstr(opts, "ctu=");
+                        sscanf(p, "ctu=%u", &k);
+                        if (k != m_param->maxCUSize)
+                        {
+                            x265_log(m_param, X265_LOG_ERROR, "maxCUSize
mismatch with 1st pass (%u vs %u)\n",
+                                k, m_param->maxCUSize);
+                            return false;
+                        }
+                    }
+                    CMP_OPT_FIRST_PASS("bitdepth",
m_param->internalBitDepth);
+                    CMP_OPT_FIRST_PASS("weightp",
m_param->bEnableWeightedPred);
+                    CMP_OPT_FIRST_PASS("bframes", m_param->bframes);
+                    CMP_OPT_FIRST_PASS("b-pyramid", m_param->bBPyramid);
+                    CMP_OPT_FIRST_PASS("open-gop", m_param->bOpenGOP);
+                    CMP_OPT_FIRST_PASS(" keyint", m_param->keyframeMax);
+                    CMP_OPT_FIRST_PASS("scenecut",
m_param->scenecutThreshold);
+                    CMP_OPT_FIRST_PASS("intra-refresh",
m_param->bIntraRefresh);
+                    CMP_OPT_FIRST_PASS("frame-dup",
m_param->bEnableFrameDuplication);
+                    if (m_param->bMultiPassOptRPS)
+                    {
+                        CMP_OPT_FIRST_PASS("multi-pass-opt-rps",
m_param->bMultiPassOptRPS);
+                        CMP_OPT_FIRST_PASS("repeat-headers",
m_param->bRepeatHeaders);
+                        CMP_OPT_FIRST_PASS("min-keyint",
m_param->keyframeMin);
+                    }
+
+                    if ((p = strstr(opts, "b-adapt=")) != 0 && sscanf(p,
"b-adapt=%d", &i) && i >= X265_B_ADAPT_NONE && i <= X265_B_ADAPT_TRELLIS)
+                    {
+                        m_param->bFrameAdaptive = i;
+                    }
+                    else if (m_param->bframes)
                     {
-                        x265_log(m_param, X265_LOG_ERROR, "maxCUSize
mismatch with 1st pass (%u vs %u)\n",
-                            k, m_param->maxCUSize);
+                        x265_log(m_param, X265_LOG_ERROR, "b-adapt method
specified in stats file not valid\n");
                         return false;
                     }
+
+                    if ((p = strstr(opts, "rc-lookahead=")) != 0 &&
sscanf(p, "rc-lookahead=%d", &i))
+                        m_param->lookaheadDepth = i;
                 }
-                CMP_OPT_FIRST_PASS("bitdepth", m_param->internalBitDepth);
-                CMP_OPT_FIRST_PASS("weightp",
m_param->bEnableWeightedPred);
-                CMP_OPT_FIRST_PASS("bframes", m_param->bframes);
-                CMP_OPT_FIRST_PASS("b-pyramid", m_param->bBPyramid);
-                CMP_OPT_FIRST_PASS("open-gop", m_param->bOpenGOP);
-                CMP_OPT_FIRST_PASS(" keyint", m_param->keyframeMax);
-                CMP_OPT_FIRST_PASS("scenecut", m_param->scenecutThreshold);
-                CMP_OPT_FIRST_PASS("intra-refresh",
m_param->bIntraRefresh);
-                CMP_OPT_FIRST_PASS("frame-dup",
m_param->bEnableFrameDuplication);
-                if (m_param->bMultiPassOptRPS)
+                /* find number of pics */
+                p = statsIn;
+                int numEntries;
+                for (numEntries = -1; p; numEntries++)
+                    p = strchr(p + 1, ';');
+                if (!numEntries)
                 {
-                    CMP_OPT_FIRST_PASS("multi-pass-opt-rps",
m_param->bMultiPassOptRPS);
-                    CMP_OPT_FIRST_PASS("repeat-headers",
m_param->bRepeatHeaders);
-                    CMP_OPT_FIRST_PASS("min-keyint", m_param->keyframeMin);
+                    x265_log(m_param, X265_LOG_ERROR, "empty stats
file\n");
+                    return false;
                 }
+                m_numEntries = numEntries;

-                if ((p = strstr(opts, "b-adapt=")) != 0 && sscanf(p,
"b-adapt=%d", &i) && i >= X265_B_ADAPT_NONE && i <= X265_B_ADAPT_TRELLIS)
+                if (m_param->totalFrames < m_numEntries &&
m_param->totalFrames > 0)
                 {
-                    m_param->bFrameAdaptive = i;
+                    x265_log(m_param, X265_LOG_WARNING, "2nd pass has
fewer frames than 1st pass (%d vs %d)\n",
+                        m_param->totalFrames, m_numEntries);
                 }
-                else if (m_param->bframes)
+                if (m_param->totalFrames > m_numEntries &&
!m_param->bEnableFrameDuplication)
                 {
-                    x265_log(m_param, X265_LOG_ERROR, "b-adapt method
specified in stats file not valid\n");
+                    x265_log(m_param, X265_LOG_ERROR, "2nd pass has more
frames than 1st pass (%d vs %d)\n",
+                        m_param->totalFrames, m_numEntries);
                     return false;
                 }

-                if ((p = strstr(opts, "rc-lookahead=")) != 0 && sscanf(p,
"rc-lookahead=%d", &i))
-                    m_param->lookaheadDepth = i;
-            }
-            /* find number of pics */
-            p = statsIn;
-            int numEntries;
-            for (numEntries = -1; p; numEntries++)
-                p = strchr(p + 1, ';');
-            if (!numEntries)
-            {
-                x265_log(m_param, X265_LOG_ERROR, "empty stats file\n");
-                return false;
-            }
-            m_numEntries = numEntries;
-
-            if (m_param->totalFrames < m_numEntries &&
m_param->totalFrames > 0)
-            {
-                x265_log(m_param, X265_LOG_WARNING, "2nd pass has fewer
frames than 1st pass (%d vs %d)\n",
-                         m_param->totalFrames, m_numEntries);
-            }
-            if (m_param->totalFrames > m_numEntries &&
!m_param->bEnableFrameDuplication)
-            {
-                x265_log(m_param, X265_LOG_ERROR, "2nd pass has more
frames than 1st pass (%d vs %d)\n",
-                         m_param->totalFrames, m_numEntries);
-                return false;
-            }
-
-            m_rce2Pass = X265_MALLOC(RateControlEntry, m_numEntries);
-            if (!m_rce2Pass)
-            {
-                 x265_log(m_param, X265_LOG_ERROR, "Rce Entries for 2 pass
cannot be allocated\n");
-                 return false;
-            }
-            m_encOrder = X265_MALLOC(int, m_numEntries);
-            if (!m_encOrder)
-            {
-                x265_log(m_param, X265_LOG_ERROR, "Encode order for 2 pass
cannot be allocated\n");
-                return false;
-            }
-            /* init all to skipped p frames */
-            for (int i = 0; i < m_numEntries; i++)
-            {
-                RateControlEntry *rce = &m_rce2Pass[i];
-                rce->sliceType = P_SLICE;
-                rce->qScale = rce->newQScale = x265_qp2qScale(20);
-                rce->miscBits = m_ncu + 10;
-                rce->newQp = 0;
-            }
-            /* read stats */
-            p = statsIn;
-            double totalQpAq = 0;
-            for (int i = 0; i < m_numEntries; i++)
-            {
-                RateControlEntry *rce, *rcePocOrder;
-                int frameNumber;
-                int encodeOrder;
-                char picType;
-                int e;
-                char *next;
-                double qpRc, qpAq, qNoVbv, qRceq;
-                next = strstr(p, ";");
-                if (next)
-                    *next++ = 0;
-                e = sscanf(p, " in:%d out:%d", &frameNumber, &encodeOrder);
-                if (frameNumber < 0 || frameNumber >= m_numEntries)
+                m_rce2Pass = X265_MALLOC(RateControlEntry, m_numEntries);
+                if (!m_rce2Pass)
                 {
-                    x265_log(m_param, X265_LOG_ERROR, "bad frame number
(%d) at stats line %d\n", frameNumber, i);
+                    x265_log(m_param, X265_LOG_ERROR, "Rce Entries for 2
pass cannot be allocated\n");
                     return false;
                 }
-                rce = &m_rce2Pass[encodeOrder];
-                rcePocOrder = &m_rce2Pass[frameNumber];
-                m_encOrder[frameNumber] = encodeOrder;
-                if (!m_param->bMultiPassOptRPS)
+                m_encOrder = X265_MALLOC(int, m_numEntries);
+                if (!m_encOrder)
                 {
-                    int scenecut = 0;
-                    e += sscanf(p, " in:%*d out:%*d type:%c q:%lf q-aq:%lf
q-noVbv:%lf q-Rceq:%lf tex:%d mv:%d misc:%d icu:%lf pcu:%lf scu:%lf sc:%d",
-                        &picType, &qpRc, &qpAq, &qNoVbv, &qRceq,
&rce->coeffBits,
-                        &rce->mvBits, &rce->miscBits, &rce->iCuCount,
&rce->pCuCount,
-                        &rce->skipCuCount, &scenecut);
-                    rcePocOrder->scenecut = scenecut != 0;
+                    x265_log(m_param, X265_LOG_ERROR, "Encode order for 2
pass cannot be allocated\n");
+                    return false;
                 }
-                else
+                /* init all to skipped p frames */
+                for (int i = 0; i < m_numEntries; i++)
                 {
-                    char deltaPOC[128];
-                    char bUsed[40];
-                    memset(deltaPOC, 0, sizeof(deltaPOC));
-                    memset(bUsed, 0, sizeof(bUsed));
-                    e += sscanf(p, " in:%*d out:%*d type:%c q:%lf q-aq:%lf
q-noVbv:%lf q-Rceq:%lf tex:%d mv:%d misc:%d icu:%lf pcu:%lf scu:%lf nump:%d
numnegp:%d numposp:%d deltapoc:%s bused:%s",
-                        &picType, &qpRc, &qpAq, &qNoVbv, &qRceq,
&rce->coeffBits,
-                        &rce->mvBits, &rce->miscBits, &rce->iCuCount,
&rce->pCuCount,
-                        &rce->skipCuCount, &rce->rpsData.numberOfPictures,
&rce->rpsData.numberOfNegativePictures,
&rce->rpsData.numberOfPositivePictures, deltaPOC, bUsed);
-                    splitdeltaPOC(deltaPOC, rce);
-                    splitbUsed(bUsed, rce);
-                    rce->rpsIdx = -1;
-                }
-                rce->keptAsRef = true;
-                rce->isIdr = false;
-                if (picType == 'b' || picType == 'p')
-                    rce->keptAsRef = false;
-                if (picType == 'I')
-                    rce->isIdr = true;
-                if (picType == 'I' || picType == 'i')
-                    rce->sliceType = I_SLICE;
-                else if (picType == 'P' || picType == 'p')
+                    RateControlEntry *rce = &m_rce2Pass[i];
                     rce->sliceType = P_SLICE;
-                else if (picType == 'B' || picType == 'b')
-                    rce->sliceType = B_SLICE;
-                else
-                    e = -1;
-                if (e < 10)
+                    rce->qScale = rce->newQScale = x265_qp2qScale(20);
+                    rce->miscBits = m_ncu + 10;
+                    rce->newQp = 0;
+                }
+                /* read stats */
+                p = statsIn;
+                double totalQpAq = 0;
+                for (int i = 0; i < m_numEntries; i++)
                 {
-                    x265_log(m_param, X265_LOG_ERROR, "statistics are
damaged at line %d, parser out=%d\n", i, e);
-                    return false;
+                    RateControlEntry *rce, *rcePocOrder;
+                    int frameNumber;
+                    int encodeOrder;
+                    char picType;
+                    int e;
+                    char *next;
+                    double qpRc, qpAq, qNoVbv, qRceq;
+                    next = strstr(p, ";");
+                    if (next)
+                        *next++ = 0;
+                    e = sscanf(p, " in:%d out:%d", &frameNumber,
&encodeOrder);
+                    if (frameNumber < 0 || frameNumber >= m_numEntries)
+                    {
+                        x265_log(m_param, X265_LOG_ERROR, "bad frame
number (%d) at stats line %d\n", frameNumber, i);
+                        return false;
+                    }
+                    rce = &m_rce2Pass[encodeOrder];
+                    rcePocOrder = &m_rce2Pass[frameNumber];
+                    m_encOrder[frameNumber] = encodeOrder;
+                    if (!m_param->bMultiPassOptRPS)
+                    {
+                        int scenecut = 0;
+                        e += sscanf(p, " in:%*d out:%*d type:%c q:%lf
q-aq:%lf q-noVbv:%lf q-Rceq:%lf tex:%d mv:%d misc:%d icu:%lf pcu:%lf
scu:%lf sc:%d",
+                            &picType, &qpRc, &qpAq, &qNoVbv, &qRceq,
&rce->coeffBits,
+                            &rce->mvBits, &rce->miscBits, &rce->iCuCount,
&rce->pCuCount,
+                            &rce->skipCuCount, &scenecut);
+                        rcePocOrder->scenecut = scenecut != 0;
+                    }
+                    else
+                    {
+                        char deltaPOC[128];
+                        char bUsed[40];
+                        memset(deltaPOC, 0, sizeof(deltaPOC));
+                        memset(bUsed, 0, sizeof(bUsed));
+                        e += sscanf(p, " in:%*d out:%*d type:%c q:%lf
q-aq:%lf q-noVbv:%lf q-Rceq:%lf tex:%d mv:%d misc:%d icu:%lf pcu:%lf
scu:%lf nump:%d numnegp:%d numposp:%d deltapoc:%s bused:%s",
+                            &picType, &qpRc, &qpAq, &qNoVbv, &qRceq,
&rce->coeffBits,
+                            &rce->mvBits, &rce->miscBits, &rce->iCuCount,
&rce->pCuCount,
+                            &rce->skipCuCount,
&rce->rpsData.numberOfPictures, &rce->rpsData.numberOfNegativePictures,
&rce->rpsData.numberOfPositivePictures, deltaPOC, bUsed);
+                        splitdeltaPOC(deltaPOC, rce);
+                        splitbUsed(bUsed, rce);
+                        rce->rpsIdx = -1;
+                    }
+                    rce->keptAsRef = true;
+                    rce->isIdr = false;
+                    if (picType == 'b' || picType == 'p')
+                        rce->keptAsRef = false;
+                    if (picType == 'I')
+                        rce->isIdr = true;
+                    if (picType == 'I' || picType == 'i')
+                        rce->sliceType = I_SLICE;
+                    else if (picType == 'P' || picType == 'p')
+                        rce->sliceType = P_SLICE;
+                    else if (picType == 'B' || picType == 'b')
+                        rce->sliceType = B_SLICE;
+                    else
+                        e = -1;
+                    if (e < 10)
+                    {
+                        x265_log(m_param, X265_LOG_ERROR, "statistics are
damaged at line %d, parser out=%d\n", i, e);
+                        return false;
+                    }
+                    rce->qScale = rce->newQScale = x265_qp2qScale(qpRc);
+                    totalQpAq += qpAq;
+                    rce->qpNoVbv = qNoVbv;
+                    rce->qpaRc = qpRc;
+                    rce->qpAq = qpAq;
+                    rce->qRceq = qRceq;
+                    p = next;
                 }
-                rce->qScale = rce->newQScale = x265_qp2qScale(qpRc);
-                totalQpAq += qpAq;
-                rce->qpNoVbv = qNoVbv;
-                rce->qpaRc = qpRc;
-                rce->qpAq = qpAq;
-                rce->qRceq = qRceq;
-                p = next;
-            }
-            X265_FREE(statsBuf);
-            if (m_param->rc.rateControlMode != X265_RC_CQP)
-            {
-                m_start = 0;
-                m_isQpModified = true;
-                if (!initPass2())
-                    return false;
-            } /* else we're using constant quant, so no need to run the
bitrate allocation */
+                X265_FREE(statsBuf);
+                if (m_param->rc.rateControlMode != X265_RC_CQP)
+                {
+                    m_start = 0;
+                    m_isQpModified = true;
+                    if (!initPass2())
+                        return false;
+                } /* else we're using constant quant, so no need to run
the bitrate allocation */
+            }
+            else // X265_SHARE_MODE_SHAREDMEM == m_param->rc.dataShareMode
+            {
+                if (m_param->rc.cuTree)
+                {
+                    if (!initCUTreeSharedMem())
+                    {
+                        return false;
+                    }
+                }
+            }
         }
         /* Open output file */
         /* If input and output files are the same, output to a temp file
@@ -682,19 +767,29 @@ bool RateControl::init(const SPS& sps)
             X265_FREE(p);
             if (m_param->rc.cuTree && !m_param->rc.bStatRead)
             {
-                statFileTmpname = strcatFilename(fileName, ".cutree.temp");
-                if (!statFileTmpname)
-                    return false;
-                m_cutreeStatFileOut = x265_fopen(statFileTmpname, "wb");
-                X265_FREE(statFileTmpname);
-                if (!m_cutreeStatFileOut)
+                if (X265_SHARE_MODE_FILE == m_param->rc.dataShareMode)
                 {
-                    x265_log_file(m_param, X265_LOG_ERROR, "can't open
mbtree stats file %s.cutree.temp\n", fileName);
-                    return false;
+                    statFileTmpname = strcatFilename(fileName,
".cutree.temp");
+                    if (!statFileTmpname)
+                        return false;
+                    m_cutreeStatFileOut = x265_fopen(statFileTmpname,
"wb");
+                    X265_FREE(statFileTmpname);
+                    if (!m_cutreeStatFileOut)
+                    {
+                        x265_log_file(m_param, X265_LOG_ERROR, "can't open
mbtree stats file %s.cutree.temp\n", fileName);
+                        return false;
+                    }
+                }
+                else // X265_SHARE_MODE_SHAREDMEM ==
m_param->rc.dataShareMode
+                {
+                    if (!initCUTreeSharedMem())
+                    {
+                        return false;
+                    }
                 }
             }
         }
-        if (m_param->rc.cuTree)
+        if (m_param->rc.cuTree && !m_cuTreeStats.qpBuffer[0])
         {
             if (m_param->rc.qgSize == 8)
             {
@@ -714,6 +809,10 @@ bool RateControl::init(const SPS& sps)
     return true;
 }

+void RateControl::skipCUTreeSharedMemRead(int32_t cnt)
+{
+    m_cutreeShrMem->skipRead(cnt);
+}
 void RateControl::reconfigureRC()
 {
     if (m_isVbv)
@@ -1665,10 +1764,25 @@ bool RateControl::cuTreeReadFor2Pass(Frame* frame)
             {
                 m_cuTreeStats.qpBufPos++;

-                if (!fread(&type, 1, 1, m_cutreeStatFileIn))
-                    goto fail;
-                if (fread(m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos],
sizeof(uint16_t), ncu, m_cutreeStatFileIn) != (size_t)ncu)
-                    goto fail;
+                if (X265_SHARE_MODE_FILE == m_param->rc.dataShareMode)
+                {
+                    if (!fread(&type, 1, 1, m_cutreeStatFileIn))
+                        goto fail;
+                    if
(fread(m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos], sizeof(uint16_t),
ncu, m_cutreeStatFileIn) != (size_t)ncu)
+                        goto fail;
+                }
+                else // X265_SHARE_MODE_SHAREDMEM ==
m_param->rc.dataShareMode
+                {
+                    if (!m_cutreeShrMem)
+                    {
+                        goto fail;
+                    }
+
+                    CUTreeSharedDataItem shrItem;
+                    shrItem.type = &type;
+                    shrItem.stats =
m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos];
+                    m_cutreeShrMem->readNext(&shrItem,
ReadSharedCUTreeData);
+                }

                 if (type != sliceTypeActual && m_cuTreeStats.qpBufPos == 1)
                 {
@@ -3059,10 +3173,26 @@ int RateControl::writeRateControlFrameStats(Frame*
curFrame, RateControlEntry* r
     {
         uint8_t sliceType = (uint8_t)rce->sliceType;
         primitives.fix8Pack(m_cuTreeStats.qpBuffer[0],
curFrame->m_lowres.qpCuTreeOffset, ncu);
-        if (fwrite(&sliceType, 1, 1, m_cutreeStatFileOut) < 1)
-            goto writeFailure;
-        if (fwrite(m_cuTreeStats.qpBuffer[0], sizeof(uint16_t), ncu,
m_cutreeStatFileOut) < (size_t)ncu)
-            goto writeFailure;
+
+        if (X265_SHARE_MODE_FILE == m_param->rc.dataShareMode)
+        {
+            if (fwrite(&sliceType, 1, 1, m_cutreeStatFileOut) < 1)
+                goto writeFailure;
+            if (fwrite(m_cuTreeStats.qpBuffer[0], sizeof(uint16_t), ncu,
m_cutreeStatFileOut) < (size_t)ncu)
+                goto writeFailure;
+        }
+        else // X265_SHARE_MODE_SHAREDMEM == m_param->rc.dataShareMode
+        {
+            if (!m_cutreeShrMem)
+            {
+                goto writeFailure;
+            }
+
+            CUTreeSharedDataItem shrItem;
+            shrItem.type = &sliceType;
+            shrItem.stats = m_cuTreeStats.qpBuffer[0];
+            m_cutreeShrMem->writeData(&shrItem, WriteSharedCUTreeData);
+        }
     }
     return 0;

@@ -3138,6 +3268,13 @@ void RateControl::destroy()
     if (m_cutreeStatFileIn)
         fclose(m_cutreeStatFileIn);

+    if (m_cutreeShrMem)
+    {
+        m_cutreeShrMem->release();
+        delete m_cutreeShrMem;
+        m_cutreeShrMem = NULL;
+    }
+
     X265_FREE(m_rce2Pass);
     X265_FREE(m_encOrder);
     for (int i = 0; i < 2; i++)
diff --git a/source/encoder/ratecontrol.h b/source/encoder/ratecontrol.h
index 204bd71e1..10dfc268d 100644
--- a/source/encoder/ratecontrol.h
+++ b/source/encoder/ratecontrol.h
@@ -28,6 +28,7 @@

 #include "common.h"
 #include "sei.h"
+#include "ringmem.h"

 namespace X265_NS {
 // encoder namespace
@@ -240,6 +241,8 @@ public:
     FILE*   m_statFileOut;
     FILE*   m_cutreeStatFileOut;
     FILE*   m_cutreeStatFileIn;
+    ///< store the cutree data in memory instead of file
+    RingMem *m_cutreeShrMem;
     double  m_lastAccumPNorm;
     double  m_expectedBitsSum;   /* sum of qscale2bits after rceq,
ratefactor, and overflow, only includes finished frames */
     int64_t m_predictedBits;
@@ -274,6 +277,9 @@ public:
     int writeRateControlFrameStats(Frame* curFrame, RateControlEntry* rce);
     bool   initPass2();

+    bool initCUTreeSharedMem();
+    void skipCUTreeSharedMemRead(int32_t cnt);
+
     double forwardMasking(Frame* curFrame, double q);
     double backwardMasking(Frame* curFrame, double q);

diff --git a/source/x265.h b/source/x265.h
index 6bb893c98..bf945498f 100644
--- a/source/x265.h
+++ b/source/x265.h
@@ -747,6 +747,13 @@ typedef struct x265_vmaf_commondata

 static const x265_vmaf_commondata vcd[] = { { NULL, (char
*)"/usr/local/share/model/vmaf_v0.6.1.pkl", NULL, NULL, 0, 0, 0, 0, 0, 0,
0, NULL, 0, 1, 0 } };

+
+typedef enum
+{
+    X265_SHARE_MODE_FILE = 0,
+    X265_SHARE_MODE_SHAREDMEM
+}X265_DATA_SHARE_MODES;
+
 /* x265 input parameters
  *
  * For version safety you may use x265_param_alloc/free() to manage the
@@ -1433,19 +1440,16 @@ typedef struct x265_param
         double    rfConstantMin;

         /* Multi-pass encoding */
-        /* Enable writing the stats in a multi-pass encode to the stat
output file */
+        /* Enable writing the stats in a multi-pass encode to the stat
output file/memory */
         int       bStatWrite;

-        /* Enable loading data from the stat input file in a multi pass
encode */
+        /* Enable loading data from the stat input file/memory in a multi
pass encode */
         int       bStatRead;

         /* Filename of the 2pass output/input stats file, if unspecified
the
          * encoder will default to using x265_2pass.log */
         const char* statFileName;

-        /* if only the focused frames would be re-encode or not */
-        int       bEncFocusedFramesOnly;
-
         /* temporally blur quants */
         double    qblur;

@@ -1492,6 +1496,21 @@ typedef struct x265_param
         /* internally enable if tune grain is set */
         int      bEnableConstVbv;

+        /* if only the focused frames would be re-encode or not */
+        int       bEncFocusedFramesOnly;
+
+        /* Share the data with stats file or shared memory.
+        It must be one of the X265_DATA_SHARE_MODES enum values
+        Available if the bStatWrite or bStatRead is true.
+        Use stats file by default.
+        The stats file mode would be used among the encoders running in
sequence.
+        The shared memory mode could only be used among the encoders
running in parallel.
+        Now only the cutree data could be shared among shared memory. More
data would be support in the future.*/
+        int       dataShareMode;
+
+        /* Unique shared memory name. Required if the shared memory mode
enabled. NULL by default */
+        const char* sharedMemName;
+
     } rc;

     /*== Video Usability Information ==*/
-- 
2.22.0.windows.1
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20211012/56212342/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-x265-pass-2-Add-support-for-cutree-data-sharing-via-shared-memory.patch
Type: application/octet-stream
Size: 56882 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20211012/56212342/attachment-0001.obj>


More information about the x265-devel mailing list