[x265] [PATCH] --pass 2: Add support for cutree data sharing via shared memory
Liwei Wang
liwei at multicorewareinc.com
Tue Oct 12 07:15:18 UTC 2021
>From dc20a412f57c9a986cb69f54f5a3e0c821a41128 Mon Sep 17 00:00:00 2001
From: lwWang <liwei at multicorewareinc.com>
Date: Tue, 12 Oct 2021 14:18:30 +0800
Subject: [PATCH] --pass 2: Add support for cutree data sharing via shared
memory
---
source/CMakeLists.txt | 2 +-
source/common/CMakeLists.txt | 3 +-
source/common/param.cpp | 7 +
source/common/ringmem.cpp | 357 ++++++++++++++++++++
source/common/ringmem.h | 90 +++++
source/common/threading.h | 137 ++++++++
source/encoder/encoder.cpp | 6 +
source/encoder/ratecontrol.cpp | 579 ++++++++++++++++++++-------------
source/encoder/ratecontrol.h | 6 +
source/x265.h | 29 +-
10 files changed, 988 insertions(+), 228 deletions(-)
create mode 100644 source/common/ringmem.cpp
create mode 100644 source/common/ringmem.h
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 62afd3610..ad46614d2 100755
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -29,7 +29,7 @@ option(NATIVE_BUILD "Target the build CPU" OFF)
option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
# X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 202)
+set(X265_BUILD 203)
configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
"${PROJECT_BINARY_DIR}/x265.def")
configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt
index 2be305cac..6583ad56e 100644
--- a/source/common/CMakeLists.txt
+++ b/source/common/CMakeLists.txt
@@ -172,4 +172,5 @@ add_library(common OBJECT
scalinglist.cpp scalinglist.h
quant.cpp quant.h contexts.h
deblock.cpp deblock.h
- scaler.cpp scaler.h)
+ scaler.cpp scaler.h
+ ringmem.cpp ringmem.h)
diff --git a/source/common/param.cpp b/source/common/param.cpp
index 2c1583d93..9f878927b 100755
--- a/source/common/param.cpp
+++ b/source/common/param.cpp
@@ -281,7 +281,9 @@ void x265_param_default(x265_param* param)
param->rc.rfConstantMin = 0;
param->rc.bStatRead = 0;
param->rc.bStatWrite = 0;
+ param->rc.dataShareMode = X265_SHARE_MODE_FILE;
param->rc.statFileName = NULL;
+ param->rc.sharedMemName = NULL;
param->rc.bEncFocusedFramesOnly = 0;
param->rc.complexityBlur = 20;
param->rc.qblur = 0.5;
@@ -1191,6 +1193,7 @@ int x265_param_parse(x265_param* p, const char* name,
const char* value)
int pass = x265_clip3(0, 3, atoi(value));
p->rc.bStatWrite = pass & 1;
p->rc.bStatRead = pass & 2;
+ p->rc.dataShareMode = X265_SHARE_MODE_FILE;
}
OPT("stats") p->rc.statFileName = strdup(value);
OPT("scaling-list") p->scalingLists = strdup(value);
@@ -1921,6 +1924,7 @@ int x265_check_params(x265_param* param)
x265_log(param, X265_LOG_WARNING, "Live VBV enabled without
VBV settings.Disabling live VBV in 2 pass\n");
}
}
+ CHECK(param->rc.dataShareMode != X265_SHARE_MODE_FILE &&
param->rc.dataShareMode != X265_SHARE_MODE_SHAREDMEM, "Invalid data share
mode. It must be one of the X265_DATA_SHARE_MODES enum values\n" );
return check_failed;
}
@@ -2561,8 +2565,11 @@ void x265_copy_params(x265_param* dst, x265_param*
src)
dst->rc.rfConstantMin = src->rc.rfConstantMin;
dst->rc.bStatWrite = src->rc.bStatWrite;
dst->rc.bStatRead = src->rc.bStatRead;
+ dst->rc.dataShareMode = src->rc.dataShareMode;
if (src->rc.statFileName)
dst->rc.statFileName=strdup(src->rc.statFileName);
else dst->rc.statFileName = NULL;
+ if (src->rc.sharedMemName) dst->rc.sharedMemName =
strdup(src->rc.sharedMemName);
+ else dst->rc.sharedMemName = NULL;
dst->rc.qblur = src->rc.qblur;
dst->rc.complexityBlur = src->rc.complexityBlur;
dst->rc.bEnableSlowFirstPass = src->rc.bEnableSlowFirstPass;
diff --git a/source/common/ringmem.cpp b/source/common/ringmem.cpp
new file mode 100644
index 000000000..a4f191c90
--- /dev/null
+++ b/source/common/ringmem.cpp
@@ -0,0 +1,357 @@
+/*****************************************************************************
+ * Copyright (C) 2013-2017 MulticoreWare, Inc
+ *
+ * Authors: liwei <liwei at multicorewareinc.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111,
USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com
+
*****************************************************************************/
+
+#include "ringmem.h"
+
+#ifndef _WIN32
+#include <sys/mman.h>
+#endif ////< _WIN32
+
+#ifdef _WIN32
+#define X265_SHARED_MEM_NAME "Local\\_x265_shr_mem_"
+#define X265_SEMAPHORE_RINGMEM_WRITER_NAME "_x265_semW_"
+#define X265_SEMAPHORE_RINGMEM_READER_NAME "_x265_semR_"
+#else /* POSIX / pthreads */
+#define X265_SHARED_MEM_NAME "/tmp/_x265_shr_mem_"
+#define X265_SEMAPHORE_RINGMEM_WRITER_NAME "/tmp/_x265_semW_"
+#define X265_SEMAPHORE_RINGMEM_READER_NAME "/tmp/_x265_semR_"
+#endif
+
+#define RINGMEM_ALLIGNMENT 64
+
+namespace X265_NS {
+ RingMem::RingMem()
+ : m_initialized(false)
+ , m_protectRW(false)
+ , m_itemSize(0)
+ , m_itemCnt(0)
+ , m_dataPool(NULL)
+ , m_shrMem(NULL)
+#ifdef _WIN32
+ , m_handle(NULL)
+#else //_WIN32
+ , m_filepath(NULL)
+#endif //_WIN32
+ , m_writeSem(NULL)
+ , m_readSem(NULL)
+ {
+ }
+
+
+ RingMem::~RingMem()
+ {
+ }
+
+ bool RingMem::skipRead(int32_t cnt) {
+ if (!m_initialized)
+ {
+ return false;
+ }
+
+ if (m_protectRW)
+ {
+ for (int i = 0; i < cnt; i++)
+ {
+ m_readSem->take();
+ }
+ }
+
+ ATOMIC_ADD(&m_shrMem->m_read, cnt);
+
+ if (m_protectRW)
+ {
+ m_writeSem->give(cnt);
+ }
+
+ return true;
+ }
+
+ bool RingMem::skipWrite(int32_t cnt) {
+ if (!m_initialized)
+ {
+ return false;
+ }
+
+ if (m_protectRW)
+ {
+ for (int i = 0; i < cnt; i++)
+ {
+ m_writeSem->take();
+ }
+ }
+
+ ATOMIC_ADD(&m_shrMem->m_write, cnt);
+
+ if (m_protectRW)
+ {
+ m_readSem->give(cnt);
+ }
+
+ return true;
+ }
+
+ ///< initialize
+ bool RingMem::init(int32_t itemSize, int32_t itemCnt, const char
*name, bool protectRW)
+ {
+ ///< check parameters
+ if (itemSize <= 0 || itemCnt <= 0 || NULL == name)
+ {
+ ///< invalid parameters
+ return false;
+ }
+
+ if (!m_initialized)
+ {
+ ///< formating names
+ char nameBuf[MAX_SHR_NAME_LEN] = { 0 };
+
+ ///< shared memory name
+ snprintf(nameBuf, sizeof(nameBuf) - 1, "%s%s",
X265_SHARED_MEM_NAME, name);
+
+ ///< create or open shared memory
+ bool newCreated = false;
+
+ ///< calculate the size of the shared memory
+ int32_t shrMemSize = (itemSize * itemCnt + sizeof(ShrMemCtrl)
+ RINGMEM_ALLIGNMENT - 1) & ~(RINGMEM_ALLIGNMENT - 1);
+
+#ifdef _WIN32
+ HANDLE h = OpenFileMappingA(FILE_MAP_WRITE | FILE_MAP_READ,
FALSE, nameBuf);
+ if (!h)
+ {
+ h = CreateFileMappingA(INVALID_HANDLE_VALUE, NULL,
PAGE_READWRITE, 0, shrMemSize, nameBuf);
+
+ if (!h)
+ {
+ return false;
+ }
+
+ newCreated = true;
+ }
+
+ void *pool = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, 0);
+
+ ///< should not close the handle here, otherwise the
OpenFileMapping would fail
+ //CloseHandle(h);
+ m_handle = h;
+
+ if (!pool)
+ {
+ return false;
+ }
+
+#else /* POSIX / pthreads */
+ mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH
| S_IWOTH;
+ int flag = O_RDWR;
+ int shrfd = -1;
+ if ((shrfd = open(nameBuf, flag, mode)) < 0)
+ {
+ flag |= O_CREAT;
+
+ shrfd = open(nameBuf, flag, mode);
+ if (shrfd < 0)
+ {
+ return false;
+ }
+ newCreated = true;
+
+ lseek(shrfd, shrMemSize - 1, SEEK_SET);
+
+ if (-1 == write(shrfd, "\0", 1))
+ {
+ close(shrfd);
+ return false;
+ }
+
+ if (lseek(shrfd, 0, SEEK_END) < shrMemSize)
+ {
+ close(shrfd);
+ return false;
+ }
+ }
+
+ void *pool = mmap(0,
+ shrMemSize,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ shrfd,
+ 0);
+
+ close(shrfd);
+ if (pool == MAP_FAILED)
+ {
+ return false;
+ }
+
+ m_filepath = strdup(nameBuf);
+#endif ///< _WIN32
+
+ if (newCreated)
+ {
+ memset(pool, 0, shrMemSize);
+ }
+
+ m_shrMem = reinterpret_cast<ShrMemCtrl *>(pool);
+ m_dataPool = reinterpret_cast<uint8_t *>(pool) +
sizeof(ShrMemCtrl);
+ m_itemSize = itemSize;
+ m_itemCnt = itemCnt;
+ m_initialized = true;
+
+ if (protectRW)
+ {
+ m_protectRW = true;
+ m_writeSem = new NamedSemaphore();
+ if (!m_writeSem)
+ {
+ release();
+ return false;
+ }
+
+ ///< shared memory name
+ snprintf(nameBuf, sizeof(nameBuf - 1), "%s%s",
X265_SEMAPHORE_RINGMEM_WRITER_NAME, name);
+ if (!m_writeSem->create(nameBuf, m_itemCnt, m_itemCnt))
+ {
+ release();
+ return false;
+ }
+
+ m_readSem = new NamedSemaphore();
+ if (!m_readSem)
+ {
+ release();
+ return false;
+ }
+
+ ///< shared memory name
+ snprintf(nameBuf, sizeof(nameBuf - 1), "%s%s",
X265_SEMAPHORE_RINGMEM_READER_NAME, name);
+ if (!m_readSem->create(nameBuf, 0, m_itemCnt))
+ {
+ release();
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
+ ///< finalize
+ void RingMem::release()
+ {
+ if (m_initialized)
+ {
+ m_initialized = false;
+
+ if (m_shrMem)
+ {
+#ifdef _WIN32
+ UnmapViewOfFile(m_shrMem);
+ CloseHandle(m_handle);
+ m_handle = NULL;
+#else /* POSIX / pthreads */
+ int32_t shrMemSize = (m_itemSize * m_itemCnt +
sizeof(ShrMemCtrl) + RINGMEM_ALLIGNMENT - 1) & (~RINGMEM_ALLIGNMENT - 1);
+ munmap(m_shrMem, shrMemSize);
+ unlink(m_filepath);
+ free(m_filepath);
+ m_filepath = NULL;
+#endif ///< _WIN32
+ m_shrMem = NULL;
+ m_dataPool = NULL;
+ m_itemSize = 0;
+ m_itemCnt = 0;
+ }
+
+ if (m_protectRW)
+ {
+ m_protectRW = false;
+ if (m_writeSem)
+ {
+ m_writeSem->release();
+
+ delete m_writeSem;
+ m_writeSem = NULL;
+ }
+
+ if (m_readSem)
+ {
+ m_readSem->release();
+
+ delete m_readSem;
+ m_readSem = NULL;
+ }
+ }
+
+ }
+ }
+
+ ///< data read
+ bool RingMem::readNext(void* dst, fnRWSharedData callback)
+ {
+ if (!m_initialized || !callback || !dst)
+ {
+ return false;
+ }
+
+ if (m_protectRW)
+ {
+ if (!m_readSem->take())
+ {
+ return false;
+ }
+ }
+
+ int32_t index = ATOMIC_ADD(&m_shrMem->m_read, 1) % m_itemCnt;
+ (*callback)(dst, reinterpret_cast<uint8_t *>(m_dataPool) + index *
m_itemSize, m_itemSize);
+
+ if (m_protectRW)
+ {
+ m_writeSem->give(1);
+ }
+
+ return true;
+ }
+ ///< data write
+ bool RingMem::writeData(void *data, fnRWSharedData callback)
+ {
+ if (!m_initialized || !data || !callback)
+ {
+ return false;
+ }
+
+ if (m_protectRW)
+ {
+ if (!m_writeSem->take())
+ {
+ return false;
+ }
+ }
+
+ int32_t index = ATOMIC_ADD(&m_shrMem->m_write, 1) % m_itemCnt;
+ (*callback)(reinterpret_cast<uint8_t *>(m_dataPool) + index *
m_itemSize, data, m_itemSize);
+
+ if (m_protectRW)
+ {
+ m_readSem->give(1);
+ }
+
+ return true;
+ }
+}
diff --git a/source/common/ringmem.h b/source/common/ringmem.h
new file mode 100644
index 000000000..b14f7bee9
--- /dev/null
+++ b/source/common/ringmem.h
@@ -0,0 +1,90 @@
+/*****************************************************************************
+ * Copyright (C) 2013-2017 MulticoreWare, Inc
+ *
+ * Authors: liwei <liwei at multicorewareinc.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111,
USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at license @ x265.com
+
*****************************************************************************/
+
+#ifndef X265_RINGMEM_H
+#define X265_RINGMEM_H
+
+#include "common.h"
+#include "threading.h"
+
+#if _MSC_VER
+#define snprintf _snprintf
+#define strdup _strdup
+#endif
+
+namespace X265_NS {
+
+#define MAX_SHR_NAME_LEN 256
+
+ class RingMem {
+ public:
+ RingMem();
+ ~RingMem();
+
+ bool skipRead(int32_t cnt);
+
+ bool skipWrite(int32_t cnt);
+
+ ///< initialize
+ ///< protectRW: if use the semaphore the protect the write and
read operation.
+ bool init(int32_t itemSize, int32_t itemCnt, const char *name,
bool protectRW = false);
+ ///< finalize
+ void release();
+
+ typedef void(*fnRWSharedData)(void *dst, void *src, int32_t size);
+
+ ///< data read
+ bool readNext(void* dst, fnRWSharedData callback);
+ ///< data write
+ bool writeData(void *data, fnRWSharedData callback);
+
+ private:
+ bool m_initialized;
+ bool m_protectRW;
+
+ int32_t m_itemSize;
+ int32_t m_itemCnt;
+ ///< data pool
+ void *m_dataPool;
+ typedef struct {
+ ///< index to write
+ int32_t m_write;
+ ///< index to read
+ int32_t m_read;
+
+ }ShrMemCtrl;
+
+ ShrMemCtrl *m_shrMem;
+#ifdef _WIN32
+ void *m_handle;
+#else // _WIN32
+ char *m_filepath;
+#endif // _WIN32
+
+ ///< Semaphores
+ NamedSemaphore *m_writeSem;
+ NamedSemaphore *m_readSem;
+ };
+};
+
+#endif // ifndef X265_RINGMEM_H
diff --git a/source/common/threading.h b/source/common/threading.h
index 53a63beaf..dcf6081e3 100644
--- a/source/common/threading.h
+++ b/source/common/threading.h
@@ -3,6 +3,7 @@
*
* Authors: Steve Borho <steve at borho.org>
* Min Chen <chenm003 at 163.com>
+ liwei <liwei at multicorewareinc.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -253,6 +254,47 @@ protected:
int m_val;
};
+class NamedSemaphore
+{
+public:
+ NamedSemaphore() : m_sem(NULL)
+ {
+ }
+
+ ~NamedSemaphore()
+ {
+ }
+
+ bool create(const char* name, const int initcnt, const int maxcnt)
+ {
+ if(!m_sem)
+ {
+ m_sem = CreateSemaphoreA(NULL, initcnt, maxcnt, name);
+ }
+ return m_sem != NULL;
+ }
+
+ bool give(const int32_t cnt)
+ {
+ return ReleaseSemaphore(m_sem, (LONG)cnt, NULL) != FALSE;
+ }
+
+ bool take(const uint32_t time_out = INFINITE)
+ {
+ int32_t rt = WaitForSingleObject(m_sem, time_out);
+ return rt != WAIT_TIMEOUT && rt != WAIT_FAILED;
+ }
+
+ void release()
+ {
+ CloseHandle(m_sem);
+ m_sem = NULL;
+ }
+
+private:
+ HANDLE m_sem;
+};
+
#else /* POSIX / pthreads */
typedef pthread_t ThreadHandle;
@@ -459,6 +501,101 @@ protected:
int m_val;
};
+#define TIMEOUT_INFINITE 0xFFFFFFFF
+
+class NamedSemaphore
+{
+public:
+ NamedSemaphore()
+ : m_sem(NULL)
+ , m_name(NULL)
+ {
+ }
+
+ ~NamedSemaphore()
+ {
+ }
+
+ bool create(const char* name, const int initcnt, const int maxcnt)
+ {
+ bool ret = false;
+
+ if (initcnt >= maxcnt)
+ {
+ return false;
+ }
+
+ m_sem = sem_open(name, O_CREAT | O_EXCL, 0666, initcnt);
+ if (m_sem != SEM_FAILED)
+ {
+ m_name = strdup(name);
+ ret = true;
+ }
+ else
+ {
+ if (EEXIST == errno)
+ {
+ m_sem = sem_open(name, 0);
+ if (m_sem != SEM_FAILED)
+ {
+ m_name = strdup(name);
+ ret = true;
+ }
+ }
+ }
+
+ return ret;
+ }
+
+ bool give(const int32_t cnt)
+ {
+ int ret = 0;
+ int32_t curCnt = cnt;
+ while (curCnt-- && !ret) {
+ ret = sem_post(m_sem);
+ }
+
+ return 0 == ret;
+ }
+
+ bool take(const uint32_t time_out = TIMEOUT_INFINITE)
+ {
+ if (TIMEOUT_INFINITE == time_out) {
+ return 0 == sem_wait(m_sem);
+ }
+ else
+ {
+ if (0 == time_out)
+ {
+ return 0 == sem_trywait(m_sem);
+ }
+ else
+ {
+ struct timespec ts;
+ ts.tv_sec = time_out / 1000L;
+ ts.tv_nsec = (time_out * 1000000L) - ts.tv_sec * 1000 *
1000 * 1000;
+ return 0 == sem_timedwait(m_sem, &ts);
+ }
+ }
+ }
+
+ void release()
+ {
+ if (m_sem)
+ {
+ sem_close(m_sem);
+ sem_unlink(m_name);
+ m_sem = NULL;
+ free(m_name);
+ m_name = NULL;
+ }
+ }
+
+private:
+ sem_t *m_sem;
+ char *m_name;
+};
+
#endif // ifdef _WIN32
class ScopedLock
diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
index 56f0e5433..607e64370 100644
--- a/source/encoder/encoder.cpp
+++ b/source/encoder/encoder.cpp
@@ -1006,6 +1006,7 @@ void Encoder::destroy()
/* release string arguments that were strdup'd */
free((char*)m_param->rc.lambdaFileName);
free((char*)m_param->rc.statFileName);
+ free((char*)m_param->rc.sharedMemName);
free((char*)m_param->analysisReuseFileName);
free((char*)m_param->scalingLists);
free((char*)m_param->csvfn);
@@ -4019,6 +4020,11 @@ void Encoder::configure(x265_param *p)
p->rc.bStatRead = 0;
}
+ if ((p->rc.bStatWrite || p->rc.bStatRead) && p->rc.dataShareMode !=
X265_SHARE_MODE_FILE && p->rc.dataShareMode != X265_SHARE_MODE_SHAREDMEM)
+ {
+ p->rc.dataShareMode = X265_SHARE_MODE_FILE;
+ }
+
if (!p->rc.bStatRead || p->rc.rateControlMode != X265_RC_CRF)
{
p->rc.bEncFocusedFramesOnly = 0;
diff --git a/source/encoder/ratecontrol.cpp b/source/encoder/ratecontrol.cpp
index 6c59d0c64..a1321b38d 100644
--- a/source/encoder/ratecontrol.cpp
+++ b/source/encoder/ratecontrol.cpp
@@ -41,6 +41,10 @@
#define BR_SHIFT 6
#define CPB_SHIFT 4
+#define SHARED_DATA_ALIGNMENT 4 ///< 4btye, 32bit
+#define CUTREE_SHARED_MEM_NAME "cutree"
+#define GOP_CNT_CU_TREE 3
+
using namespace X265_NS;
/* Amortize the partial cost of I frames over the next N frames */
@@ -104,6 +108,37 @@ inline char *strcatFilename(const char *input, const
char *suffix)
return output;
}
+typedef struct CUTreeSharedDataItem
+{
+ uint8_t *type;
+ uint16_t *stats;
+}CUTreeSharedDataItem;
+
+void static ReadSharedCUTreeData(void *dst, void *src, int32_t size)
+{
+ CUTreeSharedDataItem *statsDst = reinterpret_cast<CUTreeSharedDataItem
*>(dst);
+ uint8_t *typeSrc = reinterpret_cast<uint8_t *>(src);
+ *statsDst->type = *typeSrc;
+
+ ///< for memory alignment, the type will take 32bit in the shared
memory
+ int32_t offset = (sizeof(*statsDst->type) + SHARED_DATA_ALIGNMENT - 1)
& ~(SHARED_DATA_ALIGNMENT - 1);
+ uint16_t *statsSrc = reinterpret_cast<uint16_t *>(typeSrc + offset);
+ memcpy(statsDst->stats, statsSrc, size - offset);
+}
+
+void static WriteSharedCUTreeData(void *dst, void *src, int32_t size)
+{
+ CUTreeSharedDataItem *statsSrc = reinterpret_cast<CUTreeSharedDataItem
*>(src);
+ uint8_t *typeDst = reinterpret_cast<uint8_t *>(dst);
+ *typeDst = *statsSrc->type;
+
+ ///< for memory alignment, the type will take 32bit in the shared
memory
+ int32_t offset = (sizeof(*statsSrc->type) + SHARED_DATA_ALIGNMENT - 1)
& ~(SHARED_DATA_ALIGNMENT - 1);
+ uint16_t *statsDst = reinterpret_cast<uint16_t *>(typeDst + offset);
+ memcpy(statsDst, statsSrc->stats, size - offset);
+}
+
+
inline double qScale2bits(RateControlEntry *rce, double qScale)
{
if (qScale < 0.1)
@@ -209,6 +244,7 @@ RateControl::RateControl(x265_param& p, Encoder *top)
m_lastAbrResetPoc = -1;
m_statFileOut = NULL;
m_cutreeStatFileOut = m_cutreeStatFileIn = NULL;
+ m_cutreeShrMem = NULL;
m_rce2Pass = NULL;
m_encOrder = NULL;
m_lastBsliceSatdCost = 0;
@@ -320,6 +356,42 @@ RateControl::RateControl(x265_param& p, Encoder *top)
m_cuTreeStats.qpBuffer[i] = NULL;
}
+bool RateControl::initCUTreeSharedMem()
+{
+ if (!m_cutreeShrMem) {
+ m_cutreeShrMem = new RingMem();
+ if (!m_cutreeShrMem)
+ {
+ return false;
+ }
+
+ ///< now cutree data form at most 3 gops would be stored in the
shared memory at the same time
+ int32_t itemSize = (sizeof(uint8_t) + SHARED_DATA_ALIGNMENT - 1) &
~(SHARED_DATA_ALIGNMENT - 1);
+ if (m_param->rc.qgSize == 8)
+ {
+ itemSize += sizeof(uint16_t) * m_ncu * 4;
+ }
+ else
+ {
+ itemSize += sizeof(uint16_t) * m_ncu;
+ }
+
+ int32_t itemCnt = X265_MIN(m_param->keyframeMax, (int)(m_fps +
0.5));
+ itemCnt *= GOP_CNT_CU_TREE;
+
+ char shrname[MAX_SHR_NAME_LEN] = { 0 };
+ strcpy(shrname, m_param->rc.sharedMemName);
+ strcat(shrname, CUTREE_SHARED_MEM_NAME);
+
+ if (!m_cutreeShrMem->init(itemSize, itemCnt, shrname))
+ {
+ return false;
+ }
+ }
+
+ return true;
+}
+
bool RateControl::init(const SPS& sps)
{
if (m_isVbv && !m_initVbv)
@@ -421,244 +493,257 @@ bool RateControl::init(const SPS& sps)
/* Load stat file and init 2pass algo */
if (m_param->rc.bStatRead)
{
- m_expectedBitsSum = 0;
- char *p, *statsIn, *statsBuf;
- /* read 1st pass stats */
- statsIn = statsBuf = x265_slurp_file(fileName);
- if (!statsBuf)
- return false;
- if (m_param->rc.cuTree)
+ if (X265_SHARE_MODE_FILE == m_param->rc.dataShareMode)
{
- char *tmpFile = strcatFilename(fileName, ".cutree");
- if (!tmpFile)
+ m_expectedBitsSum = 0;
+ char *p, *statsIn, *statsBuf;
+ /* read 1st pass stats */
+ statsIn = statsBuf = x265_slurp_file(fileName);
+ if (!statsBuf)
return false;
- m_cutreeStatFileIn = x265_fopen(tmpFile, "rb");
- X265_FREE(tmpFile);
- if (!m_cutreeStatFileIn)
+ if (m_param->rc.cuTree)
{
- x265_log_file(m_param, X265_LOG_ERROR, "can't open
stats file %s.cutree\n", fileName);
- return false;
+ char *tmpFile = strcatFilename(fileName, ".cutree");
+ if (!tmpFile)
+ return false;
+ m_cutreeStatFileIn = x265_fopen(tmpFile, "rb");
+ X265_FREE(tmpFile);
+ if (!m_cutreeStatFileIn)
+ {
+ x265_log_file(m_param, X265_LOG_ERROR, "can't open
stats file %s.cutree\n", fileName);
+ return false;
+ }
}
- }
- /* check whether 1st pass options were compatible with current
options */
- if (strncmp(statsBuf, "#options:", 9))
- {
- x265_log(m_param, X265_LOG_ERROR,"options list in stats
file not valid\n");
- return false;
- }
- {
- int i, j, m;
- uint32_t k , l;
- bool bErr = false;
- char *opts = statsBuf;
- statsIn = strchr(statsBuf, '\n');
- if (!statsIn)
- {
- x265_log(m_param, X265_LOG_ERROR, "Malformed stats
file\n");
- return false;
- }
- *statsIn = '\0';
- statsIn++;
- if ((p = strstr(opts, " input-res=")) == 0 || sscanf(p, "
input-res=%dx%d", &i, &j) != 2)
- {
- x265_log(m_param, X265_LOG_ERROR, "Resolution
specified in stats file not valid\n");
- return false;
- }
- if ((p = strstr(opts, " fps=")) == 0 || sscanf(p, "
fps=%u/%u", &k, &l) != 2)
- {
- x265_log(m_param, X265_LOG_ERROR, "fps specified in
stats file not valid\n");
- return false;
- }
- if (((p = strstr(opts, " vbv-maxrate=")) == 0 || sscanf(p,
" vbv-maxrate=%d", &m) != 1) && m_param->rc.rateControlMode == X265_RC_CRF)
+ /* check whether 1st pass options were compatible with
current options */
+ if (strncmp(statsBuf, "#options:", 9))
{
- x265_log(m_param, X265_LOG_ERROR, "Constant
rate-factor is incompatible with 2pass without vbv-maxrate in the previous
pass\n");
+ x265_log(m_param, X265_LOG_ERROR, "options list in
stats file not valid\n");
return false;
}
- if (k != m_param->fpsNum || l != m_param->fpsDenom)
{
- x265_log(m_param, X265_LOG_ERROR, "fps mismatch with
1st pass (%u/%u vs %u/%u)\n",
- m_param->fpsNum, m_param->fpsDenom, k, l);
- return false;
- }
- if (m_param->analysisMultiPassRefine)
- {
- p = strstr(opts, "ref=");
- sscanf(p, "ref=%d", &i);
- if (i > m_param->maxNumReferences)
+ int i, j, m;
+ uint32_t k, l;
+ bool bErr = false;
+ char *opts = statsBuf;
+ statsIn = strchr(statsBuf, '\n');
+ if (!statsIn)
{
- x265_log(m_param, X265_LOG_ERROR,
"maxNumReferences cannot be less than 1st pass (%d vs %d)\n",
- i, m_param->maxNumReferences);
+ x265_log(m_param, X265_LOG_ERROR, "Malformed stats
file\n");
return false;
}
- }
- if (m_param->analysisMultiPassRefine ||
m_param->analysisMultiPassDistortion)
- {
- p = strstr(opts, "ctu=");
- sscanf(p, "ctu=%u", &k);
- if (k != m_param->maxCUSize)
+ *statsIn = '\0';
+ statsIn++;
+ if ((p = strstr(opts, " input-res=")) == 0 ||
sscanf(p, " input-res=%dx%d", &i, &j) != 2)
+ {
+ x265_log(m_param, X265_LOG_ERROR, "Resolution
specified in stats file not valid\n");
+ return false;
+ }
+ if ((p = strstr(opts, " fps=")) == 0 || sscanf(p, "
fps=%u/%u", &k, &l) != 2)
+ {
+ x265_log(m_param, X265_LOG_ERROR, "fps specified
in stats file not valid\n");
+ return false;
+ }
+ if (((p = strstr(opts, " vbv-maxrate=")) == 0 ||
sscanf(p, " vbv-maxrate=%d", &m) != 1) && m_param->rc.rateControlMode ==
X265_RC_CRF)
+ {
+ x265_log(m_param, X265_LOG_ERROR, "Constant
rate-factor is incompatible with 2pass without vbv-maxrate in the previous
pass\n");
+ return false;
+ }
+ if (k != m_param->fpsNum || l != m_param->fpsDenom)
+ {
+ x265_log(m_param, X265_LOG_ERROR, "fps mismatch
with 1st pass (%u/%u vs %u/%u)\n",
+ m_param->fpsNum, m_param->fpsDenom, k, l);
+ return false;
+ }
+ if (m_param->analysisMultiPassRefine)
+ {
+ p = strstr(opts, "ref=");
+ sscanf(p, "ref=%d", &i);
+ if (i > m_param->maxNumReferences)
+ {
+ x265_log(m_param, X265_LOG_ERROR,
"maxNumReferences cannot be less than 1st pass (%d vs %d)\n",
+ i, m_param->maxNumReferences);
+ return false;
+ }
+ }
+ if (m_param->analysisMultiPassRefine ||
m_param->analysisMultiPassDistortion)
+ {
+ p = strstr(opts, "ctu=");
+ sscanf(p, "ctu=%u", &k);
+ if (k != m_param->maxCUSize)
+ {
+ x265_log(m_param, X265_LOG_ERROR, "maxCUSize
mismatch with 1st pass (%u vs %u)\n",
+ k, m_param->maxCUSize);
+ return false;
+ }
+ }
+ CMP_OPT_FIRST_PASS("bitdepth",
m_param->internalBitDepth);
+ CMP_OPT_FIRST_PASS("weightp",
m_param->bEnableWeightedPred);
+ CMP_OPT_FIRST_PASS("bframes", m_param->bframes);
+ CMP_OPT_FIRST_PASS("b-pyramid", m_param->bBPyramid);
+ CMP_OPT_FIRST_PASS("open-gop", m_param->bOpenGOP);
+ CMP_OPT_FIRST_PASS(" keyint", m_param->keyframeMax);
+ CMP_OPT_FIRST_PASS("scenecut",
m_param->scenecutThreshold);
+ CMP_OPT_FIRST_PASS("intra-refresh",
m_param->bIntraRefresh);
+ CMP_OPT_FIRST_PASS("frame-dup",
m_param->bEnableFrameDuplication);
+ if (m_param->bMultiPassOptRPS)
+ {
+ CMP_OPT_FIRST_PASS("multi-pass-opt-rps",
m_param->bMultiPassOptRPS);
+ CMP_OPT_FIRST_PASS("repeat-headers",
m_param->bRepeatHeaders);
+ CMP_OPT_FIRST_PASS("min-keyint",
m_param->keyframeMin);
+ }
+
+ if ((p = strstr(opts, "b-adapt=")) != 0 && sscanf(p,
"b-adapt=%d", &i) && i >= X265_B_ADAPT_NONE && i <= X265_B_ADAPT_TRELLIS)
+ {
+ m_param->bFrameAdaptive = i;
+ }
+ else if (m_param->bframes)
{
- x265_log(m_param, X265_LOG_ERROR, "maxCUSize
mismatch with 1st pass (%u vs %u)\n",
- k, m_param->maxCUSize);
+ x265_log(m_param, X265_LOG_ERROR, "b-adapt method
specified in stats file not valid\n");
return false;
}
+
+ if ((p = strstr(opts, "rc-lookahead=")) != 0 &&
sscanf(p, "rc-lookahead=%d", &i))
+ m_param->lookaheadDepth = i;
}
- CMP_OPT_FIRST_PASS("bitdepth", m_param->internalBitDepth);
- CMP_OPT_FIRST_PASS("weightp",
m_param->bEnableWeightedPred);
- CMP_OPT_FIRST_PASS("bframes", m_param->bframes);
- CMP_OPT_FIRST_PASS("b-pyramid", m_param->bBPyramid);
- CMP_OPT_FIRST_PASS("open-gop", m_param->bOpenGOP);
- CMP_OPT_FIRST_PASS(" keyint", m_param->keyframeMax);
- CMP_OPT_FIRST_PASS("scenecut", m_param->scenecutThreshold);
- CMP_OPT_FIRST_PASS("intra-refresh",
m_param->bIntraRefresh);
- CMP_OPT_FIRST_PASS("frame-dup",
m_param->bEnableFrameDuplication);
- if (m_param->bMultiPassOptRPS)
+ /* find number of pics */
+ p = statsIn;
+ int numEntries;
+ for (numEntries = -1; p; numEntries++)
+ p = strchr(p + 1, ';');
+ if (!numEntries)
{
- CMP_OPT_FIRST_PASS("multi-pass-opt-rps",
m_param->bMultiPassOptRPS);
- CMP_OPT_FIRST_PASS("repeat-headers",
m_param->bRepeatHeaders);
- CMP_OPT_FIRST_PASS("min-keyint", m_param->keyframeMin);
+ x265_log(m_param, X265_LOG_ERROR, "empty stats
file\n");
+ return false;
}
+ m_numEntries = numEntries;
- if ((p = strstr(opts, "b-adapt=")) != 0 && sscanf(p,
"b-adapt=%d", &i) && i >= X265_B_ADAPT_NONE && i <= X265_B_ADAPT_TRELLIS)
+ if (m_param->totalFrames < m_numEntries &&
m_param->totalFrames > 0)
{
- m_param->bFrameAdaptive = i;
+ x265_log(m_param, X265_LOG_WARNING, "2nd pass has
fewer frames than 1st pass (%d vs %d)\n",
+ m_param->totalFrames, m_numEntries);
}
- else if (m_param->bframes)
+ if (m_param->totalFrames > m_numEntries &&
!m_param->bEnableFrameDuplication)
{
- x265_log(m_param, X265_LOG_ERROR, "b-adapt method
specified in stats file not valid\n");
+ x265_log(m_param, X265_LOG_ERROR, "2nd pass has more
frames than 1st pass (%d vs %d)\n",
+ m_param->totalFrames, m_numEntries);
return false;
}
- if ((p = strstr(opts, "rc-lookahead=")) != 0 && sscanf(p,
"rc-lookahead=%d", &i))
- m_param->lookaheadDepth = i;
- }
- /* find number of pics */
- p = statsIn;
- int numEntries;
- for (numEntries = -1; p; numEntries++)
- p = strchr(p + 1, ';');
- if (!numEntries)
- {
- x265_log(m_param, X265_LOG_ERROR, "empty stats file\n");
- return false;
- }
- m_numEntries = numEntries;
-
- if (m_param->totalFrames < m_numEntries &&
m_param->totalFrames > 0)
- {
- x265_log(m_param, X265_LOG_WARNING, "2nd pass has fewer
frames than 1st pass (%d vs %d)\n",
- m_param->totalFrames, m_numEntries);
- }
- if (m_param->totalFrames > m_numEntries &&
!m_param->bEnableFrameDuplication)
- {
- x265_log(m_param, X265_LOG_ERROR, "2nd pass has more
frames than 1st pass (%d vs %d)\n",
- m_param->totalFrames, m_numEntries);
- return false;
- }
-
- m_rce2Pass = X265_MALLOC(RateControlEntry, m_numEntries);
- if (!m_rce2Pass)
- {
- x265_log(m_param, X265_LOG_ERROR, "Rce Entries for 2 pass
cannot be allocated\n");
- return false;
- }
- m_encOrder = X265_MALLOC(int, m_numEntries);
- if (!m_encOrder)
- {
- x265_log(m_param, X265_LOG_ERROR, "Encode order for 2 pass
cannot be allocated\n");
- return false;
- }
- /* init all to skipped p frames */
- for (int i = 0; i < m_numEntries; i++)
- {
- RateControlEntry *rce = &m_rce2Pass[i];
- rce->sliceType = P_SLICE;
- rce->qScale = rce->newQScale = x265_qp2qScale(20);
- rce->miscBits = m_ncu + 10;
- rce->newQp = 0;
- }
- /* read stats */
- p = statsIn;
- double totalQpAq = 0;
- for (int i = 0; i < m_numEntries; i++)
- {
- RateControlEntry *rce, *rcePocOrder;
- int frameNumber;
- int encodeOrder;
- char picType;
- int e;
- char *next;
- double qpRc, qpAq, qNoVbv, qRceq;
- next = strstr(p, ";");
- if (next)
- *next++ = 0;
- e = sscanf(p, " in:%d out:%d", &frameNumber, &encodeOrder);
- if (frameNumber < 0 || frameNumber >= m_numEntries)
+ m_rce2Pass = X265_MALLOC(RateControlEntry, m_numEntries);
+ if (!m_rce2Pass)
{
- x265_log(m_param, X265_LOG_ERROR, "bad frame number
(%d) at stats line %d\n", frameNumber, i);
+ x265_log(m_param, X265_LOG_ERROR, "Rce Entries for 2
pass cannot be allocated\n");
return false;
}
- rce = &m_rce2Pass[encodeOrder];
- rcePocOrder = &m_rce2Pass[frameNumber];
- m_encOrder[frameNumber] = encodeOrder;
- if (!m_param->bMultiPassOptRPS)
+ m_encOrder = X265_MALLOC(int, m_numEntries);
+ if (!m_encOrder)
{
- int scenecut = 0;
- e += sscanf(p, " in:%*d out:%*d type:%c q:%lf q-aq:%lf
q-noVbv:%lf q-Rceq:%lf tex:%d mv:%d misc:%d icu:%lf pcu:%lf scu:%lf sc:%d",
- &picType, &qpRc, &qpAq, &qNoVbv, &qRceq,
&rce->coeffBits,
- &rce->mvBits, &rce->miscBits, &rce->iCuCount,
&rce->pCuCount,
- &rce->skipCuCount, &scenecut);
- rcePocOrder->scenecut = scenecut != 0;
+ x265_log(m_param, X265_LOG_ERROR, "Encode order for 2
pass cannot be allocated\n");
+ return false;
}
- else
+ /* init all to skipped p frames */
+ for (int i = 0; i < m_numEntries; i++)
{
- char deltaPOC[128];
- char bUsed[40];
- memset(deltaPOC, 0, sizeof(deltaPOC));
- memset(bUsed, 0, sizeof(bUsed));
- e += sscanf(p, " in:%*d out:%*d type:%c q:%lf q-aq:%lf
q-noVbv:%lf q-Rceq:%lf tex:%d mv:%d misc:%d icu:%lf pcu:%lf scu:%lf nump:%d
numnegp:%d numposp:%d deltapoc:%s bused:%s",
- &picType, &qpRc, &qpAq, &qNoVbv, &qRceq,
&rce->coeffBits,
- &rce->mvBits, &rce->miscBits, &rce->iCuCount,
&rce->pCuCount,
- &rce->skipCuCount, &rce->rpsData.numberOfPictures,
&rce->rpsData.numberOfNegativePictures,
&rce->rpsData.numberOfPositivePictures, deltaPOC, bUsed);
- splitdeltaPOC(deltaPOC, rce);
- splitbUsed(bUsed, rce);
- rce->rpsIdx = -1;
- }
- rce->keptAsRef = true;
- rce->isIdr = false;
- if (picType == 'b' || picType == 'p')
- rce->keptAsRef = false;
- if (picType == 'I')
- rce->isIdr = true;
- if (picType == 'I' || picType == 'i')
- rce->sliceType = I_SLICE;
- else if (picType == 'P' || picType == 'p')
+ RateControlEntry *rce = &m_rce2Pass[i];
rce->sliceType = P_SLICE;
- else if (picType == 'B' || picType == 'b')
- rce->sliceType = B_SLICE;
- else
- e = -1;
- if (e < 10)
+ rce->qScale = rce->newQScale = x265_qp2qScale(20);
+ rce->miscBits = m_ncu + 10;
+ rce->newQp = 0;
+ }
+ /* read stats */
+ p = statsIn;
+ double totalQpAq = 0;
+ for (int i = 0; i < m_numEntries; i++)
{
- x265_log(m_param, X265_LOG_ERROR, "statistics are
damaged at line %d, parser out=%d\n", i, e);
- return false;
+ RateControlEntry *rce, *rcePocOrder;
+ int frameNumber;
+ int encodeOrder;
+ char picType;
+ int e;
+ char *next;
+ double qpRc, qpAq, qNoVbv, qRceq;
+ next = strstr(p, ";");
+ if (next)
+ *next++ = 0;
+ e = sscanf(p, " in:%d out:%d", &frameNumber,
&encodeOrder);
+ if (frameNumber < 0 || frameNumber >= m_numEntries)
+ {
+ x265_log(m_param, X265_LOG_ERROR, "bad frame
number (%d) at stats line %d\n", frameNumber, i);
+ return false;
+ }
+ rce = &m_rce2Pass[encodeOrder];
+ rcePocOrder = &m_rce2Pass[frameNumber];
+ m_encOrder[frameNumber] = encodeOrder;
+ if (!m_param->bMultiPassOptRPS)
+ {
+ int scenecut = 0;
+ e += sscanf(p, " in:%*d out:%*d type:%c q:%lf
q-aq:%lf q-noVbv:%lf q-Rceq:%lf tex:%d mv:%d misc:%d icu:%lf pcu:%lf
scu:%lf sc:%d",
+ &picType, &qpRc, &qpAq, &qNoVbv, &qRceq,
&rce->coeffBits,
+ &rce->mvBits, &rce->miscBits, &rce->iCuCount,
&rce->pCuCount,
+ &rce->skipCuCount, &scenecut);
+ rcePocOrder->scenecut = scenecut != 0;
+ }
+ else
+ {
+ char deltaPOC[128];
+ char bUsed[40];
+ memset(deltaPOC, 0, sizeof(deltaPOC));
+ memset(bUsed, 0, sizeof(bUsed));
+ e += sscanf(p, " in:%*d out:%*d type:%c q:%lf
q-aq:%lf q-noVbv:%lf q-Rceq:%lf tex:%d mv:%d misc:%d icu:%lf pcu:%lf
scu:%lf nump:%d numnegp:%d numposp:%d deltapoc:%s bused:%s",
+ &picType, &qpRc, &qpAq, &qNoVbv, &qRceq,
&rce->coeffBits,
+ &rce->mvBits, &rce->miscBits, &rce->iCuCount,
&rce->pCuCount,
+ &rce->skipCuCount,
&rce->rpsData.numberOfPictures, &rce->rpsData.numberOfNegativePictures,
&rce->rpsData.numberOfPositivePictures, deltaPOC, bUsed);
+ splitdeltaPOC(deltaPOC, rce);
+ splitbUsed(bUsed, rce);
+ rce->rpsIdx = -1;
+ }
+ rce->keptAsRef = true;
+ rce->isIdr = false;
+ if (picType == 'b' || picType == 'p')
+ rce->keptAsRef = false;
+ if (picType == 'I')
+ rce->isIdr = true;
+ if (picType == 'I' || picType == 'i')
+ rce->sliceType = I_SLICE;
+ else if (picType == 'P' || picType == 'p')
+ rce->sliceType = P_SLICE;
+ else if (picType == 'B' || picType == 'b')
+ rce->sliceType = B_SLICE;
+ else
+ e = -1;
+ if (e < 10)
+ {
+ x265_log(m_param, X265_LOG_ERROR, "statistics are
damaged at line %d, parser out=%d\n", i, e);
+ return false;
+ }
+ rce->qScale = rce->newQScale = x265_qp2qScale(qpRc);
+ totalQpAq += qpAq;
+ rce->qpNoVbv = qNoVbv;
+ rce->qpaRc = qpRc;
+ rce->qpAq = qpAq;
+ rce->qRceq = qRceq;
+ p = next;
}
- rce->qScale = rce->newQScale = x265_qp2qScale(qpRc);
- totalQpAq += qpAq;
- rce->qpNoVbv = qNoVbv;
- rce->qpaRc = qpRc;
- rce->qpAq = qpAq;
- rce->qRceq = qRceq;
- p = next;
- }
- X265_FREE(statsBuf);
- if (m_param->rc.rateControlMode != X265_RC_CQP)
- {
- m_start = 0;
- m_isQpModified = true;
- if (!initPass2())
- return false;
- } /* else we're using constant quant, so no need to run the
bitrate allocation */
+ X265_FREE(statsBuf);
+ if (m_param->rc.rateControlMode != X265_RC_CQP)
+ {
+ m_start = 0;
+ m_isQpModified = true;
+ if (!initPass2())
+ return false;
+ } /* else we're using constant quant, so no need to run
the bitrate allocation */
+ }
+ else // X265_SHARE_MODE_SHAREDMEM == m_param->rc.dataShareMode
+ {
+ if (m_param->rc.cuTree)
+ {
+ if (!initCUTreeSharedMem())
+ {
+ return false;
+ }
+ }
+ }
}
/* Open output file */
/* If input and output files are the same, output to a temp file
@@ -682,19 +767,29 @@ bool RateControl::init(const SPS& sps)
X265_FREE(p);
if (m_param->rc.cuTree && !m_param->rc.bStatRead)
{
- statFileTmpname = strcatFilename(fileName, ".cutree.temp");
- if (!statFileTmpname)
- return false;
- m_cutreeStatFileOut = x265_fopen(statFileTmpname, "wb");
- X265_FREE(statFileTmpname);
- if (!m_cutreeStatFileOut)
+ if (X265_SHARE_MODE_FILE == m_param->rc.dataShareMode)
{
- x265_log_file(m_param, X265_LOG_ERROR, "can't open
mbtree stats file %s.cutree.temp\n", fileName);
- return false;
+ statFileTmpname = strcatFilename(fileName,
".cutree.temp");
+ if (!statFileTmpname)
+ return false;
+ m_cutreeStatFileOut = x265_fopen(statFileTmpname,
"wb");
+ X265_FREE(statFileTmpname);
+ if (!m_cutreeStatFileOut)
+ {
+ x265_log_file(m_param, X265_LOG_ERROR, "can't open
mbtree stats file %s.cutree.temp\n", fileName);
+ return false;
+ }
+ }
+ else // X265_SHARE_MODE_SHAREDMEM ==
m_param->rc.dataShareMode
+ {
+ if (!initCUTreeSharedMem())
+ {
+ return false;
+ }
}
}
}
- if (m_param->rc.cuTree)
+ if (m_param->rc.cuTree && !m_cuTreeStats.qpBuffer[0])
{
if (m_param->rc.qgSize == 8)
{
@@ -714,6 +809,10 @@ bool RateControl::init(const SPS& sps)
return true;
}
+void RateControl::skipCUTreeSharedMemRead(int32_t cnt)
+{
+ m_cutreeShrMem->skipRead(cnt);
+}
void RateControl::reconfigureRC()
{
if (m_isVbv)
@@ -1665,10 +1764,25 @@ bool RateControl::cuTreeReadFor2Pass(Frame* frame)
{
m_cuTreeStats.qpBufPos++;
- if (!fread(&type, 1, 1, m_cutreeStatFileIn))
- goto fail;
- if (fread(m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos],
sizeof(uint16_t), ncu, m_cutreeStatFileIn) != (size_t)ncu)
- goto fail;
+ if (X265_SHARE_MODE_FILE == m_param->rc.dataShareMode)
+ {
+ if (!fread(&type, 1, 1, m_cutreeStatFileIn))
+ goto fail;
+ if
(fread(m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos], sizeof(uint16_t),
ncu, m_cutreeStatFileIn) != (size_t)ncu)
+ goto fail;
+ }
+ else // X265_SHARE_MODE_SHAREDMEM ==
m_param->rc.dataShareMode
+ {
+ if (!m_cutreeShrMem)
+ {
+ goto fail;
+ }
+
+ CUTreeSharedDataItem shrItem;
+ shrItem.type = &type;
+ shrItem.stats =
m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos];
+ m_cutreeShrMem->readNext(&shrItem,
ReadSharedCUTreeData);
+ }
if (type != sliceTypeActual && m_cuTreeStats.qpBufPos == 1)
{
@@ -3059,10 +3173,26 @@ int RateControl::writeRateControlFrameStats(Frame*
curFrame, RateControlEntry* r
{
uint8_t sliceType = (uint8_t)rce->sliceType;
primitives.fix8Pack(m_cuTreeStats.qpBuffer[0],
curFrame->m_lowres.qpCuTreeOffset, ncu);
- if (fwrite(&sliceType, 1, 1, m_cutreeStatFileOut) < 1)
- goto writeFailure;
- if (fwrite(m_cuTreeStats.qpBuffer[0], sizeof(uint16_t), ncu,
m_cutreeStatFileOut) < (size_t)ncu)
- goto writeFailure;
+
+ if (X265_SHARE_MODE_FILE == m_param->rc.dataShareMode)
+ {
+ if (fwrite(&sliceType, 1, 1, m_cutreeStatFileOut) < 1)
+ goto writeFailure;
+ if (fwrite(m_cuTreeStats.qpBuffer[0], sizeof(uint16_t), ncu,
m_cutreeStatFileOut) < (size_t)ncu)
+ goto writeFailure;
+ }
+ else // X265_SHARE_MODE_SHAREDMEM == m_param->rc.dataShareMode
+ {
+ if (!m_cutreeShrMem)
+ {
+ goto writeFailure;
+ }
+
+ CUTreeSharedDataItem shrItem;
+ shrItem.type = &sliceType;
+ shrItem.stats = m_cuTreeStats.qpBuffer[0];
+ m_cutreeShrMem->writeData(&shrItem, WriteSharedCUTreeData);
+ }
}
return 0;
@@ -3138,6 +3268,13 @@ void RateControl::destroy()
if (m_cutreeStatFileIn)
fclose(m_cutreeStatFileIn);
+ if (m_cutreeShrMem)
+ {
+ m_cutreeShrMem->release();
+ delete m_cutreeShrMem;
+ m_cutreeShrMem = NULL;
+ }
+
X265_FREE(m_rce2Pass);
X265_FREE(m_encOrder);
for (int i = 0; i < 2; i++)
diff --git a/source/encoder/ratecontrol.h b/source/encoder/ratecontrol.h
index 204bd71e1..10dfc268d 100644
--- a/source/encoder/ratecontrol.h
+++ b/source/encoder/ratecontrol.h
@@ -28,6 +28,7 @@
#include "common.h"
#include "sei.h"
+#include "ringmem.h"
namespace X265_NS {
// encoder namespace
@@ -240,6 +241,8 @@ public:
FILE* m_statFileOut;
FILE* m_cutreeStatFileOut;
FILE* m_cutreeStatFileIn;
+ ///< store the cutree data in memory instead of file
+ RingMem *m_cutreeShrMem;
double m_lastAccumPNorm;
double m_expectedBitsSum; /* sum of qscale2bits after rceq,
ratefactor, and overflow, only includes finished frames */
int64_t m_predictedBits;
@@ -274,6 +277,9 @@ public:
int writeRateControlFrameStats(Frame* curFrame, RateControlEntry* rce);
bool initPass2();
+ bool initCUTreeSharedMem();
+ void skipCUTreeSharedMemRead(int32_t cnt);
+
double forwardMasking(Frame* curFrame, double q);
double backwardMasking(Frame* curFrame, double q);
diff --git a/source/x265.h b/source/x265.h
index 6bb893c98..bf945498f 100644
--- a/source/x265.h
+++ b/source/x265.h
@@ -747,6 +747,13 @@ typedef struct x265_vmaf_commondata
static const x265_vmaf_commondata vcd[] = { { NULL, (char
*)"/usr/local/share/model/vmaf_v0.6.1.pkl", NULL, NULL, 0, 0, 0, 0, 0, 0,
0, NULL, 0, 1, 0 } };
+
+typedef enum
+{
+ X265_SHARE_MODE_FILE = 0,
+ X265_SHARE_MODE_SHAREDMEM
+}X265_DATA_SHARE_MODES;
+
/* x265 input parameters
*
* For version safety you may use x265_param_alloc/free() to manage the
@@ -1433,19 +1440,16 @@ typedef struct x265_param
double rfConstantMin;
/* Multi-pass encoding */
- /* Enable writing the stats in a multi-pass encode to the stat
output file */
+ /* Enable writing the stats in a multi-pass encode to the stat
output file/memory */
int bStatWrite;
- /* Enable loading data from the stat input file in a multi pass
encode */
+ /* Enable loading data from the stat input file/memory in a multi
pass encode */
int bStatRead;
/* Filename of the 2pass output/input stats file, if unspecified
the
* encoder will default to using x265_2pass.log */
const char* statFileName;
- /* if only the focused frames would be re-encode or not */
- int bEncFocusedFramesOnly;
-
/* temporally blur quants */
double qblur;
@@ -1492,6 +1496,21 @@ typedef struct x265_param
/* internally enable if tune grain is set */
int bEnableConstVbv;
+ /* if only the focused frames would be re-encode or not */
+ int bEncFocusedFramesOnly;
+
+ /* Share the data with stats file or shared memory.
+ It must be one of the X265_DATA_SHARE_MODES enum values
+ Available if the bStatWrite or bStatRead is true.
+ Use stats file by default.
+ The stats file mode would be used among the encoders running in
sequence.
+ The shared memory mode could only be used among the encoders
running in parallel.
+ Now only the cutree data could be shared among shared memory. More
data would be support in the future.*/
+ int dataShareMode;
+
+ /* Unique shared memory name. Required if the shared memory mode
enabled. NULL by default */
+ const char* sharedMemName;
+
} rc;
/*== Video Usability Information ==*/
--
2.22.0.windows.1
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20211012/56212342/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-x265-pass-2-Add-support-for-cutree-data-sharing-via-shared-memory.patch
Type: application/octet-stream
Size: 56882 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20211012/56212342/attachment-0001.obj>
More information about the x265-devel
mailing list