[x265] [PATCH] cudata: change cudata copy functions into template functions to avoid useless memory copy
Ximing Cheng
chengximing1989 at foxmail.com
Tue Sep 6 19:22:44 CEST 2016
# HG changeset patch
# User Ximing Cheng <ximingcheng at tencent.com>
# Date 1473182485 -28800
# Wed Sep 07 01:21:25 2016 +0800
# Node ID b24cf6bc3795f06d53cd9d614b38f021d0e55a2f
# Parent df559450949bd085b0fc5e01332aa8458af2fa43
cudata: change cudata copy functions into template functions to avoid useless memory copy
diff -r df559450949b -r b24cf6bc3795 source/common/common.h
--- a/source/common/common.h Wed Aug 10 13:26:18 2016 +0530
+++ b/source/common/common.h Wed Sep 07 01:21:25 2016 +0800
@@ -246,6 +246,7 @@
#define X265_LOG2(x) log2(x)
#endif
+#define MAX_SLICE_TYPES 3 // maximum number of slice types
#define NUM_CU_DEPTH 4 // maximum number of CU depths
#define NUM_FULL_DEPTH 5 // maximum number of full depths
#define MIN_LOG2_CU_SIZE 3 // log2(minCUSize)
diff -r df559450949b -r b24cf6bc3795 source/common/cudata.cpp
--- a/source/common/cudata.cpp Wed Aug 10 13:26:18 2016 +0530
+++ b/source/common/cudata.cpp Wed Sep 07 01:21:25 2016 +0800
@@ -28,6 +28,9 @@
#include "picyuv.h"
#include "mv.h"
#include "cudata.h"
+#if defined(_MSC_VER)
+#pragma warning(disable: 4127) // conditional expression is constant
+#endif
using namespace X265_NS;
@@ -113,6 +116,19 @@
cubcast_t CUData::s_partSet[NUM_FULL_DEPTH] = { NULL, NULL, NULL, NULL, NULL };
uint32_t CUData::s_numPartInCUSize;
+#define CUDATA_SLICETYPE_FUNCS(funcName) \
+ CUData:: ## funcName ## _t CUData:: ## funcName ## _func[MAX_SLICE_TYPES] = \
+ { &CUData:: ## funcName ## <B_SLICE>, &CUData:: ## funcName ## <P_SLICE>, &CUData:: ## funcName ## <I_SLICE> };
+
+CUDATA_SLICETYPE_FUNCS(initCTU)
+CUDATA_SLICETYPE_FUNCS(initSubCU)
+CUDATA_SLICETYPE_FUNCS(initLosslessCU)
+CUDATA_SLICETYPE_FUNCS(copyPartFrom)
+CUDATA_SLICETYPE_FUNCS(copyToPic)
+CUDATA_SLICETYPE_FUNCS(copyFromPic)
+
+#undef CUDATA_SLICETYPE_FUNCS
+
CUData::CUData()
{
memset(this, 0, sizeof(*this));
@@ -266,6 +282,7 @@
}
}
+template <SliceType type>
void CUData::initCTU(const Frame& frame, uint32_t cuAddr, int qp)
{
m_encData = frame.m_encData;
@@ -282,10 +299,11 @@
m_partSet(m_lumaIntraDir, (uint8_t)ALL_IDX);
m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
m_partSet(m_tqBypass, (uint8_t)frame.m_encData->m_param->bLossless);
- if (m_slice->m_sliceType != I_SLICE)
+ if (type != I_SLICE)
{
m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
- m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID);
+ if (type == B_SLICE)
+ m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID);
}
X265_CHECK(!(frame.m_encData->m_param->bLossless && !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without TQbypass in PPS\n");
@@ -301,6 +319,7 @@
}
// initialize Sub partition
+template <SliceType type>
void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp)
{
m_absIdxInCTU = cuGeom.absPartIdx;
@@ -321,8 +340,12 @@
m_partSet(m_lumaIntraDir, (uint8_t)ALL_IDX);
m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
m_partSet(m_tqBypass, (uint8_t)m_encData->m_param->bLossless);
- m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
- m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID);
+ if (type != I_SLICE)
+ {
+ m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
+ if (type == B_SLICE)
+ m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID);
+ }
m_partSet(m_cuDepth, (uint8_t)cuGeom.depth);
/* initialize the remaining CU data in one memset */
@@ -330,6 +353,7 @@
}
/* Copy the results of a sub-part (split) CU to the parent CU */
+template <SliceType type>
void CUData::copyPartFrom(const CUData& subCU, const CUGeom& childGeom, uint32_t subPartIdx)
{
X265_CHECK(subPartIdx < 4, "part unit should be less than 4\n");
@@ -340,24 +364,37 @@
m_subPartCopy(m_log2CUSize + offset, subCU.m_log2CUSize);
m_subPartCopy(m_lumaIntraDir + offset, subCU.m_lumaIntraDir);
m_subPartCopy(m_tqBypass + offset, subCU.m_tqBypass);
- m_subPartCopy((uint8_t*)m_refIdx[0] + offset, (uint8_t*)subCU.m_refIdx[0]);
- m_subPartCopy((uint8_t*)m_refIdx[1] + offset, (uint8_t*)subCU.m_refIdx[1]);
+ if (type != I_SLICE)
+ {
+ m_subPartCopy((uint8_t*)m_refIdx[0] + offset, (uint8_t*)subCU.m_refIdx[0]);
+ if (type == B_SLICE)
+ m_subPartCopy((uint8_t*)m_refIdx[1] + offset, (uint8_t*)subCU.m_refIdx[1]);
+ }
m_subPartCopy(m_cuDepth + offset, subCU.m_cuDepth);
m_subPartCopy(m_predMode + offset, subCU.m_predMode);
m_subPartCopy(m_partSize + offset, subCU.m_partSize);
- m_subPartCopy(m_mergeFlag + offset, subCU.m_mergeFlag);
- m_subPartCopy(m_interDir + offset, subCU.m_interDir);
- m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]);
- m_subPartCopy(m_mvpIdx[1] + offset, subCU.m_mvpIdx[1]);
+ if (type != I_SLICE)
+ {
+ m_subPartCopy(m_mergeFlag + offset, subCU.m_mergeFlag);
+ m_subPartCopy(m_interDir + offset, subCU.m_interDir);
+ m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]);
+ if (type == B_SLICE)
+ m_subPartCopy(m_mvpIdx[1] + offset, subCU.m_mvpIdx[1]);
+ }
m_subPartCopy(m_tuDepth + offset, subCU.m_tuDepth);
m_subPartCopy(m_transformSkip[0] + offset, subCU.m_transformSkip[0]);
m_subPartCopy(m_cbf[0] + offset, subCU.m_cbf[0]);
- memcpy(m_mv[0] + offset, subCU.m_mv[0], childGeom.numPartitions * sizeof(MV));
- memcpy(m_mv[1] + offset, subCU.m_mv[1], childGeom.numPartitions * sizeof(MV));
- memcpy(m_mvd[0] + offset, subCU.m_mvd[0], childGeom.numPartitions * sizeof(MV));
- memcpy(m_mvd[1] + offset, subCU.m_mvd[1], childGeom.numPartitions * sizeof(MV));
+ if (type != I_SLICE)
+ {
+ memcpy(m_mv[0] + offset, subCU.m_mv[0], childGeom.numPartitions * sizeof(MV));
+ if (type == B_SLICE)
+ memcpy(m_mv[1] + offset, subCU.m_mv[1], childGeom.numPartitions * sizeof(MV));
+ memcpy(m_mvd[0] + offset, subCU.m_mvd[0], childGeom.numPartitions * sizeof(MV));
+ if (type == B_SLICE)
+ memcpy(m_mvd[1] + offset, subCU.m_mvd[1], childGeom.numPartitions * sizeof(MV));
+ }
uint32_t tmp = 1 << ((g_maxLog2CUSize - childGeom.depth) * 2);
uint32_t tmp2 = subPartIdx * tmp;
@@ -389,6 +426,7 @@
/* Copy all CU data from one instance to the next, except set lossless flag
* This will only get used when --cu-lossless is enabled but --lossless is not. */
+template <SliceType type>
void CUData::initLosslessCU(const CUData& cu, const CUGeom& cuGeom)
{
/* Start by making an exact copy */
@@ -404,10 +442,15 @@
m_absIdxInCTU = cuGeom.absPartIdx;
m_numPartitions = cuGeom.numPartitions;
memcpy(m_qp, cu.m_qp, BytesPerPartition * m_numPartitions);
- memcpy(m_mv[0], cu.m_mv[0], m_numPartitions * sizeof(MV));
- memcpy(m_mv[1], cu.m_mv[1], m_numPartitions * sizeof(MV));
- memcpy(m_mvd[0], cu.m_mvd[0], m_numPartitions * sizeof(MV));
- memcpy(m_mvd[1], cu.m_mvd[1], m_numPartitions * sizeof(MV));
+ if (type != I_SLICE)
+ {
+ memcpy(m_mv[0], cu.m_mv[0], m_numPartitions * sizeof(MV));
+ if (type == B_SLICE)
+ memcpy(m_mv[1], cu.m_mv[1], m_numPartitions * sizeof(MV));
+ memcpy(m_mvd[0], cu.m_mvd[0], m_numPartitions * sizeof(MV));
+ if (type == B_SLICE)
+ memcpy(m_mvd[1], cu.m_mvd[1], m_numPartitions * sizeof(MV));
+ }
/* force TQBypass to true */
m_partSet(m_tqBypass, true);
@@ -429,6 +472,7 @@
}
/* Copy completed predicted CU to CTU in picture */
+template <SliceType type>
void CUData::copyToPic(uint32_t depth) const
{
CUData& ctu = *m_encData->getPicCTU(m_cuAddr);
@@ -437,23 +481,36 @@
m_partCopy(ctu.m_log2CUSize + m_absIdxInCTU, m_log2CUSize);
m_partCopy(ctu.m_lumaIntraDir + m_absIdxInCTU, m_lumaIntraDir);
m_partCopy(ctu.m_tqBypass + m_absIdxInCTU, m_tqBypass);
- m_partCopy((uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU, (uint8_t*)m_refIdx[0]);
- m_partCopy((uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU, (uint8_t*)m_refIdx[1]);
+ if (type != I_SLICE)
+ {
+ m_partCopy((uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU, (uint8_t*)m_refIdx[0]);
+ if (type == B_SLICE)
+ m_partCopy((uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU, (uint8_t*)m_refIdx[1]);
+ }
m_partCopy(ctu.m_cuDepth + m_absIdxInCTU, m_cuDepth);
m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize);
- m_partCopy(ctu.m_mergeFlag + m_absIdxInCTU, m_mergeFlag);
- m_partCopy(ctu.m_interDir + m_absIdxInCTU, m_interDir);
- m_partCopy(ctu.m_mvpIdx[0] + m_absIdxInCTU, m_mvpIdx[0]);
- m_partCopy(ctu.m_mvpIdx[1] + m_absIdxInCTU, m_mvpIdx[1]);
+ if (type != I_SLICE)
+ {
+ m_partCopy(ctu.m_mergeFlag + m_absIdxInCTU, m_mergeFlag);
+ m_partCopy(ctu.m_interDir + m_absIdxInCTU, m_interDir);
+ m_partCopy(ctu.m_mvpIdx[0] + m_absIdxInCTU, m_mvpIdx[0]);
+ if (type == B_SLICE)
+ m_partCopy(ctu.m_mvpIdx[1] + m_absIdxInCTU, m_mvpIdx[1]);
+ }
m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth);
m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]);
m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]);
- memcpy(ctu.m_mv[0] + m_absIdxInCTU, m_mv[0], m_numPartitions * sizeof(MV));
- memcpy(ctu.m_mv[1] + m_absIdxInCTU, m_mv[1], m_numPartitions * sizeof(MV));
- memcpy(ctu.m_mvd[0] + m_absIdxInCTU, m_mvd[0], m_numPartitions * sizeof(MV));
- memcpy(ctu.m_mvd[1] + m_absIdxInCTU, m_mvd[1], m_numPartitions * sizeof(MV));
+ if (type != I_SLICE)
+ {
+ memcpy(ctu.m_mv[0] + m_absIdxInCTU, m_mv[0], m_numPartitions * sizeof(MV));
+ if (type == B_SLICE)
+ memcpy(ctu.m_mv[1] + m_absIdxInCTU, m_mv[1], m_numPartitions * sizeof(MV));
+ memcpy(ctu.m_mvd[0] + m_absIdxInCTU, m_mvd[0], m_numPartitions * sizeof(MV));
+ if (type == B_SLICE)
+ memcpy(ctu.m_mvd[1] + m_absIdxInCTU, m_mvd[1], m_numPartitions * sizeof(MV));
+ }
uint32_t tmpY = 1 << ((g_maxLog2CUSize - depth) * 2);
uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2);
@@ -475,6 +532,7 @@
}
/* The reverse of copyToPic, called only by encodeResidue */
+template <SliceType type>
void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom, int csp, bool copyQp)
{
m_encData = ctu.m_encData;
@@ -491,21 +549,34 @@
m_partCopy(m_log2CUSize, ctu.m_log2CUSize + m_absIdxInCTU);
m_partCopy(m_lumaIntraDir, ctu.m_lumaIntraDir + m_absIdxInCTU);
m_partCopy(m_tqBypass, ctu.m_tqBypass + m_absIdxInCTU);
- m_partCopy((uint8_t*)m_refIdx[0], (uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU);
- m_partCopy((uint8_t*)m_refIdx[1], (uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU);
+ if (type != I_SLICE)
+ {
+ m_partCopy((uint8_t*)m_refIdx[0], (uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU);
+ if (type == B_SLICE)
+ m_partCopy((uint8_t*)m_refIdx[1], (uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU);
+ }
m_partCopy(m_cuDepth, ctu.m_cuDepth + m_absIdxInCTU);
m_partSet(m_predMode, ctu.m_predMode[m_absIdxInCTU] & (MODE_INTRA | MODE_INTER)); /* clear skip flag */
m_partCopy(m_partSize, ctu.m_partSize + m_absIdxInCTU);
- m_partCopy(m_mergeFlag, ctu.m_mergeFlag + m_absIdxInCTU);
- m_partCopy(m_interDir, ctu.m_interDir + m_absIdxInCTU);
- m_partCopy(m_mvpIdx[0], ctu.m_mvpIdx[0] + m_absIdxInCTU);
- m_partCopy(m_mvpIdx[1], ctu.m_mvpIdx[1] + m_absIdxInCTU);
+ if (type != I_SLICE)
+ {
+ m_partCopy(m_mergeFlag, ctu.m_mergeFlag + m_absIdxInCTU);
+ m_partCopy(m_interDir, ctu.m_interDir + m_absIdxInCTU);
+ m_partCopy(m_mvpIdx[0], ctu.m_mvpIdx[0] + m_absIdxInCTU);
+ if (type == B_SLICE)
+ m_partCopy(m_mvpIdx[1], ctu.m_mvpIdx[1] + m_absIdxInCTU);
+ }
m_partCopy(m_chromaIntraDir, ctu.m_chromaIntraDir + m_absIdxInCTU);
- memcpy(m_mv[0], ctu.m_mv[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
- memcpy(m_mv[1], ctu.m_mv[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
- memcpy(m_mvd[0], ctu.m_mvd[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
- memcpy(m_mvd[1], ctu.m_mvd[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
+ if (type != I_SLICE)
+ {
+ memcpy(m_mv[0], ctu.m_mv[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
+ if (type == B_SLICE)
+ memcpy(m_mv[1], ctu.m_mv[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
+ memcpy(m_mvd[0], ctu.m_mvd[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
+ if (type == B_SLICE)
+ memcpy(m_mvd[1], ctu.m_mvd[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
+ }
/* clear residual coding flags */
m_partSet(m_tuDepth, 0);
diff -r df559450949b -r b24cf6bc3795 source/common/cudata.h
--- a/source/common/cudata.h Wed Aug 10 13:26:18 2016 +0530
+++ b/source/common/cudata.h Wed Sep 07 01:21:25 2016 +0800
@@ -154,6 +154,9 @@
{ 0x00, 0x05, 0x05, 0x05 } // SIZE_nRx2N.
};
+#define CUDATA_FUNC(caller, funcName, type, ...) \
+ ((caller)->*CUData:: ## funcName ## _func[type])(__VA_ARGS__)
+
// Holds part data for a CU of a given size, from an 8x8 CU to a CTU
class CUData
{
@@ -214,18 +217,33 @@
void initialize(const CUDataMemPool& dataPool, uint32_t depth, int csp, int instance);
static void calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, uint32_t minCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]);
- void initCTU(const Frame& frame, uint32_t cuAddr, int qp);
- void initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp);
- void initLosslessCU(const CUData& cu, const CUGeom& cuGeom);
+#define DECLARE_SLICETYPE_TEMPLATE_FUNC(funcName, ...) \
+ typedef void (CUData::* ## funcName ## _t)(__VA_ARGS__); \
+ static funcName ## _t funcName ## _func[MAX_SLICE_TYPES]; \
+ template <SliceType type> \
+ void funcName(__VA_ARGS__);
- void copyPartFrom(const CUData& cu, const CUGeom& childGeom, uint32_t subPartIdx);
+#define DECLARE_SLICETYPE_TEMPLATE_CONST_FUNC(funcName, ...) \
+ typedef void (CUData::* ## funcName ## _t)(__VA_ARGS__) const; \
+ static funcName ## _t funcName ## _func[MAX_SLICE_TYPES]; \
+ template <SliceType type> \
+ void funcName(__VA_ARGS__) const;
+
+ DECLARE_SLICETYPE_TEMPLATE_FUNC(initCTU, const Frame& frame, uint32_t cuAddr, int qp);
+ DECLARE_SLICETYPE_TEMPLATE_FUNC(initSubCU, const CUData& ctu, const CUGeom& cuGeom, int qp);
+ DECLARE_SLICETYPE_TEMPLATE_FUNC(initLosslessCU, const CUData& cu, const CUGeom& cuGeom);
+
+ DECLARE_SLICETYPE_TEMPLATE_FUNC(copyPartFrom, const CUData& cu, const CUGeom& childGeom, uint32_t subPartIdx);
void setEmptyPart(const CUGeom& childGeom, uint32_t subPartIdx);
- void copyToPic(uint32_t depth) const;
+ DECLARE_SLICETYPE_TEMPLATE_CONST_FUNC(copyToPic, uint32_t depth);
/* RD-0 methods called only from encodeResidue */
- void copyFromPic(const CUData& ctu, const CUGeom& cuGeom, int csp, bool copyQp = true);
+ DECLARE_SLICETYPE_TEMPLATE_FUNC(copyFromPic, const CUData& ctu, const CUGeom& cuGeom, int csp, bool copyQp);
void updatePic(uint32_t depth, int picCsp) const;
+#undef DECLARE_SLICETYPE_TEMPLATE_CONST_FUNC
+#undef DECLARE_SLICETYPE_TEMPLATE_FUNC
+
void setPartSizeSubParts(PartSize size) { m_partSet(m_partSize, (uint8_t)size); }
void setPredModeSubParts(PredMode mode) { m_partSet(m_predMode, (uint8_t)mode); }
void clearCbf() { m_partSet(m_cbf[0], 0); if (m_chromaFormat != X265_CSP_I400) { m_partSet(m_cbf[1], 0); m_partSet(m_cbf[2], 0);} }
diff -r df559450949b -r b24cf6bc3795 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Wed Aug 10 13:26:18 2016 +0530
+++ b/source/encoder/analysis.cpp Wed Sep 07 01:21:25 2016 +0800
@@ -70,6 +70,9 @@
* rd-level 5,6 does RDO for each inter mode
*/
+#define CALL_CUDATA_FUNC(caller, funcName, ...) \
+ CUDATA_FUNC(caller, funcName, m_slice->m_sliceType, __VA_ARGS__)
+
Analysis::Analysis()
{
m_reuseInterDataCTU = NULL;
@@ -213,7 +216,7 @@
else if (md.bestMode->cu.isIntra(0))
{
md.pred[PRED_LOSSLESS].initCosts();
- md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu, cuGeom);
+ CALL_CUDATA_FUNC(&md.pred[PRED_LOSSLESS].cu, initLosslessCU, md.bestMode->cu, cuGeom);
PartSize size = (PartSize)md.pred[PRED_LOSSLESS].cu.m_partSize[0];
checkIntra(md.pred[PRED_LOSSLESS], cuGeom, size);
checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth);
@@ -221,7 +224,7 @@
else
{
md.pred[PRED_LOSSLESS].initCosts();
- md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu, cuGeom);
+ CALL_CUDATA_FUNC(&md.pred[PRED_LOSSLESS].cu, initLosslessCU, md.bestMode->cu, cuGeom);
md.pred[PRED_LOSSLESS].predYuv.copyFromYuv(md.bestMode->predYuv);
encodeResAndCalcRdInterCU(md.pred[PRED_LOSSLESS], cuGeom);
checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth);
@@ -279,7 +282,7 @@
recodeCU(parentCTU, cuGeom, bestCUQP, lambdaQP);
/* Copy best data to encData CTU and recon */
- md.bestMode->cu.copyToPic(depth);
+ CALL_CUDATA_FUNC(&md.bestMode->cu, copyToPic, depth);
md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, parentCTU.m_cuAddr, cuGeom.absPartIdx);
}
@@ -301,7 +304,7 @@
{
Mode& mode = md.pred[0];
md.bestMode = &mode;
- mode.cu.initSubCU(parentCTU, cuGeom, qp);
+ mode.cu.initSubCU<I_SLICE>(parentCTU, cuGeom, qp);
memcpy(mode.cu.m_lumaIntraDir, parentCTU.m_lumaIntraDir + cuGeom.absPartIdx, cuGeom.numPartitions);
memcpy(mode.cu.m_chromaIntraDir, parentCTU.m_chromaIntraDir + cuGeom.absPartIdx, cuGeom.numPartitions);
checkIntra(mode, cuGeom, (PartSize)parentCTU.m_partSize[cuGeom.absPartIdx]);
@@ -315,13 +318,13 @@
}
else if (cuGeom.log2CUSize != MAX_LOG2_CU_SIZE && mightNotSplit)
{
- md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
+ md.pred[PRED_INTRA].cu.initSubCU<I_SLICE>(parentCTU, cuGeom, qp);
checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N);
checkBestMode(md.pred[PRED_INTRA], depth);
if (cuGeom.log2CUSize == 3 && m_slice->m_sps->quadtreeTULog2MinSize < 3)
{
- md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom, qp);
+ md.pred[PRED_INTRA_NxN].cu.initSubCU<I_SLICE>(parentCTU, cuGeom, qp);
checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN);
checkBestMode(md.pred[PRED_INTRA_NxN], depth);
}
@@ -341,7 +344,7 @@
Mode* splitPred = &md.pred[PRED_SPLIT];
splitPred->initCosts();
CUData* splitCU = &splitPred->cu;
- splitCU->initSubCU(parentCTU, cuGeom, qp);
+ splitCU->initSubCU<I_SLICE>(parentCTU, cuGeom, qp);
uint32_t nextDepth = depth + 1;
ModeDepth& nd = m_modeDepth[nextDepth];
@@ -363,7 +366,7 @@
compressIntraCU(parentCTU, childGeom, nextQP);
// Save best CU and pred data for this sub CU
- splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
+ splitCU->copyPartFrom<I_SLICE>(nd.bestMode->cu, childGeom, subPartIdx);
splitPred->addSubCosts(*nd.bestMode);
nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx);
nextContext = &nd.bestMode->contexts;
@@ -395,7 +398,7 @@
}
/* Copy best data to encData CTU and recon */
- md.bestMode->cu.copyToPic(depth);
+ md.bestMode->cu.copyToPic<I_SLICE>(depth);
if (md.bestMode != &md.pred[PRED_SPLIT])
md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, parentCTU.m_cuAddr, cuGeom.absPartIdx);
}
@@ -610,8 +613,8 @@
if (mightNotSplit && depth >= minDepth)
{
/* Initialize all prediction CUs based on parentCTU */
- md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
- md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_MERGE].cu, initSubCU, parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_SKIP].cu, initSubCU, parentCTU, cuGeom, qp);
if (m_param->rdLevel <= 4)
checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
@@ -633,7 +636,7 @@
Mode* splitPred = &md.pred[PRED_SPLIT];
splitPred->initCosts();
CUData* splitCU = &splitPred->cu;
- splitCU->initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(splitCU, initSubCU, parentCTU, cuGeom, qp);
uint32_t nextDepth = depth + 1;
ModeDepth& nd = m_modeDepth[nextDepth];
@@ -657,7 +660,7 @@
// Save best CU and pred data for this sub CU
splitIntra |= nd.bestMode->cu.isIntra(0);
- splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
+ CALL_CUDATA_FUNC(splitCU, copyPartFrom, nd.bestMode->cu, childGeom, subPartIdx);
splitPred->addSubCosts(*nd.bestMode);
nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx);
@@ -686,24 +689,24 @@
if (bTryIntra)
{
- md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_INTRA].cu, initSubCU, parentCTU, cuGeom, qp);
if (cuGeom.log2CUSize == 3 && m_slice->m_sps->quadtreeTULog2MinSize < 3 && m_param->rdLevel >= 5)
- md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_INTRA_NxN].cu, initSubCU, parentCTU, cuGeom, qp);
pmode.modes[pmode.m_jobTotal++] = PRED_INTRA;
}
- md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_2Nx2N;
- md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_2Nx2N].cu, initSubCU, parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_2Nx2N;
+ CALL_CUDATA_FUNC(&md.pred[PRED_BIDIR].cu, initSubCU, parentCTU, cuGeom, qp);
if (m_param->bEnableRectInter)
{
- md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_2NxN;
- md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_Nx2N;
+ CALL_CUDATA_FUNC(&md.pred[PRED_2NxN].cu, initSubCU, parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_2NxN;
+ CALL_CUDATA_FUNC(&md.pred[PRED_Nx2N].cu, initSubCU, parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_Nx2N;
}
if (bTryAmp)
{
- md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_2NxnU;
- md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_2NxnD;
- md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_nLx2N;
- md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_nRx2N;
+ CALL_CUDATA_FUNC(&md.pred[PRED_2NxnU].cu, initSubCU, parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_2NxnU;
+ CALL_CUDATA_FUNC(&md.pred[PRED_2NxnD].cu, initSubCU, parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_2NxnD;
+ CALL_CUDATA_FUNC(&md.pred[PRED_nLx2N].cu, initSubCU, parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_nLx2N;
+ CALL_CUDATA_FUNC(&md.pred[PRED_nRx2N].cu, initSubCU, parentCTU, cuGeom, qp); pmode.modes[pmode.m_jobTotal++] = PRED_nRx2N;
}
m_splitRefIdx[0] = splitRefs[0]; m_splitRefIdx[1] = splitRefs[1]; m_splitRefIdx[2] = splitRefs[2]; m_splitRefIdx[3] = splitRefs[3];
@@ -870,7 +873,7 @@
}
/* Copy best data to encData CTU and recon */
- md.bestMode->cu.copyToPic(depth);
+ CALL_CUDATA_FUNC(&md.bestMode->cu, copyToPic, depth);
md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.absPartIdx);
return refMask;
@@ -914,8 +917,8 @@
{
if (m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP)
{
- md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
- md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_MERGE].cu, initSubCU, parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_SKIP].cu, initSubCU, parentCTU, cuGeom, qp);
checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode;
@@ -937,8 +940,8 @@
if (mightNotSplit && depth >= minDepth && !md.bestMode) /* TODO: Re-evaluate if analysis load/save still works */
{
/* Compute Merge Cost */
- md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
- md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_MERGE].cu, initSubCU, parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_SKIP].cu, initSubCU, parentCTU, cuGeom, qp);
checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
if (m_param->rdLevel)
skipModes = m_param->bEnableEarlySkip && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth
@@ -962,7 +965,7 @@
Mode* splitPred = &md.pred[PRED_SPLIT];
splitPred->initCosts();
CUData* splitCU = &splitPred->cu;
- splitCU->initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(splitCU, initSubCU, parentCTU, cuGeom, qp);
uint32_t nextDepth = depth + 1;
ModeDepth& nd = m_modeDepth[nextDepth];
@@ -986,7 +989,7 @@
// Save best CU and pred data for this sub CU
splitIntra |= nd.bestMode->cu.isIntra(0);
- splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
+ CALL_CUDATA_FUNC(splitCU, copyPartFrom, nd.bestMode->cu, childGeom, subPartIdx);
splitPred->addSubCosts(*nd.bestMode);
if (m_param->rdLevel)
@@ -1023,7 +1026,7 @@
{
uint32_t refMasks[2];
refMasks[0] = allSplitRefs;
- md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_2Nx2N].cu, initSubCU, parentCTU, cuGeom, qp);
checkInter_rd0_4(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
if (m_param->limitReferences & X265_REF_LIMIT_CU)
@@ -1035,7 +1038,7 @@
if (m_slice->m_sliceType == B_SLICE)
{
- md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom, qp);
+ md.pred[PRED_BIDIR].cu.initSubCU<B_SLICE>(parentCTU, cuGeom, qp);
checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], cuGeom);
}
@@ -1065,7 +1068,7 @@
{
refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */
refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */
- md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_2NxN].cu, initSubCU, parentCTU, cuGeom, qp);
checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks);
if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_2NxN];
@@ -1075,7 +1078,7 @@
{
refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* left */
refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* right */
- md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_Nx2N].cu, initSubCU, parentCTU, cuGeom, qp);
checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, refMasks);
if (md.pred[PRED_Nx2N].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_Nx2N];
@@ -1085,7 +1088,7 @@
{
refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */
refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */
- md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_2NxN].cu, initSubCU, parentCTU, cuGeom, qp);
checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks);
if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_2NxN];
@@ -1137,7 +1140,7 @@
{
refMasks[0] = allSplitRefs; /* 75% top */
refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */
- md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_2NxnD].cu, initSubCU, parentCTU, cuGeom, qp);
checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks);
if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_2NxnD];
@@ -1147,7 +1150,7 @@
{
refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* 25% top */
refMasks[1] = allSplitRefs; /* 75% bot */
- md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_2NxnU].cu, initSubCU, parentCTU, cuGeom, qp);
checkInter_rd0_4(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, refMasks);
if (md.pred[PRED_2NxnU].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_2NxnU];
@@ -1157,7 +1160,7 @@
{
refMasks[0] = allSplitRefs; /* 75% top */
refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */
- md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_2NxnD].cu, initSubCU, parentCTU, cuGeom, qp);
checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks);
if (md.pred[PRED_2NxnD].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_2NxnD];
@@ -1170,7 +1173,7 @@
{
refMasks[0] = allSplitRefs; /* 75% left */
refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */
- md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_nRx2N].cu, initSubCU, parentCTU, cuGeom, qp);
checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks);
if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_nRx2N];
@@ -1180,7 +1183,7 @@
{
refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* 25% left */
refMasks[1] = allSplitRefs; /* 75% right */
- md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_nLx2N].cu, initSubCU, parentCTU, cuGeom, qp);
checkInter_rd0_4(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, refMasks);
if (md.pred[PRED_nLx2N].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_nLx2N];
@@ -1190,7 +1193,7 @@
{
refMasks[0] = allSplitRefs; /* 75% left */
refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */
- md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_nRx2N].cu, initSubCU, parentCTU, cuGeom, qp);
checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks);
if (md.pred[PRED_nRx2N].sa8dCost < bestInter->sa8dCost)
bestInter = &md.pred[PRED_nRx2N];
@@ -1239,7 +1242,7 @@
if (!m_param->limitReferences || splitIntra)
{
ProfileCounter(parentCTU, totalIntraCU[cuGeom.depth]);
- md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_INTRA].cu, initSubCU, parentCTU, cuGeom, qp);
checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
encodeIntraInInter(md.pred[PRED_INTRA], cuGeom);
checkBestMode(md.pred[PRED_INTRA], depth);
@@ -1265,7 +1268,7 @@
if (!m_param->limitReferences || splitIntra)
{
ProfileCounter(parentCTU, totalIntraCU[cuGeom.depth]);
- md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_INTRA].cu, initSubCU, parentCTU, cuGeom, qp);
checkIntraInInter(md.pred[PRED_INTRA], cuGeom);
if (md.pred[PRED_INTRA].sa8dCost < md.bestMode->sa8dCost)
md.bestMode = &md.pred[PRED_INTRA];
@@ -1396,7 +1399,7 @@
}
/* Copy best data to encData CTU and recon */
- md.bestMode->cu.copyToPic(depth);
+ CALL_CUDATA_FUNC(&md.bestMode->cu, copyToPic, depth);
if (m_param->rdLevel)
md.bestMode->reconYuv.copyToPicYuv(reconPic, cuAddr, cuGeom.absPartIdx);
@@ -1437,12 +1440,12 @@
{
if (m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP)
{
- md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
- md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_SKIP].cu, initSubCU, parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_MERGE].cu, initSubCU, parentCTU, cuGeom, qp);
checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
skipModes = !!m_param->bEnableEarlySkip && md.bestMode;
refMasks[0] = allSplitRefs;
- md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_2Nx2N].cu, initSubCU, parentCTU, cuGeom, qp);
checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
@@ -1457,12 +1460,12 @@
/* Step 1. Evaluate Merge/Skip candidates for likely early-outs */
if (mightNotSplit && !md.bestMode)
{
- md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
- md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_SKIP].cu, initSubCU, parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_MERGE].cu, initSubCU, parentCTU, cuGeom, qp);
checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
skipModes = m_param->bEnableEarlySkip && md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
refMasks[0] = allSplitRefs;
- md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_2Nx2N].cu, initSubCU, parentCTU, cuGeom, qp);
checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
@@ -1477,7 +1480,7 @@
Mode* splitPred = &md.pred[PRED_SPLIT];
splitPred->initCosts();
CUData* splitCU = &splitPred->cu;
- splitCU->initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(splitCU, initSubCU, parentCTU, cuGeom, qp);
uint32_t nextDepth = depth + 1;
ModeDepth& nd = m_modeDepth[nextDepth];
@@ -1501,7 +1504,7 @@
// Save best CU and pred data for this sub CU
splitIntra |= nd.bestMode->cu.isIntra(0);
- splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
+ CALL_CUDATA_FUNC(splitCU, copyPartFrom, nd.bestMode->cu, childGeom, subPartIdx);
splitPred->addSubCosts(*nd.bestMode);
nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx);
nextContext = &nd.bestMode->contexts;
@@ -1543,7 +1546,7 @@
if (m_slice->m_sliceType == B_SLICE)
{
- md.pred[PRED_BIDIR].cu.initSubCU(parentCTU, cuGeom, qp);
+ md.pred[PRED_BIDIR].cu.initSubCU<B_SLICE>(parentCTU, cuGeom, qp);
checkBidir2Nx2N(md.pred[PRED_2Nx2N], md.pred[PRED_BIDIR], cuGeom);
if (md.pred[PRED_BIDIR].sa8dCost < MAX_INT64)
{
@@ -1584,7 +1587,7 @@
{
refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */
refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */
- md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_2NxN].cu, initSubCU, parentCTU, cuGeom, qp);
checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks);
checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
}
@@ -1593,7 +1596,7 @@
{
refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* left */
refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* right */
- md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_Nx2N].cu, initSubCU, parentCTU, cuGeom, qp);
checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom, SIZE_Nx2N, refMasks);
checkBestMode(md.pred[PRED_Nx2N], cuGeom.depth);
}
@@ -1602,7 +1605,7 @@
{
refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* top */
refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* bot */
- md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_2NxN].cu, initSubCU, parentCTU, cuGeom, qp);
checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom, SIZE_2NxN, refMasks);
checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
}
@@ -1653,7 +1656,7 @@
{
refMasks[0] = allSplitRefs; /* 75% top */
refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */
- md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_2NxnD].cu, initSubCU, parentCTU, cuGeom, qp);
checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks);
checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
}
@@ -1662,7 +1665,7 @@
{
refMasks[0] = splitData[0].splitRefs | splitData[1].splitRefs; /* 25% top */
refMasks[1] = allSplitRefs; /* 75% bot */
- md.pred[PRED_2NxnU].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_2NxnU].cu, initSubCU, parentCTU, cuGeom, qp);
checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom, SIZE_2NxnU, refMasks);
checkBestMode(md.pred[PRED_2NxnU], cuGeom.depth);
}
@@ -1671,7 +1674,7 @@
{
refMasks[0] = allSplitRefs; /* 75% top */
refMasks[1] = splitData[2].splitRefs | splitData[3].splitRefs; /* 25% bot */
- md.pred[PRED_2NxnD].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_2NxnD].cu, initSubCU, parentCTU, cuGeom, qp);
checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom, SIZE_2NxnD, refMasks);
checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
}
@@ -1684,7 +1687,7 @@
{
refMasks[0] = allSplitRefs; /* 75% left */
refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */
- md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_nRx2N].cu, initSubCU, parentCTU, cuGeom, qp);
checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks);
checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
}
@@ -1693,7 +1696,7 @@
{
refMasks[0] = splitData[0].splitRefs | splitData[2].splitRefs; /* 25% left */
refMasks[1] = allSplitRefs; /* 75% right */
- md.pred[PRED_nLx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_nLx2N].cu, initSubCU, parentCTU, cuGeom, qp);
checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom, SIZE_nLx2N, refMasks);
checkBestMode(md.pred[PRED_nLx2N], cuGeom.depth);
}
@@ -1702,7 +1705,7 @@
{
refMasks[0] = allSplitRefs; /* 75% left */
refMasks[1] = splitData[1].splitRefs | splitData[3].splitRefs; /* 25% right */
- md.pred[PRED_nRx2N].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_nRx2N].cu, initSubCU, parentCTU, cuGeom, qp);
checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom, SIZE_nRx2N, refMasks);
checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
}
@@ -1715,13 +1718,13 @@
if (!m_param->limitReferences || splitIntra)
{
ProfileCounter(parentCTU, totalIntraCU[cuGeom.depth]);
- md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_INTRA].cu, initSubCU, parentCTU, cuGeom, qp);
checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N);
checkBestMode(md.pred[PRED_INTRA], depth);
if (cuGeom.log2CUSize == 3 && m_slice->m_sps->quadtreeTULog2MinSize < 3)
{
- md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&md.pred[PRED_INTRA_NxN].cu, initSubCU, parentCTU, cuGeom, qp);
checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN);
checkBestMode(md.pred[PRED_INTRA_NxN], depth);
}
@@ -1786,7 +1789,7 @@
}
/* Copy best data to encData CTU and recon */
- md.bestMode->cu.copyToPic(depth);
+ CALL_CUDATA_FUNC(&md.bestMode->cu, copyToPic, depth);
md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, parentCTU.m_cuAddr, cuGeom.absPartIdx);
return splitCUData;
@@ -1808,7 +1811,7 @@
Mode& mode = md.pred[0];
md.bestMode = &mode;
- mode.cu.initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(&mode.cu, initSubCU, parentCTU, cuGeom, qp);
PartSize size = (PartSize)parentCTU.m_partSize[cuGeom.absPartIdx];
if (parentCTU.isIntra(cuGeom.absPartIdx))
{
@@ -1818,7 +1821,7 @@
}
else
{
- mode.cu.copyFromPic(parentCTU, cuGeom, m_csp, false);
+ CALL_CUDATA_FUNC(&mode.cu, copyFromPic, parentCTU, cuGeom, m_csp, false);
for (int part = 0; part < (int)parentCTU.getNumPartInter(cuGeom.absPartIdx); part++)
{
PredictionUnit pu(mode.cu, cuGeom, part);
@@ -1848,7 +1851,7 @@
md.bestMode = splitPred;
splitPred->initCosts();
CUData* splitCU = &splitPred->cu;
- splitCU->initSubCU(parentCTU, cuGeom, qp);
+ CALL_CUDATA_FUNC(splitCU, initSubCU, parentCTU, cuGeom, qp);
uint32_t nextDepth = depth + 1;
ModeDepth& nd = m_modeDepth[nextDepth];
@@ -1870,7 +1873,7 @@
qprdRefine(parentCTU, childGeom, nextQP, lqp);
// Save best CU and pred data for this sub CU
- splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);
+ CALL_CUDATA_FUNC(splitCU, copyPartFrom, nd.bestMode->cu, childGeom, subPartIdx);
splitPred->addSubCosts(*nd.bestMode);
nd.bestMode->reconYuv.copyToPartYuv(splitPred->reconYuv, childGeom.numPartitions * subPartIdx);
nextContext = &nd.bestMode->contexts;
@@ -1891,7 +1894,7 @@
checkDQPForSplitPred(*splitPred, cuGeom);
/* Copy best data to encData CTU and recon */
- md.bestMode->cu.copyToPic(depth);
+ CALL_CUDATA_FUNC(&md.bestMode->cu, copyToPic, depth);
md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, parentCTU.m_cuAddr, cuGeom.absPartIdx);
}
}
@@ -2384,7 +2387,7 @@
Mode *bestMode = m_modeDepth[cuGeom.depth].bestMode;
CUData& cu = bestMode->cu;
- cu.copyFromPic(ctu, cuGeom, m_csp);
+ CALL_CUDATA_FUNC(&cu, copyFromPic, ctu, cuGeom, m_csp, true);
PicYuv& reconPic = *m_frame->m_reconPic;
diff -r df559450949b -r b24cf6bc3795 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Wed Aug 10 13:26:18 2016 +0530
+++ b/source/encoder/frameencoder.cpp Wed Sep 07 01:21:25 2016 +0800
@@ -953,7 +953,7 @@
const uint32_t col = curRow.completed;
const uint32_t cuAddr = lineStartCUAddr + col;
CUData* ctu = curEncData.getPicCTU(cuAddr);
- ctu->initCTU(*m_frame, cuAddr, slice->m_sliceQp);
+ CUDATA_FUNC(ctu, initCTU, slice->m_sliceType, *m_frame, cuAddr, slice->m_sliceQp);
if (bIsVbv)
{
More information about the x265-devel
mailing list