[x265] [PATCH 1 of 2 EXPERIMENTAL] slice: prep work for per-numa recon picture copies
Steve Borho
steve at borho.org
Tue Aug 4 05:33:36 CEST 2015
# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1438642558 18000
# Mon Aug 03 17:55:58 2015 -0500
# Node ID 02e84edaa14399a3a68ade8617c63422f51a305b
# Parent 1f161d9c6e35e32998d38ebf5b6dec96f1ef43e2
slice: prep work for per-numa recon picture copies
Rename slice.m_refPicList to m_refFrameList since it is an array of Frame
pointers, and make a new slice.m_refReconPicList array which points directly
to the motion reference PicYuv buffers (bypassing the Frame structure)
The reconstructed pictures are now allocated by the frame encoder worker thread
making them socket-local. The per-node structures devolve to a single pointer
de-reference when NUMA support is not compiled in, minimizing the impact when
the feature is disabled.
The shared offset buffers were moved from the top-level encoder into the SPS
structure so the FrameData functions could use them directly (avoiding a major
layering violations). The offset buffers are computed based on SPS values, so it
seems minimally ugly to keep them there.
Later commits will make the PicYuv buffers per-NUMA node and copy reconstructed
pixels between nodes on demand.
diff -r 1f161d9c6e35 -r 02e84edaa143 source/common/cudata.cpp
--- a/source/common/cudata.cpp Mon Aug 03 14:56:21 2015 -0500
+++ b/source/common/cudata.cpp Mon Aug 03 17:55:58 2015 -0500
@@ -1676,7 +1676,7 @@
if (tempRefIdx != -1)
{
uint32_t cuAddr = neighbours[MD_COLLOCATED].cuAddr[picList];
- const Frame* colPic = m_slice->m_refPicList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
+ const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
// Scale the vector
@@ -1857,7 +1857,7 @@
bool CUData::getColMVP(MV& outMV, int& outRefIdx, int picList, int cuAddr, int partUnitIdx) const
{
- const Frame* colPic = m_slice->m_refPicList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
+ const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
@@ -1892,7 +1892,7 @@
// Cache the collocated MV.
bool CUData::getCollocatedMV(int cuAddr, int partUnitIdx, InterNeighbourMV *neighbour) const
{
- const Frame* colPic = m_slice->m_refPicList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
+ const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
diff -r 1f161d9c6e35 -r 02e84edaa143 source/common/deblock.cpp
--- a/source/common/deblock.cpp Mon Aug 03 14:56:21 2015 -0500
+++ b/source/common/deblock.cpp Mon Aug 03 17:55:58 2015 -0500
@@ -209,8 +209,8 @@
const Slice* const sliceQ = cuQ->m_slice;
const Slice* const sliceP = cuP->m_slice;
- const Frame* refP0 = sliceP->getRefPic(0, cuP->m_refIdx[0][partP]);
- const Frame* refQ0 = sliceQ->getRefPic(0, cuQ->m_refIdx[0][partQ]);
+ const Frame* refP0 = sliceP->m_refFrameList[0][cuP->m_refIdx[0][partP]];
+ const Frame* refQ0 = sliceQ->m_refFrameList[0][cuQ->m_refIdx[0][partQ]];
const MV& mvP0 = refP0 ? cuP->m_mv[0][partP] : zeroMv;
const MV& mvQ0 = refQ0 ? cuQ->m_mv[0][partQ] : zeroMv;
@@ -221,8 +221,8 @@
}
// (sliceQ->isInterB() || sliceP->isInterB())
- const Frame* refP1 = sliceP->getRefPic(1, cuP->m_refIdx[1][partP]);
- const Frame* refQ1 = sliceQ->getRefPic(1, cuQ->m_refIdx[1][partQ]);
+ const Frame* refP1 = sliceP->m_refFrameList[1][cuP->m_refIdx[1][partP]];
+ const Frame* refQ1 = sliceQ->m_refFrameList[1][cuQ->m_refIdx[1][partQ]];
const MV& mvP1 = refP1 ? cuP->m_mv[1][partP] : zeroMv;
const MV& mvQ1 = refQ1 ? cuQ->m_mv[1][partQ] : zeroMv;
@@ -366,7 +366,7 @@
void Deblock::edgeFilterLuma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
{
- PicYuv* reconPic = cuQ->m_encData->m_reconPic;
+ PicYuv* reconPic = cuQ->m_encData->getOutputRecon();
pixel* src = reconPic->getLumaAddr(cuQ->m_cuAddr, absPartIdx);
intptr_t stride = reconPic->m_stride;
const PPS* pps = cuQ->m_slice->m_pps;
@@ -474,7 +474,7 @@
: ((g_zscanToPelY[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_vChromaShift)) % DEBLOCK_SMALLEST_BLOCK == 0,
"invalid edge\n");
- PicYuv* reconPic = cuQ->m_encData->m_reconPic;
+ PicYuv* reconPic = cuQ->m_encData->getOutputRecon();
intptr_t stride = reconPic->m_strideC;
intptr_t srcOffset = reconPic->getChromaAddrOffset(cuQ->m_cuAddr, absPartIdx);
bool bCheckNoFilter = pps->bTransquantBypassEnabled;
diff -r 1f161d9c6e35 -r 02e84edaa143 source/common/frame.cpp
--- a/source/common/frame.cpp Mon Aug 03 14:56:21 2015 -0500
+++ b/source/common/frame.cpp Mon Aug 03 17:55:58 2015 -0500
@@ -35,7 +35,6 @@
m_reconRowCount.set(0);
m_countRefEncoders = 0;
m_encData = NULL;
- m_reconPic = NULL;
m_next = NULL;
m_prev = NULL;
m_param = NULL;
@@ -54,26 +53,13 @@
bool Frame::allocEncodeData(x265_param *param, const SPS& sps)
{
m_encData = new FrameData;
- m_reconPic = new PicYuv;
- m_encData->m_reconPic = m_reconPic;
- bool ok = m_encData->create(param, sps) && m_reconPic->create(param->sourceWidth, param->sourceHeight, param->internalCsp);
- if (ok)
- {
- /* initialize right border of m_reconpicYuv as SAO may read beyond the
- * end of the picture accessing uninitialized pixels */
- int maxHeight = sps.numCuInHeight * g_maxCUSize;
- memset(m_reconPic->m_picOrg[0], 0, sizeof(pixel) * m_reconPic->m_stride * maxHeight);
- memset(m_reconPic->m_picOrg[1], 0, sizeof(pixel) * m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
- memset(m_reconPic->m_picOrg[2], 0, sizeof(pixel) * m_reconPic->m_strideC * (maxHeight >> m_reconPic->m_vChromaShift));
- }
- return ok;
+ return m_encData->create(*param, sps);
}
/* prepare to re-use a FrameData instance to encode a new picture */
void Frame::reinit(const SPS& sps)
{
m_bChromaExtended = false;
- m_reconPic = m_encData->m_reconPic;
m_encData->reinit(sps);
}
@@ -93,12 +79,5 @@
m_fencPic = NULL;
}
- if (m_reconPic)
- {
- m_reconPic->destroy();
- delete m_reconPic;
- m_reconPic = NULL;
- }
-
m_lowres.destroy();
}
diff -r 1f161d9c6e35 -r 02e84edaa143 source/common/frame.h
--- a/source/common/frame.h Mon Aug 03 14:56:21 2015 -0500
+++ b/source/common/frame.h Mon Aug 03 17:55:58 2015 -0500
@@ -41,10 +41,9 @@
{
public:
- /* These two items will be NULL until the Frame begins to be encoded, at which point
- * it will be assigned a FrameData instance, which comes with a reconstructed image PicYuv */
+ /* will be NULL until the Frame begins to be encoded, at which point it will
+ * be assigned a FrameData instance */
FrameData* m_encData;
- PicYuv* m_reconPic;
/* Data associated with x265_picture */
PicYuv* m_fencPic;
diff -r 1f161d9c6e35 -r 02e84edaa143 source/common/framedata.cpp
--- a/source/common/framedata.cpp Mon Aug 03 14:56:21 2015 -0500
+++ b/source/common/framedata.cpp Mon Aug 03 17:55:58 2015 -0500
@@ -22,24 +22,39 @@
*****************************************************************************/
#include "framedata.h"
+#include "threadpool.h"
#include "picyuv.h"
using namespace X265_NS;
+PerNodeRecon::~PerNodeRecon()
+{
+ if (reconPic)
+ {
+ reconPic->destroy();
+ delete reconPic;
+ }
+}
+
FrameData::FrameData()
{
memset(this, 0, sizeof(*this));
}
-bool FrameData::create(x265_param *param, const SPS& sps)
+bool FrameData::create(const x265_param& param, const SPS& sps)
{
- m_param = param;
+ m_param = ¶m;
m_slice = new Slice;
m_picCTU = new CUData[sps.numCUsInFrame];
+#if (defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7) || HAVE_LIBNUMA
+ m_nodes = new PerNodeRecon[ThreadPool::getNumaNodeCount()];
+#else
+ m_nodes = new PerNodeRecon[1];
+#endif
- m_cuMemPool.create(0, param->internalCsp, sps.numCUsInFrame);
+ m_cuMemPool.create(0, param.internalCsp, sps.numCUsInFrame);
for (uint32_t ctuAddr = 0; ctuAddr < sps.numCUsInFrame; ctuAddr++)
- m_picCTU[ctuAddr].initialize(m_cuMemPool, 0, param->internalCsp, ctuAddr);
+ m_picCTU[ctuAddr].initialize(m_cuMemPool, 0, param.internalCsp, ctuAddr);
CHECKED_MALLOC(m_cuStat, RCStatCU, sps.numCUsInFrame);
CHECKED_MALLOC(m_rowStat, RCStatRow, sps.numCuInHeight);
@@ -54,11 +69,51 @@
{
memset(m_cuStat, 0, sps.numCUsInFrame * sizeof(*m_cuStat));
memset(m_rowStat, 0, sps.numCuInHeight * sizeof(*m_rowStat));
+#if (defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7) || HAVE_LIBNUMA
+ for (int i = 0; i < ThreadPool::getNumaNodeCount(); i++)
+ m_nodes[i].rows = 0;
+#else
+ m_nodes->rows = 0;
+#endif
+}
+
+bool FrameData::allocRecon(const SPS& sps, int node)
+{
+ if (m_nodes[node].reconPic)
+ return true;
+
+ ScopedLock s(m_nodes[node].copyLock);
+
+ if (m_nodes[node].reconPic)
+ return true;
+
+ PicYuv* reconPic = new PicYuv;
+ m_nodes[node].reconPic = reconPic;
+
+ int maxHeight = sps.numCuInHeight * g_maxCUSize;
+ if (reconPic->create(m_param->sourceWidth, m_param->sourceHeight, m_param->internalCsp))
+ {
+ /* initialize right border of recon PicYuv as SAO may read beyond the end
+ * of the picture accessing uninitialized pixels */
+ memset(reconPic->m_picOrg[0], 0, sizeof(pixel) * reconPic->m_stride * maxHeight);
+ memset(reconPic->m_picOrg[1], 0, sizeof(pixel) * reconPic->m_strideC * (maxHeight >> reconPic->m_vChromaShift));
+ memset(reconPic->m_picOrg[2], 0, sizeof(pixel) * reconPic->m_strideC * (maxHeight >> reconPic->m_vChromaShift));
+
+ /* use pre-calculated cu/pu offsets cached in the SPS structure */
+ reconPic->m_cuOffsetC = sps.cuOffsetC;
+ reconPic->m_cuOffsetY = sps.cuOffsetY;
+ reconPic->m_buOffsetC = sps.buOffsetC;
+ reconPic->m_buOffsetY = sps.buOffsetY;
+ return true;
+ }
+
+ return false;
}
void FrameData::destroy()
{
delete [] m_picCTU;
+ delete [] m_nodes;
delete m_slice;
delete m_saoParam;
diff -r 1f161d9c6e35 -r 02e84edaa143 source/common/framedata.h
--- a/source/common/framedata.h Mon Aug 03 14:56:21 2015 -0500
+++ b/source/common/framedata.h Mon Aug 03 17:55:58 2015 -0500
@@ -27,6 +27,7 @@
#include "common.h"
#include "slice.h"
#include "cudata.h"
+#include "threading.h"
namespace X265_NS {
// private namespace
@@ -83,6 +84,21 @@
}
};
+struct PerNodeRecon
+{
+ PicYuv* reconPic;
+ Lock copyLock;
+ int rows;
+
+ PerNodeRecon()
+ {
+ rows = 0;
+ reconPic = NULL;
+ }
+
+ ~PerNodeRecon();
+};
+
/* Per-frame data that is used during encodes and referenced while the picture
* is available for reference. A FrameData instance is attached to a Frame as it
* comes out of the lookahead. Frames which are not being encoded do not have a
@@ -93,13 +109,14 @@
class FrameData
{
public:
+ PerNodeRecon* m_nodes;
+ int m_ownerNode; /* NUMA node of writing frame encoder */
Slice* m_slice;
SAOParam* m_saoParam;
- x265_param* m_param;
+ const x265_param* m_param;
FrameData* m_freeListNext;
- PicYuv* m_reconPic;
bool m_bHasReferences; /* used during DPB/RPS updates */
int m_frameEncoderID; /* the ID of the FrameEncoder encoding this frame */
JobProvider* m_jobProvider;
@@ -142,11 +159,23 @@
FrameData();
- bool create(x265_param *param, const SPS& sps);
+ bool create(const x265_param& param, const SPS& sps);
+ bool allocRecon(const SPS& sps, int node);
void reinit(const SPS& sps);
void destroy();
- CUData* getPicCTU(uint32_t ctuAddr) { return &m_picCTU[ctuAddr]; }
+ inline CUData* getPicCTU(uint32_t ctuAddr) { return &m_picCTU[ctuAddr]; }
+
+#if (defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7) || HAVE_LIBNUMA
+ /* this function should only be called by worker threads which are encoding this particular
+ * frame. workers which are using this frame as a motion reference should use their slice's
+ * m_refReconPicList[][] array instead. Its pointers will always reference a buffer which
+ * was allocated on their local socket */
+ /* TODO: X265_CHECK that current node == m_ownerNode */
+ inline PicYuv* getOutputRecon() { return m_nodes[m_ownerNode].reconPic; }
+#else
+ inline PicYuv* getOutputRecon() { return m_nodes->reconPic; }
+#endif
};
}
diff -r 1f161d9c6e35 -r 02e84edaa143 source/common/predict.cpp
--- a/source/common/predict.cpp Mon Aug 03 14:56:21 2015 -0500
+++ b/source/common/predict.cpp Mon Aug 03 17:55:58 2015 -0500
@@ -109,18 +109,18 @@
ShortYuv& shortYuv = m_predShortYuv[0];
if (bLuma)
- predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
+ predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
if (bChroma)
- predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
+ predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma);
}
else
{
if (bLuma)
- predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
+ predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
if (bChroma)
- predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
+ predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
}
}
else
@@ -179,13 +179,13 @@
if (bLuma)
{
- predInterLumaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
- predInterLumaShort(pu, m_predShortYuv[1], *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1);
+ predInterLumaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
+ predInterLumaShort(pu, m_predShortYuv[1], *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
}
if (bChroma)
{
- predInterChromaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
- predInterChromaShort(pu, m_predShortYuv[1], *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1);
+ predInterChromaShort(pu, m_predShortYuv[0], *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
+ predInterChromaShort(pu, m_predShortYuv[1], *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
}
if (pwp0 && pwp1 && (pwp0->bPresentFlag || pwp1->bPresentFlag))
@@ -203,18 +203,18 @@
ShortYuv& shortYuv = m_predShortYuv[0];
if (bLuma)
- predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
+ predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
if (bChroma)
- predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
+ predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma);
}
else
{
if (bLuma)
- predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
+ predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
if (bChroma)
- predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refPicList[0][refIdx0]->m_reconPic, mv0);
+ predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[0][refIdx0], mv0);
}
}
else
@@ -230,18 +230,18 @@
ShortYuv& shortYuv = m_predShortYuv[0];
if (bLuma)
- predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1);
+ predInterLumaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
if (bChroma)
- predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1);
+ predInterChromaShort(pu, shortYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
addWeightUni(pu, predYuv, shortYuv, wv0, bLuma, bChroma);
}
else
{
if (bLuma)
- predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1);
+ predInterLumaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
if (bChroma)
- predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refPicList[1][refIdx1]->m_reconPic, mv1);
+ predInterChromaPixel(pu, predYuv, *cu.m_slice->m_refReconPicList[1][refIdx1], mv1);
}
}
}
@@ -600,8 +600,9 @@
int tuSize = 1 << intraNeighbors.log2TrSize;
int tuSize2 = tuSize << 1;
- pixel* adiOrigin = cu.m_encData->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + puAbsPartIdx);
- intptr_t picStride = cu.m_encData->m_reconPic->m_stride;
+ PicYuv* reconPic = cu.m_encData->getOutputRecon();
+ pixel* adiOrigin = reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + puAbsPartIdx);
+ intptr_t picStride = reconPic->m_stride;
fillReferenceSamples(adiOrigin, picStride, intraNeighbors, intraNeighbourBuf[0]);
@@ -648,8 +649,9 @@
void Predict::initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, uint32_t puAbsPartIdx, const IntraNeighbors& intraNeighbors, uint32_t chromaId)
{
- const pixel* adiOrigin = cu.m_encData->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx + puAbsPartIdx);
- intptr_t picStride = cu.m_encData->m_reconPic->m_strideC;
+ PicYuv* reconPic = cu.m_encData->getOutputRecon();
+ const pixel* adiOrigin = reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx + puAbsPartIdx);
+ intptr_t picStride = reconPic->m_strideC;
fillReferenceSamples(adiOrigin, picStride, intraNeighbors, intraNeighbourBuf[0]);
diff -r 1f161d9c6e35 -r 02e84edaa143 source/common/slice.cpp
--- a/source/common/slice.cpp Mon Aug 03 14:56:21 2015 -0500
+++ b/source/common/slice.cpp Mon Aug 03 17:55:58 2015 -0500
@@ -33,7 +33,9 @@
{
if (m_sliceType == I_SLICE)
{
- memset(m_refPicList, 0, sizeof(m_refPicList));
+ memset(m_refFrameList, 0, sizeof(m_refFrameList));
+ memset(m_refReconPicList, 0, sizeof(m_refReconPicList));
+ memset(m_refPOCList, 0, sizeof(m_refPOCList));
m_numRefIdx[1] = m_numRefIdx[0] = 0;
return;
}
@@ -106,13 +108,13 @@
{
cIdx = rIdx % numPocTotalCurr;
X265_CHECK(cIdx >= 0 && cIdx < numPocTotalCurr, "RPS index check fail\n");
- m_refPicList[0][rIdx] = rpsCurrList0[cIdx];
+ m_refFrameList[0][rIdx] = rpsCurrList0[cIdx];
}
if (m_sliceType != B_SLICE)
{
m_numRefIdx[1] = 0;
- memset(m_refPicList[1], 0, sizeof(m_refPicList[1]));
+ memset(m_refFrameList[1], 0, sizeof(m_refFrameList[1]));
}
else
{
@@ -120,13 +122,13 @@
{
cIdx = rIdx % numPocTotalCurr;
X265_CHECK(cIdx >= 0 && cIdx < numPocTotalCurr, "RPS index check fail\n");
- m_refPicList[1][rIdx] = rpsCurrList1[cIdx];
+ m_refFrameList[1][rIdx] = rpsCurrList1[cIdx];
}
}
for (int dir = 0; dir < 2; dir++)
for (int numRefIdx = 0; numRefIdx < m_numRefIdx[dir]; numRefIdx++)
- m_refPOCList[dir][numRefIdx] = m_refPicList[dir][numRefIdx]->m_poc;
+ m_refPOCList[dir][numRefIdx] = m_refFrameList[dir][numRefIdx]->m_poc;
}
void Slice::disableWeights()
diff -r 1f161d9c6e35 -r 02e84edaa143 source/common/slice.h
--- a/source/common/slice.h Mon Aug 03 14:56:21 2015 -0500
+++ b/source/common/slice.h Mon Aug 03 17:55:58 2015 -0500
@@ -31,6 +31,7 @@
class Frame;
class PicList;
+class PicYuv;
class MotionReference;
enum SliceType
@@ -209,6 +210,13 @@
struct SPS
{
+ /* cached PicYuv offset arrays, shared by all instances of
+ * PicYuv created by this encoder */
+ intptr_t* cuOffsetY;
+ intptr_t* cuOffsetC;
+ intptr_t* buOffsetY;
+ intptr_t* buOffsetC;
+
int chromaFormatIdc; // use param
uint32_t picWidthInLumaSamples; // use param
uint32_t picHeightInLumaSamples; // use param
@@ -242,6 +250,11 @@
Window conformanceWindow;
VUI vuiParameters;
+
+ SPS()
+ {
+ memset(this, 0, sizeof(*this));
+ }
};
struct PPS
@@ -321,7 +334,8 @@
uint32_t m_colRefIdx; // never modified
int m_numRefIdx[2];
- Frame* m_refPicList[2][MAX_NUM_REF + 1];
+ Frame* m_refFrameList[2][MAX_NUM_REF + 1];
+ PicYuv* m_refReconPicList[2][MAX_NUM_REF + 1];
int m_refPOCList[2][MAX_NUM_REF + 1];
uint32_t m_maxNumMergeCand; // use param
@@ -332,14 +346,9 @@
m_lastIDR = 0;
m_sLFaseFlag = true;
m_numRefIdx[0] = m_numRefIdx[1] = 0;
- for (int i = 0; i < MAX_NUM_REF; i++)
- {
- m_refPicList[0][i] = NULL;
- m_refPicList[1][i] = NULL;
- m_refPOCList[0][i] = 0;
- m_refPOCList[1][i] = 0;
- }
-
+ memset(m_refFrameList, 0, sizeof(m_refFrameList));
+ memset(m_refReconPicList, 0, sizeof(m_refReconPicList));
+ memset(m_refPOCList, 0, sizeof(m_refPOCList));
disableWeights();
}
@@ -347,8 +356,6 @@
void setRefPicList(PicList& picList);
- const Frame* getRefPic(int list, int refIdx) const { return refIdx >= 0 ? m_refPicList[list][refIdx] : NULL; }
-
bool getRapPicFlag() const
{
return m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL
diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Mon Aug 03 14:56:21 2015 -0500
+++ b/source/encoder/analysis.cpp Mon Aug 03 17:55:58 2015 -0500
@@ -173,8 +173,8 @@
if (!m_param->rdLevel)
{
/* In RD Level 0/1, copy source pixels into the reconstructed block so
- * they are available for intra predictions */
- m_modeDepth[0].fencYuv.copyToPicYuv(*m_frame->m_reconPic, ctu.m_cuAddr, 0);
+ * they are available for intra predictions */
+ m_modeDepth[0].fencYuv.copyToPicYuv(*m_frame->m_encData->getOutputRecon(), ctu.m_cuAddr, 0);
compressInterCU_rd0_4(ctu, cuGeom, qp);
@@ -337,7 +337,7 @@
/* Copy best data to encData CTU and recon */
md.bestMode->cu.copyToPic(depth);
if (md.bestMode != &md.pred[PRED_SPLIT])
- md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, parentCTU.m_cuAddr, cuGeom.absPartIdx);
+ md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_encData->getOutputRecon(), parentCTU.m_cuAddr, cuGeom.absPartIdx);
}
void Analysis::PMODE::processTasks(int workerThreadId)
@@ -747,7 +747,7 @@
/* Copy best data to encData CTU and recon */
md.bestMode->cu.copyToPic(depth);
if (md.bestMode != &md.pred[PRED_SPLIT])
- md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.absPartIdx);
+ md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_encData->getOutputRecon(), cuAddr, cuGeom.absPartIdx);
}
uint32_t Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp)
@@ -757,6 +757,8 @@
ModeDepth& md = m_modeDepth[depth];
md.bestMode = NULL;
+ PicYuv& reconPic = *m_frame->m_encData->getOutputRecon();
+
bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);
@@ -1051,7 +1053,7 @@
residualTransformQuantIntra(*md.bestMode, cuGeom, 0, 0, tuDepthRange);
getBestIntraModeChroma(*md.bestMode, cuGeom);
residualQTIntraChroma(*md.bestMode, cuGeom, 0, 0);
- md.bestMode->reconYuv.copyFromPicYuv(*m_frame->m_reconPic, cu.m_cuAddr, cuGeom.absPartIdx); // TODO:
+ md.bestMode->reconYuv.copyFromPicYuv(reconPic, cu.m_cuAddr, cuGeom.absPartIdx); // TODO:
}
}
}
@@ -1107,7 +1109,7 @@
X265_CHECK(md.bestMode->ok(), "best mode is not ok");
md.bestMode->cu.copyToPic(depth);
if (m_param->rdLevel)
- md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.absPartIdx);
+ md.bestMode->reconYuv.copyToPicYuv(reconPic, cuAddr, cuGeom.absPartIdx);
return refMask;
}
@@ -1356,7 +1358,7 @@
/* Copy best data to encData CTU and recon */
X265_CHECK(md.bestMode->ok(), "best mode is not ok");
md.bestMode->cu.copyToPic(depth);
- md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, parentCTU.m_cuAddr, cuGeom.absPartIdx);
+ md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_encData->getOutputRecon(), parentCTU.m_cuAddr, cuGeom.absPartIdx);
return refMask;
}
@@ -1851,6 +1853,8 @@
cu.copyFromPic(ctu, cuGeom);
+ PicYuv& reconPic = *m_frame->m_encData->getOutputRecon();
+
Yuv& fencYuv = m_modeDepth[cuGeom.depth].fencYuv;
if (cuGeom.depth)
m_modeDepth[0].fencYuv.copyPartToYuv(fencYuv, absPartIdx);
@@ -1906,7 +1910,6 @@
/* residualTransformQuantInter() wrote transformed residual back into
* resiYuv. Generate the recon pixels by adding it to the prediction */
- PicYuv& reconPic = *m_frame->m_reconPic;
if (cu.m_cbf[0][0])
primitives.cu[sizeIdx].add_ps(reconPic.getLumaAddr(cu.m_cuAddr, absPartIdx), reconPic.m_stride,
predY, resiYuv.m_buf[0], predYuv.m_size, resiYuv.m_size);
@@ -1969,7 +1972,7 @@
if (m_slice->m_numRefIdx[0])
{
numRefs++;
- const CUData& cu = *m_slice->m_refPicList[0][0]->m_encData->getPicCTU(parentCTU.m_cuAddr);
+ const CUData& cu = *m_slice->m_refFrameList[0][0]->m_encData->getPicCTU(parentCTU.m_cuAddr);
previousQP = cu.m_qp[0];
if (!cu.m_cuDepth[cuGeom.absPartIdx])
return 0;
@@ -1983,7 +1986,7 @@
if (m_slice->m_numRefIdx[1])
{
numRefs++;
- const CUData& cu = *m_slice->m_refPicList[1][0]->m_encData->getPicCTU(parentCTU.m_cuAddr);
+ const CUData& cu = *m_slice->m_refFrameList[1][0]->m_encData->getPicCTU(parentCTU.m_cuAddr);
if (!cu.m_cuDepth[cuGeom.absPartIdx])
return 0;
for (uint32_t i = 0; i < cuGeom.numPartitions; i += 4)
diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/dpb.cpp
--- a/source/encoder/dpb.cpp Mon Aug 03 14:56:21 2015 -0500
+++ b/source/encoder/dpb.cpp Mon Aug 03 17:55:58 2015 -0500
@@ -47,16 +47,12 @@
delete curFrame;
}
- while (m_picSymFreeList)
+ while (m_frameDataFreeList)
{
- FrameData* next = m_picSymFreeList->m_freeListNext;
- m_picSymFreeList->destroy();
-
- m_picSymFreeList->m_reconPic->destroy();
- delete m_picSymFreeList->m_reconPic;
-
- delete m_picSymFreeList;
- m_picSymFreeList = next;
+ FrameData* next = m_frameDataFreeList->m_freeListNext;
+ m_frameDataFreeList->destroy();
+ delete m_frameDataFreeList;
+ m_frameDataFreeList = next;
}
}
@@ -79,10 +75,9 @@
iterFrame = m_picList.first();
m_freeList.pushBack(*curFrame);
- curFrame->m_encData->m_freeListNext = m_picSymFreeList;
- m_picSymFreeList = curFrame->m_encData;
+ curFrame->m_encData->m_freeListNext = m_frameDataFreeList;
+ m_frameDataFreeList = curFrame->m_encData;
curFrame->m_encData = NULL;
- curFrame->m_reconPic = NULL;
}
}
}
@@ -171,7 +166,7 @@
{
for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
{
- Frame *refpic = slice->m_refPicList[l][ref];
+ Frame *refpic = slice->m_refFrameList[l][ref];
ATOMIC_INC(&refpic->m_countRefEncoders);
}
}
diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/dpb.h
--- a/source/encoder/dpb.h Mon Aug 03 14:56:21 2015 -0500
+++ b/source/encoder/dpb.h Mon Aug 03 17:55:58 2015 -0500
@@ -46,14 +46,14 @@
bool m_bTemporalSublayer;
PicList m_picList;
PicList m_freeList;
- FrameData* m_picSymFreeList;
+ FrameData* m_frameDataFreeList;
DPB(x265_param *param)
{
m_lastIDR = 0;
m_pocCRA = 0;
m_bRefreshPending = false;
- m_picSymFreeList = NULL;
+ m_frameDataFreeList = NULL;
m_maxRefL0 = param->maxNumReferences;
m_maxRefL1 = param->bBPyramid ? 2 : 1;
m_bOpenGOP = param->bOpenGOP;
diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Mon Aug 03 14:56:21 2015 -0500
+++ b/source/encoder/encoder.cpp Mon Aug 03 17:55:58 2015 -0500
@@ -66,10 +66,6 @@
m_outputCount = 0;
m_param = NULL;
m_latestParam = NULL;
- m_cuOffsetY = NULL;
- m_cuOffsetC = NULL;
- m_buOffsetY = NULL;
- m_buOffsetC = NULL;
m_threadPool = NULL;
m_analysisFile = NULL;
for (int i = 0; i < X265_MAX_FRAME_THREADS; i++)
@@ -318,10 +314,10 @@
delete m_rateControl;
}
- X265_FREE(m_cuOffsetY);
- X265_FREE(m_cuOffsetC);
- X265_FREE(m_buOffsetY);
- X265_FREE(m_buOffsetC);
+ X265_FREE(m_sps.cuOffsetY);
+ X265_FREE(m_sps.cuOffsetC);
+ X265_FREE(m_sps.buOffsetY);
+ X265_FREE(m_sps.buOffsetC);
if (m_analysisFile)
fclose(m_analysisFile);
@@ -416,12 +412,12 @@
/* the first PicYuv created is asked to generate the CU and block unit offset
* arrays which are then shared with all subsequent PicYuv (orig and recon)
* allocated by this top level encoder */
- if (m_cuOffsetY)
+ if (m_sps.cuOffsetY)
{
- inFrame->m_fencPic->m_cuOffsetC = m_cuOffsetC;
- inFrame->m_fencPic->m_cuOffsetY = m_cuOffsetY;
- inFrame->m_fencPic->m_buOffsetC = m_buOffsetC;
- inFrame->m_fencPic->m_buOffsetY = m_buOffsetY;
+ inFrame->m_fencPic->m_cuOffsetC = m_sps.cuOffsetC;
+ inFrame->m_fencPic->m_cuOffsetY = m_sps.cuOffsetY;
+ inFrame->m_fencPic->m_buOffsetC = m_sps.buOffsetC;
+ inFrame->m_fencPic->m_buOffsetY = m_sps.buOffsetY;
}
else
{
@@ -435,10 +431,10 @@
}
else
{
- m_cuOffsetC = inFrame->m_fencPic->m_cuOffsetC;
- m_cuOffsetY = inFrame->m_fencPic->m_cuOffsetY;
- m_buOffsetC = inFrame->m_fencPic->m_buOffsetC;
- m_buOffsetY = inFrame->m_fencPic->m_buOffsetY;
+ m_sps.cuOffsetC = inFrame->m_fencPic->m_cuOffsetC;
+ m_sps.cuOffsetY = inFrame->m_fencPic->m_cuOffsetY;
+ m_sps.buOffsetC = inFrame->m_fencPic->m_buOffsetC;
+ m_sps.buOffsetY = inFrame->m_fencPic->m_buOffsetY;
}
}
}
@@ -538,7 +534,7 @@
if (pic_out)
{
- PicYuv *recpic = outFrame->m_reconPic;
+ PicYuv *recpic = outFrame->m_encData->getOutputRecon();
pic_out->poc = slice->m_poc;
pic_out->bitDepth = X265_DEPTH;
pic_out->userData = outFrame->m_userData;
@@ -633,10 +629,10 @@
if (frameEnc && !pass)
{
/* give this frame a FrameData instance before encoding */
- if (m_dpb->m_picSymFreeList)
+ if (m_dpb->m_frameDataFreeList)
{
- frameEnc->m_encData = m_dpb->m_picSymFreeList;
- m_dpb->m_picSymFreeList = m_dpb->m_picSymFreeList->m_freeListNext;
+ frameEnc->m_encData = m_dpb->m_frameDataFreeList;
+ m_dpb->m_frameDataFreeList = m_dpb->m_frameDataFreeList->m_freeListNext;
frameEnc->reinit(m_sps);
}
else
@@ -647,10 +643,6 @@
slice->m_pps = &m_pps;
slice->m_maxNumMergeCand = m_param->maxNumMergeCand;
slice->m_endCUAddr = slice->realEndAddress(m_sps.numCUsInFrame * NUM_4x4_PARTITIONS);
- frameEnc->m_reconPic->m_cuOffsetC = m_cuOffsetC;
- frameEnc->m_reconPic->m_cuOffsetY = m_cuOffsetY;
- frameEnc->m_reconPic->m_buOffsetC = m_buOffsetC;
- frameEnc->m_reconPic->m_buOffsetY = m_buOffsetY;
}
curEncoder->m_rce.encodeOrder = m_encodedFrameNum++;
@@ -1054,7 +1046,7 @@
void Encoder::finishFrameStats(Frame* curFrame, FrameEncoder *curEncoder, uint64_t bits, x265_frame_stats* frameStats)
{
- PicYuv* reconPic = curFrame->m_reconPic;
+ PicYuv *reconPic = curFrame->m_encData->getOutputRecon();
//===== calculate PSNR =====
int width = reconPic->m_picWidth - m_sps.conformanceWindow.rightOffset;
diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/encoder.h
--- a/source/encoder/encoder.h Mon Aug 03 14:56:21 2015 -0500
+++ b/source/encoder/encoder.h Mon Aug 03 17:55:58 2015 -0500
@@ -93,13 +93,6 @@
int m_numPools;
int m_curEncoder;
- /* cached PicYuv offset arrays, shared by all instances of
- * PicYuv created by this encoder */
- intptr_t* m_cuOffsetY;
- intptr_t* m_cuOffsetC;
- intptr_t* m_buOffsetY;
- intptr_t* m_buOffsetC;
-
/* Collect statistics globally */
EncStats m_analyzeAll;
EncStats m_analyzeI;
diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp Mon Aug 03 14:56:21 2015 -0500
+++ b/source/encoder/frameencoder.cpp Mon Aug 03 17:55:58 2015 -0500
@@ -329,6 +329,16 @@
if (m_frame->m_lowres.bKeyframe && m_param->bRepeatHeaders)
m_top->getStreamHeaders(m_nalList, m_entropyCoder, m_bs);
+#if (defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7) || HAVE_LIBNUMA
+ int numaNode = 0; // m_pool ? m_pool->m_numaNode : 0;
+#else
+ int numaNode = 0;
+#endif
+
+ /* Claim this frame as being encoded by this NUMA node */
+ m_frame->m_encData->allocRecon(m_top->m_sps, numaNode); /* TODO: bail if failure */
+ m_frame->m_encData->m_ownerNode = numaNode;
+
// Weighted Prediction parameters estimation.
bool bUseWeightP = slice->m_sliceType == P_SLICE && slice->m_pps->bUseWeightPred;
bool bUseWeightB = slice->m_sliceType == B_SLICE && slice->m_pps->bUseWeightedBiPred;
@@ -357,7 +367,9 @@
WeightParam *w = NULL;
if ((bUseWeightP || bUseWeightB) && slice->m_weightPredTable[l][ref][0].bPresentFlag)
w = slice->m_weightPredTable[l][ref];
- m_mref[l][ref].init(slice->m_refPicList[l][ref]->m_reconPic, w, *m_param);
+ slice->m_refFrameList[l][ref]->m_encData->allocRecon(m_top->m_sps, numaNode); /* TODO: bail if failure */
+ slice->m_refReconPicList[l][ref] = slice->m_refFrameList[l][ref]->m_encData->m_nodes[numaNode].reconPic;
+ m_mref[l][ref].init(slice->m_refReconPicList[l][ref], w, *m_param);
}
}
@@ -477,7 +489,7 @@
/* CQP and CRF (without capped VBV) doesn't use mid-frame statistics to
* tune RateControl parameters for other frames.
* Hence, for these modes, update m_startEndOrder and unlock RC for previous threads waiting in
- * RateControlEnd here, after the slicecontexts are initialized. For the rest - ABR
+ * RateControlEnd here, after the slice contexts are initialized. For the rest - ABR
* and VBV, unlock only after rateControlUpdateStats of this frame is called */
if (m_param->rc.rateControlMode != X265_RC_ABR && !m_top->m_rateControl->m_isVbv)
{
@@ -501,12 +513,14 @@
{
for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
{
- Frame *refpic = slice->m_refPicList[l][ref];
+ Frame *refpic = slice->m_refFrameList[l][ref];
uint32_t reconRowCount = refpic->m_reconRowCount.get();
while ((reconRowCount != m_numRows) && (reconRowCount < row + m_refLagRows))
reconRowCount = refpic->m_reconRowCount.waitForChange(reconRowCount);
+ /* TODO: if refpic->m_encData->m_ownerNode != numaNode, copy rows */
+
if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
m_mref[l][ref].applyWeight(row + m_refLagRows, m_numRows);
}
@@ -540,12 +554,14 @@
int list = l;
for (int ref = 0; ref < slice->m_numRefIdx[list]; ref++)
{
- Frame *refpic = slice->m_refPicList[list][ref];
+ Frame *refpic = slice->m_refFrameList[list][ref];
uint32_t reconRowCount = refpic->m_reconRowCount.get();
while ((reconRowCount != m_numRows) && (reconRowCount < i + m_refLagRows))
reconRowCount = refpic->m_reconRowCount.waitForChange(reconRowCount);
+ /* TODO: if refpic->m_encData->m_ownerNode != numaNode, copy rows */
+
if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
m_mref[list][ref].applyWeight(i + m_refLagRows, m_numRows);
}
@@ -697,7 +713,7 @@
{
for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
{
- Frame *refpic = slice->m_refPicList[l][ref];
+ Frame *refpic = slice->m_refFrameList[l][ref];
ATOMIC_DEC(&refpic->m_countRefEncoders);
}
}
diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp Mon Aug 03 14:56:21 2015 -0500
+++ b/source/encoder/framefilter.cpp Mon Aug 03 17:55:58 2015 -0500
@@ -160,7 +160,7 @@
void FrameFilter::processRowPost(int row)
{
- PicYuv *reconPic = m_frame->m_reconPic;
+ PicYuv *reconPic = m_frame->m_encData->getOutputRecon();
const uint32_t numCols = m_frame->m_encData->m_slice->m_sps->numCuInWidth;
const uint32_t lineStartCUAddr = row * numCols;
const int realH = getCUHeight(row);
@@ -233,10 +233,10 @@
}
if (m_param->bEnableSsim && m_ssimBuf)
{
- pixel *rec = m_frame->m_reconPic->m_picOrg[0];
+ pixel *rec = reconPic->m_picOrg[0];
pixel *fenc = m_frame->m_fencPic->m_picOrg[0];
- intptr_t stride1 = m_frame->m_fencPic->m_stride;
- intptr_t stride2 = m_frame->m_reconPic->m_stride;
+ intptr_t stride1 = reconPic->m_stride;
+ intptr_t stride2 = m_frame->m_fencPic->m_stride;
uint32_t bEnd = ((row + 1) == (this->m_numRows - 1));
uint32_t bStart = (row == 0);
uint32_t minPixY = row * g_maxCUSize - 4 * !bStart;
@@ -407,7 +407,7 @@
int size = cu->m_log2CUSize[absPartIdx] - 2;
uint32_t cuAddr = cu->m_cuAddr;
- PicYuv* reconPic = frame.m_reconPic;
+ PicYuv* reconPic = frame.m_encData->getOutputRecon();
PicYuv* fencPic = frame.m_fencPic;
pixel* dst = reconPic->getLumaAddr(cuAddr, absPartIdx);
diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp Mon Aug 03 14:56:21 2015 -0500
+++ b/source/encoder/ratecontrol.cpp Mon Aug 03 17:55:58 2015 -0500
@@ -1345,10 +1345,10 @@
{
/* B-frames don't have independent rate control, but rather get the
* average QP of the two adjacent P-frames + an offset */
- Slice* prevRefSlice = m_curSlice->m_refPicList[0][0]->m_encData->m_slice;
- Slice* nextRefSlice = m_curSlice->m_refPicList[1][0]->m_encData->m_slice;
- double q0 = m_curSlice->m_refPicList[0][0]->m_encData->m_avgQpRc;
- double q1 = m_curSlice->m_refPicList[1][0]->m_encData->m_avgQpRc;
+ Slice* prevRefSlice = m_curSlice->m_refFrameList[0][0]->m_encData->m_slice;
+ Slice* nextRefSlice = m_curSlice->m_refFrameList[1][0]->m_encData->m_slice;
+ double q0 = m_curSlice->m_refFrameList[0][0]->m_encData->m_avgQpRc;
+ double q1 = m_curSlice->m_refFrameList[1][0]->m_encData->m_avgQpRc;
bool i0 = prevRefSlice->m_sliceType == I_SLICE;
bool i1 = nextRefSlice->m_sliceType == I_SLICE;
int dt0 = abs(m_curSlice->m_poc - prevRefSlice->m_poc);
@@ -1364,9 +1364,9 @@
q0 = q1;
}
}
- if (prevRefSlice->m_sliceType == B_SLICE && IS_REFERENCED(m_curSlice->m_refPicList[0][0]))
+ if (prevRefSlice->m_sliceType == B_SLICE && IS_REFERENCED(m_curSlice->m_refFrameList[0][0]))
q0 -= m_pbOffset / 2;
- if (nextRefSlice->m_sliceType == B_SLICE && IS_REFERENCED(m_curSlice->m_refPicList[1][0]))
+ if (nextRefSlice->m_sliceType == B_SLICE && IS_REFERENCED(m_curSlice->m_refFrameList[1][0]))
q1 -= m_pbOffset / 2;
if (i0 && i1)
q = (q0 + q1) / 2 + m_ipOffset;
@@ -1483,7 +1483,7 @@
* Then bias the quant up or down if total size so far was far from
* the target.
* Result: Depending on the value of rate_tolerance, there is a
- * tradeoff between quality and bitrate precision. But at large
+ * trade-off between quality and bitrate precision. But at large
* tolerances, the bit distribution approaches that of 2pass. */
double overflow = 1;
@@ -1832,7 +1832,7 @@
double qScale = x265_qp2qScale(qpVbv);
FrameData& curEncData = *curFrame->m_encData;
int picType = curEncData.m_slice->m_sliceType;
- Frame* refFrame = curEncData.m_slice->m_refPicList[0][0];
+ Frame* refFrame = curEncData.m_slice->m_refFrameList[0][0];
uint32_t maxRows = curEncData.m_slice->m_sps->numCuInHeight;
uint32_t maxCols = curEncData.m_slice->m_sps->numCuInWidth;
@@ -1921,7 +1921,7 @@
updatePredictor(rce->rowPred[0], qScaleVbv, (double)rowSatdCost, encodedBits);
if (curEncData.m_slice->m_sliceType == P_SLICE)
{
- Frame* refFrame = curEncData.m_slice->m_refPicList[0][0];
+ Frame* refFrame = curEncData.m_slice->m_refFrameList[0][0];
if (qpVbv < refFrame->m_encData->m_rowStat[row].diagQp)
{
uint64_t intraRowSatdCost = curEncData.m_rowStat[row].diagIntraSatd;
diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp Mon Aug 03 14:56:21 2015 -0500
+++ b/source/encoder/sao.cpp Mon Aug 03 17:55:58 2015 -0500
@@ -243,12 +243,13 @@
void SAO::processSaoCu(int addr, int typeIdx, int plane)
{
int x, y;
- const CUData* cu = m_frame->m_encData->getPicCTU(addr);
- pixel* rec = m_frame->m_reconPic->getPlaneAddr(plane, addr);
- intptr_t stride = plane ? m_frame->m_reconPic->m_strideC : m_frame->m_reconPic->m_stride;
+ PicYuv* reconPic = m_frame->m_encData->getOutputRecon();
+ pixel* rec = reconPic->getPlaneAddr(plane, addr);
+ intptr_t stride = plane ? reconPic->m_strideC : reconPic->m_stride;
uint32_t picWidth = m_param->sourceWidth;
uint32_t picHeight = m_param->sourceHeight;
- int ctuWidth = g_maxCUSize;
+ const CUData* cu = m_frame->m_encData->getPicCTU(addr);
+ int ctuWidth = g_maxCUSize;
int ctuHeight = g_maxCUSize;
uint32_t lpelx = cu->m_cuPelX;
uint32_t tpely = cu->m_cuPelY;
@@ -572,7 +573,8 @@
/* Process SAO all units */
void SAO::processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int plane)
{
- intptr_t stride = plane ? m_frame->m_reconPic->m_strideC : m_frame->m_reconPic->m_stride;
+ PicYuv* reconPic = m_frame->m_encData->getOutputRecon();
+ intptr_t stride = plane ? reconPic->m_strideC : reconPic->m_stride;
uint32_t picWidth = m_param->sourceWidth;
int ctuWidth = g_maxCUSize;
int ctuHeight = g_maxCUSize;
@@ -585,12 +587,12 @@
if (!idxY)
{
- pixel* rec = m_frame->m_reconPic->m_picOrg[plane];
+ pixel* rec = reconPic->m_picOrg[plane];
memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidth);
}
int addr = idxY * m_numCuInWidth;
- pixel* rec = plane ? m_frame->m_reconPic->getChromaAddr(plane, addr) : m_frame->m_reconPic->getLumaAddr(addr);
+ pixel* rec = plane ? reconPic->getChromaAddr(plane, addr) : reconPic->getLumaAddr(addr);
for (int i = 0; i < ctuHeight + 1; i++)
{
@@ -635,7 +637,7 @@
}
else if (idxX != (m_numCuInWidth - 1))
{
- rec = plane ? m_frame->m_reconPic->getChromaAddr(plane, addr) : m_frame->m_reconPic->getLumaAddr(addr);
+ rec = plane ? reconPic->getChromaAddr(plane, addr) : reconPic->getLumaAddr(addr);
for (int i = 0; i < ctuHeight + 1; i++)
{
@@ -671,12 +673,13 @@
/* Calculate SAO statistics for current CTU without non-crossing slice */
void SAO::calcSaoStatsCu(int addr, int plane)
{
+ const PicYuv* reconPic = m_frame->m_encData->getOutputRecon();
const CUData* cu = m_frame->m_encData->getPicCTU(addr);
const pixel* fenc0 = m_frame->m_fencPic->getPlaneAddr(plane, addr);
- const pixel* rec0 = m_frame->m_reconPic->getPlaneAddr(plane, addr);
+ const pixel* rec0 = reconPic->getPlaneAddr(plane, addr);
const pixel* fenc;
const pixel* rec;
- intptr_t stride = plane ? m_frame->m_reconPic->m_strideC : m_frame->m_reconPic->m_stride;
+ intptr_t stride = plane ? reconPic->m_strideC : reconPic->m_stride;
uint32_t picWidth = m_param->sourceWidth;
uint32_t picHeight = m_param->sourceHeight;
int ctuWidth = g_maxCUSize;
@@ -825,9 +828,10 @@
int x, y;
const CUData* cu = frame->m_encData->getPicCTU(addr);
+ const PicYuv* reconPic = m_frame->m_encData->getOutputRecon();
const pixel* fenc;
const pixel* rec;
- intptr_t stride = m_frame->m_reconPic->m_stride;
+ intptr_t stride = reconPic->m_stride;
uint32_t picWidth = m_param->sourceWidth;
uint32_t picHeight = m_param->sourceHeight;
int ctuWidth = g_maxCUSize;
@@ -861,7 +865,7 @@
{
if (plane == 1)
{
- stride = frame->m_reconPic->m_strideC;
+ stride = reconPic->m_strideC;
picWidth >>= m_hChromaShift;
picHeight >>= m_vChromaShift;
ctuWidth >>= m_hChromaShift;
@@ -881,7 +885,7 @@
count = m_countPreDblk[addr][plane][SAO_BO];
const pixel* fenc0 = m_frame->m_fencPic->getPlaneAddr(plane, addr);
- const pixel* rec0 = m_frame->m_reconPic->getPlaneAddr(plane, addr);
+ const pixel* rec0 = reconPic->getPlaneAddr(plane, addr);
fenc = fenc0;
rec = rec0;
diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/search.cpp
--- a/source/encoder/search.cpp Mon Aug 03 14:56:21 2015 -0500
+++ b/source/encoder/search.cpp Mon Aug 03 17:55:58 2015 -0500
@@ -446,8 +446,9 @@
}
// set reconstruction for next intra prediction blocks if full TU prediction won
- pixel* picReconY = m_frame->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + absPartIdx);
- intptr_t picStride = m_frame->m_reconPic->m_stride;
+ PicYuv* reconPic = m_frame->m_encData->getOutputRecon();
+ pixel* picReconY = reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + absPartIdx);
+ intptr_t picStride = reconPic->m_stride;
primitives.cu[sizeIdx].copy_pp(picReconY, picStride, reconQt, reconQtStride);
outCost.rdcost += fullCost.rdcost;
@@ -611,8 +612,9 @@
}
// set reconstruction for next intra prediction blocks
- pixel* picReconY = m_frame->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + absPartIdx);
- intptr_t picStride = m_frame->m_reconPic->m_stride;
+ PicYuv* reconPic = m_frame->m_encData->getOutputRecon();
+ pixel* picReconY = reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + absPartIdx);
+ intptr_t picStride = reconPic->m_stride;
primitives.cu[sizeIdx].copy_pp(picReconY, picStride, reconQt, reconQtStride);
outCost.rdcost += fullCost.rdcost;
@@ -661,8 +663,9 @@
uint32_t sizeIdx = log2TrSize - 2;
primitives.cu[sizeIdx].calcresidual(fenc, pred, residual, stride);
- pixel* picReconY = m_frame->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + absPartIdx);
- intptr_t picStride = m_frame->m_reconPic->m_stride;
+ PicYuv* reconPic = m_frame->m_encData->getOutputRecon();
+ pixel* picReconY = reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + absPartIdx);
+ intptr_t picStride = reconPic->m_stride;
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
if (numSig)
@@ -821,8 +824,9 @@
coeff_t* coeffC = m_rqt[qtLayer].coeffRQT[chromaId] + coeffOffsetC;
pixel* reconQt = m_rqt[qtLayer].reconQtYuv.getChromaAddr(chromaId, absPartIdxC);
uint32_t reconQtStride = m_rqt[qtLayer].reconQtYuv.m_csize;
- pixel* picReconC = m_frame->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx + absPartIdxC);
- intptr_t picStride = m_frame->m_reconPic->m_strideC;
+ PicYuv* reconPic = m_frame->m_encData->getOutputRecon();
+ pixel* picReconC = reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx + absPartIdxC);
+ intptr_t picStride = reconPic->m_strideC;
uint32_t chromaPredMode = cu.m_chromaIntraDir[absPartIdxC];
if (chromaPredMode == DM_CHROMA_IDX)
@@ -998,8 +1002,9 @@
cu.setCbfPartRange(bCbf << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
cu.setTransformSkipPartRange(bTSkip, ttype, absPartIdxC, tuIterator.absPartIdxStep);
- pixel* reconPicC = m_frame->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx + absPartIdxC);
- intptr_t picStride = m_frame->m_reconPic->m_strideC;
+ PicYuv* reconPic = m_frame->m_encData->getOutputRecon();
+ pixel* reconPicC = reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx + absPartIdxC);
+ intptr_t picStride = reconPic->m_strideC;
primitives.cu[sizeIdxC].copy_pp(reconPicC, picStride, reconQt, reconQtStride);
outDist += bDist;
@@ -1108,8 +1113,9 @@
int16_t* residual = resiYuv.getChromaAddr(chromaId, absPartIdxC);
uint32_t coeffOffsetC = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (m_hChromaShift + m_vChromaShift));
coeff_t* coeffC = cu.m_trCoeff[ttype] + coeffOffsetC;
- pixel* picReconC = m_frame->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx + absPartIdxC);
- intptr_t picStride = m_frame->m_reconPic->m_strideC;
+ PicYuv* reconPic = m_frame->m_encData->getOutputRecon();
+ pixel* picReconC = reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.absPartIdx + absPartIdxC);
+ intptr_t picStride = reconPic->m_strideC;
uint32_t chromaPredMode = cu.m_chromaIntraDir[absPartIdxC];
if (chromaPredMode == DM_CHROMA_IDX)
@@ -1591,10 +1597,11 @@
* output recon picture, so it cannot proceed in parallel with anything else when doing INTRA_NXN. Also
* it is not updating m_rdContexts[depth].cur for the later PUs which I suspect is slightly wrong. I think
* that the contexts should be tracked through each PU */
- pixel* dst = m_frame->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + absPartIdx);
- uint32_t dststride = m_frame->m_reconPic->m_stride;
- const pixel* src = reconYuv->getLumaAddr(absPartIdx);
- uint32_t srcstride = reconYuv->m_size;
+ PicYuv* reconPic = m_frame->m_encData->getOutputRecon();
+ pixel* dst = reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + absPartIdx);
+ uint32_t dststride = reconPic->m_stride;
+ const pixel* src = reconYuv->getLumaAddr(absPartIdx);
+ uint32_t srcstride = reconYuv->m_size;
primitives.cu[log2TrSize - 2].copy_pp(dst, dststride, src, srcstride);
}
}
@@ -1757,15 +1764,16 @@
if (!tuIterator.isLastSection())
{
uint32_t zorder = cuGeom.absPartIdx + absPartIdxC;
- uint32_t dststride = m_frame->m_reconPic->m_strideC;
+ PicYuv* reconPic = m_frame->m_encData->getOutputRecon();
+ uint32_t dststride = reconPic->m_strideC;
const pixel* src;
pixel* dst;
- dst = m_frame->m_reconPic->getCbAddr(cu.m_cuAddr, zorder);
+ dst = reconPic->getCbAddr(cu.m_cuAddr, zorder);
src = reconYuv.getCbAddr(absPartIdxC);
primitives.chroma[m_csp].cu[size].copy_pp(dst, dststride, src, reconYuv.m_csize);
- dst = m_frame->m_reconPic->getCrAddr(cu.m_cuAddr, zorder);
+ dst = reconPic->getCrAddr(cu.m_cuAddr, zorder);
src = reconYuv.getCrAddr(absPartIdxC);
primitives.chroma[m_csp].cu[size].copy_pp(dst, dststride, src, reconYuv.m_csize);
}
@@ -1866,7 +1874,7 @@
/* find the lowres motion vector from lookahead in middle of current PU */
MV Search::getLowresMV(const CUData& cu, const PredictionUnit& pu, int list, int ref)
{
- int diffPoc = abs(m_slice->m_poc - m_slice->m_refPicList[list][ref]->m_poc);
+ int diffPoc = abs(m_slice->m_poc - m_slice->m_refPOCList[list][ref]);
if (diffPoc > m_param->bframes + 1)
/* poc difference is out of range for lookahead */
return 0;
@@ -1906,7 +1914,7 @@
else
{
cu.clipMv(mvCand);
- predInterLumaPixel(pu, tmpPredYuv, *m_slice->m_refPicList[list][ref]->m_reconPic, mvCand);
+ predInterLumaPixel(pu, tmpPredYuv, *m_slice->m_refReconPicList[list][ref], mvCand);
costs[i] = m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size);
}
}
@@ -2197,8 +2205,8 @@
}
else
{
- PicYuv* refPic0 = slice->m_refPicList[0][bestME[0].ref]->m_reconPic;
- PicYuv* refPic1 = slice->m_refPicList[1][bestME[1].ref]->m_reconPic;
+ PicYuv* refPic0 = slice->m_refReconPicList[0][bestME[0].ref];
+ PicYuv* refPic1 = slice->m_refReconPicList[1][bestME[1].ref];
Yuv* bidirYuv = m_rqt[cuGeom.depth].bidirPredYuv;
/* Generate reference subpels */
diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Mon Aug 03 14:56:21 2015 -0500
+++ b/source/encoder/slicetype.cpp Mon Aug 03 17:55:58 2015 -0500
@@ -714,16 +714,16 @@
case P_SLICE:
b = p1 = poc - l0poc;
- frames[p0] = &slice->m_refPicList[0][0]->m_lowres;
+ frames[p0] = &slice->m_refFrameList[0][0]->m_lowres;
frames[b] = &curFrame->m_lowres;
break;
case B_SLICE:
b = poc - l0poc;
p1 = b + l1poc - poc;
- frames[p0] = &slice->m_refPicList[0][0]->m_lowres;
+ frames[p0] = &slice->m_refFrameList[0][0]->m_lowres;
frames[b] = &curFrame->m_lowres;
- frames[p1] = &slice->m_refPicList[1][0]->m_lowres;
+ frames[p1] = &slice->m_refFrameList[1][0]->m_lowres;
break;
default:
diff -r 1f161d9c6e35 -r 02e84edaa143 source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp Mon Aug 03 14:56:21 2015 -0500
+++ b/source/encoder/weightPrediction.cpp Mon Aug 03 17:55:58 2015 -0500
@@ -259,7 +259,7 @@
for (int list = 0; list < cache.numPredDir; list++)
{
WeightParam *weights = wp[list][0];
- Frame *refFrame = slice.m_refPicList[list][0];
+ Frame *refFrame = slice.m_refFrameList[list][0];
Lowres& refLowres = refFrame->m_lowres;
int diffPoc = abs(curPoc - refFrame->m_poc);
More information about the x265-devel
mailing list