[x265] [PATCH] analysis: improve Intra Information sharing using analysis-mode
gopu at multicorewareinc.com
gopu at multicorewareinc.com
Mon Feb 9 12:53:17 CET 2015
# HG changeset patch
# User Gopu Govindaswamy <gopu at multicorewareinc.com>
# Date 1423472686 -19800
# Mon Feb 09 14:34:46 2015 +0530
# Node ID 598a63f153e167912c738f65fc340965b62790a6
# Parent b6f36b277234d7f402d67986dab969e7bc655646
analysis: improve Intra Information sharing using analysis-mode
Dump and reuse the chroma modes for intra frames, improves the performance for
--analysis-mode=load
Fix the binary mismatch for same cli using --analysis-mode=save|load
diff -r b6f36b277234 -r 598a63f153e1 source/common/common.h
--- a/source/common/common.h Fri Feb 06 21:57:45 2015 -0600
+++ b/source/common/common.h Mon Feb 09 14:34:46 2015 +0530
@@ -384,6 +384,7 @@
uint8_t* depth;
uint8_t* modes;
char* partSizes;
+ uint8_t* chromaModes;
};
enum TextType
diff -r b6f36b277234 -r 598a63f153e1 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Fri Feb 06 21:57:45 2015 -0600
+++ b/source/encoder/analysis.cpp Mon Feb 09 14:34:46 2015 +0530
@@ -155,6 +155,7 @@
memcpy(&m_reuseIntraDataCTU->depth[ctu.m_cuAddr * numPartition], bestCU->m_cuDepth, sizeof(uint8_t) * numPartition);
memcpy(&m_reuseIntraDataCTU->modes[ctu.m_cuAddr * numPartition], bestCU->m_lumaIntraDir, sizeof(uint8_t) * numPartition);
memcpy(&m_reuseIntraDataCTU->partSizes[ctu.m_cuAddr * numPartition], bestCU->m_partSize, sizeof(uint8_t) * numPartition);
+ memcpy(&m_reuseIntraDataCTU->chromaModes[ctu.m_cuAddr * numPartition], bestCU->m_chromaIntraDir, sizeof(uint8_t) * numPartition);
}
}
else
@@ -201,7 +202,7 @@
md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu, cuGeom);
PartSize size = (PartSize)md.pred[PRED_LOSSLESS].cu.m_partSize[0];
uint8_t* modes = md.pred[PRED_LOSSLESS].cu.m_lumaIntraDir;
- checkIntra(md.pred[PRED_LOSSLESS], cuGeom, size, modes);
+ checkIntra(md.pred[PRED_LOSSLESS], cuGeom, size, modes, NULL);
checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth);
}
else
@@ -227,6 +228,7 @@
uint8_t* reuseDepth = &m_reuseIntraDataCTU->depth[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
uint8_t* reuseModes = &m_reuseIntraDataCTU->modes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
char* reusePartSizes = &m_reuseIntraDataCTU->partSizes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
+ uint8_t* reuseChromaModes = &m_reuseIntraDataCTU->chromaModes[parentCTU.m_cuAddr * parentCTU.m_numPartitions];
if (mightNotSplit && depth == reuseDepth[zOrder] && zOrder == cuGeom.encodeIdx)
{
@@ -235,7 +237,7 @@
PartSize size = (PartSize)reusePartSizes[zOrder];
Mode& mode = size == SIZE_2Nx2N ? md.pred[PRED_INTRA] : md.pred[PRED_INTRA_NxN];
mode.cu.initSubCU(parentCTU, cuGeom);
- checkIntra(mode, cuGeom, size, &reuseModes[zOrder]);
+ checkIntra(mode, cuGeom, size, &reuseModes[zOrder], &reuseChromaModes[zOrder]);
checkBestMode(mode, depth);
if (m_bTryLossless)
@@ -254,13 +256,13 @@
m_quant.setQPforQuant(parentCTU);
md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
- checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL);
+ checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL, NULL);
checkBestMode(md.pred[PRED_INTRA], depth);
if (depth == g_maxCUDepth)
{
md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom);
- checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, NULL);
+ checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, NULL, NULL);
checkBestMode(md.pred[PRED_INTRA_NxN], depth);
}
@@ -468,9 +470,9 @@
switch (jobId)
{
case 0:
- slave->checkIntra(md.pred[PRED_INTRA], *m_curGeom, SIZE_2Nx2N, NULL);
+ slave->checkIntra(md.pred[PRED_INTRA], *m_curGeom, SIZE_2Nx2N, NULL, NULL);
if (m_curGeom->depth == g_maxCUDepth && m_curGeom->log2CUSize > m_slice->m_sps->quadtreeTULog2MinSize)
- slave->checkIntra(md.pred[PRED_INTRA_NxN], *m_curGeom, SIZE_NxN, NULL);
+ slave->checkIntra(md.pred[PRED_INTRA_NxN], *m_curGeom, SIZE_NxN, NULL, NULL);
break;
case 1:
@@ -1095,13 +1097,13 @@
(!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0)))
{
md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
- checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL);
+ checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL, NULL);
checkBestMode(md.pred[PRED_INTRA], depth);
if (depth == g_maxCUDepth && cuGeom.log2CUSize > m_slice->m_sps->quadtreeTULog2MinSize)
{
md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom);
- checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, &reuseModes[zOrder]);
+ checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, &reuseModes[zOrder], NULL);
checkBestMode(md.pred[PRED_INTRA_NxN], depth);
}
}
@@ -1212,13 +1214,13 @@
(!m_param->bEnableCbfFastMode || md.bestMode->cu.getQtRootCbf(0)))
{
md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom);
- checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL);
+ checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL, NULL);
checkBestMode(md.pred[PRED_INTRA], depth);
if (depth == g_maxCUDepth && cuGeom.log2CUSize > m_slice->m_sps->quadtreeTULog2MinSize)
{
md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom);
- checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, NULL);
+ checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, NULL, NULL);
checkBestMode(md.pred[PRED_INTRA_NxN], depth);
}
}
diff -r b6f36b277234 -r 598a63f153e1 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Fri Feb 06 21:57:45 2015 -0600
+++ b/source/encoder/encoder.cpp Mon Feb 09 14:34:46 2015 +0530
@@ -1744,6 +1744,7 @@
CHECKED_MALLOC(intraData->depth, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);
CHECKED_MALLOC(intraData->modes, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);
CHECKED_MALLOC(intraData->partSizes, char, analysis->numPartitions * analysis->numCUsInFrame);
+ CHECKED_MALLOC(intraData->chromaModes, uint8_t, analysis->numPartitions * analysis->numCUsInFrame);
analysis->intraData = intraData;
}
else
@@ -1769,6 +1770,7 @@
X265_FREE(((analysis_intra_data*)analysis->intraData)->depth);
X265_FREE(((analysis_intra_data*)analysis->intraData)->modes);
X265_FREE(((analysis_intra_data*)analysis->intraData)->partSizes);
+ X265_FREE(((analysis_intra_data*)analysis->intraData)->chromaModes);
X265_FREE(analysis->intraData);
}
else
@@ -1833,6 +1835,7 @@
X265_FREAD(((analysis_intra_data *)analysis->intraData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
X265_FREAD(((analysis_intra_data *)analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
X265_FREAD(((analysis_intra_data *)analysis->intraData)->partSizes, sizeof(char), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
+ X265_FREAD(((analysis_intra_data *)analysis->intraData)->chromaModes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
analysis->sliceType = X265_TYPE_I;
consumedBytes += frameRecordSize;
}
@@ -1870,7 +1873,7 @@
analysis->frameRecordSize = sizeof(analysis->frameRecordSize) + sizeof(analysis->poc) + sizeof(analysis->sliceType) +
sizeof(analysis->numCUsInFrame) + sizeof(analysis->numPartitions);
if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I)
- analysis->frameRecordSize += sizeof(uint8_t) * analysis->numCUsInFrame * analysis->numPartitions * 3;
+ analysis->frameRecordSize += sizeof(uint8_t) * analysis->numCUsInFrame * analysis->numPartitions * 4;
else if (analysis->sliceType == X265_TYPE_P)
{
analysis->frameRecordSize += sizeof(int32_t) * analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU;
@@ -1893,6 +1896,7 @@
X265_FWRITE(((analysis_intra_data*)analysis->intraData)->depth, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
X265_FWRITE(((analysis_intra_data*)analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
X265_FWRITE(((analysis_intra_data*)analysis->intraData)->partSizes, sizeof(char), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
+ X265_FWRITE(((analysis_intra_data*)analysis->intraData)->chromaModes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
}
else if (analysis->sliceType == X265_TYPE_P)
{
diff -r b6f36b277234 -r 598a63f153e1 source/encoder/search.cpp
--- a/source/encoder/search.cpp Fri Feb 06 21:57:45 2015 -0600
+++ b/source/encoder/search.cpp Mon Feb 09 14:34:46 2015 +0530
@@ -1130,7 +1130,7 @@
}
}
-void Search::checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSize, uint8_t* sharedModes)
+void Search::checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSize, uint8_t* sharedModes, uint8_t* sharedChromaModes)
{
CUData& cu = intraMode.cu;
@@ -1143,7 +1143,7 @@
intraMode.initCosts();
intraMode.distortion += estIntraPredQT(intraMode, cuGeom, tuDepthRange, sharedModes);
- intraMode.distortion += estIntraPredChromaQT(intraMode, cuGeom);
+ intraMode.distortion += estIntraPredChromaQT(intraMode, cuGeom, sharedChromaModes);
m_entropyCoder.resetBits();
if (m_slice->m_pps->bTransquantBypassEnabled)
@@ -1368,7 +1368,7 @@
extractIntraResultQT(cu, *reconYuv, 0, 0);
intraMode.distortion = icosts.distortion;
- intraMode.distortion += estIntraPredChromaQT(intraMode, cuGeom);
+ intraMode.distortion += estIntraPredChromaQT(intraMode, cuGeom, NULL);
m_entropyCoder.resetBits();
if (m_slice->m_pps->bTransquantBypassEnabled)
@@ -1669,7 +1669,7 @@
cu.setChromIntraDirSubParts(bestMode, 0, cuGeom.depth);
}
-uint32_t Search::estIntraPredChromaQT(Mode &intraMode, const CUGeom& cuGeom)
+uint32_t Search::estIntraPredChromaQT(Mode &intraMode, const CUGeom& cuGeom, uint8_t* sharedChromaModes)
{
CUData& cu = intraMode.cu;
Yuv& reconYuv = intraMode.reconYuv;
@@ -1697,7 +1697,14 @@
uint32_t maxMode = NUM_CHROMA_MODE;
uint32_t modeList[NUM_CHROMA_MODE];
- cu.getAllowedChromaDir(absPartIdxC, modeList);
+ if (sharedChromaModes)
+ {
+ for (uint32_t l = 0; l < NUM_CHROMA_MODE; l++)
+ modeList[l] = sharedChromaModes[0];
+ maxMode = 1;
+ }
+ else
+ cu.getAllowedChromaDir(absPartIdxC, modeList);
// check chroma modes
for (uint32_t mode = minMode; mode < maxMode; mode++)
diff -r b6f36b277234 -r 598a63f153e1 source/encoder/search.h
--- a/source/encoder/search.h Fri Feb 06 21:57:45 2015 -0600
+++ b/source/encoder/search.h Mon Feb 09 14:34:46 2015 +0530
@@ -247,7 +247,7 @@
void invalidateContexts(int fromDepth);
// full RD search of intra modes. if sharedModes is not NULL, it directly uses them
- void checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSize, uint8_t* sharedModes);
+ void checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize partSize, uint8_t* sharedModes, uint8_t* sharedChromaModes);
// select best intra mode using only sa8d costs, cannot measure NxN intra
void checkIntraInInter(Mode& intraMode, const CUGeom& cuGeom);
@@ -291,7 +291,7 @@
uint32_t estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2], uint8_t* sharedModes);
// RDO select best chroma mode from luma; result is fully encode chroma. chroma distortion is returned
- uint32_t estIntraPredChromaQT(Mode &intraMode, const CUGeom& cuGeom);
+ uint32_t estIntraPredChromaQT(Mode &intraMode, const CUGeom& cuGeom, uint8_t* sharedChromaModes);
void codeSubdivCbfQTChroma(const CUData& cu, uint32_t tuDepth, uint32_t absPartIdx);
void codeInterSubdivCbfQT(CUData& cu, uint32_t absPartIdx, const uint32_t tuDepth, const uint32_t depthRange[2]);
More information about the x265-devel
mailing list