[x265] [PATCH] analysis: enable scaled save and load runs with different max CU sizes
bhavna at multicorewareinc.com
bhavna at multicorewareinc.com
Thu May 17 08:53:50 CEST 2018
# HG changeset patch
# User Bhavna Hariharan <bhavna at multicorewareinc.com>
# Date 1526539714 -19800
# Thu May 17 12:18:34 2018 +0530
# Branch stable
# Node ID 3cef29225ef431c820c8e5593b00c3c225bfffdc
# Parent e70f8897811514877bed1f1f318ed95d24658af0
analysis: enable scaled save and load runs with different max CU sizes
This patch allows either of the following:
(i) The CTU size of save and load encodes are the same.
(or)
(ii) The CTU size of load encode is double the CTU size of save encode.
diff -r e70f88978115 -r 3cef29225ef4 doc/reST/cli.rst
--- a/doc/reST/cli.rst Thu May 17 12:11:45 2018 +0530
+++ b/doc/reST/cli.rst Thu May 17 12:18:34 2018 +0530
@@ -911,9 +911,10 @@
.. option:: --scale-factor
- Factor by which input video is scaled down for analysis save mode.
- This option should be coupled with analysis-reuse-mode option, --analysis-reuse-level 10.
- The ctu size of load should be double the size of save. Default 0.
+ Factor by which input video is scaled down for analysis save mode.
+ This option should be coupled with analysis-reuse-mode option,
+ --analysis-reuse-level 10. The ctu size of load can either be the
+ same as that of save or double the size of save. Default 0.
.. option:: --refine-intra <0..4>
diff -r e70f88978115 -r 3cef29225ef4 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Thu May 17 12:11:45 2018 +0530
+++ b/source/encoder/encoder.cpp Thu May 17 12:18:34 2018 +0530
@@ -97,6 +97,7 @@
m_prevTonemapPayload.payload = NULL;
m_startPoint = 0;
+ m_saveCTUSize = 0;
}
inline char *strcatFilename(const char *input, const char *suffix)
{
@@ -1078,7 +1079,23 @@
if (paramBytes == -1)
m_aborted = true;
}
- readAnalysisFile(&inFrame->m_analysisData, inFrame->m_poc, pic_in, paramBytes);
+ if (m_saveCTUSize)
+ {
+ cuLocation cuLocInFrame;
+ cuLocInFrame.init(m_param);
+ /* Set skipWidth/skipHeight flags when the out of bound pixels in lowRes is greater than half of maxCUSize */
+ int extendedWidth = ((m_param->sourceWidth / 2 + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize;
+ int extendedHeight = ((m_param->sourceHeight / 2 + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize;
+ uint32_t outOfBoundaryLowres = extendedWidth - m_param->sourceWidth / 2;
+ if (outOfBoundaryLowres * 2 >= m_param->maxCUSize)
+ cuLocInFrame.skipWidth = true;
+ uint32_t outOfBoundaryLowresH = extendedHeight - m_param->sourceHeight / 2;
+ if (outOfBoundaryLowresH * 2 >= m_param->maxCUSize)
+ cuLocInFrame.skipHeight = true;
+ readAnalysisFile(&inFrame->m_analysisData, inFrame->m_poc, pic_in, paramBytes, cuLocInFrame);
+ }
+ else
+ readAnalysisFile(&inFrame->m_analysisData, inFrame->m_poc, pic_in, paramBytes);
inFrame->m_poc = inFrame->m_analysisData.poc;
sliceType = inFrame->m_analysisData.sliceType;
inFrame->m_lowres.bScenecut = !!inFrame->m_analysisData.bScenecut;
@@ -3320,7 +3337,265 @@
void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x265_picture* picIn, int paramBytes)
{
-
+#define X265_FREAD(val, size, readSize, fileOffset, src)\
+ if (!m_param->bUseAnalysisFile)\
+ {\
+ memcpy(val, src, (size * readSize));\
+ }\
+ else if (fread(val, size, readSize, fileOffset) != readSize)\
+ {\
+ x265_log(NULL, X265_LOG_ERROR, "Error reading analysis data\n");\
+ freeAnalysis(analysis);\
+ m_aborted = true;\
+ return;\
+ }\
+
+ static uint64_t consumedBytes = 0;
+ static uint64_t totalConsumedBytes = 0;
+ uint32_t depthBytes = 0;
+ if (m_param->bUseAnalysisFile)
+ fseeko(m_analysisFileIn, totalConsumedBytes + paramBytes, SEEK_SET);
+ const x265_analysis_data *picData = &(picIn->analysisData);
+ analysis_intra_data *intraPic = (analysis_intra_data *)picData->intraData;
+ analysis_inter_data *interPic = (analysis_inter_data *)picData->interData;
+
+ int poc; uint32_t frameRecordSize;
+ X265_FREAD(&frameRecordSize, sizeof(uint32_t), 1, m_analysisFileIn, &(picData->frameRecordSize));
+ X265_FREAD(&depthBytes, sizeof(uint32_t), 1, m_analysisFileIn, &(picData->depthBytes));
+ X265_FREAD(&poc, sizeof(int), 1, m_analysisFileIn, &(picData->poc));
+
+ if (m_param->bUseAnalysisFile)
+ {
+ uint64_t currentOffset = totalConsumedBytes;
+
+ /* Seeking to the right frame Record */
+ while (poc != curPoc && !feof(m_analysisFileIn))
+ {
+ currentOffset += frameRecordSize;
+ fseeko(m_analysisFileIn, currentOffset + paramBytes, SEEK_SET);
+ X265_FREAD(&frameRecordSize, sizeof(uint32_t), 1, m_analysisFileIn, &(picData->frameRecordSize));
+ X265_FREAD(&depthBytes, sizeof(uint32_t), 1, m_analysisFileIn, &(picData->depthBytes));
+ X265_FREAD(&poc, sizeof(int), 1, m_analysisFileIn, &(picData->poc));
+ }
+ if (poc != curPoc || feof(m_analysisFileIn))
+ {
+ x265_log(NULL, X265_LOG_WARNING, "Error reading analysis data: Cannot find POC %d\n", curPoc);
+ freeAnalysis(analysis);
+ return;
+ }
+ }
+
+ /* Now arrived at the right frame, read the record */
+ analysis->poc = poc;
+ analysis->frameRecordSize = frameRecordSize;
+ X265_FREAD(&analysis->sliceType, sizeof(int), 1, m_analysisFileIn, &(picData->sliceType));
+ X265_FREAD(&analysis->bScenecut, sizeof(int), 1, m_analysisFileIn, &(picData->bScenecut));
+ X265_FREAD(&analysis->satdCost, sizeof(int64_t), 1, m_analysisFileIn, &(picData->satdCost));
+ X265_FREAD(&analysis->numCUsInFrame, sizeof(int), 1, m_analysisFileIn, &(picData->numCUsInFrame));
+ X265_FREAD(&analysis->numPartitions, sizeof(int), 1, m_analysisFileIn, &(picData->numPartitions));
+ if (m_param->bDisableLookahead)
+ {
+ X265_FREAD(&analysis->numCuInHeight, sizeof(uint32_t), 1, m_analysisFileIn, &(picData->numCuInHeight));
+ X265_FREAD(&analysis->lookahead, sizeof(x265_lookahead_data), 1, m_analysisFileIn, &(picData->lookahead));
+ }
+ int scaledNumPartition = analysis->numPartitions;
+ int factor = 1 << m_param->scaleFactor;
+
+ if (m_param->scaleFactor)
+ analysis->numPartitions *= factor;
+ /* Memory is allocated for inter and intra analysis data based on the slicetype */
+ allocAnalysis(analysis);
+ if (m_param->bDisableLookahead && m_rateControl->m_isVbv)
+ {
+ X265_FREAD(analysis->lookahead.intraVbvCost, sizeof(uint32_t), analysis->numCUsInFrame, m_analysisFileIn, picData->lookahead.intraVbvCost);
+ X265_FREAD(analysis->lookahead.vbvCost, sizeof(uint32_t), analysis->numCUsInFrame, m_analysisFileIn, picData->lookahead.vbvCost);
+ X265_FREAD(analysis->lookahead.satdForVbv, sizeof(uint32_t), analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.satdForVbv);
+ X265_FREAD(analysis->lookahead.intraSatdForVbv, sizeof(uint32_t), analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.intraSatdForVbv);
+ }
+ if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I)
+ {
+ if (m_param->analysisReuseLevel < 2)
+ return;
+
+ uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, *partSizes = NULL;
+
+ tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
+ depthBuf = tempBuf;
+ modeBuf = tempBuf + depthBytes;
+ partSizes = tempBuf + 2 * depthBytes;
+
+ X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->depth);
+ X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->chromaModes);
+ X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->partSizes);
+
+ size_t count = 0;
+ for (uint32_t d = 0; d < depthBytes; d++)
+ {
+ int bytes = analysis->numPartitions >> (depthBuf[d] * 2);
+ if (m_param->scaleFactor)
+ {
+ if (depthBuf[d] == 0)
+ depthBuf[d] = 1;
+ if (partSizes[d] == SIZE_NxN)
+ partSizes[d] = SIZE_2Nx2N;
+ }
+ memset(&((analysis_intra_data *)analysis->intraData)->depth[count], depthBuf[d], bytes);
+ memset(&((analysis_intra_data *)analysis->intraData)->chromaModes[count], modeBuf[d], bytes);
+ memset(&((analysis_intra_data *)analysis->intraData)->partSizes[count], partSizes[d], bytes);
+ count += bytes;
+ }
+
+ if (!m_param->scaleFactor)
+ {
+ X265_FREAD(((analysis_intra_data *)analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileIn, intraPic->modes);
+ }
+ else
+ {
+ uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, analysis->numCUsInFrame * scaledNumPartition);
+ X265_FREAD(tempLumaBuf, sizeof(uint8_t), analysis->numCUsInFrame * scaledNumPartition, m_analysisFileIn, intraPic->modes);
+ for (uint32_t ctu32Idx = 0, cnt = 0; ctu32Idx < analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++, cnt += factor)
+ memset(&((analysis_intra_data *)analysis->intraData)->modes[cnt], tempLumaBuf[ctu32Idx], factor);
+ X265_FREE(tempLumaBuf);
+ }
+ X265_FREE(tempBuf);
+ consumedBytes += frameRecordSize;
+ }
+
+ else
+ {
+ uint32_t numDir = analysis->sliceType == X265_TYPE_P ? 1 : 2;
+ uint32_t numPlanes = m_param->internalCsp == X265_CSP_I400 ? 1 : 3;
+ X265_FREAD((WeightParam*)analysis->wt, sizeof(WeightParam), numPlanes * numDir, m_analysisFileIn, (picIn->analysisData.wt));
+ if (m_param->analysisReuseLevel < 2)
+ return;
+
+ uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL, *partSize = NULL, *mergeFlag = NULL;
+ uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
+ MV* mv[2];
+ int8_t* refIdx[2];
+
+ int numBuf = m_param->analysisReuseLevel > 4 ? 4 : 2;
+ bool bIntraInInter = false;
+ if (m_param->analysisReuseLevel == 10)
+ {
+ numBuf++;
+ bIntraInInter = (analysis->sliceType == X265_TYPE_P || m_param->bIntraInBFrames);
+ if (bIntraInInter) numBuf++;
+ }
+
+ tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf);
+ depthBuf = tempBuf;
+ modeBuf = tempBuf + depthBytes;
+
+ X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->depth);
+ X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->modes);
+
+ if (m_param->analysisReuseLevel > 4)
+ {
+ partSize = modeBuf + depthBytes;
+ mergeFlag = partSize + depthBytes;
+ X265_FREAD(partSize, sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->partSize);
+ X265_FREAD(mergeFlag, sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->mergeFlag);
+
+ if (m_param->analysisReuseLevel == 10)
+ {
+ interDir = mergeFlag + depthBytes;
+ X265_FREAD(interDir, sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->interDir);
+ if (bIntraInInter)
+ {
+ chromaDir = interDir + depthBytes;
+ X265_FREAD(chromaDir, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->chromaModes);
+ }
+ for (uint32_t i = 0; i < numDir; i++)
+ {
+ mvpIdx[i] = X265_MALLOC(uint8_t, depthBytes);
+ refIdx[i] = X265_MALLOC(int8_t, depthBytes);
+ mv[i] = X265_MALLOC(MV, depthBytes);
+ X265_FREAD(mvpIdx[i], sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->mvpIdx[i]);
+ X265_FREAD(refIdx[i], sizeof(int8_t), depthBytes, m_analysisFileIn, interPic->refIdx[i]);
+ X265_FREAD(mv[i], sizeof(MV), depthBytes, m_analysisFileIn, interPic->mv[i]);
+ }
+ }
+ }
+
+ size_t count = 0;
+ for (uint32_t d = 0; d < depthBytes; d++)
+ {
+ int bytes = analysis->numPartitions >> (depthBuf[d] * 2);
+ if (m_param->scaleFactor && modeBuf[d] == MODE_INTRA && depthBuf[d] == 0)
+ depthBuf[d] = 1;
+ memset(&((analysis_inter_data *)analysis->interData)->depth[count], depthBuf[d], bytes);
+ memset(&((analysis_inter_data *)analysis->interData)->modes[count], modeBuf[d], bytes);
+ if (m_param->analysisReuseLevel > 4)
+ {
+ if (m_param->scaleFactor && modeBuf[d] == MODE_INTRA && partSize[d] == SIZE_NxN)
+ partSize[d] = SIZE_2Nx2N;
+ memset(&((analysis_inter_data *)analysis->interData)->partSize[count], partSize[d], bytes);
+ int numPU = (modeBuf[d] == MODE_INTRA) ? 1 : nbPartsTable[(int)partSize[d]];
+ for (int pu = 0; pu < numPU; pu++)
+ {
+ if (pu) d++;
+ ((analysis_inter_data *)analysis->interData)->mergeFlag[count + pu] = mergeFlag[d];
+ if (m_param->analysisReuseLevel == 10)
+ {
+ ((analysis_inter_data *)analysis->interData)->interDir[count + pu] = interDir[d];
+ for (uint32_t i = 0; i < numDir; i++)
+ {
+ ((analysis_inter_data *)analysis->interData)->mvpIdx[i][count + pu] = mvpIdx[i][d];
+ ((analysis_inter_data *)analysis->interData)->refIdx[i][count + pu] = refIdx[i][d];
+ if (m_param->scaleFactor)
+ {
+ mv[i][d].x *= (int16_t)m_param->scaleFactor;
+ mv[i][d].y *= (int16_t)m_param->scaleFactor;
+ }
+ memcpy(&((analysis_inter_data *)analysis->interData)->mv[i][count + pu], &mv[i][d], sizeof(MV));
+ }
+ }
+ }
+ if (m_param->analysisReuseLevel == 10 && bIntraInInter)
+ memset(&((analysis_intra_data *)analysis->intraData)->chromaModes[count], chromaDir[d], bytes);
+ }
+ count += bytes;
+ }
+
+ X265_FREE(tempBuf);
+
+ if (m_param->analysisReuseLevel == 10)
+ {
+ for (uint32_t i = 0; i < numDir; i++)
+ {
+ X265_FREE(mvpIdx[i]);
+ X265_FREE(refIdx[i]);
+ X265_FREE(mv[i]);
+ }
+ if (bIntraInInter)
+ {
+ if (!m_param->scaleFactor)
+ {
+ X265_FREAD(((analysis_intra_data *)analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileIn, intraPic->modes);
+ }
+ else
+ {
+ uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, analysis->numCUsInFrame * scaledNumPartition);
+ X265_FREAD(tempLumaBuf, sizeof(uint8_t), analysis->numCUsInFrame * scaledNumPartition, m_analysisFileIn, intraPic->modes);
+ for (uint32_t ctu32Idx = 0, cnt = 0; ctu32Idx < analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++, cnt += factor)
+ memset(&((analysis_intra_data *)analysis->intraData)->modes[cnt], tempLumaBuf[ctu32Idx], factor);
+ X265_FREE(tempLumaBuf);
+ }
+ }
+ }
+ else
+ X265_FREAD(((analysis_inter_data *)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir, m_analysisFileIn, interPic->ref);
+
+ consumedBytes += frameRecordSize;
+ if (numDir == 1)
+ totalConsumedBytes = consumedBytes;
+ }
+#undef X265_FREAD
+}
+
+void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x265_picture* picIn, int paramBytes, cuLocation cuLoc)
+{
#define X265_FREAD(val, size, readSize, fileOffset, src)\
if (!m_param->bUseAnalysisFile)\
{\
@@ -3388,25 +3663,9 @@
int numPartitions = analysis->numPartitions;
int numCUsInFrame = analysis->numCUsInFrame;
- cuLocation cuLoc;
- cuLoc.init(m_param);
-
- if (m_param->scaleFactor)
- {
- /* Allocate memory for scaled resoultion's numPartitions and numCUsInFrame*/
- analysis->numPartitions = m_param->num4x4Partitions;
- analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU;
-
- /* Set skipWidth/skipHeight flags when the out of bound pixels in lowRes is greater than half of maxCUSize */
- int extendedWidth = ((m_param->sourceWidth / 2 + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize;
- int extendedHeight = ((m_param->sourceHeight / 2 + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize;
- uint32_t outOfBoundaryLowres = extendedWidth - m_param->sourceWidth / 2;
- if (outOfBoundaryLowres * 2 >= m_param->maxCUSize)
- cuLoc.skipWidth = true;
- uint32_t outOfBoundaryLowresH = extendedHeight - m_param->sourceHeight / 2;
- if (outOfBoundaryLowresH * 2 >= m_param->maxCUSize)
- cuLoc.skipHeight = true;
- }
+ /* Allocate memory for scaled resoultion's numPartitions and numCUsInFrame*/
+ analysis->numPartitions = m_param->num4x4Partitions;
+ analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU;
/* Memory is allocated for inter and intra analysis data based on the slicetype */
allocAnalysis(analysis);
@@ -3442,51 +3701,39 @@
{
int bytes = analysis->numPartitions >> (depthBuf[d] * 2);
int numCTUCopied = 1;
-
- if (m_param->scaleFactor)
+ if (!depthBuf[d]) //copy data of one 64x64 to four scaled 64x64 CTUs.
{
- if (!depthBuf[d]) //copy data of one 64x64 to four scaled 64x64 CTUs.
- {
- bytes /= 4;
- numCTUCopied = 4;
- }
-
- if (partSizes[d] == SIZE_NxN)
- partSizes[d] = SIZE_2Nx2N;
- if ((depthBuf[d] > 1 && m_param->maxCUSize == 64) || (depthBuf[d] && m_param->maxCUSize != 64))
- depthBuf[d]--;
+ bytes /= 4;
+ numCTUCopied = 4;
}
+ if (partSizes[d] == SIZE_NxN)
+ partSizes[d] = SIZE_2Nx2N;
+ if ((depthBuf[d] > 1 && m_param->maxCUSize == 64) || (depthBuf[d] && m_param->maxCUSize != 64))
+ depthBuf[d]--;
+
for (int numCTU = 0; numCTU < numCTUCopied; numCTU++)
{
memset(&((analysis_intra_data *)analysis->intraData)->depth[count], depthBuf[d], bytes);
memset(&((analysis_intra_data *)analysis->intraData)->chromaModes[count], modeBuf[d], bytes);
memset(&((analysis_intra_data *)analysis->intraData)->partSizes[count], partSizes[d], bytes);
count += bytes;
- if (m_param->scaleFactor)
- d += getCUIndex(&cuLoc, &count, bytes, 1);
+ d += getCUIndex(&cuLoc, &count, bytes, 1);
}
}
- if (!m_param->scaleFactor)
- {
- X265_FREAD(((analysis_intra_data *)analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileIn, intraPic->modes);
- }
- else
+ cuLoc.evenRowIndex = 0;
+ cuLoc.oddRowIndex = m_param->num4x4Partitions * cuLoc.widthInCU;
+ cuLoc.switchCondition = 0;
+ uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, analysis->numCUsInFrame * scaledNumPartition);
+ X265_FREAD(tempLumaBuf, sizeof(uint8_t), analysis->numCUsInFrame * scaledNumPartition, m_analysisFileIn, intraPic->modes);
+ uint32_t cnt = 0;
+ for (uint32_t ctu32Idx = 0; ctu32Idx < analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++)
{
- cuLoc.evenRowIndex = 0;
- cuLoc.oddRowIndex = m_param->num4x4Partitions * cuLoc.widthInCU;
- cuLoc.switchCondition = 0;
- uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, analysis->numCUsInFrame * scaledNumPartition);
- X265_FREAD(tempLumaBuf, sizeof(uint8_t), analysis->numCUsInFrame * scaledNumPartition, m_analysisFileIn, intraPic->modes);
- uint32_t cnt = 0;
- for (uint32_t ctu32Idx = 0; ctu32Idx < analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++)
- {
- memset(&((analysis_intra_data *)analysis->intraData)->modes[cnt], tempLumaBuf[ctu32Idx], factor);
- cnt += factor;
- ctu32Idx += getCUIndex(&cuLoc, &cnt, factor, 0);
- }
- X265_FREE(tempLumaBuf);
+ memset(&((analysis_intra_data *)analysis->intraData)->modes[cnt], tempLumaBuf[ctu32Idx], factor);
+ cnt += factor;
+ ctu32Idx += getCUIndex(&cuLoc, &cnt, factor, 0);
}
+ X265_FREE(tempLumaBuf);
X265_FREE(tempBuf);
consumedBytes += frameRecordSize;
}
@@ -3519,14 +3766,12 @@
X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->depth);
X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->modes);
-
if (m_param->analysisReuseLevel > 4)
{
partSize = modeBuf + depthBytes;
mergeFlag = partSize + depthBytes;
X265_FREAD(partSize, sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->partSize);
X265_FREAD(mergeFlag, sizeof(uint8_t), depthBytes, m_analysisFileIn, interPic->mergeFlag);
-
if (m_param->analysisReuseLevel == 10)
{
interDir = mergeFlag + depthBytes;
@@ -3556,17 +3801,14 @@
bool isScaledMaxCUSize = false;
int numCTUCopied = 1;
int writeDepth = depthBuf[d];
- if (m_param->scaleFactor)
+ if (!depthBuf[d]) //copy data of one 64x64 to four scaled 64x64 CTUs.
{
- if (!depthBuf[d]) //copy data of one 64x64 to four scaled 64x64 CTUs.
- {
- isScaledMaxCUSize = true;
- bytes /= 4;
- numCTUCopied = 4;
- }
- if ((modeBuf[d] != MODE_INTRA && depthBuf[d] != 0) || (modeBuf[d] == MODE_INTRA && depthBuf[d] > 1))
- writeDepth--;
+ isScaledMaxCUSize = true;
+ bytes /= 4;
+ numCTUCopied = 4;
}
+ if ((modeBuf[d] != MODE_INTRA && depthBuf[d] != 0) || (modeBuf[d] == MODE_INTRA && depthBuf[d] > 1))
+ writeDepth--;
for (int numCTU = 0; numCTU < numCTUCopied; numCTU++)
{
@@ -3579,7 +3821,7 @@
{
puOrientation puOrient;
puOrient.init();
- if (m_param->scaleFactor && modeBuf[d] == MODE_INTRA && partSize[d] == SIZE_NxN)
+ if (modeBuf[d] == MODE_INTRA && partSize[d] == SIZE_NxN)
partSize[d] = SIZE_2Nx2N;
int partitionSize = partSize[d];
if (isScaledMaxCUSize && partSize[d] != SIZE_2Nx2N)
@@ -3609,13 +3851,8 @@
{
((analysis_inter_data *)analysis->interData)->mvpIdx[i][count + pu] = mvpIdx[i][d];
((analysis_inter_data *)analysis->interData)->refIdx[i][count + pu] = refIdx[i][d];
- mvCopy[i].x = mv[i][d].x;
- mvCopy[i].y = mv[i][d].y;
- if (m_param->scaleFactor)
- {
- mvCopy[i].x = mv[i][d].x * (int16_t)m_param->scaleFactor;
- mvCopy[i].y = mv[i][d].y * (int16_t)m_param->scaleFactor;
- }
+ mvCopy[i].x = mv[i][d].x * (int16_t)m_param->scaleFactor;
+ mvCopy[i].y = mv[i][d].y * (int16_t)m_param->scaleFactor;
memcpy(&((analysis_inter_data *)analysis->interData)->mv[i][count + pu], &mvCopy[i], sizeof(MV));
}
}
@@ -3633,8 +3870,7 @@
}
}
count += bytes;
- if (m_param->scaleFactor)
- d += getCUIndex(&cuLoc, &count, bytes, 1);
+ d += getCUIndex(&cuLoc, &count, bytes, 1);
}
}
@@ -3650,26 +3886,19 @@
}
if (bIntraInInter)
{
- if (!m_param->scaleFactor)
- {
- X265_FREAD(((analysis_intra_data *)analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileIn, intraPic->modes);
- }
- else
+ cuLoc.evenRowIndex = 0;
+ cuLoc.oddRowIndex = m_param->num4x4Partitions * cuLoc.widthInCU;
+ cuLoc.switchCondition = 0;
+ uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, analysis->numCUsInFrame * scaledNumPartition);
+ X265_FREAD(tempLumaBuf, sizeof(uint8_t), analysis->numCUsInFrame * scaledNumPartition, m_analysisFileIn, intraPic->modes);
+ uint32_t cnt = 0;
+ for (uint32_t ctu32Idx = 0; ctu32Idx < analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++)
{
- cuLoc.evenRowIndex = 0;
- cuLoc.oddRowIndex = m_param->num4x4Partitions * cuLoc.widthInCU;
- cuLoc.switchCondition = 0;
- uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, analysis->numCUsInFrame * scaledNumPartition);
- X265_FREAD(tempLumaBuf, sizeof(uint8_t), analysis->numCUsInFrame * scaledNumPartition, m_analysisFileIn, intraPic->modes);
- uint32_t cnt = 0;
- for (uint32_t ctu32Idx = 0; ctu32Idx < analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++)
- {
- memset(&((analysis_intra_data *)analysis->intraData)->modes[cnt], tempLumaBuf[ctu32Idx], factor);
- cnt += factor;
- ctu32Idx += getCUIndex(&cuLoc, &cnt, factor, 0);
- }
- X265_FREE(tempLumaBuf);
+ memset(&((analysis_intra_data *)analysis->intraData)->modes[cnt], tempLumaBuf[ctu32Idx], factor);
+ cnt += factor;
+ ctu32Idx += getCUIndex(&cuLoc, &cnt, factor, 0);
}
+ X265_FREE(tempLumaBuf);
}
}
else
@@ -3681,11 +3910,8 @@
}
/* Restore to the current encode's numPartitions and numCUsInFrame */
- if (m_param->scaleFactor)
- {
- analysis->numPartitions = m_param->num4x4Partitions;
- analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU;
- }
+ analysis->numPartitions = m_param->num4x4Partitions;
+ analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU;
#undef X265_FREAD
}
@@ -3731,7 +3957,32 @@
X265_PARAM_VALIDATE(saveParam.openGOP, sizeof(int), 1, &m_param->bOpenGOP);
X265_PARAM_VALIDATE(saveParam.bframes, sizeof(int), 1, &m_param->bframes);
X265_PARAM_VALIDATE(saveParam.bPyramid, sizeof(int), 1, &m_param->bBPyramid);
- X265_PARAM_VALIDATE(saveParam.maxCUSize, sizeof(int), 1, &m_param->maxCUSize);
+ /* Enable m_saveCTUSize if the save and load encodes have the same maxCU size */
+ if (writeFlag)
+ {
+ X265_PARAM_VALIDATE(saveParam.maxCUSize, sizeof(int), 1, &m_param->maxCUSize);
+ }
+ else
+ {
+ fileOffset = m_analysisFileIn;
+ if (m_param->bUseAnalysisFile && fread(&readValue, sizeof(int), 1, fileOffset) != 1)
+ {
+ x265_log(NULL, X265_LOG_ERROR, "Error reading analysis data.\n");
+ m_aborted = true;
+ }
+ else if (!m_param->bUseAnalysisFile)
+ readValue = saveParam.maxCUSize;
+
+ m_saveCTUSize = 0;
+ if (m_param->scaleFactor && g_log2Size[m_param->maxCUSize] == g_log2Size[readValue])
+ m_saveCTUSize = 1;
+ else if (readValue != (int)m_param->maxCUSize && (g_log2Size[m_param->maxCUSize] - g_log2Size[readValue]) != 1)
+ {
+ x265_log(NULL, X265_LOG_ERROR, "Error reading analysis data. Mismatch in params.\n");
+ m_aborted = true;
+ }
+ count++;
+ }
X265_PARAM_VALIDATE(saveParam.minCUSize, sizeof(int), 1, &m_param->minCUSize);
X265_PARAM_VALIDATE(saveParam.radl, sizeof(int), 1, &m_param->radl);
X265_PARAM_VALIDATE(saveParam.lookaheadDepth, sizeof(int), 1, &m_param->lookaheadDepth);
diff -r e70f88978115 -r 3cef29225ef4 source/encoder/encoder.h
--- a/source/encoder/encoder.h Thu May 17 12:11:45 2018 +0530
+++ b/source/encoder/encoder.h Thu May 17 12:18:34 2018 +0530
@@ -228,6 +228,8 @@
int32_t m_startPoint;
Lock m_dynamicRefineLock;
+ bool m_saveCTUSize;
+
Encoder();
~Encoder()
{
@@ -281,6 +283,8 @@
void readAnalysisFile(x265_analysis_data* analysis, int poc, const x265_picture* picIn, int paramBytes);
+ void readAnalysisFile(x265_analysis_data* analysis, int poc, const x265_picture* picIn, int paramBytes, cuLocation cuLoc);
+
int getCUIndex(cuLocation* cuLoc, uint32_t* count, int bytes, int flag);
int getPuShape(puOrientation* puOrient, int partSize, int numCTU);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: x265-clone.patch
Type: text/x-patch
Size: 30417 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20180517/632370ed/attachment-0001.bin>
More information about the x265-devel
mailing list