[x265] [PATCH] Add support for reusing cutree offsets in all reuse levels 1, 2, 5
Aruna Matheswaran
aruna at multicorewareinc.com
Wed Sep 2 15:50:48 CEST 2020
On Wed, Sep 2, 2020 at 6:53 PM Mahesh Pittala <mahesh at multicorewareinc.com>
wrote:
>
>
> On Tue, Sep 1, 2020 at 7:09 PM Aruna Matheswaran <
> aruna at multicorewareinc.com> wrote:
>
>>
>>
>> On Mon, Aug 31, 2020 at 7:38 PM Mahesh Pittala <
>> mahesh at multicorewareinc.com> wrote:
>>
>>> From 812cccaa864da47dcf9567dccb81c9fef220c626 Mon Sep 17 00:00:00 2001
>>> From: maheshpittala <mahesh at multicorewareinc.com>
>>> Date: Sat, 1 Aug 2020 14:52:48 +0530
>>> Subject: [PATCH 1/1] Add support for reusing cutree offsets in all reuse
>>> levels 1,2,5
>>>
>>> Depth information required to load cutree offsets so writing depth
>>> information
>>> but not using in reuse level 1
>>>
>> [AM] Will this restrict reuse-level 1 from sharing analysis info across
>> non-dyadic resolutions?
>>
> No
>
[AM] I don't see the logic for cutree sharing between non dyadic
resolutions either. Am I missing anything?
> ---
>>> source/abrEncApp.cpp | 23 ++--
>>> source/encoder/analysis.cpp | 2 +-
>>> source/encoder/api.cpp | 53 +++++---
>>> source/encoder/encoder.cpp | 239 +++++++++++++++++++++---------------
>>> 4 files changed, 188 insertions(+), 129 deletions(-)
>>>
>>> diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp
>>> index fa62ebf63..e26d3adfc 100644
>>> --- a/source/abrEncApp.cpp
>>> +++ b/source/abrEncApp.cpp
>>> @@ -330,15 +330,16 @@ namespace X265_NS {
>>>
>>> if (src->sliceType == X265_TYPE_IDR || src->sliceType ==
>>> X265_TYPE_I)
>>> {
>>> - if (m_param->analysisSaveReuseLevel < 2)
>>> - goto ret;
>>> x265_analysis_intra_data *intraDst, *intraSrc;
>>> intraDst =
>>> (x265_analysis_intra_data*)m_analysisInfo->intraData;
>>> intraSrc = (x265_analysis_intra_data*)src->intraData;
>>> - memcpy(intraDst->depth, intraSrc->depth, sizeof(uint8_t) *
>>> src->depthBytes);
>>> - memcpy(intraDst->modes, intraSrc->modes, sizeof(uint8_t) *
>>> src->numCUsInFrame * src->numPartitions);
>>> - memcpy(intraDst->partSizes, intraSrc->partSizes,
>>> sizeof(char) * src->depthBytes);
>>> - memcpy(intraDst->chromaModes, intraSrc->chromaModes,
>>> sizeof(uint8_t) * src->depthBytes);
>>> + memcpy(intraDst->depth, intraSrc->depth, sizeof(uint8_t)*
>>> src->depthBytes);
>>> + if (m_param->analysisSaveReuseLevel > 1)
>>> + {
>>> + memcpy(intraDst->modes, intraSrc->modes,
>>> sizeof(uint8_t)* src->numCUsInFrame * src->numPartitions);
>>> + memcpy(intraDst->partSizes, intraSrc->partSizes,
>>> sizeof(char)* src->depthBytes);
>>> + memcpy(intraDst->chromaModes, intraSrc->chromaModes,
>>> sizeof(uint8_t)* src->depthBytes);
>>> + }
>>> if (m_param->rc.cuTree)
>>> memcpy(intraDst->cuQPOff, intraSrc->cuQPOff,
>>> sizeof(int8_t) * src->depthBytes);
>>> }
>>> @@ -347,13 +348,12 @@ namespace X265_NS {
>>> bool bIntraInInter = (src->sliceType == X265_TYPE_P ||
>>> m_param->bIntraInBFrames);
>>> int numDir = src->sliceType == X265_TYPE_P ? 1 : 2;
>>> memcpy(m_analysisInfo->wt, src->wt, sizeof(WeightParam) * 3
>>> * numDir);
>>> - if (m_param->analysisSaveReuseLevel < 2)
>>> - goto ret;
>>> x265_analysis_inter_data *interDst, *interSrc;
>>> interDst =
>>> (x265_analysis_inter_data*)m_analysisInfo->interData;
>>> interSrc = (x265_analysis_inter_data*)src->interData;
>>> - memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t) *
>>> src->depthBytes);
>>> - memcpy(interDst->modes, interSrc->modes, sizeof(uint8_t) *
>>> src->depthBytes);
>>> + memcpy(interDst->depth, interSrc->depth, sizeof(uint8_t)*
>>> src->depthBytes);
>>> + if (m_param->analysisSaveReuseLevel > 1)
>>> + memcpy(interDst->modes, interSrc->modes,
>>> sizeof(uint8_t)* src->depthBytes);
>>> if (m_param->rc.cuTree)
>>> memcpy(interDst->cuQPOff, interSrc->cuQPOff,
>>> sizeof(int8_t) * src->depthBytes);
>>> if (m_param->analysisSaveReuseLevel > 4)
>>> @@ -378,11 +378,10 @@ namespace X265_NS {
>>> }
>>> }
>>> }
>>> - if (m_param->analysisSaveReuseLevel != 10)
>>> + if (m_param->analysisSaveReuseLevel != 10 &&
>>> m_param->analysisSaveReuseLevel > 1)
>>> memcpy(interDst->ref, interSrc->ref, sizeof(int32_t) *
>>> src->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir);
>>> }
>>>
>>> -ret:
>>> //increment analysis Write counter
>>> m_parent->m_analysisWriteCnt[m_id].incr();
>>> m_parent->m_analysisWrite[m_id][index].incr();
>>> diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp
>>> index 157bae8cd..351d8a3c8 100644
>>> --- a/source/encoder/analysis.cpp
>>> +++ b/source/encoder/analysis.cpp
>>> @@ -3645,7 +3645,7 @@ int Analysis::calculateQpforCuSize(const CUData&
>>> ctu, const CUGeom& cuGeom, int3
>>> qp += distortionData->offset[ctu.m_cuAddr];
>>> }
>>>
>>> - if (m_param->analysisLoadReuseLevel == 10 && m_param->rc.cuTree)
>>> + if (m_param->analysisLoadReuseLevel >= 1 && m_param->rc.cuTree)
>>>
>> [AM] Shall replace this check with "if (m_param->analysisLoadReuseLevel
>> && m_param->rc.cuTree)"
>>
> I will change it
>
>> {
>>> int cuIdx = (ctu.m_cuAddr * ctu.m_numPartitions) +
>>> cuGeom.absPartIdx;
>>> if (ctu.m_slice->m_sliceType == I_SLICE)
>>> diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp
>>> index a986355e0..4ef3f04bc 100644
>>> --- a/source/encoder/api.cpp
>>> +++ b/source/encoder/api.cpp
>>> @@ -813,35 +813,41 @@ void x265_alloc_analysis_data(x265_param *param,
>>> x265_analysis_data* analysis)
>>> CHECKED_MALLOC_ZERO(analysis->wt, x265_weight_param, numPlanes
>>> * numDir);
>>>
>>> //Allocate memory for intraData pointer
>>> - if ((maxReuseLevel > 1) || isMultiPassOpt)
>>> + if ((maxReuseLevel > 0) || isMultiPassOpt)
>>> {
>>> CHECKED_MALLOC_ZERO(intraData, x265_analysis_intra_data, 1);
>>> CHECKED_MALLOC(intraData->depth, uint8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> }
>>>
>> [AM] If cutree offsets and CU depths are shared in all the reuse levels,
>> can we change the order of analysis data write/read such that CU depth and
>> cutree write/read happens in the common section? By doing this you can
>> eliminate the "reuse-level > 0" check all over the code.
>>
> CU depths are shared from reuse-leve 2 in code, no info about CU depths in
> doc. I have enabled cutree offsets sharing in all reuse levels,
>
[AM] Sorry, I don't get your point. Could you please elaborate?
>
>>> - if (maxReuseLevel > 1)
>>> + if (maxReuseLevel > 0)
>>> {
>>> - CHECKED_MALLOC_ZERO(intraData->modes, uint8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> - CHECKED_MALLOC_ZERO(intraData->partSizes, char,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> - CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> + if (maxReuseLevel > 1)
>>> + {
>>> + CHECKED_MALLOC_ZERO(intraData->modes, uint8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> + CHECKED_MALLOC_ZERO(intraData->partSizes, char,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> + CHECKED_MALLOC_ZERO(intraData->chromaModes, uint8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> + }
>>> if (param->rc.cuTree)
>>> CHECKED_MALLOC_ZERO(intraData->cuQPOff, int8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> }
>>> analysis->intraData = intraData;
>>>
>>> - if ((maxReuseLevel > 1) || isMultiPassOpt)
>>> + if ((maxReuseLevel > 0) || isMultiPassOpt)
>>> {
>>> //Allocate memory for interData pointer based on ReuseLevels
>>> CHECKED_MALLOC_ZERO(interData, x265_analysis_inter_data, 1);
>>> CHECKED_MALLOC(interData->depth, uint8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> - CHECKED_MALLOC_ZERO(interData->modes, uint8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> -
>>> if (param->rc.cuTree && !isMultiPassOpt)
>>> CHECKED_MALLOC_ZERO(interData->cuQPOff, int8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> - CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> - CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> - CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> - CHECKED_MALLOC_ZERO(interData->mv[1], x265_analysis_MV,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> +
>>> + if (maxReuseLevel > 1)
>>> + {
>>> + CHECKED_MALLOC_ZERO(interData->modes, uint8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> + CHECKED_MALLOC_ZERO(interData->mvpIdx[0], uint8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> + CHECKED_MALLOC_ZERO(interData->mvpIdx[1], uint8_t,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> + CHECKED_MALLOC_ZERO(interData->mv[0], x265_analysis_MV,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> + CHECKED_MALLOC_ZERO(interData->mv[1], x265_analysis_MV,
>>> analysis->numPartitions * analysis->numCUsInFrame);
>>> + }
>>> }
>>>
>>> if (maxReuseLevel > 4)
>>> @@ -915,9 +921,12 @@ void x265_free_analysis_data(x265_param *param,
>>> x265_analysis_data* analysis)
>>> X265_FREE((analysis->intraData)->depth);
>>> if (!isMultiPassOpt)
>>> {
>>> - X265_FREE((analysis->intraData)->modes);
>>> - X265_FREE((analysis->intraData)->partSizes);
>>> - X265_FREE((analysis->intraData)->chromaModes);
>>> + if (maxReuseLevel > 1)
>>> + {
>>> + X265_FREE((analysis->intraData)->modes);
>>> + X265_FREE((analysis->intraData)->partSizes);
>>> + X265_FREE((analysis->intraData)->chromaModes);
>>> + }
>>> if (param->rc.cuTree)
>>> X265_FREE((analysis->intraData)->cuQPOff);
>>> }
>>> @@ -929,13 +938,17 @@ void x265_free_analysis_data(x265_param *param,
>>> x265_analysis_data* analysis)
>>> if (analysis->interData)
>>> {
>>> X265_FREE((analysis->interData)->depth);
>>> - X265_FREE((analysis->interData)->modes);
>>> if (!isMultiPassOpt && param->rc.cuTree)
>>> X265_FREE((analysis->interData)->cuQPOff);
>>> - X265_FREE((analysis->interData)->mvpIdx[0]);
>>> - X265_FREE((analysis->interData)->mvpIdx[1]);
>>> - X265_FREE((analysis->interData)->mv[0]);
>>> - X265_FREE((analysis->interData)->mv[1]);
>>> +
>>> + if (maxReuseLevel > 1)
>>> + {
>>> + X265_FREE((analysis->interData)->modes);
>>> + X265_FREE((analysis->interData)->mvpIdx[0]);
>>> + X265_FREE((analysis->interData)->mvpIdx[1]);
>>> + X265_FREE((analysis->interData)->mv[0]);
>>> + X265_FREE((analysis->interData)->mv[1]);
>>> + }
>>>
>>> if (maxReuseLevel > 4)
>>> {
>>> diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
>>> index cf7bfb98d..c87feb84a 100644
>>> --- a/source/encoder/encoder.cpp
>>> +++ b/source/encoder/encoder.cpp
>>> @@ -4448,8 +4448,6 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>>> analysis, int curPoc, const x
>>> {
>>> if (m_param->bAnalysisType == HEVC_INFO)
>>> return;
>>> - if (m_param->analysisLoadReuseLevel < 2)
>>> - return;
>>>
>>> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
>>> *partSizes = NULL;
>>> int8_t *cuQPBuf = NULL;
>>> @@ -4462,8 +4460,11 @@ void
>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>> cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>>>
>>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, intraPic->depth);
>>> - X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, intraPic->chromaModes);
>>> - X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, intraPic->partSizes);
>>> + if (m_param->analysisLoadReuseLevel > 1)
>>> + {
>>> + X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, intraPic->chromaModes);
>>> + X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, intraPic->partSizes);
>>> + }
>>> if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
>>> depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
>>>
>>> size_t count = 0;
>>> @@ -4478,38 +4479,40 @@ void
>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>> partSizes[d] = SIZE_2Nx2N;
>>> }
>>> memset(&(analysis->intraData)->depth[count], depthBuf[d],
>>> bytes);
>>> - memset(&(analysis->intraData)->chromaModes[count],
>>> modeBuf[d], bytes);
>>> - memset(&(analysis->intraData)->partSizes[count],
>>> partSizes[d], bytes);
>>> + if (m_param->analysisLoadReuseLevel > 1)
>>> + {
>>> + memset(&(analysis->intraData)->chromaModes[count],
>>> modeBuf[d], bytes);
>>> + memset(&(analysis->intraData)->partSizes[count],
>>> partSizes[d], bytes);
>>> + }
>>> if (m_param->rc.cuTree)
>>> memset(&(analysis->intraData)->cuQPOff[count],
>>> cuQPBuf[d], bytes);
>>> count += bytes;
>>> }
>>> -
>>> - if (!m_param->scaleFactor)
>>> - {
>>> - X265_FREAD((analysis->intraData)->modes, sizeof(uint8_t),
>>> numCUsLoad * analysis->numPartitions, m_analysisFileIn, intraPic->modes);
>>> - }
>>> - else
>>> + if (m_param->analysisLoadReuseLevel > 1)
>>> {
>>> - uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, numCUsLoad *
>>> scaledNumPartition);
>>> - X265_FREAD(tempLumaBuf, sizeof(uint8_t), numCUsLoad *
>>> scaledNumPartition, m_analysisFileIn, intraPic->modes);
>>> - for (uint32_t ctu32Idx = 0, cnt = 0; ctu32Idx < numCUsLoad
>>> * scaledNumPartition; ctu32Idx++, cnt += factor)
>>> - memset(&(analysis->intraData)->modes[cnt],
>>> tempLumaBuf[ctu32Idx], factor);
>>> - X265_FREE(tempLumaBuf);
>>> + if (!m_param->scaleFactor)
>>> + {
>>> + X265_FREAD((analysis->intraData)->modes,
>>> sizeof(uint8_t), numCUsLoad * analysis->numPartitions, m_analysisFileIn,
>>> intraPic->modes);
>>> + }
>>> + else
>>> + {
>>> + uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, numCUsLoad
>>> * scaledNumPartition);
>>> + X265_FREAD(tempLumaBuf, sizeof(uint8_t), numCUsLoad *
>>> scaledNumPartition, m_analysisFileIn, intraPic->modes);
>>> + for (uint32_t ctu32Idx = 0, cnt = 0; ctu32Idx <
>>> numCUsLoad * scaledNumPartition; ctu32Idx++, cnt += factor)
>>> + memset(&(analysis->intraData)->modes[cnt],
>>> tempLumaBuf[ctu32Idx], factor);
>>> + X265_FREE(tempLumaBuf);
>>> + }
>>> }
>>> if (m_param->rc.cuTree)
>>> X265_FREE(cuQPBuf);
>>> X265_FREE(tempBuf);
>>> consumedBytes += frameRecordSize;
>>> }
>>> -
>>> else
>>> {
>>> uint32_t numDir = analysis->sliceType == X265_TYPE_P ? 1 : 2;
>>> uint32_t numPlanes = m_param->internalCsp == X265_CSP_I400 ? 1
>>> : 3;
>>> X265_FREAD((WeightParam*)analysis->wt, sizeof(WeightParam),
>>> numPlanes * numDir, m_analysisFileIn, (picIn->analysisData.wt));
>>> - if (m_param->analysisLoadReuseLevel < 2)
>>> - return;
>>>
>>> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
>>> *partSize = NULL, *mergeFlag = NULL;
>>> uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
>>> @@ -4539,9 +4542,11 @@ void
>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>> cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>>>
>>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, interPic->depth);
>>> - X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, interPic->modes);
>>> + if (m_param->analysisLoadReuseLevel > 1)
>>> + X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, interPic->modes);
>>> if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf,
>>> sizeof(int8_t), depthBytes, m_analysisFileIn, interPic->cuQPOff); }
>>>
>>> +
>>> if (m_param->analysisLoadReuseLevel > 4)
>>> {
>>> partSize = modeBuf + depthBytes;
>>> @@ -4577,9 +4582,13 @@ void
>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>> if (m_param->scaleFactor && modeBuf[d] == MODE_INTRA &&
>>> depthBuf[d] == 0)
>>> depthBuf[d] = 1;
>>> memset(&(analysis->interData)->depth[count],
>>> depthBuf[d], bytes);
>>> - memset(&(analysis->interData)->modes[count],
>>> modeBuf[d], bytes);
>>> +
>>> + if (m_param->analysisLoadReuseLevel > 1)
>>> + memset(&(analysis->interData)->modes[count],
>>> modeBuf[d], bytes);
>>> +
>>> if (m_param->rc.cuTree)
>>> memset(&(analysis->interData)->cuQPOff[count],
>>> cuQPBuf[d], bytes);
>>> +
>>> if (m_param->analysisLoadReuseLevel > 4)
>>> {
>>> if (m_param->scaleFactor && modeBuf[d] ==
>>> MODE_INTRA && partSize[d] == SIZE_NxN)
>>> @@ -4643,7 +4652,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>>> analysis, int curPoc, const x
>>> }
>>> }
>>> }
>>> - else
>>> + else if (m_param->analysisLoadReuseLevel > 1)
>>> X265_FREAD((analysis->interData)->ref, sizeof(int32_t),
>>> numCUsLoad * X265_MAX_PRED_MODE_PER_CTU * numDir, m_analysisFileIn,
>>> interPic->ref);
>>>
>>> consumedBytes += frameRecordSize;
>>> @@ -4810,9 +4819,6 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>>> analysis, int curPoc, const x
>>>
>>> if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType ==
>>> X265_TYPE_I)
>>> {
>>> - if (m_param->analysisLoadReuseLevel < 2)
>>> - return;
>>> -
>>> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
>>> *partSizes = NULL;
>>> int8_t *cuQPBuf = NULL;
>>>
>>> @@ -4824,8 +4830,12 @@ void
>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>> cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>>>
>>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, intraPic->depth);
>>> - X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, intraPic->chromaModes);
>>> - X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, intraPic->partSizes);
>>> +
>>> + if (m_param->analysisLoadReuseLevel > 1)
>>> + {
>>> + X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, intraPic->chromaModes);
>>> + X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, intraPic->partSizes);
>>> + }
>>> if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
>>> depthBytes, m_analysisFileIn, intraPic->cuQPOff); }
>>>
>>> uint32_t count = 0;
>>> @@ -4838,36 +4848,45 @@ void
>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>> bytes /= 4;
>>> numCTUCopied = 4;
>>> }
>>> - if (partSizes[d] == SIZE_NxN)
>>> - partSizes[d] = SIZE_2Nx2N;
>>> + if (m_param->analysisLoadReuseLevel > 1)
>>> + {
>>> + if (partSizes[d] == SIZE_NxN)
>>> + partSizes[d] = SIZE_2Nx2N;
>>> + }
>>> if ((depthBuf[d] > 1 && m_param->maxCUSize == 64) ||
>>> (depthBuf[d] && m_param->maxCUSize != 64))
>>> depthBuf[d]--;
>>>
>>> for (int numCTU = 0; numCTU < numCTUCopied; numCTU++)
>>> {
>>> memset(&(analysis->intraData)->depth[count],
>>> depthBuf[d], bytes);
>>> - memset(&(analysis->intraData)->chromaModes[count],
>>> modeBuf[d], bytes);
>>> - memset(&(analysis->intraData)->partSizes[count],
>>> partSizes[d], bytes);
>>> + if (m_param->analysisLoadReuseLevel > 1)
>>> + {
>>> + memset(&(analysis->intraData)->chromaModes[count],
>>> modeBuf[d], bytes);
>>> + memset(&(analysis->intraData)->partSizes[count],
>>> partSizes[d], bytes);
>>> + }
>>> if (m_param->rc.cuTree)
>>> memset(&(analysis->intraData)->cuQPOff[count],
>>> cuQPBuf[d], bytes);
>>> +
>>> count += bytes;
>>> d += getCUIndex(&cuLoc, &count, bytes, 1);
>>> }
>>> }
>>> -
>>> - cuLoc.evenRowIndex = 0;
>>> - cuLoc.oddRowIndex = m_param->num4x4Partitions * cuLoc.widthInCU;
>>> - cuLoc.switchCondition = 0;
>>> - uint8_t *tempLumaBuf = X265_MALLOC(uint8_t,
>>> analysis->numCUsInFrame * scaledNumPartition);
>>> - X265_FREAD(tempLumaBuf, sizeof(uint8_t),
>>> analysis->numCUsInFrame * scaledNumPartition, m_analysisFileIn,
>>> intraPic->modes);
>>> - uint32_t cnt = 0;
>>> - for (uint32_t ctu32Idx = 0; ctu32Idx < analysis->numCUsInFrame
>>> * scaledNumPartition; ctu32Idx++)
>>> + if (m_param->analysisLoadReuseLevel > 1)
>>> {
>>> - memset(&(analysis->intraData)->modes[cnt],
>>> tempLumaBuf[ctu32Idx], factor);
>>> - cnt += factor;
>>> - ctu32Idx += getCUIndex(&cuLoc, &cnt, factor, 0);
>>> + cuLoc.evenRowIndex = 0;
>>> + cuLoc.oddRowIndex = m_param->num4x4Partitions *
>>> cuLoc.widthInCU;
>>> + cuLoc.switchCondition = 0;
>>> + uint8_t *tempLumaBuf = X265_MALLOC(uint8_t,
>>> analysis->numCUsInFrame * scaledNumPartition);
>>> + X265_FREAD(tempLumaBuf, sizeof(uint8_t),
>>> analysis->numCUsInFrame * scaledNumPartition, m_analysisFileIn,
>>> intraPic->modes);
>>> + uint32_t cnt = 0;
>>> + for (uint32_t ctu32Idx = 0; ctu32Idx <
>>> analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++)
>>> + {
>>> + memset(&(analysis->intraData)->modes[cnt],
>>> tempLumaBuf[ctu32Idx], factor);
>>> + cnt += factor;
>>> + ctu32Idx += getCUIndex(&cuLoc, &cnt, factor, 0);
>>> + }
>>> + X265_FREE(tempLumaBuf);
>>> }
>>> - X265_FREE(tempLumaBuf);
>>> if (m_param->rc.cuTree)
>>> X265_FREE(cuQPBuf);
>>> X265_FREE(tempBuf);
>>> @@ -4879,8 +4898,6 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>>> analysis, int curPoc, const x
>>> uint32_t numDir = analysis->sliceType == X265_TYPE_P ? 1 : 2;
>>> uint32_t numPlanes = m_param->internalCsp == X265_CSP_I400 ? 1
>>> : 3;
>>> X265_FREAD((WeightParam*)analysis->wt, sizeof(WeightParam),
>>> numPlanes * numDir, m_analysisFileIn, (picIn->analysisData.wt));
>>> - if (m_param->analysisLoadReuseLevel < 2)
>>> - return;
>>>
>>> uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
>>> *partSize = NULL, *mergeFlag = NULL;
>>> uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
>>> @@ -4904,8 +4921,10 @@ void
>>> Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, const x
>>> cuQPBuf = X265_MALLOC(int8_t, depthBytes);
>>>
>>> X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, interPic->depth);
>>> - X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, interPic->modes);
>>> + if (m_param->analysisLoadReuseLevel > 1)
>>> + X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
>>> m_analysisFileIn, interPic->modes);
>>> if (m_param->rc.cuTree) { X265_FREAD(cuQPBuf, sizeof(int8_t),
>>> depthBytes, m_analysisFileIn, interPic->cuQPOff); }
>>> +
>>> if (m_param->analysisLoadReuseLevel > 4)
>>> {
>>> partSize = modeBuf + depthBytes;
>>> @@ -4953,7 +4972,9 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>>> analysis, int curPoc, const x
>>> for (int numCTU = 0; numCTU < numCTUCopied; numCTU++)
>>> {
>>> memset(&(analysis->interData)->depth[count],
>>> writeDepth, bytes);
>>> - memset(&(analysis->interData)->modes[count],
>>> modeBuf[d], bytes);
>>> + if (m_param->analysisLoadReuseLevel > 1)
>>> + memset(&(analysis->interData)->modes[count],
>>> modeBuf[d], bytes);
>>> +
>>> if (m_param->rc.cuTree)
>>> memset(&(analysis->interData)->cuQPOff[count],
>>> cuQPBuf[d], bytes);
>>> if (m_param->analysisLoadReuseLevel == 10 &&
>>> bIntraInInter)
>>> @@ -5045,7 +5066,7 @@ void Encoder::readAnalysisFile(x265_analysis_data*
>>> analysis, int curPoc, const x
>>> X265_FREE(tempLumaBuf);
>>> }
>>> }
>>> - else
>>> + else if (m_param->analysisLoadReuseLevel > 1)
>>> X265_FREAD((analysis->interData)->ref, sizeof(int32_t),
>>> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir,
>>> m_analysisFileIn, interPic->ref);
>>>
>>> consumedBytes += frameRecordSize;
>>> @@ -5155,6 +5176,8 @@ int
>>> Encoder::validateAnalysisData(x265_analysis_validate* saveParam, int writeFl
>>> isIncompatibleReuseLevel = true;
>>> else if ((loadLevel >= 2 && loadLevel <= 4) && (saveLevel < 2
>>> || saveLevel > 6))
>>> isIncompatibleReuseLevel = true;
>>> + else if (loadLevel == 1 && saveLevel < 1)
>>> + isIncompatibleReuseLevel = true;
>>> else if (!saveLevel)
>>> isIncompatibleReuseLevel = true;
>>>
>>> @@ -5167,7 +5190,7 @@ int
>>> Encoder::validateAnalysisData(x265_analysis_validate* saveParam, int writeFl
>>>
>>> int bcutree;
>>> X265_FREAD(&bcutree, sizeof(int), 1, m_analysisFileIn,
>>> &(saveParam->cuTree));
>>> - if (loadLevel == 10 && m_param->rc.cuTree && (!bcutree ||
>>> saveLevel < 2))
>>> + if (loadLevel >= 1 && m_param->rc.cuTree && (!bcutree ||
>>> saveLevel < 1))
>>> {
>>> x265_log(NULL, X265_LOG_ERROR, "Error reading cu-tree info.
>>> Disabling cutree offsets. \n");
>>> m_param->rc.cuTree = 0;
>>> @@ -5510,7 +5533,7 @@ void
>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>> analysis->frameRecordSize += analysis->numCUsInFrame *
>>> sizeof(sse_t);
>>> }
>>>
>>> - if (m_param->analysisSaveReuseLevel > 1)
>>> + if (m_param->analysisSaveReuseLevel > 0)
>>> {
>>>
>>> if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType
>>> == X265_TYPE_I)
>>> @@ -5529,18 +5552,21 @@ void
>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>> {
>>> depth = ctu->m_cuDepth[absPartIdx];
>>> intraDataCTU->depth[depthBytes] = depth;
>>> + if (m_param->analysisSaveReuseLevel > 1)
>>> + {
>>> + mode = ctu->m_chromaIntraDir[absPartIdx];
>>> + intraDataCTU->chromaModes[depthBytes] = mode;
>>>
>>> - mode = ctu->m_chromaIntraDir[absPartIdx];
>>> - intraDataCTU->chromaModes[depthBytes] = mode;
>>> -
>>> - partSize = ctu->m_partSize[absPartIdx];
>>> - intraDataCTU->partSizes[depthBytes] = partSize;
>>> -
>>> + partSize = ctu->m_partSize[absPartIdx];
>>> + intraDataCTU->partSizes[depthBytes] = partSize;
>>> + }
>>> if (m_param->rc.cuTree)
>>> intraDataCTU->cuQPOff[depthBytes] =
>>> (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);
>>> +
>>> absPartIdx += ctu->m_numPartitions >> (depth * 2);
>>> }
>>> - memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
>>> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
>>> ctu->m_numPartitions);
>>> + if (m_param->analysisSaveReuseLevel > 1)
>>> + memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
>>> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
>>> ctu->m_numPartitions);
>>> }
>>> }
>>> else
>>> @@ -5561,12 +5587,14 @@ void
>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>> {
>>> depth = ctu->m_cuDepth[absPartIdx];
>>> interDataCTU->depth[depthBytes] = depth;
>>> + if (m_param->analysisSaveReuseLevel > 1)
>>> + {
>>> + predMode = ctu->m_predMode[absPartIdx];
>>> + if (m_param->analysisSaveReuseLevel != 10 &&
>>> ctu->m_refIdx[1][absPartIdx] != -1)
>>> + predMode = 4; // used as indicator if the
>>> block is coded as bidir
>>>
>>> - predMode = ctu->m_predMode[absPartIdx];
>>> - if (m_param->analysisSaveReuseLevel != 10 &&
>>> ctu->m_refIdx[1][absPartIdx] != -1)
>>> - predMode = 4; // used as indicator if the block
>>> is coded as bidir
>>> -
>>> - interDataCTU->modes[depthBytes] = predMode;
>>> + interDataCTU->modes[depthBytes] = predMode;
>>> + }
>>> if (m_param->rc.cuTree)
>>> interDataCTU->cuQPOff[depthBytes] =
>>> (int8_t)(ctu->m_qpAnalysis[absPartIdx] - baseQP);
>>>
>>> @@ -5603,17 +5631,22 @@ void
>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>> memcpy(&intraDataCTU->modes[ctu->m_cuAddr *
>>> ctu->m_numPartitions], ctu->m_lumaIntraDir, sizeof(uint8_t)*
>>> ctu->m_numPartitions);
>>> }
>>> }
>>> -
>>> - if ((analysis->sliceType == X265_TYPE_IDR ||
>>> analysis->sliceType == X265_TYPE_I) && m_param->rc.cuTree)
>>> + if ((analysis->sliceType == X265_TYPE_IDR ||
>>> analysis->sliceType == X265_TYPE_I) && m_param->rc.cuTree &&
>>> m_param->analysisSaveReuseLevel == 1)
>>> + analysis->frameRecordSize += depthBytes + (sizeof(int8_t)*
>>> depthBytes);
>>> + else if ((analysis->sliceType == X265_TYPE_IDR ||
>>> analysis->sliceType == X265_TYPE_I) && m_param->analysisSaveReuseLevel == 1)
>>> + analysis->frameRecordSize += depthBytes;
>>> + else if ((analysis->sliceType == X265_TYPE_IDR ||
>>> analysis->sliceType == X265_TYPE_I) && m_param->rc.cuTree)
>>> analysis->frameRecordSize += sizeof(uint8_t)*
>>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3 +
>>> (sizeof(int8_t) * depthBytes);
>>> else if (analysis->sliceType == X265_TYPE_IDR ||
>>> analysis->sliceType == X265_TYPE_I)
>>> analysis->frameRecordSize += sizeof(uint8_t)*
>>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes * 3;
>>> else
>>> {
>>> /* Add sizeof depth, modes, partSize, cuQPOffset, mergeFlag
>>> */
>>> - analysis->frameRecordSize += depthBytes * 2;
>>> + analysis->frameRecordSize += depthBytes;
>>> if (m_param->rc.cuTree)
>>> - analysis->frameRecordSize += (sizeof(int8_t) * depthBytes);
>>> + analysis->frameRecordSize += (sizeof(int8_t) *
>>> depthBytes);
>>> + if (m_param->analysisSaveReuseLevel > 1)
>>> + analysis->frameRecordSize += depthBytes;
>>> if (m_param->analysisSaveReuseLevel > 4)
>>> analysis->frameRecordSize += (depthBytes * 2);
>>>
>>> @@ -5627,7 +5660,7 @@ void
>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>> if (bIntraInInter)
>>> analysis->frameRecordSize += sizeof(uint8_t)*
>>> analysis->numCUsInFrame * analysis->numPartitions + depthBytes;
>>> }
>>> - else
>>> + else if (m_param->analysisSaveReuseLevel > 1)
>>> analysis->frameRecordSize += sizeof(int32_t)*
>>> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir;
>>> }
>>> analysis->depthBytes = depthBytes;
>>> @@ -5661,44 +5694,58 @@ void
>>> Encoder::writeAnalysisFile(x265_analysis_data* analysis, FrameData &curEncD
>>> X265_FWRITE((WeightParam*)analysis->wt, sizeof(WeightParam),
>>> numPlanes * numDir, m_analysisFileOut);
>>>
>>> if (m_param->analysisSaveReuseLevel < 2)
>>> - return;
>>> -
>>> - if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType ==
>>> X265_TYPE_I)
>>> {
>>> - X265_FWRITE((analysis->intraData)->depth, sizeof(uint8_t),
>>> depthBytes, m_analysisFileOut);
>>> - X265_FWRITE((analysis->intraData)->chromaModes,
>>> sizeof(uint8_t), depthBytes, m_analysisFileOut);
>>> - X265_FWRITE((analysis->intraData)->partSizes, sizeof(char),
>>> depthBytes, m_analysisFileOut);
>>> - if (m_param->rc.cuTree)
>>> - X265_FWRITE((analysis->intraData)->cuQPOff, sizeof(int8_t),
>>> depthBytes, m_analysisFileOut);
>>> - X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t),
>>> analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut);
>>> + if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType
>>> == X265_TYPE_I)
>>> + {
>>> + X265_FWRITE((analysis->intraData)->depth, sizeof(uint8_t),
>>> depthBytes, m_analysisFileOut);
>>> + if (m_param->rc.cuTree)
>>> + X265_FWRITE((analysis->intraData)->cuQPOff,
>>> sizeof(int8_t), depthBytes, m_analysisFileOut);
>>> + }
>>> + else
>>> + {
>>> + X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t),
>>> depthBytes, m_analysisFileOut);
>>> + if (m_param->rc.cuTree)
>>> + X265_FWRITE((analysis->interData)->cuQPOff,
>>> sizeof(int8_t), depthBytes, m_analysisFileOut);
>>> + }
>>> }
>>> else
>>> {
>>> - X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t),
>>> depthBytes, m_analysisFileOut);
>>> - X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t),
>>> depthBytes, m_analysisFileOut);
>>> - if (m_param->rc.cuTree)
>>> - X265_FWRITE((analysis->interData)->cuQPOff, sizeof(int8_t),
>>> depthBytes, m_analysisFileOut);
>>> - if (m_param->analysisSaveReuseLevel > 4)
>>> + if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType
>>> == X265_TYPE_I)
>>> {
>>> - X265_FWRITE((analysis->interData)->partSize,
>>> sizeof(uint8_t), depthBytes, m_analysisFileOut);
>>> - X265_FWRITE((analysis->interData)->mergeFlag,
>>> sizeof(uint8_t), depthBytes, m_analysisFileOut);
>>> - if (m_param->analysisSaveReuseLevel == 10)
>>> + X265_FWRITE((analysis->intraData)->depth, sizeof(uint8_t),
>>> depthBytes, m_analysisFileOut);
>>> + X265_FWRITE((analysis->intraData)->chromaModes,
>>> sizeof(uint8_t), depthBytes, m_analysisFileOut);
>>> + X265_FWRITE((analysis->intraData)->partSizes, sizeof(char),
>>> depthBytes, m_analysisFileOut);
>>> + if (m_param->rc.cuTree)
>>> + X265_FWRITE((analysis->intraData)->cuQPOff,
>>> sizeof(int8_t), depthBytes, m_analysisFileOut);
>>> + X265_FWRITE((analysis->intraData)->modes, sizeof(uint8_t),
>>> analysis->numCUsInFrame * analysis->numPartitions, m_analysisFileOut);
>>> + }
>>> + else
>>> + {
>>> + X265_FWRITE((analysis->interData)->depth, sizeof(uint8_t),
>>> depthBytes, m_analysisFileOut);
>>> + X265_FWRITE((analysis->interData)->modes, sizeof(uint8_t),
>>> depthBytes, m_analysisFileOut);
>>> + if (m_param->rc.cuTree)
>>> + X265_FWRITE((analysis->interData)->cuQPOff,
>>> sizeof(int8_t), depthBytes, m_analysisFileOut);
>>> + if (m_param->analysisSaveReuseLevel > 4)
>>> {
>>> - X265_FWRITE((analysis->interData)->interDir,
>>> sizeof(uint8_t), depthBytes, m_analysisFileOut);
>>> - if (bIntraInInter)
>>> X265_FWRITE((analysis->intraData)->chromaModes, sizeof(uint8_t),
>>> depthBytes, m_analysisFileOut);
>>> - for (uint32_t dir = 0; dir < numDir; dir++)
>>> + X265_FWRITE((analysis->interData)->partSize,
>>> sizeof(uint8_t), depthBytes, m_analysisFileOut);
>>> + X265_FWRITE((analysis->interData)->mergeFlag,
>>> sizeof(uint8_t), depthBytes, m_analysisFileOut);
>>> + if (m_param->analysisSaveReuseLevel == 10)
>>> {
>>> - X265_FWRITE((analysis->interData)->mvpIdx[dir],
>>> sizeof(uint8_t), depthBytes, m_analysisFileOut);
>>> - X265_FWRITE((analysis->interData)->refIdx[dir],
>>> sizeof(int8_t), depthBytes, m_analysisFileOut);
>>> - X265_FWRITE((analysis->interData)->mv[dir],
>>> sizeof(MV), depthBytes, m_analysisFileOut);
>>> + X265_FWRITE((analysis->interData)->interDir,
>>> sizeof(uint8_t), depthBytes, m_analysisFileOut);
>>> + if (bIntraInInter)
>>> X265_FWRITE((analysis->intraData)->chromaModes, sizeof(uint8_t),
>>> depthBytes, m_analysisFileOut);
>>> + for (uint32_t dir = 0; dir < numDir; dir++)
>>> + {
>>> + X265_FWRITE((analysis->interData)->mvpIdx[dir],
>>> sizeof(uint8_t), depthBytes, m_analysisFileOut);
>>> + X265_FWRITE((analysis->interData)->refIdx[dir],
>>> sizeof(int8_t), depthBytes, m_analysisFileOut);
>>> + X265_FWRITE((analysis->interData)->mv[dir],
>>> sizeof(MV), depthBytes, m_analysisFileOut);
>>> + }
>>> + if (bIntraInInter)
>>> + X265_FWRITE((analysis->intraData)->modes,
>>> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
>>> m_analysisFileOut);
>>> }
>>> - if (bIntraInInter)
>>> - X265_FWRITE((analysis->intraData)->modes,
>>> sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions,
>>> m_analysisFileOut);
>>> }
>>> + if (m_param->analysisSaveReuseLevel > 1 &&
>>> m_param->analysisSaveReuseLevel != 10)
>>> + X265_FWRITE((analysis->interData)->ref,
>>> sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU *
>>> numDir, m_analysisFileOut);
>>> }
>>> - if (m_param->analysisSaveReuseLevel != 10)
>>> - X265_FWRITE((analysis->interData)->ref, sizeof(int32_t),
>>> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir,
>>> m_analysisFileOut);
>>> -
>>> }
>>> #undef X265_FWRITE
>>> }
>>> --
>>> 2.18.2
>>>
>>> _______________________________________________
>>> x265-devel mailing list
>>> x265-devel at videolan.org
>>> https://mailman.videolan.org/listinfo/x265-devel
>>>
>>
>>
>> --
>> Regards,
>> *Aruna Matheswaran,*
>> Video Codec Engineer,
>> Media & AI analytics BU,
>>
>>
>>
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
Regards,
*Aruna Matheswaran,*
Video Codec Engineer,
Media & AI analytics BU,
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20200902/ba59ceaa/attachment-0001.html>
More information about the x265-devel
mailing list