[x265] [PATCH] analysis: enable scaled save and load runs with different max CU sizes

Ashok Kumar Mishra ashok at multicorewareinc.com
Thu May 17 17:37:16 CEST 2018


On Thu, May 17, 2018 at 12:23 PM, <bhavna at multicorewareinc.com> wrote:

> # HG changeset patch
> # User Bhavna Hariharan <bhavna at multicorewareinc.com>
> # Date 1526539714 -19800
> #      Thu May 17 12:18:34 2018 +0530
> # Branch stable
> # Node ID 3cef29225ef431c820c8e5593b00c3c225bfffdc
> # Parent  e70f8897811514877bed1f1f318ed95d24658af0
> analysis: enable scaled save and load runs with different max CU sizes
>
> This patch allows either of the following:
> (i) The CTU size of save and load encodes are the same.
>                          (or)
> (ii) The CTU size of load encode is double the CTU size of save encode.
>
> diff -r e70f88978115 -r 3cef29225ef4 doc/reST/cli.rst
> --- a/doc/reST/cli.rst  Thu May 17 12:11:45 2018 +0530
> +++ b/doc/reST/cli.rst  Thu May 17 12:18:34 2018 +0530
> @@ -911,9 +911,10 @@
>
>  .. option:: --scale-factor
>
> -       Factor by which input video is scaled down for analysis save mode.
> -       This option should be coupled with analysis-reuse-mode option,
> --analysis-reuse-level 10.
> -       The ctu size of load should be double the size of save. Default 0.
> +       Factor by which input video is scaled down for analysis save mode.
> +       This option should be coupled with analysis-reuse-mode option,
> +       --analysis-reuse-level 10. The ctu size of load can either be the
> +       same as that of save or double the size of save. Default 0.
>
>  .. option:: --refine-intra <0..4>
>
> diff -r e70f88978115 -r 3cef29225ef4 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp        Thu May 17 12:11:45 2018 +0530
> +++ b/source/encoder/encoder.cpp        Thu May 17 12:18:34 2018 +0530
> @@ -97,6 +97,7 @@
>
>      m_prevTonemapPayload.payload = NULL;
>      m_startPoint = 0;
> +    m_saveCTUSize = 0;
>  }
>  inline char *strcatFilename(const char *input, const char *suffix)
>  {
> @@ -1078,7 +1079,23 @@
>                  if (paramBytes == -1)
>                      m_aborted = true;
>              }
> -            readAnalysisFile(&inFrame->m_analysisData, inFrame->m_poc,
> pic_in, paramBytes);
> +            if (m_saveCTUSize)
> +            {
> +                cuLocation cuLocInFrame;
> +                cuLocInFrame.init(m_param);
> +                /* Set skipWidth/skipHeight flags when the out of bound
> pixels in lowRes is greater than half of maxCUSize */
> +                int extendedWidth = ((m_param->sourceWidth / 2 +
> m_param->maxCUSize - 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize;
> +                int extendedHeight = ((m_param->sourceHeight / 2 +
> m_param->maxCUSize - 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize;
> +                uint32_t outOfBoundaryLowres = extendedWidth -
> m_param->sourceWidth / 2;
> +                if (outOfBoundaryLowres * 2 >= m_param->maxCUSize)
> +                    cuLocInFrame.skipWidth = true;
> +                uint32_t outOfBoundaryLowresH = extendedHeight -
> m_param->sourceHeight / 2;
> +                if (outOfBoundaryLowresH * 2 >= m_param->maxCUSize)
> +                    cuLocInFrame.skipHeight = true;
> +                readAnalysisFile(&inFrame->m_analysisData,
> inFrame->m_poc, pic_in, paramBytes, cuLocInFrame);
> +            }
> +            else
> +                readAnalysisFile(&inFrame->m_analysisData,
> inFrame->m_poc, pic_in, paramBytes);
>              inFrame->m_poc = inFrame->m_analysisData.poc;
>              sliceType = inFrame->m_analysisData.sliceType;
>              inFrame->m_lowres.bScenecut = !!inFrame->m_analysisData.
> bScenecut;
> @@ -3320,7 +3337,265 @@
>
>  void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc,
> const x265_picture* picIn, int paramBytes)
>  {
> -
> +#define X265_FREAD(val, size, readSize, fileOffset, src)\
> +    if (!m_param->bUseAnalysisFile)\
> +        {\
> +        memcpy(val, src, (size * readSize));\
> +        }\
> +        else if (fread(val, size, readSize, fileOffset) != readSize)\
> +    {\
> +        x265_log(NULL, X265_LOG_ERROR, "Error reading analysis data\n");\
> +        freeAnalysis(analysis);\
> +        m_aborted = true;\
> +        return;\
> +    }\
> +
> +    static uint64_t consumedBytes = 0;
> +    static uint64_t totalConsumedBytes = 0;
> +    uint32_t depthBytes = 0;
> +    if (m_param->bUseAnalysisFile)
> +        fseeko(m_analysisFileIn, totalConsumedBytes + paramBytes,
> SEEK_SET);
> +    const x265_analysis_data *picData = &(picIn->analysisData);
> +    analysis_intra_data *intraPic = (analysis_intra_data
> *)picData->intraData;
> +    analysis_inter_data *interPic = (analysis_inter_data
> *)picData->interData;
> +
> +    int poc; uint32_t frameRecordSize;
> +    X265_FREAD(&frameRecordSize, sizeof(uint32_t), 1, m_analysisFileIn,
> &(picData->frameRecordSize));
> +    X265_FREAD(&depthBytes, sizeof(uint32_t), 1, m_analysisFileIn,
> &(picData->depthBytes));
> +    X265_FREAD(&poc, sizeof(int), 1, m_analysisFileIn, &(picData->poc));
> +
> +    if (m_param->bUseAnalysisFile)
> +    {
> +        uint64_t currentOffset = totalConsumedBytes;
> +
> +        /* Seeking to the right frame Record */
> +        while (poc != curPoc && !feof(m_analysisFileIn))
> +        {
> +            currentOffset += frameRecordSize;
> +            fseeko(m_analysisFileIn, currentOffset + paramBytes,
> SEEK_SET);
> +            X265_FREAD(&frameRecordSize, sizeof(uint32_t), 1,
> m_analysisFileIn, &(picData->frameRecordSize));
> +            X265_FREAD(&depthBytes, sizeof(uint32_t), 1,
> m_analysisFileIn, &(picData->depthBytes));
> +            X265_FREAD(&poc, sizeof(int), 1, m_analysisFileIn,
> &(picData->poc));
> +        }
> +        if (poc != curPoc || feof(m_analysisFileIn))
> +        {
> +            x265_log(NULL, X265_LOG_WARNING, "Error reading analysis
> data: Cannot find POC %d\n", curPoc);
> +            freeAnalysis(analysis);
> +            return;
> +        }
> +    }
> +
> +    /* Now arrived at the right frame, read the record */
> +    analysis->poc = poc;
> +    analysis->frameRecordSize = frameRecordSize;
> +    X265_FREAD(&analysis->sliceType, sizeof(int), 1, m_analysisFileIn,
> &(picData->sliceType));
> +    X265_FREAD(&analysis->bScenecut, sizeof(int), 1, m_analysisFileIn,
> &(picData->bScenecut));
> +    X265_FREAD(&analysis->satdCost, sizeof(int64_t), 1,
> m_analysisFileIn, &(picData->satdCost));
> +    X265_FREAD(&analysis->numCUsInFrame, sizeof(int), 1,
> m_analysisFileIn, &(picData->numCUsInFrame));
> +    X265_FREAD(&analysis->numPartitions, sizeof(int), 1,
> m_analysisFileIn, &(picData->numPartitions));
> +    if (m_param->bDisableLookahead)
> +    {
> +        X265_FREAD(&analysis->numCuInHeight, sizeof(uint32_t), 1,
> m_analysisFileIn, &(picData->numCuInHeight));
> +        X265_FREAD(&analysis->lookahead, sizeof(x265_lookahead_data), 1,
> m_analysisFileIn, &(picData->lookahead));
> +    }
> +    int scaledNumPartition = analysis->numPartitions;
> +    int factor = 1 << m_param->scaleFactor;
> +
> +    if (m_param->scaleFactor)
> +        analysis->numPartitions *= factor;
> +    /* Memory is allocated for inter and intra analysis data based on the
> slicetype */
> +    allocAnalysis(analysis);
> +    if (m_param->bDisableLookahead && m_rateControl->m_isVbv)
> +    {
> +        X265_FREAD(analysis->lookahead.intraVbvCost, sizeof(uint32_t),
> analysis->numCUsInFrame, m_analysisFileIn, picData->lookahead.
> intraVbvCost);
> +        X265_FREAD(analysis->lookahead.vbvCost, sizeof(uint32_t),
> analysis->numCUsInFrame, m_analysisFileIn, picData->lookahead.vbvCost);
> +        X265_FREAD(analysis->lookahead.satdForVbv, sizeof(uint32_t),
> analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.satdForVbv);
> +        X265_FREAD(analysis->lookahead.intraSatdForVbv,
> sizeof(uint32_t), analysis->numCuInHeight, m_analysisFileIn,
> picData->lookahead.intraSatdForVbv);
> +    }
> +    if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType ==
> X265_TYPE_I)
> +    {
> +        if (m_param->analysisReuseLevel < 2)
> +            return;
> +
> +        uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
> *partSizes = NULL;
> +
> +        tempBuf = X265_MALLOC(uint8_t, depthBytes * 3);
> +        depthBuf = tempBuf;
> +        modeBuf = tempBuf + depthBytes;
> +        partSizes = tempBuf + 2 * depthBytes;
> +
> +        X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->depth);
> +        X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->chromaModes);
> +        X265_FREAD(partSizes, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->partSizes);
> +
> +        size_t count = 0;
> +        for (uint32_t d = 0; d < depthBytes; d++)
> +        {
> +            int bytes = analysis->numPartitions >> (depthBuf[d] * 2);
> +            if (m_param->scaleFactor)
> +            {
> +                if (depthBuf[d] == 0)
> +                    depthBuf[d] = 1;
> +                if (partSizes[d] == SIZE_NxN)
> +                    partSizes[d] = SIZE_2Nx2N;
> +            }
> +            memset(&((analysis_intra_data *)analysis->intraData)->depth[count],
> depthBuf[d], bytes);
> +            memset(&((analysis_intra_data *)analysis->intraData)->chromaModes[count],
> modeBuf[d], bytes);
> +            memset(&((analysis_intra_data *)analysis->intraData)->partSizes[count],
> partSizes[d], bytes);
> +            count += bytes;
> +        }
> +
> +        if (!m_param->scaleFactor)
> +        {
> +            X265_FREAD(((analysis_intra_data
> *)analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame *
> analysis->numPartitions, m_analysisFileIn, intraPic->modes);
> +        }
> +        else
> +        {
> +            uint8_t *tempLumaBuf = X265_MALLOC(uint8_t,
> analysis->numCUsInFrame * scaledNumPartition);
> +            X265_FREAD(tempLumaBuf, sizeof(uint8_t),
> analysis->numCUsInFrame * scaledNumPartition, m_analysisFileIn,
> intraPic->modes);
> +            for (uint32_t ctu32Idx = 0, cnt = 0; ctu32Idx <
> analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++, cnt += factor)
> +                memset(&((analysis_intra_data
> *)analysis->intraData)->modes[cnt], tempLumaBuf[ctu32Idx], factor);
> +            X265_FREE(tempLumaBuf);
> +        }
> +        X265_FREE(tempBuf);
> +        consumedBytes += frameRecordSize;
> +    }
> +
> +    else
> +    {
> +        uint32_t numDir = analysis->sliceType == X265_TYPE_P ? 1 : 2;
> +        uint32_t numPlanes = m_param->internalCsp == X265_CSP_I400 ? 1 :
> 3;
> +        X265_FREAD((WeightParam*)analysis->wt, sizeof(WeightParam),
> numPlanes * numDir, m_analysisFileIn, (picIn->analysisData.wt));
> +        if (m_param->analysisReuseLevel < 2)
> +            return;
> +
> +        uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
> *partSize = NULL, *mergeFlag = NULL;
> +        uint8_t *interDir = NULL, *chromaDir = NULL, *mvpIdx[2];
> +        MV* mv[2];
> +        int8_t* refIdx[2];
> +
> +        int numBuf = m_param->analysisReuseLevel > 4 ? 4 : 2;
> +        bool bIntraInInter = false;
> +        if (m_param->analysisReuseLevel == 10)
> +        {
> +            numBuf++;
> +            bIntraInInter = (analysis->sliceType == X265_TYPE_P ||
> m_param->bIntraInBFrames);
> +            if (bIntraInInter) numBuf++;
> +        }
> +
> +        tempBuf = X265_MALLOC(uint8_t, depthBytes * numBuf);
> +        depthBuf = tempBuf;
> +        modeBuf = tempBuf + depthBytes;
> +
> +        X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->depth);
> +        X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->modes);
> +
> +        if (m_param->analysisReuseLevel > 4)
> +        {
> +            partSize = modeBuf + depthBytes;
> +            mergeFlag = partSize + depthBytes;
> +            X265_FREAD(partSize, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->partSize);
> +            X265_FREAD(mergeFlag, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->mergeFlag);
> +
> +            if (m_param->analysisReuseLevel == 10)
> +            {
> +                interDir = mergeFlag + depthBytes;
> +                X265_FREAD(interDir, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->interDir);
> +                if (bIntraInInter)
> +                {
> +                    chromaDir = interDir + depthBytes;
> +                    X265_FREAD(chromaDir, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, intraPic->chromaModes);
> +                }
> +                for (uint32_t i = 0; i < numDir; i++)
> +                {
> +                    mvpIdx[i] = X265_MALLOC(uint8_t, depthBytes);
> +                    refIdx[i] = X265_MALLOC(int8_t, depthBytes);
> +                    mv[i] = X265_MALLOC(MV, depthBytes);
> +                    X265_FREAD(mvpIdx[i], sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->mvpIdx[i]);
> +                    X265_FREAD(refIdx[i], sizeof(int8_t), depthBytes,
> m_analysisFileIn, interPic->refIdx[i]);
> +                    X265_FREAD(mv[i], sizeof(MV), depthBytes,
> m_analysisFileIn, interPic->mv[i]);
> +                }
> +            }
> +        }
> +
> +        size_t count = 0;
> +        for (uint32_t d = 0; d < depthBytes; d++)
> +        {
> +            int bytes = analysis->numPartitions >> (depthBuf[d] * 2);
> +            if (m_param->scaleFactor && modeBuf[d] == MODE_INTRA &&
> depthBuf[d] == 0)
> +                depthBuf[d] = 1;
> +            memset(&((analysis_inter_data *)analysis->interData)->depth[count],
> depthBuf[d], bytes);
> +            memset(&((analysis_inter_data *)analysis->interData)->modes[count],
> modeBuf[d], bytes);
> +            if (m_param->analysisReuseLevel > 4)
> +            {
> +                if (m_param->scaleFactor && modeBuf[d] == MODE_INTRA &&
> partSize[d] == SIZE_NxN)
> +                    partSize[d] = SIZE_2Nx2N;
> +                memset(&((analysis_inter_data *)analysis->interData)->partSize[count],
> partSize[d], bytes);
> +                int numPU = (modeBuf[d] == MODE_INTRA) ? 1 :
> nbPartsTable[(int)partSize[d]];
> +                for (int pu = 0; pu < numPU; pu++)
> +                {
> +                    if (pu) d++;
> +                    ((analysis_inter_data *)analysis->interData)->mergeFlag[count
> + pu] = mergeFlag[d];
> +                    if (m_param->analysisReuseLevel == 10)
> +                    {
> +                        ((analysis_inter_data *)analysis->interData)->interDir[count
> + pu] = interDir[d];
> +                        for (uint32_t i = 0; i < numDir; i++)
> +                        {
> +                            ((analysis_inter_data *)analysis->interData)->mvpIdx[i][count
> + pu] = mvpIdx[i][d];
> +                            ((analysis_inter_data *)analysis->interData)->refIdx[i][count
> + pu] = refIdx[i][d];
> +                            if (m_param->scaleFactor)
> +                            {
> +                                mv[i][d].x *=
> (int16_t)m_param->scaleFactor;
> +                                mv[i][d].y *=
> (int16_t)m_param->scaleFactor;
> +                            }
> +                            memcpy(&((analysis_inter_data
> *)analysis->interData)->mv[i][count + pu], &mv[i][d], sizeof(MV));
> +                        }
> +                    }
> +                }
> +                if (m_param->analysisReuseLevel == 10 && bIntraInInter)
> +                    memset(&((analysis_intra_data *)analysis->intraData)->chromaModes[count],
> chromaDir[d], bytes);
> +            }
> +            count += bytes;
> +        }
> +
> +        X265_FREE(tempBuf);
> +
> +        if (m_param->analysisReuseLevel == 10)
> +        {
> +            for (uint32_t i = 0; i < numDir; i++)
> +            {
> +                X265_FREE(mvpIdx[i]);
> +                X265_FREE(refIdx[i]);
> +                X265_FREE(mv[i]);
> +            }
> +            if (bIntraInInter)
> +            {
> +                if (!m_param->scaleFactor)
> +                {
> +                    X265_FREAD(((analysis_intra_data
> *)analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame *
> analysis->numPartitions, m_analysisFileIn, intraPic->modes);
> +                }
> +                else
> +                {
> +                    uint8_t *tempLumaBuf = X265_MALLOC(uint8_t,
> analysis->numCUsInFrame * scaledNumPartition);
> +                    X265_FREAD(tempLumaBuf, sizeof(uint8_t),
> analysis->numCUsInFrame * scaledNumPartition, m_analysisFileIn,
> intraPic->modes);
> +                    for (uint32_t ctu32Idx = 0, cnt = 0; ctu32Idx <
> analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++, cnt += factor)
> +                        memset(&((analysis_intra_data
> *)analysis->intraData)->modes[cnt], tempLumaBuf[ctu32Idx], factor);
> +                    X265_FREE(tempLumaBuf);
> +                }
> +            }
> +        }
> +        else
> +            X265_FREAD(((analysis_inter_data
> *)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame *
> X265_MAX_PRED_MODE_PER_CTU * numDir, m_analysisFileIn, interPic->ref);
> +
> +        consumedBytes += frameRecordSize;
> +        if (numDir == 1)
> +            totalConsumedBytes = consumedBytes;
> +    }
> +#undef X265_FREAD
> +}
> +
> +void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc,
> const x265_picture* picIn, int paramBytes, cuLocation cuLoc)
> +{
>  #define X265_FREAD(val, size, readSize, fileOffset, src)\
>      if (!m_param->bUseAnalysisFile)\
>      {\
> @@ -3388,25 +3663,9 @@
>
>      int numPartitions = analysis->numPartitions;
>      int numCUsInFrame = analysis->numCUsInFrame;
> -    cuLocation cuLoc;
> -    cuLoc.init(m_param);
> -
> -    if (m_param->scaleFactor)
> -    {
> -        /* Allocate memory for scaled resoultion's numPartitions and
> numCUsInFrame*/
> -        analysis->numPartitions = m_param->num4x4Partitions;
> -        analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU;
> -
> -        /* Set skipWidth/skipHeight flags when the out of bound pixels in
> lowRes is greater than half of maxCUSize */
> -        int extendedWidth = ((m_param->sourceWidth / 2 +
> m_param->maxCUSize - 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize;
> -        int extendedHeight = ((m_param->sourceHeight / 2 +
> m_param->maxCUSize - 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize;
> -        uint32_t outOfBoundaryLowres = extendedWidth -
> m_param->sourceWidth / 2;
> -        if (outOfBoundaryLowres * 2 >= m_param->maxCUSize)
> -            cuLoc.skipWidth = true;
> -        uint32_t outOfBoundaryLowresH = extendedHeight -
> m_param->sourceHeight / 2;
> -        if (outOfBoundaryLowresH * 2 >= m_param->maxCUSize)
> -            cuLoc.skipHeight = true;
> -    }
> +    /* Allocate memory for scaled resoultion's numPartitions and
> numCUsInFrame*/
> +    analysis->numPartitions = m_param->num4x4Partitions;
> +    analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU;
>
>      /* Memory is allocated for inter and intra analysis data based on the
> slicetype */
>      allocAnalysis(analysis);
> @@ -3442,51 +3701,39 @@
>          {
>              int bytes = analysis->numPartitions >> (depthBuf[d] * 2);
>              int numCTUCopied = 1;
> -
> -            if (m_param->scaleFactor)
> +            if (!depthBuf[d]) //copy data of one 64x64 to four scaled
> 64x64 CTUs.
>              {
> -                if (!depthBuf[d]) //copy data of one 64x64 to four scaled
> 64x64 CTUs.
> -                {
> -                    bytes /= 4;
> -                    numCTUCopied = 4;
> -                }
> -
> -                if (partSizes[d] == SIZE_NxN)
> -                    partSizes[d] = SIZE_2Nx2N;
> -                if ((depthBuf[d] > 1 && m_param->maxCUSize == 64) ||
> (depthBuf[d] && m_param->maxCUSize != 64))
> -                    depthBuf[d]--;
> +                bytes /= 4;
> +                numCTUCopied = 4;
>              }
> +            if (partSizes[d] == SIZE_NxN)
> +                partSizes[d] = SIZE_2Nx2N;
> +            if ((depthBuf[d] > 1 && m_param->maxCUSize == 64) ||
> (depthBuf[d] && m_param->maxCUSize != 64))
> +                depthBuf[d]--;
> +
>              for (int numCTU = 0; numCTU < numCTUCopied; numCTU++)
>              {
>                  memset(&((analysis_intra_data
> *)analysis->intraData)->depth[count], depthBuf[d], bytes);
>                  memset(&((analysis_intra_data *)analysis->intraData)->chromaModes[count],
> modeBuf[d], bytes);
>                  memset(&((analysis_intra_data *)analysis->intraData)->partSizes[count],
> partSizes[d], bytes);
>                  count += bytes;
> -                if (m_param->scaleFactor)
> -                    d += getCUIndex(&cuLoc, &count, bytes, 1);
> +                d += getCUIndex(&cuLoc, &count, bytes, 1);
>              }
>          }
>
> -        if (!m_param->scaleFactor)
> -        {
> -            X265_FREAD(((analysis_intra_data
> *)analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame *
> analysis->numPartitions, m_analysisFileIn, intraPic->modes);
> -        }
> -        else
> +        cuLoc.evenRowIndex = 0;
> +        cuLoc.oddRowIndex = m_param->num4x4Partitions * cuLoc.widthInCU;
> +        cuLoc.switchCondition = 0;
> +        uint8_t *tempLumaBuf = X265_MALLOC(uint8_t,
> analysis->numCUsInFrame * scaledNumPartition);
> +        X265_FREAD(tempLumaBuf, sizeof(uint8_t), analysis->numCUsInFrame
> * scaledNumPartition, m_analysisFileIn, intraPic->modes);
> +        uint32_t cnt = 0;
> +        for (uint32_t ctu32Idx = 0; ctu32Idx < analysis->numCUsInFrame *
> scaledNumPartition; ctu32Idx++)
>          {
> -            cuLoc.evenRowIndex = 0;
> -            cuLoc.oddRowIndex = m_param->num4x4Partitions *
> cuLoc.widthInCU;
> -            cuLoc.switchCondition = 0;
> -            uint8_t *tempLumaBuf = X265_MALLOC(uint8_t,
> analysis->numCUsInFrame * scaledNumPartition);
> -            X265_FREAD(tempLumaBuf, sizeof(uint8_t),
> analysis->numCUsInFrame * scaledNumPartition, m_analysisFileIn,
> intraPic->modes);
> -            uint32_t cnt = 0;
> -            for (uint32_t ctu32Idx = 0; ctu32Idx <
> analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++)
> -            {
> -                memset(&((analysis_intra_data
> *)analysis->intraData)->modes[cnt], tempLumaBuf[ctu32Idx], factor);
> -                cnt += factor;
> -                ctu32Idx += getCUIndex(&cuLoc, &cnt, factor, 0);
> -            }
> -            X265_FREE(tempLumaBuf);
> +            memset(&((analysis_intra_data *)analysis->intraData)->modes[cnt],
> tempLumaBuf[ctu32Idx], factor);
> +            cnt += factor;
> +            ctu32Idx += getCUIndex(&cuLoc, &cnt, factor, 0);
>          }
> +        X265_FREE(tempLumaBuf);
>          X265_FREE(tempBuf);
>          consumedBytes += frameRecordSize;
>      }
> @@ -3519,14 +3766,12 @@
>
>          X265_FREAD(depthBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->depth);
>          X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->modes);
> -
>          if (m_param->analysisReuseLevel > 4)
>          {
>              partSize = modeBuf + depthBytes;
>              mergeFlag = partSize + depthBytes;
>              X265_FREAD(partSize, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->partSize);
>              X265_FREAD(mergeFlag, sizeof(uint8_t), depthBytes,
> m_analysisFileIn, interPic->mergeFlag);
> -
>              if (m_param->analysisReuseLevel == 10)
>              {
>                  interDir = mergeFlag + depthBytes;
> @@ -3556,17 +3801,14 @@
>              bool isScaledMaxCUSize = false;
>              int numCTUCopied = 1;
>              int writeDepth = depthBuf[d];
> -            if (m_param->scaleFactor)
> +            if (!depthBuf[d]) //copy data of one 64x64 to four scaled
> 64x64 CTUs.
>              {
> -                if (!depthBuf[d]) //copy data of one 64x64 to four scaled
> 64x64 CTUs.
> -                {
> -                    isScaledMaxCUSize = true;
> -                    bytes /= 4;
> -                    numCTUCopied = 4;
> -                }
> -                if ((modeBuf[d] != MODE_INTRA && depthBuf[d] != 0) ||
> (modeBuf[d] == MODE_INTRA && depthBuf[d] > 1))
> -                    writeDepth--;
> +                isScaledMaxCUSize = true;
> +                bytes /= 4;
> +                numCTUCopied = 4;
>              }
> +            if ((modeBuf[d] != MODE_INTRA && depthBuf[d] != 0) ||
> (modeBuf[d] == MODE_INTRA && depthBuf[d] > 1))
> +                writeDepth--;
>
>              for (int numCTU = 0; numCTU < numCTUCopied; numCTU++)
>              {
> @@ -3579,7 +3821,7 @@
>                  {
>                      puOrientation puOrient;
>                      puOrient.init();
> -                    if (m_param->scaleFactor && modeBuf[d] == MODE_INTRA
> && partSize[d] == SIZE_NxN)
> +                    if (modeBuf[d] == MODE_INTRA && partSize[d] ==
> SIZE_NxN)
>                          partSize[d] = SIZE_2Nx2N;
>                      int partitionSize = partSize[d];
>                      if (isScaledMaxCUSize && partSize[d] != SIZE_2Nx2N)
> @@ -3609,13 +3851,8 @@
>                              {
>                                  ((analysis_inter_data
> *)analysis->interData)->mvpIdx[i][count + pu] = mvpIdx[i][d];
>                                  ((analysis_inter_data
> *)analysis->interData)->refIdx[i][count + pu] = refIdx[i][d];
> -                                mvCopy[i].x = mv[i][d].x;
> -                                mvCopy[i].y = mv[i][d].y;
> -                                if (m_param->scaleFactor)
> -                                {
> -                                    mvCopy[i].x = mv[i][d].x *
> (int16_t)m_param->scaleFactor;
> -                                    mvCopy[i].y = mv[i][d].y *
> (int16_t)m_param->scaleFactor;
> -                                }
> +                                mvCopy[i].x = mv[i][d].x *
> (int16_t)m_param->scaleFactor;
> +                                mvCopy[i].y = mv[i][d].y *
> (int16_t)m_param->scaleFactor;
>                                  memcpy(&((analysis_inter_data
> *)analysis->interData)->mv[i][count + pu], &mvCopy[i], sizeof(MV));
>                              }
>                          }
> @@ -3633,8 +3870,7 @@
>                      }
>                  }
>                  count += bytes;
> -                if (m_param->scaleFactor)
> -                    d += getCUIndex(&cuLoc, &count, bytes, 1);
> +                d += getCUIndex(&cuLoc, &count, bytes, 1);
>              }
>          }
>
> @@ -3650,26 +3886,19 @@
>              }
>              if (bIntraInInter)
>              {
> -                if (!m_param->scaleFactor)
> -                {
> -                    X265_FREAD(((analysis_intra_data
> *)analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame *
> analysis->numPartitions, m_analysisFileIn, intraPic->modes);
> -                }
> -                else
> +                cuLoc.evenRowIndex = 0;
> +                cuLoc.oddRowIndex = m_param->num4x4Partitions *
> cuLoc.widthInCU;
> +                cuLoc.switchCondition = 0;
> +                uint8_t *tempLumaBuf = X265_MALLOC(uint8_t,
> analysis->numCUsInFrame * scaledNumPartition);
> +                X265_FREAD(tempLumaBuf, sizeof(uint8_t),
> analysis->numCUsInFrame * scaledNumPartition, m_analysisFileIn,
> intraPic->modes);
> +                uint32_t cnt = 0;
> +                for (uint32_t ctu32Idx = 0; ctu32Idx <
> analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++)
>                  {
> -                    cuLoc.evenRowIndex = 0;
> -                    cuLoc.oddRowIndex = m_param->num4x4Partitions *
> cuLoc.widthInCU;
> -                    cuLoc.switchCondition = 0;
> -                    uint8_t *tempLumaBuf = X265_MALLOC(uint8_t,
> analysis->numCUsInFrame * scaledNumPartition);
> -                    X265_FREAD(tempLumaBuf, sizeof(uint8_t),
> analysis->numCUsInFrame * scaledNumPartition, m_analysisFileIn,
> intraPic->modes);
> -                    uint32_t cnt = 0;
> -                    for (uint32_t ctu32Idx = 0; ctu32Idx <
> analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++)
> -                    {
> -                        memset(&((analysis_intra_data
> *)analysis->intraData)->modes[cnt], tempLumaBuf[ctu32Idx], factor);
> -                        cnt += factor;
> -                        ctu32Idx += getCUIndex(&cuLoc, &cnt, factor, 0);
> -                    }
> -                    X265_FREE(tempLumaBuf);
> +                    memset(&((analysis_intra_data
> *)analysis->intraData)->modes[cnt], tempLumaBuf[ctu32Idx], factor);
> +                    cnt += factor;
> +                    ctu32Idx += getCUIndex(&cuLoc, &cnt, factor, 0);
>                  }
> +                X265_FREE(tempLumaBuf);
>              }
>          }
>          else
> @@ -3681,11 +3910,8 @@
>      }
>
>      /* Restore to the current encode's numPartitions and numCUsInFrame */
> -    if (m_param->scaleFactor)
> -    {
> -        analysis->numPartitions = m_param->num4x4Partitions;
> -        analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU;
> -    }
> +    analysis->numPartitions = m_param->num4x4Partitions;
> +    analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU;
>  #undef X265_FREAD
>  }
>
> @@ -3731,7 +3957,32 @@
>      X265_PARAM_VALIDATE(saveParam.openGOP, sizeof(int), 1,
> &m_param->bOpenGOP);
>      X265_PARAM_VALIDATE(saveParam.bframes, sizeof(int), 1,
> &m_param->bframes);
>      X265_PARAM_VALIDATE(saveParam.bPyramid, sizeof(int), 1,
> &m_param->bBPyramid);
> -    X265_PARAM_VALIDATE(saveParam.maxCUSize, sizeof(int), 1,
> &m_param->maxCUSize);
> +    /* Enable m_saveCTUSize if the save and load encodes have the same
> maxCU size */
> +    if (writeFlag)
> +    {
> +        X265_PARAM_VALIDATE(saveParam.maxCUSize, sizeof(int), 1,
> &m_param->maxCUSize);
> +    }
> +    else
> +    {
> +        fileOffset = m_analysisFileIn;
> +        if (m_param->bUseAnalysisFile && fread(&readValue, sizeof(int),
> 1, fileOffset) != 1)
> +        {
> +            x265_log(NULL, X265_LOG_ERROR, "Error reading analysis
> data.\n");
> +            m_aborted = true;
> +        }
> +        else if (!m_param->bUseAnalysisFile)
> +            readValue = saveParam.maxCUSize;
> +
> +        m_saveCTUSize = 0;
> +        if (m_param->scaleFactor && g_log2Size[m_param->maxCUSize] ==
> g_log2Size[readValue])
> +            m_saveCTUSize = 1;
> +        else if (readValue != (int)m_param->maxCUSize &&
> (g_log2Size[m_param->maxCUSize] - g_log2Size[readValue]) != 1)
> +        {
> +            x265_log(NULL, X265_LOG_ERROR, "Error reading analysis data.
> Mismatch in params.\n");
> +            m_aborted = true;
> +        }
> +        count++;
> +    }
>      X265_PARAM_VALIDATE(saveParam.minCUSize, sizeof(int), 1,
> &m_param->minCUSize);
>      X265_PARAM_VALIDATE(saveParam.radl, sizeof(int), 1, &m_param->radl);
>      X265_PARAM_VALIDATE(saveParam.lookaheadDepth, sizeof(int), 1,
> &m_param->lookaheadDepth);
> diff -r e70f88978115 -r 3cef29225ef4 source/encoder/encoder.h
> --- a/source/encoder/encoder.h  Thu May 17 12:11:45 2018 +0530
> +++ b/source/encoder/encoder.h  Thu May 17 12:18:34 2018 +0530
> @@ -228,6 +228,8 @@
>      int32_t                 m_startPoint;
>      Lock                    m_dynamicRefineLock;
>
> +    bool                    m_saveCTUSize;
> +
>      Encoder();
>      ~Encoder()
>      {
> @@ -281,6 +283,8 @@
>
>      void readAnalysisFile(x265_analysis_data* analysis, int poc, const
> x265_picture* picIn, int paramBytes);
>
> +    void readAnalysisFile(x265_analysis_data* analysis, int poc, const
> x265_picture* picIn, int paramBytes, cuLocation cuLoc);
> +
>      int getCUIndex(cuLocation* cuLoc, uint32_t* count, int bytes, int
> flag);
>
>      int getPuShape(puOrientation* puOrient, int partSize, int numCTU);
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>

Pushed.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20180517/ccb3bb35/attachment-0001.html>


More information about the x265-devel mailing list