[x265] [PATCH] analysis: skip rect/amp in analysis load mode
Deepthi Nandakumar
deepthi at multicorewareinc.com
Fri Apr 8 16:51:48 CEST 2016
Can you please regenerate this patch at the current tip?
On Mon, Mar 28, 2016 at 9:21 AM, <sagar at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Sagar Kotecha<sagar at multicorewareinc.com>
> # Date 1458817615 -19800
> # Thu Mar 24 16:36:55 2016 +0530
> # Node ID 5bccf2596d8a1d66a6a9d460e65b1b9b93c2d112
> # Parent 2de6cb99313a03c3577934ac5e2e116f7ba6cd10
> analysis: skip rect/amp in analysis load mode
>
> Avoid doing rect/amp analysis in load mode if the save mode has not chosen
> it as the best partition
>
> diff -r 2de6cb99313a -r 5bccf2596d8a source/common/framedata.h
> --- a/source/common/framedata.h Mon Mar 21 13:50:14 2016 +0530
> +++ b/source/common/framedata.h Thu Mar 24 16:36:55 2016 +0530
> @@ -172,6 +172,8 @@
> int32_t* ref;
> uint8_t* depth;
> uint8_t* modes;
> + uint8_t* partSize;
> + uint8_t* mergeFlag;
> };
> }
> #endif // ifndef X265_FRAMEDATA_H
> diff -r 2de6cb99313a -r 5bccf2596d8a source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Mon Mar 21 13:50:14 2016 +0530
> +++ b/source/encoder/analysis.cpp Thu Mar 24 16:36:55 2016 +0530
> @@ -149,6 +149,8 @@
> m_reuseRef = &m_reuseInterDataCTU->ref[ctu.m_cuAddr *
> X265_MAX_PRED_MODE_PER_CTU * numPredDir];
> m_reuseDepth = &m_reuseInterDataCTU->depth[ctu.m_cuAddr *
> ctu.m_numPartitions];
> m_reuseModes = &m_reuseInterDataCTU->modes[ctu.m_cuAddr *
> ctu.m_numPartitions];
> + m_reusePartSize = &m_reuseInterDataCTU->partSize[ctu.m_cuAddr *
> ctu.m_numPartitions];
> + m_reuseMergeFlag = &m_reuseInterDataCTU->mergeFlag[ctu.m_cuAddr *
> ctu.m_numPartitions];
> if (m_param->analysisMode == X265_ANALYSIS_SAVE)
> for (int i = 0; i < X265_MAX_PRED_MODE_PER_CTU * numPredDir;
> i++)
> m_reuseRef[i] = -1;
> @@ -885,6 +887,8 @@
> uint32_t minDepth = topSkipMinDepth(parentCTU, cuGeom);
> bool earlyskip = false;
> bool splitIntra = true;
> + bool skipRectAmp = false;
> + bool chooseMerge = false;
>
> SplitData splitData[4];
> splitData[0].initSplitCUData();
> @@ -903,15 +907,26 @@
> bool foundSkip = false;
> if (m_param->analysisMode == X265_ANALYSIS_LOAD)
> {
> - if (mightNotSplit && depth == m_reuseDepth[cuGeom.absPartIdx] &&
> m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP)
> + if (mightNotSplit && depth == m_reuseDepth[cuGeom.absPartIdx])
> {
> - md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
> - md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
> - checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP],
> md.pred[PRED_MERGE], cuGeom);
> + if (m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP)
> + {
> + md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
> + md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
> + checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP],
> md.pred[PRED_MERGE], cuGeom);
>
> - foundSkip = true;
> - if (m_param->rdLevel)
> - earlyskip = md.bestMode && m_param->bEnableEarlySkip;
> + foundSkip = true;
> + if (m_param->rdLevel)
> + earlyskip = md.bestMode && m_param->bEnableEarlySkip;
> + }
> + if (m_reusePartSize[cuGeom.absPartIdx] == SIZE_2Nx2N)
> + {
> + if (m_reuseModes[cuGeom.absPartIdx] != MODE_INTRA &&
> m_reuseModes[cuGeom.absPartIdx] != 4)
> + {
> + skipRectAmp = true && !!md.bestMode;
> + chooseMerge = !!m_reuseMergeFlag[cuGeom.absPartIdx]
> && !!md.bestMode;
> + }
> + }
> }
> }
>
> @@ -1017,158 +1032,161 @@
> }
>
> Mode *bestInter = &md.pred[PRED_2Nx2N];
> - if (m_param->bEnableRectInter)
> + if (!skipRectAmp)
> {
> - uint64_t splitCost = splitData[0].sa8dCost +
> splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
> - uint32_t threshold_2NxN, threshold_Nx2N;
> + if (m_param->bEnableRectInter)
> + {
> + uint64_t splitCost = splitData[0].sa8dCost +
> splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
> + uint32_t threshold_2NxN, threshold_Nx2N;
>
> - if (m_slice->m_sliceType == P_SLICE)
> - {
> - threshold_2NxN = splitData[0].mvCost[0] +
> splitData[1].mvCost[0];
> - threshold_Nx2N = splitData[0].mvCost[0] +
> splitData[2].mvCost[0];
> - }
> - else
> - {
> - threshold_2NxN = (splitData[0].mvCost[0] +
> splitData[1].mvCost[0]
> - + splitData[0].mvCost[1] +
> splitData[1].mvCost[1] + 1) >> 1;
> - threshold_Nx2N = (splitData[0].mvCost[0] +
> splitData[2].mvCost[0]
> - + splitData[0].mvCost[1] +
> splitData[2].mvCost[1] + 1) >> 1;
> + if (m_slice->m_sliceType == P_SLICE)
> + {
> + threshold_2NxN = splitData[0].mvCost[0] +
> splitData[1].mvCost[0];
> + threshold_Nx2N = splitData[0].mvCost[0] +
> splitData[2].mvCost[0];
> + }
> + else
> + {
> + threshold_2NxN = (splitData[0].mvCost[0] +
> splitData[1].mvCost[0]
> + + splitData[0].mvCost[1] +
> splitData[1].mvCost[1] + 1) >> 1;
> + threshold_Nx2N = (splitData[0].mvCost[0] +
> splitData[2].mvCost[0]
> + + splitData[0].mvCost[1] +
> splitData[2].mvCost[1] + 1) >> 1;
> + }
> +
> + int try_2NxN_first = threshold_2NxN < threshold_Nx2N;
> + if (try_2NxN_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxN)
> + {
> + refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* top */
> + refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* bot */
> + md.pred[PRED_2NxN].cu.initSubCU(parentCTU,
> cuGeom, qp);
> + checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom,
> SIZE_2NxN, refMasks);
> + if (md.pred[PRED_2NxN].sa8dCost <
> bestInter->sa8dCost)
> + bestInter = &md.pred[PRED_2NxN];
> + }
> +
> + if (splitCost < md.pred[PRED_2Nx2N].sa8dCost +
> threshold_Nx2N)
> + {
> + refMasks[0] = splitData[0].splitRefs |
> splitData[2].splitRefs; /* left */
> + refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* right */
> + md.pred[PRED_Nx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> + checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom,
> SIZE_Nx2N, refMasks);
> + if (md.pred[PRED_Nx2N].sa8dCost <
> bestInter->sa8dCost)
> + bestInter = &md.pred[PRED_Nx2N];
> + }
> +
> + if (!try_2NxN_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxN)
> + {
> + refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* top */
> + refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* bot */
> + md.pred[PRED_2NxN].cu.initSubCU(parentCTU,
> cuGeom, qp);
> + checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom,
> SIZE_2NxN, refMasks);
> + if (md.pred[PRED_2NxN].sa8dCost <
> bestInter->sa8dCost)
> + bestInter = &md.pred[PRED_2NxN];
> + }
> }
>
> - int try_2NxN_first = threshold_2NxN < threshold_Nx2N;
> - if (try_2NxN_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxN)
> + if (m_slice->m_sps->maxAMPDepth > depth)
> {
> - refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* top */
> - refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* bot */
> - md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom,
> qp);
> - checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom,
> SIZE_2NxN, refMasks);
> - if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost)
> - bestInter = &md.pred[PRED_2NxN];
> - }
> + uint64_t splitCost = splitData[0].sa8dCost +
> splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
> + uint32_t threshold_2NxnU, threshold_2NxnD,
> threshold_nLx2N, threshold_nRx2N;
>
> - if (splitCost < md.pred[PRED_2Nx2N].sa8dCost +
> threshold_Nx2N)
> - {
> - refMasks[0] = splitData[0].splitRefs |
> splitData[2].splitRefs; /* left */
> - refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* right */
> - md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom,
> qp);
> - checkInter_rd0_4(md.pred[PRED_Nx2N], cuGeom,
> SIZE_Nx2N, refMasks);
> - if (md.pred[PRED_Nx2N].sa8dCost < bestInter->sa8dCost)
> - bestInter = &md.pred[PRED_Nx2N];
> - }
> + if (m_slice->m_sliceType == P_SLICE)
> + {
> + threshold_2NxnU = splitData[0].mvCost[0] +
> splitData[1].mvCost[0];
> + threshold_2NxnD = splitData[2].mvCost[0] +
> splitData[3].mvCost[0];
>
> - if (!try_2NxN_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxN)
> - {
> - refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* top */
> - refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* bot */
> - md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom,
> qp);
> - checkInter_rd0_4(md.pred[PRED_2NxN], cuGeom,
> SIZE_2NxN, refMasks);
> - if (md.pred[PRED_2NxN].sa8dCost < bestInter->sa8dCost)
> - bestInter = &md.pred[PRED_2NxN];
> - }
> - }
> + threshold_nLx2N = splitData[0].mvCost[0] +
> splitData[2].mvCost[0];
> + threshold_nRx2N = splitData[1].mvCost[0] +
> splitData[3].mvCost[0];
> + }
> + else
> + {
> + threshold_2NxnU = (splitData[0].mvCost[0] +
> splitData[1].mvCost[0]
> + + splitData[0].mvCost[1] +
> splitData[1].mvCost[1] + 1) >> 1;
> + threshold_2NxnD = (splitData[2].mvCost[0] +
> splitData[3].mvCost[0]
> + + splitData[2].mvCost[1] +
> splitData[3].mvCost[1] + 1) >> 1;
>
> - if (m_slice->m_sps->maxAMPDepth > depth)
> - {
> - uint64_t splitCost = splitData[0].sa8dCost +
> splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
> - uint32_t threshold_2NxnU, threshold_2NxnD,
> threshold_nLx2N, threshold_nRx2N;
> -
> - if (m_slice->m_sliceType == P_SLICE)
> - {
> - threshold_2NxnU = splitData[0].mvCost[0] +
> splitData[1].mvCost[0];
> - threshold_2NxnD = splitData[2].mvCost[0] +
> splitData[3].mvCost[0];
> -
> - threshold_nLx2N = splitData[0].mvCost[0] +
> splitData[2].mvCost[0];
> - threshold_nRx2N = splitData[1].mvCost[0] +
> splitData[3].mvCost[0];
> - }
> - else
> - {
> - threshold_2NxnU = (splitData[0].mvCost[0] +
> splitData[1].mvCost[0]
> - + splitData[0].mvCost[1] +
> splitData[1].mvCost[1] + 1) >> 1;
> - threshold_2NxnD = (splitData[2].mvCost[0] +
> splitData[3].mvCost[0]
> - + splitData[2].mvCost[1] +
> splitData[3].mvCost[1] + 1) >> 1;
> -
> - threshold_nLx2N = (splitData[0].mvCost[0] +
> splitData[2].mvCost[0]
> - + splitData[0].mvCost[1] +
> splitData[2].mvCost[1] + 1) >> 1;
> - threshold_nRx2N = (splitData[1].mvCost[0] +
> splitData[3].mvCost[0]
> - + splitData[1].mvCost[1] +
> splitData[3].mvCost[1] + 1) >> 1;
> - }
> -
> - bool bHor = false, bVer = false;
> - if (bestInter->cu.m_partSize[0] == SIZE_2NxN)
> - bHor = true;
> - else if (bestInter->cu.m_partSize[0] == SIZE_Nx2N)
> - bVer = true;
> - else if (bestInter->cu.m_partSize[0] == SIZE_2Nx2N &&
> - md.bestMode && md.bestMode->cu.getQtRootCbf(0))
> - {
> - bHor = true;
> - bVer = true;
> - }
> -
> - if (bHor)
> - {
> - int try_2NxnD_first = threshold_2NxnD <
> threshold_2NxnU;
> - if (try_2NxnD_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnD)
> - {
> - refMasks[0] = allSplitRefs;
> /* 75% top */
> - refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* 25% bot */
> - md.pred[PRED_2NxnD].cu.initSubCU(parentCTU,
> cuGeom, qp);
> - checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom,
> SIZE_2NxnD, refMasks);
> - if (md.pred[PRED_2NxnD].sa8dCost <
> bestInter->sa8dCost)
> - bestInter = &md.pred[PRED_2NxnD];
> + threshold_nLx2N = (splitData[0].mvCost[0] +
> splitData[2].mvCost[0]
> + + splitData[0].mvCost[1] +
> splitData[2].mvCost[1] + 1) >> 1;
> + threshold_nRx2N = (splitData[1].mvCost[0] +
> splitData[3].mvCost[0]
> + + splitData[1].mvCost[1] +
> splitData[3].mvCost[1] + 1) >> 1;
> }
>
> - if (splitCost < md.pred[PRED_2Nx2N].sa8dCost +
> threshold_2NxnU)
> + bool bHor = false, bVer = false;
> + if (bestInter->cu.m_partSize[0] == SIZE_2NxN)
> + bHor = true;
> + else if (bestInter->cu.m_partSize[0] == SIZE_Nx2N)
> + bVer = true;
> + else if (bestInter->cu.m_partSize[0] == SIZE_2Nx2N &&
> + md.bestMode && md.bestMode->cu.getQtRootCbf(0))
> {
> - refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* 25% top */
> - refMasks[1] = allSplitRefs;
> /* 75% bot */
> - md.pred[PRED_2NxnU].cu.initSubCU(parentCTU,
> cuGeom, qp);
> - checkInter_rd0_4(md.pred[PRED_2NxnU], cuGeom,
> SIZE_2NxnU, refMasks);
> - if (md.pred[PRED_2NxnU].sa8dCost <
> bestInter->sa8dCost)
> - bestInter = &md.pred[PRED_2NxnU];
> + bHor = true;
> + bVer = true;
> }
>
> - if (!try_2NxnD_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnD)
> + if (bHor)
> {
> - refMasks[0] = allSplitRefs;
> /* 75% top */
> - refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* 25% bot */
> - md.pred[PRED_2NxnD].cu.initSubCU(parentCTU,
> cuGeom, qp);
> - checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom,
> SIZE_2NxnD, refMasks);
> - if (md.pred[PRED_2NxnD].sa8dCost <
> bestInter->sa8dCost)
> - bestInter = &md.pred[PRED_2NxnD];
> + int try_2NxnD_first = threshold_2NxnD <
> threshold_2NxnU;
> + if (try_2NxnD_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnD)
> + {
> + refMasks[0] = allSplitRefs;
> /* 75% top */
> + refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* 25% bot */
> + md.pred[PRED_2NxnD].cu.initSubCU(parentCTU,
> cuGeom, qp);
> + checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom,
> SIZE_2NxnD, refMasks);
> + if (md.pred[PRED_2NxnD].sa8dCost <
> bestInter->sa8dCost)
> + bestInter = &md.pred[PRED_2NxnD];
> + }
> +
> + if (splitCost < md.pred[PRED_2Nx2N].sa8dCost +
> threshold_2NxnU)
> + {
> + refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* 25% top */
> + refMasks[1] = allSplitRefs;
> /* 75% bot */
> + md.pred[PRED_2NxnU].cu.initSubCU(parentCTU,
> cuGeom, qp);
> + checkInter_rd0_4(md.pred[PRED_2NxnU], cuGeom,
> SIZE_2NxnU, refMasks);
> + if (md.pred[PRED_2NxnU].sa8dCost <
> bestInter->sa8dCost)
> + bestInter = &md.pred[PRED_2NxnU];
> + }
> +
> + if (!try_2NxnD_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_2NxnD)
> + {
> + refMasks[0] = allSplitRefs;
> /* 75% top */
> + refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* 25% bot */
> + md.pred[PRED_2NxnD].cu.initSubCU(parentCTU,
> cuGeom, qp);
> + checkInter_rd0_4(md.pred[PRED_2NxnD], cuGeom,
> SIZE_2NxnD, refMasks);
> + if (md.pred[PRED_2NxnD].sa8dCost <
> bestInter->sa8dCost)
> + bestInter = &md.pred[PRED_2NxnD];
> + }
> }
> - }
> - if (bVer)
> - {
> - int try_nRx2N_first = threshold_nRx2N <
> threshold_nLx2N;
> - if (try_nRx2N_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_nRx2N)
> + if (bVer)
> {
> - refMasks[0] = allSplitRefs;
> /* 75% left */
> - refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* 25% right */
> - md.pred[PRED_nRx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> - checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom,
> SIZE_nRx2N, refMasks);
> - if (md.pred[PRED_nRx2N].sa8dCost <
> bestInter->sa8dCost)
> - bestInter = &md.pred[PRED_nRx2N];
> - }
> + int try_nRx2N_first = threshold_nRx2N <
> threshold_nLx2N;
> + if (try_nRx2N_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_nRx2N)
> + {
> + refMasks[0] = allSplitRefs;
> /* 75% left */
> + refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* 25% right */
> + md.pred[PRED_nRx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> + checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom,
> SIZE_nRx2N, refMasks);
> + if (md.pred[PRED_nRx2N].sa8dCost <
> bestInter->sa8dCost)
> + bestInter = &md.pred[PRED_nRx2N];
> + }
>
> - if (splitCost < md.pred[PRED_2Nx2N].sa8dCost +
> threshold_nLx2N)
> - {
> - refMasks[0] = splitData[0].splitRefs |
> splitData[2].splitRefs; /* 25% left */
> - refMasks[1] = allSplitRefs;
> /* 75% right */
> - md.pred[PRED_nLx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> - checkInter_rd0_4(md.pred[PRED_nLx2N], cuGeom,
> SIZE_nLx2N, refMasks);
> - if (md.pred[PRED_nLx2N].sa8dCost <
> bestInter->sa8dCost)
> - bestInter = &md.pred[PRED_nLx2N];
> - }
> + if (splitCost < md.pred[PRED_2Nx2N].sa8dCost +
> threshold_nLx2N)
> + {
> + refMasks[0] = splitData[0].splitRefs |
> splitData[2].splitRefs; /* 25% left */
> + refMasks[1] = allSplitRefs;
> /* 75% right */
> + md.pred[PRED_nLx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> + checkInter_rd0_4(md.pred[PRED_nLx2N], cuGeom,
> SIZE_nLx2N, refMasks);
> + if (md.pred[PRED_nLx2N].sa8dCost <
> bestInter->sa8dCost)
> + bestInter = &md.pred[PRED_nLx2N];
> + }
>
> - if (!try_nRx2N_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_nRx2N)
> - {
> - refMasks[0] = allSplitRefs;
> /* 75% left */
> - refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* 25% right */
> - md.pred[PRED_nRx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> - checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom,
> SIZE_nRx2N, refMasks);
> - if (md.pred[PRED_nRx2N].sa8dCost <
> bestInter->sa8dCost)
> - bestInter = &md.pred[PRED_nRx2N];
> + if (!try_nRx2N_first && splitCost <
> md.pred[PRED_2Nx2N].sa8dCost + threshold_nRx2N)
> + {
> + refMasks[0] = allSplitRefs;
> /* 75% left */
> + refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* 25% right */
> + md.pred[PRED_nRx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> + checkInter_rd0_4(md.pred[PRED_nRx2N], cuGeom,
> SIZE_nRx2N, refMasks);
> + if (md.pred[PRED_nRx2N].sa8dCost <
> bestInter->sa8dCost)
> + bestInter = &md.pred[PRED_nRx2N];
> + }
> }
> }
> }
> @@ -1185,15 +1203,19 @@
> motionCompensation(bestInter->cu, pu,
> bestInter->predYuv, false, true);
> }
> }
> - encodeResAndCalcRdInterCU(*bestInter, cuGeom);
> - checkBestMode(*bestInter, depth);
>
> - /* If BIDIR is available and within 17/16 of best inter
> option, choose by RDO */
> - if (m_slice->m_sliceType == B_SLICE &&
> md.pred[PRED_BIDIR].sa8dCost != MAX_INT64 &&
> - md.pred[PRED_BIDIR].sa8dCost * 16 <=
> bestInter->sa8dCost * 17)
> + if (!chooseMerge)
> {
> - encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR],
> cuGeom);
> - checkBestMode(md.pred[PRED_BIDIR], depth);
> + encodeResAndCalcRdInterCU(*bestInter, cuGeom);
> + checkBestMode(*bestInter, depth);
> +
> + /* If BIDIR is available and within 17/16 of best
> inter option, choose by RDO */
> + if (m_slice->m_sliceType == B_SLICE &&
> md.pred[PRED_BIDIR].sa8dCost != MAX_INT64 &&
> + md.pred[PRED_BIDIR].sa8dCost * 16 <=
> bestInter->sa8dCost * 17)
> + {
> + encodeResAndCalcRdInterCU(md.pred[PRED_BIDIR],
> cuGeom);
> + checkBestMode(md.pred[PRED_BIDIR], depth);
> + }
> }
>
> if ((bTryIntra && md.bestMode->cu.getQtRootCbf(0)) ||
> @@ -1378,6 +1400,7 @@
> bool foundSkip = false;
> bool earlyskip = false;
> bool splitIntra = true;
> + bool skipRectAmp = false;
>
> // avoid uninitialize value in below reference
> if (m_param->limitModes)
> @@ -1389,14 +1412,19 @@
>
> if (m_param->analysisMode == X265_ANALYSIS_LOAD)
> {
> - if (mightNotSplit && depth == m_reuseDepth[cuGeom.absPartIdx] &&
> m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP)
> + if (mightNotSplit && depth == m_reuseDepth[cuGeom.absPartIdx])
> {
> - md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
> - md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
> - checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP],
> md.pred[PRED_MERGE], cuGeom);
> + if (m_reuseModes[cuGeom.absPartIdx] == MODE_SKIP)
> + {
> + md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
> + md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
> + checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP],
> md.pred[PRED_MERGE], cuGeom);
>
> - foundSkip = true;
> - earlyskip = !!m_param->bEnableEarlySkip;
> + foundSkip = true;
> + earlyskip = !!m_param->bEnableEarlySkip;
> + }
> + if (m_reusePartSize[cuGeom.absPartIdx] == SIZE_2Nx2N)
> + skipRectAmp = true && !!md.bestMode;
> }
> }
>
> @@ -1502,150 +1530,153 @@
> }
> }
>
> - if (m_param->bEnableRectInter)
> + if (!skipRectAmp)
> {
> - uint64_t splitCost = splitData[0].sa8dCost +
> splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
> - uint32_t threshold_2NxN, threshold_Nx2N;
> + if (m_param->bEnableRectInter)
> + {
> + uint64_t splitCost = splitData[0].sa8dCost +
> splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
> + uint32_t threshold_2NxN, threshold_Nx2N;
>
> - if (m_slice->m_sliceType == P_SLICE)
> - {
> - threshold_2NxN = splitData[0].mvCost[0] +
> splitData[1].mvCost[0];
> - threshold_Nx2N = splitData[0].mvCost[0] +
> splitData[2].mvCost[0];
> - }
> - else
> - {
> - threshold_2NxN = (splitData[0].mvCost[0] +
> splitData[1].mvCost[0]
> - + splitData[0].mvCost[1] +
> splitData[1].mvCost[1] + 1) >> 1;
> - threshold_Nx2N = (splitData[0].mvCost[0] +
> splitData[2].mvCost[0]
> - + splitData[0].mvCost[1] +
> splitData[2].mvCost[1] + 1) >> 1;
> - }
> -
> - int try_2NxN_first = threshold_2NxN < threshold_Nx2N;
> - if (try_2NxN_first && splitCost < md.bestMode->rdCost +
> threshold_2NxN)
> - {
> - refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* top */
> - refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* bot */
> - md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom,
> qp);
> - checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom,
> SIZE_2NxN, refMasks);
> - checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
> - }
> -
> - if (splitCost < md.bestMode->rdCost + threshold_Nx2N)
> - {
> - refMasks[0] = splitData[0].splitRefs |
> splitData[2].splitRefs; /* left */
> - refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* right */
> - md.pred[PRED_Nx2N].cu.initSubCU(parentCTU, cuGeom,
> qp);
> - checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom,
> SIZE_Nx2N, refMasks);
> - checkBestMode(md.pred[PRED_Nx2N], cuGeom.depth);
> - }
> -
> - if (!try_2NxN_first && splitCost < md.bestMode->rdCost +
> threshold_2NxN)
> - {
> - refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* top */
> - refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* bot */
> - md.pred[PRED_2NxN].cu.initSubCU(parentCTU, cuGeom,
> qp);
> - checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom,
> SIZE_2NxN, refMasks);
> - checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
> - }
> - }
> -
> - // Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N, SIZE_nRx2N)
> - if (m_slice->m_sps->maxAMPDepth > depth)
> - {
> - uint64_t splitCost = splitData[0].sa8dCost +
> splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
> - uint32_t threshold_2NxnU, threshold_2NxnD,
> threshold_nLx2N, threshold_nRx2N;
> -
> - if (m_slice->m_sliceType == P_SLICE)
> - {
> - threshold_2NxnU = splitData[0].mvCost[0] +
> splitData[1].mvCost[0];
> - threshold_2NxnD = splitData[2].mvCost[0] +
> splitData[3].mvCost[0];
> -
> - threshold_nLx2N = splitData[0].mvCost[0] +
> splitData[2].mvCost[0];
> - threshold_nRx2N = splitData[1].mvCost[0] +
> splitData[3].mvCost[0];
> - }
> - else
> - {
> - threshold_2NxnU = (splitData[0].mvCost[0] +
> splitData[1].mvCost[0]
> + if (m_slice->m_sliceType == P_SLICE)
> + {
> + threshold_2NxN = splitData[0].mvCost[0] +
> splitData[1].mvCost[0];
> + threshold_Nx2N = splitData[0].mvCost[0] +
> splitData[2].mvCost[0];
> + }
> + else
> + {
> + threshold_2NxN = (splitData[0].mvCost[0] +
> splitData[1].mvCost[0]
> + splitData[0].mvCost[1] +
> splitData[1].mvCost[1] + 1) >> 1;
> - threshold_2NxnD = (splitData[2].mvCost[0] +
> splitData[3].mvCost[0]
> - + splitData[2].mvCost[1] +
> splitData[3].mvCost[1] + 1) >> 1;
> -
> - threshold_nLx2N = (splitData[0].mvCost[0] +
> splitData[2].mvCost[0]
> + threshold_Nx2N = (splitData[0].mvCost[0] +
> splitData[2].mvCost[0]
> + splitData[0].mvCost[1] +
> splitData[2].mvCost[1] + 1) >> 1;
> - threshold_nRx2N = (splitData[1].mvCost[0] +
> splitData[3].mvCost[0]
> - + splitData[1].mvCost[1] +
> splitData[3].mvCost[1] + 1) >> 1;
> - }
> -
> - bool bHor = false, bVer = false;
> - if (md.bestMode->cu.m_partSize[0] == SIZE_2NxN)
> - bHor = true;
> - else if (md.bestMode->cu.m_partSize[0] == SIZE_Nx2N)
> - bVer = true;
> - else if (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N &&
> !md.bestMode->cu.m_mergeFlag[0])
> - {
> - bHor = true;
> - bVer = true;
> - }
> -
> - if (bHor)
> - {
> - int try_2NxnD_first = threshold_2NxnD <
> threshold_2NxnU;
> - if (try_2NxnD_first && splitCost <
> md.bestMode->rdCost + threshold_2NxnD)
> - {
> - refMasks[0] = allSplitRefs;
> /* 75% top */
> - refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* 25% bot */
> - md.pred[PRED_2NxnD].cu.initSubCU(parentCTU,
> cuGeom, qp);
> - checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom,
> SIZE_2NxnD, refMasks);
> - checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
> }
>
> - if (splitCost < md.bestMode->rdCost + threshold_2NxnU)
> + int try_2NxN_first = threshold_2NxN < threshold_Nx2N;
> + if (try_2NxN_first && splitCost < md.bestMode->rdCost
> + threshold_2NxN)
> {
> - refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* 25% top */
> - refMasks[1] = allSplitRefs;
> /* 75% bot */
> - md.pred[PRED_2NxnU].cu.initSubCU(parentCTU,
> cuGeom, qp);
> - checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom,
> SIZE_2NxnU, refMasks);
> - checkBestMode(md.pred[PRED_2NxnU], cuGeom.depth);
> + refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* top */
> + refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* bot */
> + md.pred[PRED_2NxN].cu.initSubCU(parentCTU,
> cuGeom, qp);
> + checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom,
> SIZE_2NxN, refMasks);
> + checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
> }
>
> - if (!try_2NxnD_first && splitCost <
> md.bestMode->rdCost + threshold_2NxnD)
> + if (splitCost < md.bestMode->rdCost + threshold_Nx2N)
> {
> - refMasks[0] = allSplitRefs;
> /* 75% top */
> - refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* 25% bot */
> - md.pred[PRED_2NxnD].cu.initSubCU(parentCTU,
> cuGeom, qp);
> - checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom,
> SIZE_2NxnD, refMasks);
> - checkBestMode(md.pred[PRED_2NxnD], cuGeom.depth);
> + refMasks[0] = splitData[0].splitRefs |
> splitData[2].splitRefs; /* left */
> + refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* right */
> + md.pred[PRED_Nx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> + checkInter_rd5_6(md.pred[PRED_Nx2N], cuGeom,
> SIZE_Nx2N, refMasks);
> + checkBestMode(md.pred[PRED_Nx2N], cuGeom.depth);
> + }
> +
> + if (!try_2NxN_first && splitCost <
> md.bestMode->rdCost + threshold_2NxN)
> + {
> + refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* top */
> + refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* bot */
> + md.pred[PRED_2NxN].cu.initSubCU(parentCTU,
> cuGeom, qp);
> + checkInter_rd5_6(md.pred[PRED_2NxN], cuGeom,
> SIZE_2NxN, refMasks);
> + checkBestMode(md.pred[PRED_2NxN], cuGeom.depth);
> }
> }
>
> - if (bVer)
> + // Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N,
> SIZE_nRx2N)
> + if (m_slice->m_sps->maxAMPDepth > depth)
> {
> - int try_nRx2N_first = threshold_nRx2N <
> threshold_nLx2N;
> - if (try_nRx2N_first && splitCost <
> md.bestMode->rdCost + threshold_nRx2N)
> + uint64_t splitCost = splitData[0].sa8dCost +
> splitData[1].sa8dCost + splitData[2].sa8dCost + splitData[3].sa8dCost;
> + uint32_t threshold_2NxnU, threshold_2NxnD,
> threshold_nLx2N, threshold_nRx2N;
> +
> + if (m_slice->m_sliceType == P_SLICE)
> {
> - refMasks[0] = allSplitRefs;
> /* 75% left */
> - refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* 25% right */
> - md.pred[PRED_nRx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> - checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom,
> SIZE_nRx2N, refMasks);
> - checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
> + threshold_2NxnU = splitData[0].mvCost[0] +
> splitData[1].mvCost[0];
> + threshold_2NxnD = splitData[2].mvCost[0] +
> splitData[3].mvCost[0];
> +
> + threshold_nLx2N = splitData[0].mvCost[0] +
> splitData[2].mvCost[0];
> + threshold_nRx2N = splitData[1].mvCost[0] +
> splitData[3].mvCost[0];
> + }
> + else
> + {
> + threshold_2NxnU = (splitData[0].mvCost[0] +
> splitData[1].mvCost[0]
> + + splitData[0].mvCost[1] +
> splitData[1].mvCost[1] + 1) >> 1;
> + threshold_2NxnD = (splitData[2].mvCost[0] +
> splitData[3].mvCost[0]
> + + splitData[2].mvCost[1] +
> splitData[3].mvCost[1] + 1) >> 1;
> +
> + threshold_nLx2N = (splitData[0].mvCost[0] +
> splitData[2].mvCost[0]
> + + splitData[0].mvCost[1] +
> splitData[2].mvCost[1] + 1) >> 1;
> + threshold_nRx2N = (splitData[1].mvCost[0] +
> splitData[3].mvCost[0]
> + + splitData[1].mvCost[1] +
> splitData[3].mvCost[1] + 1) >> 1;
> }
>
> - if (splitCost < md.bestMode->rdCost + threshold_nLx2N)
> + bool bHor = false, bVer = false;
> + if (md.bestMode->cu.m_partSize[0] == SIZE_2NxN)
> + bHor = true;
> + else if (md.bestMode->cu.m_partSize[0] == SIZE_Nx2N)
> + bVer = true;
> + else if (md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N
> && !md.bestMode->cu.m_mergeFlag[0])
> {
> - refMasks[0] = splitData[0].splitRefs |
> splitData[2].splitRefs; /* 25% left */
> - refMasks[1] = allSplitRefs;
> /* 75% right */
> - md.pred[PRED_nLx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> - checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom,
> SIZE_nLx2N, refMasks);
> - checkBestMode(md.pred[PRED_nLx2N], cuGeom.depth);
> + bHor = true;
> + bVer = true;
> }
>
> - if (!try_nRx2N_first && splitCost <
> md.bestMode->rdCost + threshold_nRx2N)
> + if (bHor)
> {
> - refMasks[0] = allSplitRefs;
> /* 75% left */
> - refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* 25% right */
> - md.pred[PRED_nRx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> - checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom,
> SIZE_nRx2N, refMasks);
> - checkBestMode(md.pred[PRED_nRx2N], cuGeom.depth);
> + int try_2NxnD_first = threshold_2NxnD <
> threshold_2NxnU;
> + if (try_2NxnD_first && splitCost <
> md.bestMode->rdCost + threshold_2NxnD)
> + {
> + refMasks[0] = allSplitRefs;
> /* 75% top */
> + refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* 25% bot */
> + md.pred[PRED_2NxnD].cu.initSubCU(parentCTU,
> cuGeom, qp);
> + checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom,
> SIZE_2NxnD, refMasks);
> + checkBestMode(md.pred[PRED_2NxnD],
> cuGeom.depth);
> + }
> +
> + if (splitCost < md.bestMode->rdCost +
> threshold_2NxnU)
> + {
> + refMasks[0] = splitData[0].splitRefs |
> splitData[1].splitRefs; /* 25% top */
> + refMasks[1] = allSplitRefs;
> /* 75% bot */
> + md.pred[PRED_2NxnU].cu.initSubCU(parentCTU,
> cuGeom, qp);
> + checkInter_rd5_6(md.pred[PRED_2NxnU], cuGeom,
> SIZE_2NxnU, refMasks);
> + checkBestMode(md.pred[PRED_2NxnU],
> cuGeom.depth);
> + }
> +
> + if (!try_2NxnD_first && splitCost <
> md.bestMode->rdCost + threshold_2NxnD)
> + {
> + refMasks[0] = allSplitRefs;
> /* 75% top */
> + refMasks[1] = splitData[2].splitRefs |
> splitData[3].splitRefs; /* 25% bot */
> + md.pred[PRED_2NxnD].cu.initSubCU(parentCTU,
> cuGeom, qp);
> + checkInter_rd5_6(md.pred[PRED_2NxnD], cuGeom,
> SIZE_2NxnD, refMasks);
> + checkBestMode(md.pred[PRED_2NxnD],
> cuGeom.depth);
> + }
> + }
> +
> + if (bVer)
> + {
> + int try_nRx2N_first = threshold_nRx2N <
> threshold_nLx2N;
> + if (try_nRx2N_first && splitCost <
> md.bestMode->rdCost + threshold_nRx2N)
> + {
> + refMasks[0] = allSplitRefs;
> /* 75% left */
> + refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* 25% right */
> + md.pred[PRED_nRx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> + checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom,
> SIZE_nRx2N, refMasks);
> + checkBestMode(md.pred[PRED_nRx2N],
> cuGeom.depth);
> + }
> +
> + if (splitCost < md.bestMode->rdCost +
> threshold_nLx2N)
> + {
> + refMasks[0] = splitData[0].splitRefs |
> splitData[2].splitRefs; /* 25% left */
> + refMasks[1] = allSplitRefs;
> /* 75% right */
> + md.pred[PRED_nLx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> + checkInter_rd5_6(md.pred[PRED_nLx2N], cuGeom,
> SIZE_nLx2N, refMasks);
> + checkBestMode(md.pred[PRED_nLx2N],
> cuGeom.depth);
> + }
> +
> + if (!try_nRx2N_first && splitCost <
> md.bestMode->rdCost + threshold_nRx2N)
> + {
> + refMasks[0] = allSplitRefs;
> /* 75% left */
> + refMasks[1] = splitData[1].splitRefs |
> splitData[3].splitRefs; /* 25% right */
> + md.pred[PRED_nRx2N].cu.initSubCU(parentCTU,
> cuGeom, qp);
> + checkInter_rd5_6(md.pred[PRED_nRx2N], cuGeom,
> SIZE_nRx2N, refMasks);
> + checkBestMode(md.pred[PRED_nRx2N],
> cuGeom.depth);
> + }
> }
> }
> }
> diff -r 2de6cb99313a -r 5bccf2596d8a source/encoder/analysis.h
> --- a/source/encoder/analysis.h Mon Mar 21 13:50:14 2016 +0530
> +++ b/source/encoder/analysis.h Thu Mar 24 16:36:55 2016 +0530
> @@ -122,6 +122,8 @@
> int32_t* m_reuseRef;
> uint8_t* m_reuseDepth;
> uint8_t* m_reuseModes;
> + uint8_t* m_reusePartSize;
> + uint8_t* m_reuseMergeFlag;
>
> uint32_t m_splitRefIdx[4];
> uint64_t* cacheCost;
> diff -r 2de6cb99313a -r 5bccf2596d8a source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Mon Mar 21 13:50:14 2016 +0530
> +++ b/source/encoder/encoder.cpp Thu Mar 24 16:36:55 2016 +0530
> @@ -1918,6 +1918,8 @@
> CHECKED_MALLOC_ZERO(interData->ref, int32_t,
> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir);
> CHECKED_MALLOC(interData->depth, uint8_t, analysis->numPartitions
> * analysis->numCUsInFrame);
> CHECKED_MALLOC(interData->modes, uint8_t, analysis->numPartitions
> * analysis->numCUsInFrame);
> + CHECKED_MALLOC(interData->partSize, uint8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
> + CHECKED_MALLOC(interData->mergeFlag, uint8_t,
> analysis->numPartitions * analysis->numCUsInFrame);
> CHECKED_MALLOC_ZERO(interData->wt, WeightParam, 3 * numDir);
> analysis->interData = interData;
> }
> @@ -1943,6 +1945,8 @@
> X265_FREE(((analysis_inter_data*)analysis->interData)->ref);
> X265_FREE(((analysis_inter_data*)analysis->interData)->depth);
> X265_FREE(((analysis_inter_data*)analysis->interData)->modes);
> + X265_FREE(((analysis_inter_data*)analysis->interData)->mergeFlag);
> + X265_FREE(((analysis_inter_data*)analysis->interData)->partSize);
> X265_FREE(((analysis_inter_data*)analysis->interData)->wt);
> X265_FREE(analysis->interData);
> }
> @@ -2029,13 +2033,15 @@
>
> else
> {
> - uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL;
> + uint8_t *tempBuf = NULL, *depthBuf = NULL, *modeBuf = NULL,
> *partSize = NULL, *mergeFlag = NULL;
>
> - tempBuf = X265_MALLOC(uint8_t, depthBytes * 2);
> - X265_FREAD(tempBuf, sizeof(uint8_t), depthBytes * 2,
> m_analysisFile);
> + tempBuf = X265_MALLOC(uint8_t, depthBytes * 4);
> + X265_FREAD(tempBuf, sizeof(uint8_t), depthBytes * 4,
> m_analysisFile);
>
> depthBuf = tempBuf;
> - modeBuf = tempBuf + depthBytes;
> + modeBuf = tempBuf + depthBytes;
> + partSize = modeBuf + depthBytes;
> + mergeFlag = partSize + depthBytes;
>
> size_t count = 0;
> for (uint32_t d = 0; d < depthBytes; d++)
> @@ -2043,13 +2049,15 @@
> int bytes = analysis->numPartitions >> (depthBuf[d] * 2);
> memset(&((analysis_inter_data
> *)analysis->interData)->depth[count], depthBuf[d], bytes);
> memset(&((analysis_inter_data
> *)analysis->interData)->modes[count], modeBuf[d], bytes);
> + memset(&((analysis_inter_data
> *)analysis->interData)->partSize[count], partSize[d], bytes);
> + memset(&((analysis_inter_data
> *)analysis->interData)->mergeFlag[count], mergeFlag[d], bytes);
> count += bytes;
> }
> -
> +
> X265_FREE(tempBuf);
> -
> +
> int numDir = analysis->sliceType == X265_TYPE_P ? 1 : 2;
> - X265_FREAD(((analysis_inter_data *)analysis->interData)->ref,
> sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU *
> numDir, m_analysisFile);
> + X265_FREAD(((analysis_inter_data *)analysis->interData)->ref,
> sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU *
> numDir, m_analysisFile);
> uint32_t numPlanes = m_param->internalCsp == X265_CSP_I400 ? 1 :
> 3;
> X265_FREAD(((analysis_inter_data *)analysis->interData)->wt,
> sizeof(WeightParam), numPlanes * numDir, m_analysisFile);
> consumedBytes += frameRecordSize;
> @@ -2105,6 +2113,8 @@
> {
> uint8_t depth = 0;
> uint8_t predMode = 0;
> + uint8_t partSize = 0;
> + uint8_t mergeFlag = 0;
>
> CUData* ctu = curEncData.getPicCTU(cuAddr);
> analysis_inter_data* interDataCTU =
> (analysis_inter_data*)analysis->interData;
> @@ -2115,8 +2125,17 @@
> interDataCTU->depth[depthBytes] = depth;
>
> predMode = ctu->m_predMode[absPartIdx];
> + if (ctu->m_refIdx[1][absPartIdx] != -1)
> + predMode = 4; // used as indiacator if the block is
> coded as bidir
> +
> interDataCTU->modes[depthBytes] = predMode;
>
> + partSize = ctu->m_partSize[absPartIdx];
> + interDataCTU->partSize[depthBytes] = partSize;
> +
> + mergeFlag = ctu->m_mergeFlag[absPartIdx];
> + interDataCTU->mergeFlag[depthBytes] = mergeFlag;
> +
> absPartIdx += ctu->m_numPartitions >> (depth * 2);
> }
> }
> @@ -2130,9 +2149,9 @@
> else
> {
> int numDir = (analysis->sliceType == X265_TYPE_P) ? 1 : 2;
> - analysis->frameRecordSize += depthBytes * 2;
> - analysis->frameRecordSize += sizeof(MV) * analysis->numCUsInFrame
> * X265_MAX_PRED_MODE_PER_CTU * numDir;
> - analysis->frameRecordSize += sizeof(WeightParam) * 3 * numDir;
> + analysis->frameRecordSize += depthBytes * 4;
> + analysis->frameRecordSize += sizeof(int32_t)*
> analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir;
> + analysis->frameRecordSize += sizeof(WeightParam)* 3 * numDir;
> }
> X265_FWRITE(&analysis->frameRecordSize, sizeof(uint32_t), 1,
> m_analysisFile);
> X265_FWRITE(&depthBytes, sizeof(uint32_t), 1, m_analysisFile);
> @@ -2155,6 +2174,8 @@
> int numDir = analysis->sliceType == X265_TYPE_P ? 1 : 2;
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->depth,
> sizeof(uint8_t), depthBytes, m_analysisFile);
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->modes,
> sizeof(uint8_t), depthBytes, m_analysisFile);
> +
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->partSize,
> sizeof(uint8_t), depthBytes, m_analysisFile);
> +
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->mergeFlag,
> sizeof(uint8_t), depthBytes, m_analysisFile);
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->ref,
> sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU *
> numDir, m_analysisFile);
> uint32_t numPlanes = m_param->internalCsp == X265_CSP_I400 ? 1 :
> 3;
> X265_FWRITE(((analysis_inter_data*)analysis->interData)->wt,
> sizeof(WeightParam), numPlanes * numDir, m_analysisFile);
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
Deepthi Nandakumar
Engineering Manager, x265
Multicoreware, Inc
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20160408/baa69bd7/attachment-0001.html>
More information about the x265-devel
mailing list