[x265] [PATCH] Added fast intra search option to Analysis::checkIntraInInter_rd0_4
dave
dtyx265 at gmail.com
Thu Aug 14 18:10:06 CEST 2014
Ignore this patch. It needs a couple tweeks and I will be resubmitting
it soon.
On 08/14/2014 07:27 AM, dtyx265 at gmail.com wrote:
> # HG changeset patch
> # User David T Yuen <dtyx265 at gmail.com>
> # Date 1408026426 25200
> # Node ID 81766e60e622f28c12766f277b087cfeccff9cc3
> # Parent 6b741cce14acb610a2a17a08f51898ea18b16a35
> Added fast intra search option to Analysis::checkIntraInInter_rd0_4
>
> diff -r 6b741cce14ac -r 81766e60e622 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Thu Aug 14 12:53:52 2014 +0530
> +++ b/source/encoder/analysis.cpp Thu Aug 14 07:27:06 2014 -0700
> @@ -26,6 +26,7 @@
> #include "common.h"
> #include "rdcost.h"
> #include "encoder.h"
> +#include "predict.h"
> #include "PPA/ppa.h"
>
> using namespace x265;
> @@ -1655,6 +1656,7 @@
> }
>
> pixelcmp_t sa8d = primitives.sa8d[sizeIdx];
> + int predsize = scaleTuSize * scaleTuSize;
>
> uint32_t preds[3];
> cu->getIntraDirLumaPredictor(partOffset, preds);
> @@ -1685,23 +1687,79 @@
> bits = !(mpms & ((uint64_t)1 << mode)) ? rbits : xModeBitsIntra(cu, mode, partOffset, depth);
> cost = m_rdCost.calcRdSADCost(sad, bits);
> COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits);
> -
> +
> // Transpose NxN
> primitives.transpose[sizeIdx](buf_trans, fenc, scaleStride);
> -
> primitives.intra_pred_allangs[sizeIdx](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize <= 16));
>
> - for (mode = 2; mode < 35; mode++)
> + bool modeHor;
> + pixel *cmp;
> + intptr_t srcStride;
> + if (m_param->bEnableFastIntra)
> {
> - bool modeHor = (mode < 18);
> - pixel *cmp = (modeHor ? buf_trans : fenc);
> - intptr_t srcStride = (modeHor ? scaleTuSize : scaleStride);
> - sad = sa8d(cmp, srcStride, &tmp[(mode - 2) * (scaleTuSize * scaleTuSize)], scaleTuSize) << costShift;
> + int lowsad, highsad, asad = 0;
> + uint32_t lowbits, highbits, amode, lowmode, highmode, abits = 0;
> + uint64_t lowcost, highcost = MAX_INT64, acost = MAX_INT64;
> +
> + for (mode = 4;mode < 35; mode += 5)
> + {
> + modeHor = (mode < 18);
> + cmp = (modeHor ? buf_trans : fenc);
> + srcStride = (modeHor ? scaleTuSize : scaleStride);
> + sad = sa8d(cmp, srcStride, &tmp[(mode - 2) * predsize], scaleTuSize) << costShift;
> + bits = !(mpms & ((uint64_t)1 << mode)) ? rbits : xModeBitsIntra(cu, mode, partOffset, depth);
> + cost = m_rdCost.calcRdSADCost(sad, bits);
> + COPY4_IF_LT(acost, cost, amode, mode, asad, sad, abits, bits);
> + }
> + lowmode = amode - 2;
> + modeHor = (lowmode < 18);
> + cmp = (modeHor ? buf_trans : fenc);
> + srcStride = (modeHor ? scaleTuSize : scaleStride);
> + lowsad = sa8d(cmp, srcStride, &tmp[(lowmode - 2) * predsize], scaleTuSize) << costShift;
> + lowbits = !(mpms & ((uint64_t)1 << lowmode)) ? rbits : xModeBitsIntra(cu, lowmode, partOffset, depth);
> + lowcost = m_rdCost.calcRdSADCost(lowsad, lowbits);
> + if (bmode < 34)
> + {
> + highmode = amode + 2;
> + modeHor = (highmode < 18);
> + cmp = (modeHor ? buf_trans : fenc);
> + srcStride = (modeHor ? scaleTuSize : scaleStride);
> + highsad = sa8d(cmp, srcStride, &tmp[(highmode - 2) * predsize], scaleTuSize) << costShift;
> + highbits = !(mpms & ((uint64_t)1 << highmode)) ? rbits : xModeBitsIntra(cu, highmode, partOffset, depth);
> + highcost = m_rdCost.calcRdSADCost(highsad, highbits);
> + }
> + if (lowcost <= highcost)
> + {
> + mode = amode - 1;
> + COPY4_IF_LT(acost, lowcost, amode, lowmode, asad, lowsad, abits, lowbits);
> + }
> + else
> + {
> + mode = amode + 1;
> + COPY4_IF_LT(acost, highcost, amode, highmode, asad, highsad, abits, highbits);
> + }
> + modeHor = (mode < 18);
> + cmp = (modeHor ? buf_trans : fenc);
> + srcStride = (modeHor ? scaleTuSize : scaleStride);
> + sad = sa8d(cmp, srcStride, &tmp[(mode - 2) * predsize], scaleTuSize) << costShift;
> bits = !(mpms & ((uint64_t)1 << mode)) ? rbits : xModeBitsIntra(cu, mode, partOffset, depth);
> cost = m_rdCost.calcRdSADCost(sad, bits);
> - COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits);
> + COPY4_IF_LT(acost, cost, amode, mode, asad, sad, abits, bits);
> + COPY4_IF_LT(bcost, acost, bmode, amode, bsad, asad, bbits, abits);
> }
> -
> + else // calculate and search all intra prediction angles for lowest cost
> + {
> + for (mode = 2; mode < 35; mode++)
> + {
> + modeHor = (mode < 18);
> + cmp = (modeHor ? buf_trans : fenc);
> + srcStride = (modeHor ? scaleTuSize : scaleStride);
> + sad = sa8d(cmp, srcStride, &tmp[(mode - 2) * predsize], scaleTuSize) << costShift;
> + bits = !(mpms & ((uint64_t)1 << mode)) ? rbits : xModeBitsIntra(cu, mode, partOffset, depth);
> + cost = m_rdCost.calcRdSADCost(sad, bits);
> + COPY4_IF_LT(bcost, cost, bmode, mode, bsad, sad, bbits, bits);
> + }
> + }
> cu->m_totalBits = bbits;
> cu->m_totalDistortion = bsad;
> cu->m_sa8dCost = bcost;
More information about the x265-devel
mailing list