[x265] [PATCH] slicetype : intra cost estimates
gopu at multicorewareinc.com
gopu at multicorewareinc.com
Mon Aug 12 12:56:45 CEST 2013
# HG changeset patch
# User ggopu
# Date 1376304994 -19800
# Node ID 666b6206628994d42c826afa9b01e7499b0d150c
# Parent 8438cad92049281833caa951cc48f6d90c7434eb
slicetype : intra cost estimates
diff -r 8438cad92049 -r 666b62066289 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Sun Aug 11 21:41:12 2013 -0700
+++ b/source/encoder/slicetype.cpp Mon Aug 12 16:26:34 2013 +0530
@@ -36,9 +36,9 @@
// taking any of the threading changes because we will eventually use the x265
// thread pool and wavefront processing.
-#define QP_BD_OFFSET (6*(X265_DEPTH-8))
+#define QP_BD_OFFSET (6 * (X265_DEPTH - 8))
// arbitrary, but low because SATD scores are 1/4 normal
-#define X264_LOOKAHEAD_QP (12+QP_BD_OFFSET)
+#define X264_LOOKAHEAD_QP (12 + QP_BD_OFFSET)
// Under Construction
#if defined(_MSC_VER)
@@ -47,7 +47,6 @@
#endif
namespace x265 {
-
struct Lookahead
{
MotionEstimate me;
@@ -58,7 +57,7 @@
TComList<TComPic*> inputQueue; // input pictures in order received
TComList<TComPic*> outputQueue; // pictures to be encoded, in encode order
-
+
Lookahead(int _frameQueueSize)
{
me.setQP(X264_LOOKAHEAD_QP, 1.0);
@@ -66,6 +65,7 @@
frameQueueSize = _frameQueueSize;
frames = new LookaheadFrame*[frameQueueSize];
}
+
~Lookahead()
{
if (frames)
@@ -76,20 +76,21 @@
int estimateCUCost(int cux, int cuy, int p0, int p1, int b, int do_search[2]);
};
-static inline int16_t x265_median( int16_t a, int16_t b, int16_t c )
+static inline int16_t x265_median(int16_t a, int16_t b, int16_t c)
{
- int16_t t = (a-b)&((a-b)>>31);
+ int16_t t = (a - b) & ((a - b) >> 31);
+
a -= t;
b += t;
- b -= (b-c)&((b-c)>>31);
- b += (a-b)&((a-b)>>31);
+ b -= (b - c) & ((b - c) >> 31);
+ b += (a - b) & ((a - b) >> 31);
return b;
}
-static inline void x265_median_mv( MV &dst, MV a, MV b, MV c )
+static inline void x265_median_mv(MV &dst, MV a, MV b, MV c)
{
- dst.x = x265_median( a.x, b.x, c.x );
- dst.y = x265_median( a.y, b.y, c.y );
+ dst.x = x265_median(a.x, b.x, c.x);
+ dst.y = x265_median(a.y, b.y, c.y);
}
int Lookahead::estimateFrameCost(int p0, int p1, int b, int bIntraPenalty)
@@ -163,17 +164,18 @@
const int cu_size = g_maxCUWidth / 2;
const int pel_offset = cu_size * cux + cu_size * cuy * stride;
const int merange = 16;
+
me.setSourcePU(pel_offset, cu_size, cu_size);
- MV (*fenc_mvs[2]) = { &fenc->lowresMvs[0][b - p0 - 1][cu_xy], &fenc->lowresMvs[1][p1 - b - 1][cu_xy] };
- int (*fenc_costs[2]) = { &fenc->lowresMvCosts[0][b - p0 - 1][cu_xy], &fenc->lowresMvCosts[1][p1 - b - 1][cu_xy] };
+ MV(*fenc_mvs[2]) = { &fenc->lowresMvs[0][b - p0 - 1][cu_xy], &fenc->lowresMvs[1][p1 - b - 1][cu_xy] };
+ int(*fenc_costs[2]) = { &fenc->lowresMvCosts[0][b - p0 - 1][cu_xy], &fenc->lowresMvCosts[1][p1 - b - 1][cu_xy] };
MV mvmin, mvmax;
// TODO: calculate search extents
for (int i = 0; i < 2; i++)
{
- if (!do_search[i])
+ if (!do_search[i])
continue;
int numc = 0;
@@ -199,7 +201,7 @@
mvp = mvc[0];
else
{
- x265_median_mv(mvp, mvc[0], mvc[1], mvc[2]);
+ x265_median_mv(mvp, mvc[0], mvc[1], mvc[2]);
}
*fenc_costs[i] = me.motionEstimate(i ? fref1 : fref0, mvmin, mvmax, mvp, numc, mvc, merange, *fenc_mvs[i]);
@@ -209,7 +211,69 @@
// TODO: add bidir
}
- // TODO: copy intra SATD cost analysis here (DC + planar + all-angs)
+ UInt width = fenc->cuWidth;
+ UInt numModesAvailable = 35; //total number of Intra modes
+ Int nLog2SizeMinus2 = g_convertToBit[width];
+ x265::pixelcmp_t sa8d = x265::primitives.sa8d[nLog2SizeMinus2];
+ UInt64 CandCostList[FAST_UDI_MAX_RDMODE_NUM];
+ UInt numModesForFullRD = 5; // Currently set as 5 at default
+ UInt CandNum;
+ UInt partOffset = 0;
+
+ assert(numModesForFullRD < numModesAvailable);
+
+ for (UInt i = 0; i < numModesForFullRD; i++)
+ {
+ CandCostList[i] = MAX_UINT;
+ }
+
+ ALIGN_VAR_32(pixel, buffer[64 * 64]); // current cu buffer
+ primitives.blockcpy_pp(fenc->cuWidth, fenc->cuHeight, buffer, FENC_STRIDE, fenc->m_lumaPlane[0][0] + pel_offset, fenc->m_lumaStride);
+
+ pixel *pAbove0 = fenc->m_lumaPlane[0][0] + pel_offset - fenc->m_lumaStride;
+ pixel *pAbove1 = fenc->m_lumaPlane[0][0] + pel_offset - fenc->cuHeight;
+ pixel *pLeft0 = fenc->m_lumaPlane[0][0] + pel_offset + fenc->m_lumaStride;
+ pixel *pLeft1 = fenc->m_lumaPlane[0][0] + pel_offset + fenc->cuWidth;
+
+ CandNum = 0;
+ UInt modeCosts[35];
+
+ // 33 Angle modes once
+ ALIGN_VAR_32(pixel, buf_trans[32 * 32]);
+ ALIGN_VAR_32(pixel, tmp[33 * 32 * 32]);
+
+ if (fenc->cuWidth <= 32)
+ {
+ // 1
+ primitives.intra_pred_dc(pAbove0 + 1, pLeft0 + 1, buffer, stride, width, (width <= 16));
+ modeCosts[DC_IDX] = sa8d(fenc->m_lumaPlane[0][0], fenc->m_lumaStride, buffer, fenc->stride);
+
+ // 0
+ pixel *above = pAbove0;
+ pixel *left = pLeft0;
+ if (width >= 8 && width <= 32)
+ {
+ above = pAbove1;
+ left = pLeft1;
+ }
+ primitives.intra_pred_planar((pixel*)above + 1, (pixel*)left + 1, buffer, fenc->stride, width);
+ modeCosts[PLANAR_IDX] = sa8d(fenc->m_lumaPlane[0][0], fenc->m_lumaStride, buffer, fenc->stride);
+
+ // Transpose NxN
+ x265::primitives.transpose[nLog2SizeMinus2](buf_trans, (pixel*)fenc, stride);
+
+ x265::primitives.intra_pred_allangs[nLog2SizeMinus2](tmp, pAbove0, pLeft0, pAbove1, pLeft1, (width <= 16));
+
+ // TODO: We need SATD_x4 here
+ for (UInt mode = 2; mode < numModesAvailable; mode++)
+ {
+ bool modeHor = (mode < 18);
+ pixel *cmp = (modeHor ? buf_trans : fenc->m_lumaPlane[0][0]);
+ intptr_t srcStride = (modeHor ? width : stride);
+ modeCosts[mode] = sa8d(cmp, srcStride, &tmp[(mode - 2) * (width * width)], width);
+ }
+ }
+
return 0;
}
@@ -622,6 +686,7 @@
(dst)[2] = &(src)[2][i_pel_offset]; \
(dst)[3] = &(src)[3][i_pel_offset]; \
}
+
#define LOAD_WPELS_LUMA(dst, src) \
(dst) = &(src)[i_pel_offset];
More information about the x265-devel
mailing list