[x265] [PATCH] SEA Motion Search Implementation
Vignesh V Menon
vignesh at multicorewareinc.com
Tue Nov 29 14:02:55 CET 2016
Please ignore the previous patch.
PFA
Thanks
Vignesh
On Tue, Nov 29, 2016 at 3:12 PM, Vignesh V Menon <
vignesh at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Vignesh Vijayakumar <vignesh at multicorewareinc.com>
> # Date 1480313149 -19800
> # Mon Nov 28 11:35:49 2016 +0530
> # Node ID f8d523976ed61cada53c579d8145a815d21d08ed
> # Parent 5d95fbd53ca31747498c4bd661fa24f6ffd5a070
> SEA motion search Implementation
>
> diff -r 5d95fbd53ca3 -r f8d523976ed6 doc/reST/cli.rst
> --- a/doc/reST/cli.rst Fri Nov 25 12:57:52 2016 +0530
> +++ b/doc/reST/cli.rst Mon Nov 28 11:35:49 2016 +0530
> @@ -964,13 +964,17 @@
> encoder: a star-pattern search followed by an optional radix scan
> followed by an optional star-search refinement. Full is an
> exhaustive search; an order of magnitude slower than all other
> - searches but not much better than umh or star.
> + searches but not much better than umh or star. SEA is similar to
> + FULL search; a three step motion search adopted from x264: DC
> + calculation followed by ADS calculation followed by SAD of the
> + passed motion vector candidates, hence faster than Full search.
>
> 0. dia
> 1. hex **(default)**
> 2. umh
> 3. star
> - 4. full
> + 4. sea
> + 5. full
>
> .. option:: --subme, -m <0..7>
>
> diff -r 5d95fbd53ca3 -r f8d523976ed6 source/common/common.h
> --- a/source/common/common.h Fri Nov 25 12:57:52 2016 +0530
> +++ b/source/common/common.h Mon Nov 28 11:35:49 2016 +0530
> @@ -328,6 +328,8 @@
>
> #define PIXEL_MAX ((1 << X265_DEPTH) - 1)
>
> +#define INTEGRAL_PLANE_NUM 12 // 12 integral planes for 32x32,
> 32x24, 32x8, 24x32, 16x16, 16x12, 16x4, 12x16, 8x32, 8x8, 4x16 and 4x4.
> +
> namespace X265_NS {
>
> enum { SAO_NUM_OFFSET = 4 };
> diff -r 5d95fbd53ca3 -r f8d523976ed6 source/common/framedata.cpp
> --- a/source/common/framedata.cpp Fri Nov 25 12:57:52 2016 +0530
> +++ b/source/common/framedata.cpp Mon Nov 28 11:35:49 2016 +0530
> @@ -48,6 +48,12 @@
> CHECKED_MALLOC_ZERO(m_cuStat, RCStatCU, sps.numCUsInFrame);
> CHECKED_MALLOC(m_rowStat, RCStatRow, sps.numCuInHeight);
> reinit(sps);
> +
> + for (int i = 0; i < INTEGRAL_PLANE_NUM; i++)
> + {
> + m_meBuffer[i] = NULL;
> + m_meIntegral[i] = NULL;
> + }
> return true;
>
> fail:
> @@ -70,4 +76,16 @@
>
> X265_FREE(m_cuStat);
> X265_FREE(m_rowStat);
> +
> + if (m_meBuffer)
> + {
> + for (int i = 0; i < INTEGRAL_PLANE_NUM; i++)
> + {
> + if (m_meBuffer[i] != NULL)
> + {
> + X265_FREE(m_meBuffer[i]);
> + m_meBuffer[i] = NULL;
> + }
> + }
> + }
> }
> diff -r 5d95fbd53ca3 -r f8d523976ed6 source/common/framedata.h
> --- a/source/common/framedata.h Fri Nov 25 12:57:52 2016 +0530
> +++ b/source/common/framedata.h Mon Nov 28 11:35:49 2016 +0530
> @@ -151,6 +151,9 @@
> double m_rateFactor; /* calculated based on the Frame QP */
> int m_picCsp;
>
> + uint32_t* m_meIntegral[INTEGRAL_PLANE_NUM]; // 12
> integral planes for 32x32, 32x24, 32x8, 24x32, 16x16, 16x12, 16x4, 12x16,
> 8x32, 8x8, 4x16 and 4x4.
> + uint32_t* m_meBuffer[INTEGRAL_PLANE_NUM];
> +
> FrameData();
>
> bool create(const x265_param& param, const SPS& sps, int csp);
> diff -r 5d95fbd53ca3 -r f8d523976ed6 source/common/param.cpp
> --- a/source/common/param.cpp Fri Nov 25 12:57:52 2016 +0530
> +++ b/source/common/param.cpp Mon Nov 28 11:35:49 2016 +0530
> @@ -1092,8 +1092,8 @@
> "Frame rate numerator and denominator must be specified");
> CHECK(param->interlaceMode < 0 || param->interlaceMode > 2,
> "Interlace mode must be 0 (progressive) 1 (top-field first) or
> 2 (bottom field first)");
> - CHECK(param->searchMethod<0 || param->searchMethod> X265_FULL_SEARCH,
> - "Search method is not supported value (0:DIA 1:HEX 2:UMH 3:HM
> 5:FULL)");
> + CHECK(param->searchMethod < 0 || param->searchMethod >
> X265_FULL_SEARCH,
> + "Search method is not supported value (0:DIA 1:HEX 2:UMH 3:HM
> 4:SEA 5:FULL)");
> CHECK(param->searchRange < 0,
> "Search Range must be more than 0");
> CHECK(param->searchRange >= 32768,
> @@ -1256,6 +1256,10 @@
> "qpmin exceeds supported range (0 to 69)");
> CHECK(param->log2MaxPocLsb < 4 || param->log2MaxPocLsb > 16,
> "Supported range for log2MaxPocLsb is 4 to 16");
> +#if !X86_64
> + CHECK(param->searchMethod == X265_SEA && (param->sourceWidth > 840 ||
> param->sourceHeight > 480),
> + "SEA motion search does not support resolutions greater than 480p
> in 32 bit build");
> +#endif
> return check_failed;
> }
>
> diff -r 5d95fbd53ca3 -r f8d523976ed6 source/common/pixel.cpp
> --- a/source/common/pixel.cpp Fri Nov 25 12:57:52 2016 +0530
> +++ b/source/common/pixel.cpp Mon Nov 28 11:35:49 2016 +0530
> @@ -117,6 +117,52 @@
> }
> }
>
> +template<int lx, int ly>
> +int ads_x4(int encDC[4], uint32_t *sums, int delta, uint16_t *costMvX,
> int16_t *mvs, int width, int thresh)
> +{
> + int nmv = 0;
> + for (int16_t i = 0; i < width; i++, sums++)
> + {
> + int ads = abs(encDC[0] - long(sums[0]))
> + + abs(encDC[1] - long(sums[lx >> 1]))
> + + abs(encDC[2] - long(sums[delta]))
> + + abs(encDC[3] - long(sums[delta + (lx >> 1)]))
> + + costMvX[i];
> + if (ads < thresh)
> + mvs[nmv++] = i;
> + }
> + return nmv;
> +}
> +
> +template<int lx, int ly>
> +int ads_x2(int encDC[2], uint32_t *sums, int delta, uint16_t *costMvX,
> int16_t *mvs, int width, int thresh)
> +{
> + int nmv = 0;
> + for (int16_t i = 0; i < width; i++, sums++)
> + {
> + int ads = abs(encDC[0] - long(sums[0]))
> + + abs(encDC[1] - long(sums[delta]))
> + + costMvX[i];
> + if (ads < thresh)
> + mvs[nmv++] = i;
> + }
> + return nmv;
> +}
> +
> +template<int lx, int ly>
> +int ads_x1(int encDC[1], uint32_t *sums, int, uint16_t *costMvX, int16_t
> *mvs, int width, int thresh)
> +{
> + int nmv = 0;
> + for (int16_t i = 0; i < width; i++, sums++)
> + {
> + int ads = abs(encDC[0] - long(sums[0]))
> + + costMvX[i];
> + if (ads < thresh)
> + mvs[nmv++] = i;
> + }
> + return nmv;
> +}
> +
> template<int lx, int ly, class T1, class T2>
> sse_t sse(const T1* pix1, intptr_t stride_pix1, const T2* pix2, intptr_t
> stride_pix2)
> {
> @@ -991,6 +1037,32 @@
> LUMA_PU(64, 16);
> LUMA_PU(16, 64);
>
> + p.pu[LUMA_4x4].ads = ads_x1<4, 4>;
> + p.pu[LUMA_8x8].ads = ads_x1<8, 8>;
> + p.pu[LUMA_8x4].ads = ads_x2<8, 4>;
> + p.pu[LUMA_4x8].ads = ads_x2<4, 8>;
> + p.pu[LUMA_16x16].ads = ads_x4<16, 16>;
> + p.pu[LUMA_16x8].ads = ads_x2<16, 8>;
> + p.pu[LUMA_8x16].ads = ads_x2<8, 16>;
> + p.pu[LUMA_16x12].ads = ads_x1<16, 12>;
> + p.pu[LUMA_12x16].ads = ads_x1<12, 16>;
> + p.pu[LUMA_16x4].ads = ads_x1<16, 4>;
> + p.pu[LUMA_4x16].ads = ads_x1<4, 16>;
> + p.pu[LUMA_32x32].ads = ads_x4<32, 32>;
> + p.pu[LUMA_32x16].ads = ads_x2<32, 16>;
> + p.pu[LUMA_16x32].ads = ads_x2<16, 32>;
> + p.pu[LUMA_32x24].ads = ads_x4<32, 24>;
> + p.pu[LUMA_24x32].ads = ads_x4<24, 32>;
> + p.pu[LUMA_32x8].ads = ads_x4<32, 8>;
> + p.pu[LUMA_8x32].ads = ads_x4<8, 32>;
> + p.pu[LUMA_64x64].ads = ads_x4<64, 64>;
> + p.pu[LUMA_64x32].ads = ads_x2<64, 32>;
> + p.pu[LUMA_32x64].ads = ads_x2<32, 64>;
> + p.pu[LUMA_64x48].ads = ads_x4<64, 48>;
> + p.pu[LUMA_48x64].ads = ads_x4<48, 64>;
> + p.pu[LUMA_64x16].ads = ads_x4<64, 16>;
> + p.pu[LUMA_16x64].ads = ads_x4<16, 64>;
> +
> p.pu[LUMA_4x4].satd = satd_4x4;
> p.pu[LUMA_8x8].satd = satd8<8, 8>;
> p.pu[LUMA_8x4].satd = satd_8x4;
> diff -r 5d95fbd53ca3 -r f8d523976ed6 source/common/primitives.h
> --- a/source/common/primitives.h Fri Nov 25 12:57:52 2016 +0530
> +++ b/source/common/primitives.h Mon Nov 28 11:35:49 2016 +0530
> @@ -115,6 +115,7 @@
> typedef sse_t (*pixel_sse_t)(const pixel* fenc, intptr_t fencstride,
> const pixel* fref, intptr_t frefstride); // fenc is aligned
> typedef sse_t (*pixel_sse_ss_t)(const int16_t* fenc, intptr_t fencstride,
> const int16_t* fref, intptr_t frefstride);
> typedef sse_t (*pixel_ssd_s_t)(const int16_t* fenc, intptr_t fencstride);
> +typedef int(*pixelcmp_ads_t)(int encDC[], uint32_t *sums, int delta,
> uint16_t *costMvX, int16_t *mvs, int width, int thresh);
> typedef void (*pixelcmp_x4_t)(const pixel* fenc, const pixel* fref0,
> const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t
> frefstride, int32_t* res);
> typedef void (*pixelcmp_x3_t)(const pixel* fenc, const pixel* fref0,
> const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
> typedef void (*blockfill_s_t)(int16_t* dst, intptr_t dstride, int16_t
> val);
> @@ -217,6 +218,7 @@
> pixelcmp_t sad; // Sum of Absolute Differences
> pixelcmp_x3_t sad_x3; // Sum of Absolute Differences, 3 mv
> offsets at once
> pixelcmp_x4_t sad_x4; // Sum of Absolute Differences, 4 mv
> offsets at once
> + pixelcmp_ads_t ads; // Absolute Differences sum
> pixelcmp_t satd; // Sum of Absolute Transformed
> Differences (4x4 Hadamard)
>
> filter_pp_t luma_hpp; // 8-tap luma motion compensation
> interpolation filters
> @@ -402,6 +404,22 @@
> return part;
> }
>
> +/* Computes the size of the LumaPU for a given LumaPU enum */
> +inline void sizesFromPartition(int part, int *width, int *height)
> +{
> + X265_CHECK(part >= 0 && part <= 24, "Invalid part %d \n", part);
> + extern const uint8_t lumaPartitionMapTable[];
> + int index = 0;
> + for (int i = 0; i < 256;i++)
> + if (part == lumaPartitionMapTable[i])
> + {
> + index = i;
> + break;
> + }
> + *width = 4 * ((index >> 4) + 1);
> + *height = 4 * ((index % 16) + 1);
> +}
> +
> inline int partitionFromLog2Size(int log2Size)
> {
> X265_CHECK(2 <= log2Size && log2Size <= 6, "Invalid block size\n");
> diff -r 5d95fbd53ca3 -r f8d523976ed6 source/common/yuv.cpp
> --- a/source/common/yuv.cpp Fri Nov 25 12:57:52 2016 +0530
> +++ b/source/common/yuv.cpp Mon Nov 28 11:35:49 2016 +0530
> @@ -47,6 +47,11 @@
> m_size = size;
> m_part = partitionFromSizes(size, size);
>
> + for (int i = 0; i < 2; i++)
> + for (int j = 0; j < MAX_NUM_REF; j++)
> + for (int k = 0; k < INTEGRAL_PLANE_NUM; k++)
> + m_integral[i][j][k] = NULL;
> +
> if (csp == X265_CSP_I400)
> {
> CHECKED_MALLOC(m_buf[0], pixel, size * size + 8);
> diff -r 5d95fbd53ca3 -r f8d523976ed6 source/common/yuv.h
> --- a/source/common/yuv.h Fri Nov 25 12:57:52 2016 +0530
> +++ b/source/common/yuv.h Mon Nov 28 11:35:49 2016 +0530
> @@ -48,6 +48,7 @@
> int m_csp;
> int m_hChromaShift;
> int m_vChromaShift;
> + uint32_t *m_integral[2][MAX_NUM_REF][INTEGRAL_PLANE_NUM];
>
> Yuv();
>
> diff -r 5d95fbd53ca3 -r f8d523976ed6 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Fri Nov 25 12:57:52 2016 +0530
> +++ b/source/encoder/analysis.cpp Mon Nov 28 11:35:49 2016 +0530
> @@ -943,6 +943,16 @@
> ModeDepth& md = m_modeDepth[depth];
> md.bestMode = NULL;
>
> + if (m_param->searchMethod == X265_SEA)
> + {
> + int numPredDir = m_slice->isInterP() ? 1 : 2;
> + int offset = (int)(m_frame->m_reconPic->m_cuOffsetY[parentCTU.m_cuAddr]
> + m_frame->m_reconPic->m_buOffsetY[cuGeom.absPartIdx]);
> + for (int list = 0; list < numPredDir; list++)
> + for (int i = 0; i < m_frame->m_encData->m_slice->m_numRefIdx[list];
> i++)
> + for (int planes = 0; planes < INTEGRAL_PLANE_NUM;
> planes++)
> + m_modeDepth[depth].fencYuv.m_integral[list][i][planes]
> = m_frame->m_encData->m_slice->m_refFrameList[list][i]->m_encData->m_meIntegral[planes]
> + offset;
> + }
> +
> PicYuv& reconPic = *m_frame->m_reconPic;
>
> bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
> @@ -1484,6 +1494,16 @@
> ModeDepth& md = m_modeDepth[depth];
> md.bestMode = NULL;
>
> + if (m_param->searchMethod == X265_SEA)
> + {
> + int numPredDir = m_slice->isInterP() ? 1 : 2;
> + int offset = (int)(m_frame->m_reconPic->m_cuOffsetY[parentCTU.m_cuAddr]
> + m_frame->m_reconPic->m_buOffsetY[cuGeom.absPartIdx]);
> + for (int list = 0; list < numPredDir; list++)
> + for (int i = 0; i < m_frame->m_encData->m_slice->m_numRefIdx[list];
> i++)
> + for (int planes = 0; planes < INTEGRAL_PLANE_NUM;
> planes++)
> + m_modeDepth[depth].fencYuv.m_integral[list][i][planes]
> = m_frame->m_encData->m_slice->m_refFrameList[list][i]->m_encData->m_meIntegral[planes]
> + offset;
> + }
> +
> bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
> bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
> bool skipRecursion = false;
> diff -r 5d95fbd53ca3 -r f8d523976ed6 source/encoder/bitcost.cpp
> --- a/source/encoder/bitcost.cpp Fri Nov 25 12:57:52 2016 +0530
> +++ b/source/encoder/bitcost.cpp Mon Nov 28 11:35:49 2016 +0530
> @@ -54,7 +54,22 @@
> s_costs[qp][i] = s_costs[qp][-i] =
> (uint16_t)X265_MIN(s_bitsizes[i] * lambda + 0.5f, (1 << 15) - 1);
> }
> }
> + for (int j = 0; j < 4; j++)
> + {
> + if (!s_fpelMvCosts[qp][j])
> + {
> + s_fpelMvCosts[qp][j] = X265_MALLOC(uint16_t, BC_MAX_MV + 1) +
> (BC_MAX_MV >> 1);
> + }
> + }
>
> + for (int j = 0; j < 4; j++)
> + {
> + for (int i = -(BC_MAX_MV >> 1); i < (BC_MAX_MV >> 1); i++)
> + {
> + s_fpelMvCosts[qp][j][i] = s_costs[qp][i * 4 + j];
> + }
> + m_fpelMvCosts[j] = s_fpelMvCosts[qp][j];
> + }
> m_cost = s_costs[qp];
> }
>
> @@ -64,6 +79,8 @@
>
> uint16_t *BitCost::s_costs[BC_MAX_QP];
>
> +uint16_t* BitCost::s_fpelMvCosts[BC_MAX_QP][4];
> +
> float *BitCost::s_bitsizes;
>
> Lock BitCost::s_costCalcLock;
> @@ -97,6 +114,17 @@
> }
> }
>
> + for (int i = 0; i < BC_MAX_QP; i++)
> + {
> + if (s_fpelMvCosts[i][0])
> + {
> + for (int j = 0; j < 4; j++)
> + {
> + X265_FREE(s_fpelMvCosts[i][j] - (BC_MAX_MV >> 1));
> + }
> + }
> + }
> +
> if (s_bitsizes)
> {
> X265_FREE(s_bitsizes - 2 * BC_MAX_MV);
> diff -r 5d95fbd53ca3 -r f8d523976ed6 source/encoder/bitcost.h
> --- a/source/encoder/bitcost.h Fri Nov 25 12:57:52 2016 +0530
> +++ b/source/encoder/bitcost.h Mon Nov 28 11:35:49 2016 +0530
> @@ -67,6 +67,8 @@
>
> uint16_t *m_cost;
>
> + uint16_t *m_fpelMvCosts[4];
> +
> MV m_mvp;
>
> BitCost& operator =(const BitCost&);
> @@ -84,6 +86,8 @@
>
> static uint16_t *s_costs[BC_MAX_QP];
>
> + static uint16_t *s_fpelMvCosts[BC_MAX_QP][4];
> +
> static Lock s_costCalcLock;
>
> static void CalculateLogs();
> diff -r 5d95fbd53ca3 -r f8d523976ed6 source/encoder/dpb.cpp
> --- a/source/encoder/dpb.cpp Fri Nov 25 12:57:52 2016 +0530
> +++ b/source/encoder/dpb.cpp Mon Nov 28 11:35:49 2016 +0530
> @@ -92,6 +92,19 @@
> m_freeList.pushBack(*curFrame);
> curFrame->m_encData->m_freeListNext = m_frameDataFreeList;
> m_frameDataFreeList = curFrame->m_encData;
> +
> + if (curFrame->m_encData->m_meBuffer)
> + {
> + for (int i = 0; i < INTEGRAL_PLANE_NUM; i++)
> + {
> + if (curFrame->m_encData->m_meBuffer[i] != NULL)
> + {
> + X265_FREE(curFrame->m_encData->m_meBuffer[i]);
> + curFrame->m_encData->m_meBuffer[i] = NULL;
> + }
> + }
> + }
> +
> curFrame->m_encData = NULL;
> curFrame->m_reconPic = NULL;
> }
> diff -r 5d95fbd53ca3 -r f8d523976ed6 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp Fri Nov 25 12:57:52 2016 +0530
> +++ b/source/encoder/encoder.cpp Mon Nov 28 11:35:49 2016 +0530
> @@ -869,6 +869,25 @@
> slice->m_endCUAddr = slice->realEndAddress(m_sps.numCUsInFrame
> * NUM_4x4_PARTITIONS);
> }
>
> + if (m_param->searchMethod == X265_SEA &&
> frameEnc->m_lowres.sliceType != X265_TYPE_B)
> + {
> + int padX = g_maxCUSize + 32;
> + int padY = g_maxCUSize + 16;
> + uint32_t numCuInHeight = (frameEnc->m_encData->m_reconPic->m_picHeight
> + g_maxCUSize - 1) / g_maxCUSize;
> + int maxHeight = numCuInHeight * g_maxCUSize;
> + for (int i = 0; i < INTEGRAL_PLANE_NUM; i++)
> + {
> + frameEnc->m_encData->m_meBuffer[i] =
> X265_MALLOC(uint32_t, frameEnc->m_reconPic->m_stride * (maxHeight + (2 *
> padY)));
> + if (frameEnc->m_encData->m_meBuffer[i])
> + {
> + memset(frameEnc->m_encData->m_meBuffer[i], 0,
> sizeof(uint32_t)* frameEnc->m_reconPic->m_stride * (maxHeight + (2 *
> padY)));
> + frameEnc->m_encData->m_meIntegral[i] =
> frameEnc->m_encData->m_meBuffer[i] + frameEnc->m_encData->m_reconPic->m_stride
> * padY + padX;
> + }
> + else
> + x265_log(m_param, X265_LOG_ERROR, "SEA motion
> search: POC %d Integral buffer[%d] unallocated\n", frameEnc->m_poc, i);
> + }
> + }
> +
> if (m_param->bOptQpPPS && frameEnc->m_lowres.bKeyframe &&
> m_param->bRepeatHeaders)
> {
> ScopedLock qpLock(m_sliceQpLock);
> diff -r 5d95fbd53ca3 -r f8d523976ed6 source/encoder/framefilter.cpp
> --- a/source/encoder/framefilter.cpp Fri Nov 25 12:57:52 2016 +0530
> +++ b/source/encoder/framefilter.cpp Mon Nov 28 11:35:49 2016 +0530
> @@ -35,6 +35,109 @@
> static uint64_t computeSSD(pixel *fenc, pixel *rec, intptr_t stride,
> uint32_t width, uint32_t height);
> static float calculateSSIM(pixel *pix1, intptr_t stride1, pixel *pix2,
> intptr_t stride2, uint32_t width, uint32_t height, void *buf, uint32_t&
> cnt);
>
> +static void integral_init4h(uint32_t *sum, pixel *pix, intptr_t stride)
> +{
> + int32_t v = pix[0] + pix[1] + pix[2] + pix[3];
> + for (int16_t x = 0; x < stride - 4; x++)
> + {
> + sum[x] = v + sum[x - stride];
> + v += pix[x + 4] - pix[x];
> + }
> +}
> +
> +static void integral_init8h(uint32_t *sum, pixel *pix, intptr_t stride)
> +{
> + int32_t v = pix[0] + pix[1] + pix[2] + pix[3] + pix[4] + pix[5] +
> pix[6] + pix[7];
> + for (int16_t x = 0; x < stride - 8; x++)
> + {
> + sum[x] = v + sum[x - stride];
> + v += pix[x + 8] - pix[x];
> + }
> +}
> +
> +static void integral_init12h(uint32_t *sum, pixel *pix, intptr_t stride)
> +{
> + int32_t v = pix[0] + pix[1] + pix[2] + pix[3] + pix[4] + pix[5] +
> pix[6] + pix[7] +
> + pix[8] + pix[9] + pix[10] + pix[11];
> + for (int16_t x = 0; x < stride - 12; x++)
> + {
> + sum[x] = v + sum[x - stride];
> + v += pix[x + 12] - pix[x];
> + }
> +}
> +
> +static void integral_init16h(uint32_t *sum, pixel *pix, intptr_t stride)
> +{
> + int32_t v = pix[0] + pix[1] + pix[2] + pix[3] + pix[4] + pix[5] +
> pix[6] + pix[7] +
> + pix[8] + pix[9] + pix[10] + pix[11] + pix[12] + pix[13] + pix[14]
> + pix[15];
> + for (int16_t x = 0; x < stride - 16; x++)
> + {
> + sum[x] = v + sum[x - stride];
> + v += pix[x + 16] - pix[x];
> + }
> +}
> +
> +static void integral_init24h(uint32_t *sum, pixel *pix, intptr_t stride)
> +{
> + int32_t v = pix[0] + pix[1] + pix[2] + pix[3] + pix[4] + pix[5] +
> pix[6] + pix[7] +
> + pix[8] + pix[9] + pix[10] + pix[11] + pix[12] + pix[13] + pix[14]
> + pix[15] +
> + pix[16] + pix[17] + pix[18] + pix[19] + pix[20] + pix[21] +
> pix[22] + pix[23];
> + for (int16_t x = 0; x < stride - 24; x++)
> + {
> + sum[x] = v + sum[x - stride];
> + v += pix[x + 24] - pix[x];
> + }
> +}
> +
> +static void integral_init32h(uint32_t *sum, pixel *pix, intptr_t stride)
> +{
> + int32_t v = pix[0] + pix[1] + pix[2] + pix[3] + pix[4] + pix[5] +
> pix[6] + pix[7] +
> + pix[8] + pix[9] + pix[10] + pix[11] + pix[12] + pix[13] + pix[14]
> + pix[15] +
> + pix[16] + pix[17] + pix[18] + pix[19] + pix[20] + pix[21] +
> pix[22] + pix[23] +
> + pix[24] + pix[25] + pix[26] + pix[27] + pix[28] + pix[29] +
> pix[30] + pix[31];
> + for (int16_t x = 0; x < stride - 32; x++)
> + {
> + sum[x] = v + sum[x - stride];
> + v += pix[x + 32] - pix[x];
> + }
> +}
> +
> +static void integral_init4v(uint32_t *sum4, intptr_t stride)
> +{
> + for (int x = 0; x < stride; x++)
> + sum4[x] = sum4[x + 4 * stride] - sum4[x];
> +}
> +
> +static void integral_init8v(uint32_t *sum8, intptr_t stride)
> +{
> + for (int x = 0; x < stride; x++)
> + sum8[x] = sum8[x + 8 * stride] - sum8[x];
> +}
> +
> +static void integral_init12v(uint32_t *sum12, intptr_t stride)
> +{
> + for (int x = 0; x < stride; x++)
> + sum12[x] = sum12[x + 12 * stride] - sum12[x];
> +}
> +
> +static void integral_init16v(uint32_t *sum16, intptr_t stride)
> +{
> + for (int x = 0; x < stride; x++)
> + sum16[x] = sum16[x + 16 * stride] - sum16[x];
> +}
> +
> +static void integral_init24v(uint32_t *sum24, intptr_t stride)
> +{
> + for (int x = 0; x < stride; x++)
> + sum24[x] = sum24[x + 24 * stride] - sum24[x];
> +}
> +
> +static void integral_init32v(uint32_t *sum32, intptr_t stride)
> +{
> + for (int x = 0; x < stride; x++)
> + sum32[x] = sum32[x + 32 * stride] - sum32[x];
> +}
> +
> void FrameFilter::destroy()
> {
> X265_FREE(m_ssimBuf);
> @@ -65,6 +168,7 @@
> m_saoRowDelay = m_param->bEnableLoopFilter ? 1 : 0;
> m_lastHeight = (m_param->sourceHeight % g_maxCUSize) ?
> (m_param->sourceHeight % g_maxCUSize) : g_maxCUSize;
> m_lastWidth = (m_param->sourceWidth % g_maxCUSize) ?
> (m_param->sourceWidth % g_maxCUSize) : g_maxCUSize;
> + integralCompleted.set(0);
>
> if (m_param->bEnableSsim)
> m_ssimBuf = X265_MALLOC(int, 8 * (m_param->sourceWidth / 4 + 3));
> @@ -664,6 +768,107 @@
> }
> } // end of (m_param->maxSlices == 1)
>
> + int lastRow = row == (int)m_frame->m_encData->m_slice->m_sps->numCuInHeight
> - 1;
> +
> + /* generate integral planes for SEA motion search */
> + if (m_param->searchMethod == X265_SEA && m_frame->m_encData->m_meIntegral
> && m_frame->m_lowres.sliceType != X265_TYPE_B)
> + {
> + /* If WPP, other than first row, integral calculation for current
> row needs to wait till the
> + * integral for the previous row is computed */
> + if (m_param->bEnableWavefront && row)
> + {
> + while (m_parallelFilter[row - 1].m_frameFilter->integralCompleted.get()
> == 0)
> + {
> + m_parallelFilter[row - 1].m_frameFilter->
> integralCompleted.waitForChange(0);
> + }
> + }
> +
> + int stride = (int)m_frame->m_reconPic->m_stride;
> + int padX = g_maxCUSize + 32;
> + int padY = g_maxCUSize + 16;
> + int numCuInHeight = m_frame->m_encData->m_slice->
> m_sps->numCuInHeight;
> + int maxHeight = numCuInHeight * g_maxCUSize;
> + int start = 0;
> +
> + if (m_param->interlaceMode)
> + start = (row * g_maxCUSize >> 1);
> + else
> + start = row * g_maxCUSize;
> +
> + int height = lastRow ? (maxHeight + g_maxCUSize *
> m_param->interlaceMode) : (((row + m_param->interlaceMode) * g_maxCUSize) +
> g_maxCUSize);
> +
> + if (!row)
> + {
> + for (int i = 0; i < INTEGRAL_PLANE_NUM; i++)
> + memset(m_frame->m_encData->m_meIntegral[i] - padY *
> stride - padX, 0, stride * sizeof(uint32_t));
> + start = -padY;
> + }
> +
> + if (lastRow)
> + height += padY - 1;
> +
> + for (int y = start; y < height; y++)
> + {
> + pixel *pix = m_frame->m_reconPic->m_picOrg[0] + y *
> stride - padX;
> + uint32_t *sum32x32 = m_frame->m_encData->m_meIntegral[0] +
> (y + 1) * stride - padX;
> + uint32_t *sum32x24 = m_frame->m_encData->m_meIntegral[1] +
> (y + 1) * stride - padX;
> + uint32_t *sum32x8 = m_frame->m_encData->m_meIntegral[2] + (y
> + 1) * stride - padX;
> + uint32_t *sum24x32 = m_frame->m_encData->m_meIntegral[3] +
> (y + 1) * stride - padX;
> + uint32_t *sum16x16 = m_frame->m_encData->m_meIntegral[4] +
> (y + 1) * stride - padX;
> + uint32_t *sum16x12 = m_frame->m_encData->m_meIntegral[5] +
> (y + 1) * stride - padX;
> + uint32_t *sum16x4 = m_frame->m_encData->m_meIntegral[6] + (y
> + 1) * stride - padX;
> + uint32_t *sum12x16 = m_frame->m_encData->m_meIntegral[7] +
> (y + 1) * stride - padX;
> + uint32_t *sum8x32 = m_frame->m_encData->m_meIntegral[8] + (y
> + 1) * stride - padX;
> + uint32_t *sum8x8 = m_frame->m_encData->m_meIntegral[9] + (y
> + 1) * stride - padX;
> + uint32_t *sum4x16 = m_frame->m_encData->m_meIntegral[10] +
> (y + 1) * stride - padX;
> + uint32_t *sum4x4 = m_frame->m_encData->m_meIntegral[11] + (y
> + 1) * stride - padX;
> +
> + /*For width = 32 */
> + integral_init32h(sum32x32, pix, stride);
> + if (y >= 32 - padY)
> + integral_init32v(sum32x32 - 32 * stride, stride);
> + integral_init32h(sum32x24, pix, stride);
> + if (y >= 24 - padY)
> + integral_init24v(sum32x24 - 24 * stride, stride);
> + integral_init32h(sum32x8, pix, stride);
> + if (y >= 8 - padY)
> + integral_init8v(sum32x8 - 8 * stride, stride);
> + /*For width = 24 */
> + integral_init24h(sum24x32, pix, stride);
> + if (y >= 32 - padY)
> + integral_init32v(sum24x32 - 32 * stride, stride);
> + /*For width = 16 */
> + integral_init16h(sum16x16, pix, stride);
> + if (y >= 16 - padY)
> + integral_init16v(sum16x16 - 16 * stride, stride);
> + integral_init16h(sum16x12, pix, stride);
> + if (y >= 12 - padY)
> + integral_init12v(sum16x12 - 12 * stride, stride);
> + integral_init16h(sum16x4, pix, stride);
> + if (y >= 4 - padY)
> + integral_init4v(sum16x4 - 4 * stride, stride);
> + /*For width = 12 */
> + integral_init12h(sum12x16, pix, stride);
> + if (y >= 16 - padY)
> + integral_init16v(sum12x16 - 16 * stride, stride);
> + /*For width = 8 */
> + integral_init8h(sum8x32, pix, stride);
> + if (y >= 32 - padY)
> + integral_init32v(sum8x32 - 32 * stride, stride);
> + integral_init8h(sum8x8, pix, stride);
> + if (y >= 8 - padY)
> + integral_init8v(sum8x8 - 8 * stride, stride);
> + /*For width = 4 */
> + integral_init4h(sum4x16, pix, stride);
> + if (y >= 16 - padY)
> + integral_init16v(sum4x16 - 16 * stride, stride);
> + integral_init4h(sum4x4, pix, stride);
> + if (y >= 4 - padY)
> + integral_init4v(sum4x4 - 4 * stride, stride);
> + }
> + m_parallelFilter[row].m_frameFilter->integralCompleted.set(1);
> + }
> +
> if (ATOMIC_INC(&m_frameEncoder->m_completionCount) == 2 *
> (int)m_frameEncoder->m_numRows)
> {
> m_frameEncoder->m_completionEvent.trigger();
> diff -r 5d95fbd53ca3 -r f8d523976ed6 source/encoder/framefilter.h
> --- a/source/encoder/framefilter.h Fri Nov 25 12:57:52 2016 +0530
> +++ b/source/encoder/framefilter.h Mon Nov 28 11:35:49 2016 +0530
> @@ -57,6 +57,8 @@
> int m_lastHeight;
> int m_lastWidth;
>
> + ThreadSafeInteger integralCompleted; /* check if integral
> calculation is completed in this row */
> +
> void* m_ssimBuf; /* Temp storage for ssim computation
> */
>
> #define MAX_PFILTER_CUS (4) /* maximum CUs for every thread */
> diff -r 5d95fbd53ca3 -r f8d523976ed6 source/encoder/motion.cpp
> --- a/source/encoder/motion.cpp Fri Nov 25 12:57:52 2016 +0530
> +++ b/source/encoder/motion.cpp Mon Nov 28 11:35:49 2016 +0530
> @@ -109,6 +109,8 @@
> blockOffset = 0;
> bChromaSATD = false;
> chromaSatd = NULL;
> + for (int i = 0; i < INTEGRAL_PLANE_NUM; i++)
> + integral[i] = NULL;
> }
>
> void MotionEstimate::init(int csp)
> @@ -165,10 +167,12 @@
> partEnum = partitionFromSizes(pwidth, pheight);
> X265_CHECK(LUMA_4x4 != partEnum, "4x4 inter partition detected!\n");
> sad = primitives.pu[partEnum].sad;
> + ads = primitives.pu[partEnum].ads;
> satd = primitives.pu[partEnum].satd;
> sad_x3 = primitives.pu[partEnum].sad_x3;
> sad_x4 = primitives.pu[partEnum].sad_x4;
>
> +
> blockwidth = pwidth;
> blockOffset = offset;
> absPartIdx = ctuAddr = -1;
> @@ -188,6 +192,7 @@
> partEnum = partitionFromSizes(pwidth, pheight);
> X265_CHECK(LUMA_4x4 != partEnum, "4x4 inter partition detected!\n");
> sad = primitives.pu[partEnum].sad;
> + ads = primitives.pu[partEnum].ads;
> satd = primitives.pu[partEnum].satd;
> sad_x3 = primitives.pu[partEnum].sad_x3;
> sad_x4 = primitives.pu[partEnum].sad_x4;
> @@ -288,6 +293,21 @@
> COPY2_IF_LT(bcost, costs[3], bmv, omv + MV(m3x, m3y)); \
> }
>
> +#define COST_MV_X3_ABS( m0x, m0y, m1x, m1y, m2x, m2y )\
> +{\
> + sad_x3(fenc, \
> + fref + (m0x) + (m0y) * stride, \
> + fref + (m1x) + (m1y) * stride, \
> + fref + (m2x) + (m2y) * stride, \
> + stride, costs); \
> + costs[0] += p_cost_mvx[(m0x) << 2]; /* no cost_mvy */\
> + costs[1] += p_cost_mvx[(m1x) << 2]; \
> + costs[2] += p_cost_mvx[(m2x) << 2]; \
> + COPY3_IF_LT(bcost, costs[0], bmv.x, m0x, bmv.y, m0y); \
> + COPY3_IF_LT(bcost, costs[1], bmv.x, m1x, bmv.y, m1y); \
> + COPY3_IF_LT(bcost, costs[2], bmv.x, m2x, bmv.y, m2y); \
> +}
> +
> #define COST_MV_X4_DIR(m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y, costs) \
> { \
> pixel *pix_base = fref + bmv.x + bmv.y * stride; \
> @@ -1078,6 +1098,161 @@
> break;
> }
>
> + case X265_SEA:
> + {
> + // Successive Elimination Algorithm
> + const int16_t minX = X265_MAX(omv.x - (int16_t)merange, mvmin.x);
> + const int16_t minY = X265_MAX(omv.y - (int16_t)merange, mvmin.y);
> + const int16_t maxX = X265_MIN(omv.x + (int16_t)merange, mvmax.x);
> + const int16_t maxY = X265_MIN(omv.y + (int16_t)merange, mvmax.y);
> + const uint16_t *p_cost_mvx = m_cost_mvx - qmvp.x;
> + const uint16_t *p_cost_mvy = m_cost_mvy - qmvp.y;
> + int16_t* meScratchBuffer = NULL;
> + int scratchSize = merange * 2 + 4;
> + if (scratchSize)
> + {
> + meScratchBuffer = X265_MALLOC(int16_t, scratchSize);
> + memset(meScratchBuffer, 0, sizeof(int16_t)* scratchSize);
> + }
> +
> + /* SEA is fastest in multiples of 4 */
> + int meRangeWidth = (maxX - minX + 3) & ~3;
> + int w = 0, h = 0; // Width and height of the PU
> + ALIGN_VAR_32(pixel, zero[64 * FENC_STRIDE]) = { 0 };
> + ALIGN_VAR_32(int, encDC[4]);
> + uint16_t *fpelCostMvX = m_fpelMvCosts[-qmvp.x & 3] + (-qmvp.x >>
> 2);
> + sizesFromPartition(partEnum, &w, &h);
> + int deltaX = (w <= 8) ? (w) : (w >> 1);
> + int deltaY = (h <= 8) ? (h) : (h >> 1);
> +
> + /* Check if very small rectangular blocks which cannot be
> sub-divided anymore */
> + bool smallRectPartition = partEnum == LUMA_4x4 || partEnum ==
> LUMA_16x12 ||
> + partEnum == LUMA_12x16 || partEnum == LUMA_16x4 || partEnum
> == LUMA_4x16;
> + /* Check if vertical partition */
> + bool verticalRect = partEnum == LUMA_32x64 || partEnum ==
> LUMA_16x32 || partEnum == LUMA_8x16 ||
> + partEnum == LUMA_4x8;
> + /* Check if horizontal partition */
> + bool horizontalRect = partEnum == LUMA_64x32 || partEnum ==
> LUMA_32x16 || partEnum == LUMA_16x8 ||
> + partEnum == LUMA_8x4;
> + /* Check if assymetric vertical partition */
> + bool assymetricVertical = partEnum == LUMA_12x16 || partEnum ==
> LUMA_4x16 || partEnum == LUMA_24x32 ||
> + partEnum == LUMA_8x32 || partEnum == LUMA_48x64 || partEnum
> == LUMA_16x64;
> + /* Check if assymetric horizontal partition */
> + bool assymetricHorizontal = partEnum == LUMA_16x12 || partEnum ==
> LUMA_16x4 || partEnum == LUMA_32x24 ||
> + partEnum == LUMA_32x8 || partEnum == LUMA_64x48 || partEnum
> == LUMA_64x16;
> +
> + int tempPartEnum = 0;
> +
> + /* If a vertical rectangular partition, it is horizontally split
> into two, for ads_x2() */
> + if (verticalRect)
> + tempPartEnum = partitionFromSizes(w, h >> 1);
> + /* If a horizontal rectangular partition, it is vertically split
> into two, for ads_x2() */
> + else if (horizontalRect)
> + tempPartEnum = partitionFromSizes(w >> 1, h);
> + /* We have integral planes introduced to account for assymetric
> partitions.
> + * Hence all assymetric partitions except those which cannot be
> split into legal sizes,
> + * are split into four for ads_x4() */
> + else if (assymetricVertical || assymetricHorizontal)
> + tempPartEnum = smallRectPartition ? partEnum :
> partitionFromSizes(w >> 1, h >> 1);
> + /* General case: Square partitions. All partitions with width > 8
> are split into four
> + * for ads_x4(), for 4x4 and 8x8 we do ads_x1() */
> + else
> + tempPartEnum = (w <= 8) ? partEnum : partitionFromSizes(w >>
> 1, h >> 1);
> +
> + /* Successive elimination by comparing DC before a full SAD,
> + * because sum(abs(diff)) >= abs(diff(sum)). */
> + primitives.pu[tempPartEnum].sad_x4(zero,
> + fenc,
> + fenc + deltaX,
> + fenc + deltaY * FENC_STRIDE,
> + fenc + deltaX + deltaY * FENC_STRIDE,
> + FENC_STRIDE,
> + encDC);
> +
> + /* Assigning appropriate integral plane */
> + uint32_t *sumsBase = NULL;
> + switch (deltaX)
> + {
> + case 32: if (deltaY % 24 == 0)
> + sumsBase = integral[1];
> + else if (deltaY == 8)
> + sumsBase = integral[2];
> + else
> + sumsBase = integral[0];
> + break;
> + case 24: sumsBase = integral[3];
> + break;
> + case 16: if (deltaY % 12 == 0)
> + sumsBase = integral[5];
> + else if (deltaY == 4)
> + sumsBase = integral[6];
> + else
> + sumsBase = integral[4];
> + break;
> + case 12: sumsBase = integral[7];
> + break;
> + case 8: if (deltaY == 32)
> + sumsBase = integral[8];
> + else
> + sumsBase = integral[9];
> + break;
> + case 4: if (deltaY == 16)
> + sumsBase = integral[10];
> + else
> + sumsBase = integral[11];
> + break;
> + default: sumsBase = integral[11];
> + break;
> + }
> +
> + if (partEnum == LUMA_64x64 || partEnum == LUMA_32x32 || partEnum
> == LUMA_16x16 ||
> + partEnum == LUMA_32x64 || partEnum == LUMA_16x32 || partEnum
> == LUMA_8x16 ||
> + partEnum == LUMA_4x8 || partEnum == LUMA_12x16 || partEnum ==
> LUMA_4x16 ||
> + partEnum == LUMA_24x32 || partEnum == LUMA_8x32 || partEnum
> == LUMA_48x64 ||
> + partEnum == LUMA_16x64)
> + deltaY *= (int)stride;
> +
> + if (verticalRect)
> + encDC[1] = encDC[2];
> +
> + if (horizontalRect)
> + deltaY = deltaX;
> +
> + /* ADS and SAD */
> + MV tmv;
> + for (tmv.y = minY; tmv.y <= maxY; tmv.y++)
> + {
> + int i, xn;
> + int ycost = p_cost_mvy[tmv.y] << 2;
> + if (bcost <= ycost)
> + continue;
> + bcost -= ycost;
> +
> + /* ADS_4 for 16x16, 32x32, 64x64, 24x32, 32x24, 48x64, 64x48,
> 32x8, 8x32, 64x16, 16x64 partitions
> + * ADS_1 for 4x4, 8x8, 16x4, 4x16, 16x12, 12x16 partitions
> + * ADS_2 for all other rectangular partitions */
> + xn = ads(encDC,
> + sumsBase + minX + tmv.y * stride,
> + deltaY,
> + fpelCostMvX + minX,
> + meScratchBuffer,
> + meRangeWidth,
> + bcost);
> +
> + for (i = 0; i < xn - 2; i += 3)
> + COST_MV_X3_ABS(minX + meScratchBuffer[i], tmv.y,
> + minX + meScratchBuffer[i + 1], tmv.y,
> + minX + meScratchBuffer[i + 2], tmv.y);
> +
> + bcost += ycost;
> + for (; i < xn; i++)
> + COST_MV(minX + meScratchBuffer[i], tmv.y);
> + }
> + if (meScratchBuffer)
> + x265_free(meScratchBuffer);
> + break;
> + }
> +
> case X265_FULL_SEARCH:
> {
> // dead slow exhaustive search, but at least it uses sad_x4()
> diff -r 5d95fbd53ca3 -r f8d523976ed6 source/encoder/motion.h
> --- a/source/encoder/motion.h Fri Nov 25 12:57:52 2016 +0530
> +++ b/source/encoder/motion.h Mon Nov 28 11:35:49 2016 +0530
> @@ -52,6 +52,7 @@
> pixelcmp_t sad;
> pixelcmp_x3_t sad_x3;
> pixelcmp_x4_t sad_x4;
> + pixelcmp_ads_t ads;
> pixelcmp_t satd;
> pixelcmp_t chromaSatd;
>
> @@ -61,6 +62,7 @@
>
> static const int COST_MAX = 1 << 28;
>
> + uint32_t* integral[INTEGRAL_PLANE_NUM];
> Yuv fencPUYuv;
> int partEnum;
> bool bChromaSATD;
> diff -r 5d95fbd53ca3 -r f8d523976ed6 source/encoder/search.cpp
> --- a/source/encoder/search.cpp Fri Nov 25 12:57:52 2016 +0530
> +++ b/source/encoder/search.cpp Mon Nov 28 11:35:49 2016 +0530
> @@ -2243,7 +2243,13 @@
> if (lmv.notZero())
> mvc[numMvc++] = lmv;
> }
> -
> + if (m_param->searchMethod == X265_SEA)
> + {
> + int puX = puIdx & 1;
> + int puY = puIdx >> 1;
> + for (int planes = 0; planes < INTEGRAL_PLANE_NUM;
> planes++)
> + m_me.integral[planes] =
> interMode.fencYuv->m_integral[list][ref][planes] + puX * pu.width + puY *
> pu.height * m_slice->m_refFrameList[list][ref]->m_reconPic->m_stride;
> + }
> setSearchRange(cu, mvp, m_param->searchRange, mvmin,
> mvmax);
> int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref],
> mvmin, mvmax, mvp, numMvc, mvc, m_param->searchRange, outmv,
> m_param->bSourceReferenceEstimation ?
> m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);
> diff -r 5d95fbd53ca3 -r f8d523976ed6 source/test/regression-tests.txt
> --- a/source/test/regression-tests.txt Fri Nov 25 12:57:52 2016 +0530
> +++ b/source/test/regression-tests.txt Mon Nov 28 11:35:49 2016 +0530
> @@ -149,4 +149,7 @@
> CrowdRun_1920x1080_50_10bit_422.yuv,--preset faster --interlace tff
> CrowdRun_1920x1080_50_10bit_422.yuv,--preset fast --interlace bff
>
> +#SEA Implementation Test
> +silent_cif_420.y4m,--preset veryslow --me 4
> +big_buck_bunny_360p24.y4m,--preset superfast --me 4
> # vim: tw=200
> diff -r 5d95fbd53ca3 -r f8d523976ed6 source/x265.h
> --- a/source/x265.h Fri Nov 25 12:57:52 2016 +0530
> +++ b/source/x265.h Mon Nov 28 11:35:49 2016 +0530
> @@ -290,6 +290,7 @@
> X265_HEX_SEARCH,
> X265_UMH_SEARCH,
> X265_STAR_SEARCH,
> + X265_SEA,
> X265_FULL_SEARCH
> } X265_ME_METHODS;
>
> @@ -464,7 +465,7 @@
> } x265_stats;
>
> /* String values accepted by x265_param_parse() (and CLI) for various
> parameters */
> -static const char * const x265_motion_est_names[] = { "dia", "hex",
> "umh", "star", "full", 0 };
> +static const char * const x265_motion_est_names[] = { "dia", "hex",
> "umh", "star", "sea", "full", 0 };
> static const char * const x265_source_csp_names[] = { "i400", "i420",
> "i422", "i444", "nv12", "nv16", 0 };
> static const char * const x265_video_format_names[] = { "component",
> "pal", "ntsc", "secam", "mac", "undef", 0 };
> static const char * const x265_fullrange_names[] = { "limited", "full", 0
> };
> @@ -910,9 +911,9 @@
> /* Limit modes analyzed for each CU using cost metrics from the 4
> sub-CUs */
> uint32_t limitModes;
>
> - /* ME search method (DIA, HEX, UMH, STAR, FULL). The search patterns
> + /* ME search method (DIA, HEX, UMH, STAR, SEA, FULL). The search
> patterns
> * (methods) are sorted in increasing complexity, with diamond being
> the
> - * simplest and fastest and full being the slowest. DIA, HEX, and
> UMH were
> + * simplest and fastest and full being the slowest. DIA, HEX, UMH
> and SEA were
> * adapted from x264 directly. STAR is an adaption of the HEVC
> reference
> * encoder's three step search, while full is a naive exhaustive
> search. The
> * default is the star search, it has a good balance of performance
> and
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20161129/123b8da3/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: SEA.patch
Type: application/octet-stream
Size: 40105 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20161129/123b8da3/attachment-0001.obj>
More information about the x265-devel
mailing list