[x265] [PATCH] SEA Motion Search Implementation

Vignesh V Menon vignesh at multicorewareinc.com
Tue Nov 29 10:42:50 CET 2016


# HG changeset patch
# User Vignesh Vijayakumar <vignesh at multicorewareinc.com>
# Date 1480313149 -19800
#      Mon Nov 28 11:35:49 2016 +0530
# Node ID f8d523976ed61cada53c579d8145a815d21d08ed
# Parent  5d95fbd53ca31747498c4bd661fa24f6ffd5a070
SEA motion search Implementation

diff -r 5d95fbd53ca3 -r f8d523976ed6 doc/reST/cli.rst
--- a/doc/reST/cli.rst Fri Nov 25 12:57:52 2016 +0530
+++ b/doc/reST/cli.rst Mon Nov 28 11:35:49 2016 +0530
@@ -964,13 +964,17 @@
  encoder: a star-pattern search followed by an optional radix scan
  followed by an optional star-search refinement. Full is an
  exhaustive search; an order of magnitude slower than all other
- searches but not much better than umh or star.
+ searches but not much better than umh or star. SEA is similar to
+ FULL search; a three step motion search adopted from x264: DC
+ calculation followed by ADS calculation followed by SAD of the
+ passed motion vector candidates, hence faster than Full search.

  0. dia
  1. hex **(default)**
  2. umh
  3. star
- 4. full
+ 4. sea
+ 5. full

 .. option:: --subme, -m <0..7>

diff -r 5d95fbd53ca3 -r f8d523976ed6 source/common/common.h
--- a/source/common/common.h Fri Nov 25 12:57:52 2016 +0530
+++ b/source/common/common.h Mon Nov 28 11:35:49 2016 +0530
@@ -328,6 +328,8 @@

 #define PIXEL_MAX ((1 << X265_DEPTH) - 1)

+#define INTEGRAL_PLANE_NUM          12 // 12 integral planes for 32x32,
32x24, 32x8, 24x32, 16x16, 16x12, 16x4, 12x16, 8x32, 8x8, 4x16 and 4x4.
+
 namespace X265_NS {

 enum { SAO_NUM_OFFSET = 4 };
diff -r 5d95fbd53ca3 -r f8d523976ed6 source/common/framedata.cpp
--- a/source/common/framedata.cpp Fri Nov 25 12:57:52 2016 +0530
+++ b/source/common/framedata.cpp Mon Nov 28 11:35:49 2016 +0530
@@ -48,6 +48,12 @@
     CHECKED_MALLOC_ZERO(m_cuStat, RCStatCU, sps.numCUsInFrame);
     CHECKED_MALLOC(m_rowStat, RCStatRow, sps.numCuInHeight);
     reinit(sps);
+
+    for (int i = 0; i < INTEGRAL_PLANE_NUM; i++)
+    {
+        m_meBuffer[i] = NULL;
+        m_meIntegral[i] = NULL;
+    }
     return true;

 fail:
@@ -70,4 +76,16 @@

     X265_FREE(m_cuStat);
     X265_FREE(m_rowStat);
+
+    if (m_meBuffer)
+    {
+        for (int i = 0; i < INTEGRAL_PLANE_NUM; i++)
+        {
+            if (m_meBuffer[i] != NULL)
+            {
+                X265_FREE(m_meBuffer[i]);
+                m_meBuffer[i] = NULL;
+            }
+        }
+    }
 }
diff -r 5d95fbd53ca3 -r f8d523976ed6 source/common/framedata.h
--- a/source/common/framedata.h Fri Nov 25 12:57:52 2016 +0530
+++ b/source/common/framedata.h Mon Nov 28 11:35:49 2016 +0530
@@ -151,6 +151,9 @@
     double         m_rateFactor; /* calculated based on the Frame QP */
     int            m_picCsp;

+    uint32_t*              m_meIntegral[INTEGRAL_PLANE_NUM];       // 12
integral planes for 32x32, 32x24, 32x8, 24x32, 16x16, 16x12, 16x4, 12x16,
8x32, 8x8, 4x16 and 4x4.
+    uint32_t*              m_meBuffer[INTEGRAL_PLANE_NUM];
+
     FrameData();

     bool create(const x265_param& param, const SPS& sps, int csp);
diff -r 5d95fbd53ca3 -r f8d523976ed6 source/common/param.cpp
--- a/source/common/param.cpp Fri Nov 25 12:57:52 2016 +0530
+++ b/source/common/param.cpp Mon Nov 28 11:35:49 2016 +0530
@@ -1092,8 +1092,8 @@
           "Frame rate numerator and denominator must be specified");
     CHECK(param->interlaceMode < 0 || param->interlaceMode > 2,
           "Interlace mode must be 0 (progressive) 1 (top-field first) or 2
(bottom field first)");
-    CHECK(param->searchMethod<0 || param->searchMethod> X265_FULL_SEARCH,
-          "Search method is not supported value (0:DIA 1:HEX 2:UMH 3:HM
5:FULL)");
+    CHECK(param->searchMethod < 0 || param->searchMethod >
X265_FULL_SEARCH,
+          "Search method is not supported value (0:DIA 1:HEX 2:UMH 3:HM
4:SEA 5:FULL)");
     CHECK(param->searchRange < 0,
           "Search Range must be more than 0");
     CHECK(param->searchRange >= 32768,
@@ -1256,6 +1256,10 @@
         "qpmin exceeds supported range (0 to 69)");
     CHECK(param->log2MaxPocLsb < 4 || param->log2MaxPocLsb > 16,
         "Supported range for log2MaxPocLsb is 4 to 16");
+#if !X86_64
+    CHECK(param->searchMethod == X265_SEA && (param->sourceWidth > 840 ||
param->sourceHeight > 480),
+        "SEA motion search does not support resolutions greater than 480p
in 32 bit build");
+#endif
     return check_failed;
 }

diff -r 5d95fbd53ca3 -r f8d523976ed6 source/common/pixel.cpp
--- a/source/common/pixel.cpp Fri Nov 25 12:57:52 2016 +0530
+++ b/source/common/pixel.cpp Mon Nov 28 11:35:49 2016 +0530
@@ -117,6 +117,52 @@
     }
 }

+template<int lx, int ly>
+int ads_x4(int encDC[4], uint32_t *sums, int delta, uint16_t *costMvX,
int16_t *mvs, int width, int thresh)
+{
+    int nmv = 0;
+    for (int16_t i = 0; i < width; i++, sums++)
+    {
+        int ads = abs(encDC[0] - long(sums[0]))
+            + abs(encDC[1] - long(sums[lx >> 1]))
+            + abs(encDC[2] - long(sums[delta]))
+            + abs(encDC[3] - long(sums[delta + (lx >> 1)]))
+            + costMvX[i];
+        if (ads < thresh)
+            mvs[nmv++] = i;
+    }
+    return nmv;
+}
+
+template<int lx, int ly>
+int ads_x2(int encDC[2], uint32_t *sums, int delta, uint16_t *costMvX,
int16_t *mvs, int width, int thresh)
+{
+    int nmv = 0;
+    for (int16_t i = 0; i < width; i++, sums++)
+    {
+        int ads = abs(encDC[0] - long(sums[0]))
+            + abs(encDC[1] - long(sums[delta]))
+            + costMvX[i];
+        if (ads < thresh)
+            mvs[nmv++] = i;
+    }
+    return nmv;
+}
+
+template<int lx, int ly>
+int ads_x1(int encDC[1], uint32_t *sums, int, uint16_t *costMvX, int16_t
*mvs, int width, int thresh)
+{
+    int nmv = 0;
+    for (int16_t i = 0; i < width; i++, sums++)
+    {
+        int ads = abs(encDC[0] - long(sums[0]))
+            + costMvX[i];
+        if (ads < thresh)
+            mvs[nmv++] = i;
+    }
+    return nmv;
+}
+
 template<int lx, int ly, class T1, class T2>
 sse_t sse(const T1* pix1, intptr_t stride_pix1, const T2* pix2, intptr_t
stride_pix2)
 {
@@ -991,6 +1037,32 @@
     LUMA_PU(64, 16);
     LUMA_PU(16, 64);

+    p.pu[LUMA_4x4].ads = ads_x1<4, 4>;
+    p.pu[LUMA_8x8].ads = ads_x1<8, 8>;
+    p.pu[LUMA_8x4].ads = ads_x2<8, 4>;
+    p.pu[LUMA_4x8].ads = ads_x2<4, 8>;
+    p.pu[LUMA_16x16].ads = ads_x4<16, 16>;
+    p.pu[LUMA_16x8].ads = ads_x2<16, 8>;
+    p.pu[LUMA_8x16].ads = ads_x2<8, 16>;
+    p.pu[LUMA_16x12].ads = ads_x1<16, 12>;
+    p.pu[LUMA_12x16].ads = ads_x1<12, 16>;
+    p.pu[LUMA_16x4].ads = ads_x1<16, 4>;
+    p.pu[LUMA_4x16].ads = ads_x1<4, 16>;
+    p.pu[LUMA_32x32].ads = ads_x4<32, 32>;
+    p.pu[LUMA_32x16].ads = ads_x2<32, 16>;
+    p.pu[LUMA_16x32].ads = ads_x2<16, 32>;
+    p.pu[LUMA_32x24].ads = ads_x4<32, 24>;
+    p.pu[LUMA_24x32].ads = ads_x4<24, 32>;
+    p.pu[LUMA_32x8].ads = ads_x4<32, 8>;
+    p.pu[LUMA_8x32].ads = ads_x4<8, 32>;
+    p.pu[LUMA_64x64].ads = ads_x4<64, 64>;
+    p.pu[LUMA_64x32].ads = ads_x2<64, 32>;
+    p.pu[LUMA_32x64].ads = ads_x2<32, 64>;
+    p.pu[LUMA_64x48].ads = ads_x4<64, 48>;
+    p.pu[LUMA_48x64].ads = ads_x4<48, 64>;
+    p.pu[LUMA_64x16].ads = ads_x4<64, 16>;
+    p.pu[LUMA_16x64].ads = ads_x4<16, 64>;
+
     p.pu[LUMA_4x4].satd   = satd_4x4;
     p.pu[LUMA_8x8].satd   = satd8<8, 8>;
     p.pu[LUMA_8x4].satd   = satd_8x4;
diff -r 5d95fbd53ca3 -r f8d523976ed6 source/common/primitives.h
--- a/source/common/primitives.h Fri Nov 25 12:57:52 2016 +0530
+++ b/source/common/primitives.h Mon Nov 28 11:35:49 2016 +0530
@@ -115,6 +115,7 @@
 typedef sse_t (*pixel_sse_t)(const pixel* fenc, intptr_t fencstride, const
pixel* fref, intptr_t frefstride); // fenc is aligned
 typedef sse_t (*pixel_sse_ss_t)(const int16_t* fenc, intptr_t fencstride,
const int16_t* fref, intptr_t frefstride);
 typedef sse_t (*pixel_ssd_s_t)(const int16_t* fenc, intptr_t fencstride);
+typedef int(*pixelcmp_ads_t)(int encDC[], uint32_t *sums, int delta,
uint16_t *costMvX, int16_t *mvs, int width, int thresh);
 typedef void (*pixelcmp_x4_t)(const pixel* fenc, const pixel* fref0, const
pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride,
int32_t* res);
 typedef void (*pixelcmp_x3_t)(const pixel* fenc, const pixel* fref0, const
pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
 typedef void (*blockfill_s_t)(int16_t* dst, intptr_t dstride, int16_t val);
@@ -217,6 +218,7 @@
         pixelcmp_t     sad;         // Sum of Absolute Differences
         pixelcmp_x3_t  sad_x3;      // Sum of Absolute Differences, 3 mv
offsets at once
         pixelcmp_x4_t  sad_x4;      // Sum of Absolute Differences, 4 mv
offsets at once
+        pixelcmp_ads_t ads;         // Absolute Differences sum
         pixelcmp_t     satd;        // Sum of Absolute Transformed
Differences (4x4 Hadamard)

         filter_pp_t    luma_hpp;    // 8-tap luma motion compensation
interpolation filters
@@ -402,6 +404,22 @@
     return part;
 }

+/* Computes the size of the LumaPU for a given LumaPU enum */
+inline void sizesFromPartition(int part, int *width, int *height)
+{
+    X265_CHECK(part >= 0 && part <= 24, "Invalid part %d \n", part);
+    extern const uint8_t lumaPartitionMapTable[];
+    int index = 0;
+    for (int i = 0; i < 256;i++)
+        if (part == lumaPartitionMapTable[i])
+        {
+            index = i;
+            break;
+        }
+    *width = 4 * ((index >> 4) + 1);
+    *height = 4 * ((index % 16) + 1);
+}
+
 inline int partitionFromLog2Size(int log2Size)
 {
     X265_CHECK(2 <= log2Size && log2Size <= 6, "Invalid block size\n");
diff -r 5d95fbd53ca3 -r f8d523976ed6 source/common/yuv.cpp
--- a/source/common/yuv.cpp Fri Nov 25 12:57:52 2016 +0530
+++ b/source/common/yuv.cpp Mon Nov 28 11:35:49 2016 +0530
@@ -47,6 +47,11 @@
     m_size  = size;
     m_part = partitionFromSizes(size, size);

+    for (int i = 0; i < 2; i++)
+        for (int j = 0; j < MAX_NUM_REF; j++)
+            for (int k = 0; k < INTEGRAL_PLANE_NUM; k++)
+                m_integral[i][j][k] = NULL;
+
     if (csp == X265_CSP_I400)
     {
         CHECKED_MALLOC(m_buf[0], pixel, size * size + 8);
diff -r 5d95fbd53ca3 -r f8d523976ed6 source/common/yuv.h
--- a/source/common/yuv.h Fri Nov 25 12:57:52 2016 +0530
+++ b/source/common/yuv.h Mon Nov 28 11:35:49 2016 +0530
@@ -48,6 +48,7 @@
     int      m_csp;
     int      m_hChromaShift;
     int      m_vChromaShift;
+    uint32_t *m_integral[2][MAX_NUM_REF][INTEGRAL_PLANE_NUM];

     Yuv();

diff -r 5d95fbd53ca3 -r f8d523976ed6 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Fri Nov 25 12:57:52 2016 +0530
+++ b/source/encoder/analysis.cpp Mon Nov 28 11:35:49 2016 +0530
@@ -943,6 +943,16 @@
     ModeDepth& md = m_modeDepth[depth];
     md.bestMode = NULL;

+    if (m_param->searchMethod == X265_SEA)
+    {
+        int numPredDir = m_slice->isInterP() ? 1 : 2;
+        int offset =
(int)(m_frame->m_reconPic->m_cuOffsetY[parentCTU.m_cuAddr] +
m_frame->m_reconPic->m_buOffsetY[cuGeom.absPartIdx]);
+        for (int list = 0; list < numPredDir; list++)
+            for (int i = 0; i <
m_frame->m_encData->m_slice->m_numRefIdx[list]; i++)
+                for (int planes = 0; planes < INTEGRAL_PLANE_NUM; planes++)
+                    m_modeDepth[depth].fencYuv.m_integral[list][i][planes]
=
m_frame->m_encData->m_slice->m_refFrameList[list][i]->m_encData->m_meIntegral[planes]
+ offset;
+    }
+
     PicYuv& reconPic = *m_frame->m_reconPic;

     bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
@@ -1484,6 +1494,16 @@
     ModeDepth& md = m_modeDepth[depth];
     md.bestMode = NULL;

+    if (m_param->searchMethod == X265_SEA)
+    {
+        int numPredDir = m_slice->isInterP() ? 1 : 2;
+        int offset =
(int)(m_frame->m_reconPic->m_cuOffsetY[parentCTU.m_cuAddr] +
m_frame->m_reconPic->m_buOffsetY[cuGeom.absPartIdx]);
+        for (int list = 0; list < numPredDir; list++)
+            for (int i = 0; i <
m_frame->m_encData->m_slice->m_numRefIdx[list]; i++)
+                for (int planes = 0; planes < INTEGRAL_PLANE_NUM; planes++)
+                    m_modeDepth[depth].fencYuv.m_integral[list][i][planes]
=
m_frame->m_encData->m_slice->m_refFrameList[list][i]->m_encData->m_meIntegral[planes]
+ offset;
+    }
+
     bool mightSplit = !(cuGeom.flags & CUGeom::LEAF);
     bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
     bool skipRecursion = false;
diff -r 5d95fbd53ca3 -r f8d523976ed6 source/encoder/bitcost.cpp
--- a/source/encoder/bitcost.cpp Fri Nov 25 12:57:52 2016 +0530
+++ b/source/encoder/bitcost.cpp Mon Nov 28 11:35:49 2016 +0530
@@ -54,7 +54,22 @@
                 s_costs[qp][i] = s_costs[qp][-i] =
(uint16_t)X265_MIN(s_bitsizes[i] * lambda + 0.5f, (1 << 15) - 1);
         }
     }
+    for (int j = 0; j < 4; j++)
+    {
+         if (!s_fpelMvCosts[qp][j])
+        {
+            s_fpelMvCosts[qp][j] = X265_MALLOC(uint16_t, BC_MAX_MV + 1) +
(BC_MAX_MV >> 1);
+        }
+    }

+    for (int j = 0; j < 4; j++)
+    {
+        for (int i = -(BC_MAX_MV >> 1); i < (BC_MAX_MV >> 1); i++)
+        {
+            s_fpelMvCosts[qp][j][i] = s_costs[qp][i * 4 + j];
+        }
+        m_fpelMvCosts[j] = s_fpelMvCosts[qp][j];
+    }
     m_cost = s_costs[qp];
 }

@@ -64,6 +79,8 @@

 uint16_t *BitCost::s_costs[BC_MAX_QP];

+uint16_t* BitCost::s_fpelMvCosts[BC_MAX_QP][4];
+
 float *BitCost::s_bitsizes;

 Lock BitCost::s_costCalcLock;
@@ -97,6 +114,17 @@
         }
     }

+    for (int i = 0; i < BC_MAX_QP; i++)
+    {
+        if (s_fpelMvCosts[i][0])
+        {
+            for (int j = 0; j < 4; j++)
+            {
+                X265_FREE(s_fpelMvCosts[i][j] - (BC_MAX_MV >> 1));
+            }
+        }
+    }
+
     if (s_bitsizes)
     {
         X265_FREE(s_bitsizes - 2 * BC_MAX_MV);
diff -r 5d95fbd53ca3 -r f8d523976ed6 source/encoder/bitcost.h
--- a/source/encoder/bitcost.h Fri Nov 25 12:57:52 2016 +0530
+++ b/source/encoder/bitcost.h Mon Nov 28 11:35:49 2016 +0530
@@ -67,6 +67,8 @@

     uint16_t *m_cost;

+    uint16_t *m_fpelMvCosts[4];
+
     MV        m_mvp;

     BitCost& operator =(const BitCost&);
@@ -84,6 +86,8 @@

     static uint16_t *s_costs[BC_MAX_QP];

+    static uint16_t *s_fpelMvCosts[BC_MAX_QP][4];
+
     static Lock s_costCalcLock;

     static void CalculateLogs();
diff -r 5d95fbd53ca3 -r f8d523976ed6 source/encoder/dpb.cpp
--- a/source/encoder/dpb.cpp Fri Nov 25 12:57:52 2016 +0530
+++ b/source/encoder/dpb.cpp Mon Nov 28 11:35:49 2016 +0530
@@ -92,6 +92,19 @@
             m_freeList.pushBack(*curFrame);
             curFrame->m_encData->m_freeListNext = m_frameDataFreeList;
             m_frameDataFreeList = curFrame->m_encData;
+
+            if (curFrame->m_encData->m_meBuffer)
+            {
+                for (int i = 0; i < INTEGRAL_PLANE_NUM; i++)
+                {
+                    if (curFrame->m_encData->m_meBuffer[i] != NULL)
+                    {
+                        X265_FREE(curFrame->m_encData->m_meBuffer[i]);
+                        curFrame->m_encData->m_meBuffer[i] = NULL;
+                    }
+                }
+            }
+
             curFrame->m_encData = NULL;
             curFrame->m_reconPic = NULL;
         }
diff -r 5d95fbd53ca3 -r f8d523976ed6 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Fri Nov 25 12:57:52 2016 +0530
+++ b/source/encoder/encoder.cpp Mon Nov 28 11:35:49 2016 +0530
@@ -869,6 +869,25 @@
                 slice->m_endCUAddr =
slice->realEndAddress(m_sps.numCUsInFrame * NUM_4x4_PARTITIONS);
             }

+            if (m_param->searchMethod == X265_SEA &&
frameEnc->m_lowres.sliceType != X265_TYPE_B)
+            {
+                int padX = g_maxCUSize + 32;
+                int padY = g_maxCUSize + 16;
+                uint32_t numCuInHeight =
(frameEnc->m_encData->m_reconPic->m_picHeight + g_maxCUSize - 1) /
g_maxCUSize;
+                int maxHeight = numCuInHeight * g_maxCUSize;
+                for (int i = 0; i < INTEGRAL_PLANE_NUM; i++)
+                {
+                    frameEnc->m_encData->m_meBuffer[i] =
X265_MALLOC(uint32_t, frameEnc->m_reconPic->m_stride * (maxHeight + (2 *
padY)));
+                    if (frameEnc->m_encData->m_meBuffer[i])
+                    {
+                        memset(frameEnc->m_encData->m_meBuffer[i], 0,
sizeof(uint32_t)* frameEnc->m_reconPic->m_stride * (maxHeight + (2 *
padY)));
+                        frameEnc->m_encData->m_meIntegral[i] =
frameEnc->m_encData->m_meBuffer[i] +
frameEnc->m_encData->m_reconPic->m_stride * padY + padX;
+                    }
+                    else
+                        x265_log(m_param, X265_LOG_ERROR, "SEA motion
search: POC %d Integral buffer[%d] unallocated\n", frameEnc->m_poc, i);
+                }
+            }
+
             if (m_param->bOptQpPPS && frameEnc->m_lowres.bKeyframe &&
m_param->bRepeatHeaders)
             {
                 ScopedLock qpLock(m_sliceQpLock);
diff -r 5d95fbd53ca3 -r f8d523976ed6 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp Fri Nov 25 12:57:52 2016 +0530
+++ b/source/encoder/framefilter.cpp Mon Nov 28 11:35:49 2016 +0530
@@ -35,6 +35,109 @@
 static uint64_t computeSSD(pixel *fenc, pixel *rec, intptr_t stride,
uint32_t width, uint32_t height);
 static float calculateSSIM(pixel *pix1, intptr_t stride1, pixel *pix2,
intptr_t stride2, uint32_t width, uint32_t height, void *buf, uint32_t&
cnt);

+static void integral_init4h(uint32_t *sum, pixel *pix, intptr_t stride)
+{
+    int32_t v = pix[0] + pix[1] + pix[2] + pix[3];
+    for (int16_t x = 0; x < stride - 4; x++)
+    {
+        sum[x] = v + sum[x - stride];
+        v += pix[x + 4] - pix[x];
+    }
+}
+
+static void integral_init8h(uint32_t *sum, pixel *pix, intptr_t stride)
+{
+    int32_t v = pix[0] + pix[1] + pix[2] + pix[3] + pix[4] + pix[5] +
pix[6] + pix[7];
+    for (int16_t x = 0; x < stride - 8; x++)
+    {
+        sum[x] = v + sum[x - stride];
+        v += pix[x + 8] - pix[x];
+    }
+}
+
+static void integral_init12h(uint32_t *sum, pixel *pix, intptr_t stride)
+{
+    int32_t v = pix[0] + pix[1] + pix[2] + pix[3] + pix[4] + pix[5] +
pix[6] + pix[7] +
+        pix[8] + pix[9] + pix[10] + pix[11];
+    for (int16_t x = 0; x < stride - 12; x++)
+    {
+        sum[x] = v + sum[x - stride];
+        v += pix[x + 12] - pix[x];
+    }
+}
+
+static void integral_init16h(uint32_t *sum, pixel *pix, intptr_t stride)
+{
+    int32_t v = pix[0] + pix[1] + pix[2] + pix[3] + pix[4] + pix[5] +
pix[6] + pix[7] +
+        pix[8] + pix[9] + pix[10] + pix[11] + pix[12] + pix[13] + pix[14]
+ pix[15];
+    for (int16_t x = 0; x < stride - 16; x++)
+    {
+        sum[x] = v + sum[x - stride];
+        v += pix[x + 16] - pix[x];
+    }
+}
+
+static void integral_init24h(uint32_t *sum, pixel *pix, intptr_t stride)
+{
+    int32_t v = pix[0] + pix[1] + pix[2] + pix[3] + pix[4] + pix[5] +
pix[6] + pix[7] +
+        pix[8] + pix[9] + pix[10] + pix[11] + pix[12] + pix[13] + pix[14]
+ pix[15] +
+        pix[16] + pix[17] + pix[18] + pix[19] + pix[20] + pix[21] +
pix[22] + pix[23];
+    for (int16_t x = 0; x < stride - 24; x++)
+    {
+        sum[x] = v + sum[x - stride];
+        v += pix[x + 24] - pix[x];
+    }
+}
+
+static void integral_init32h(uint32_t *sum, pixel *pix, intptr_t stride)
+{
+    int32_t v = pix[0] + pix[1] + pix[2] + pix[3] + pix[4] + pix[5] +
pix[6] + pix[7] +
+        pix[8] + pix[9] + pix[10] + pix[11] + pix[12] + pix[13] + pix[14]
+ pix[15] +
+        pix[16] + pix[17] + pix[18] + pix[19] + pix[20] + pix[21] +
pix[22] + pix[23] +
+        pix[24] + pix[25] + pix[26] + pix[27] + pix[28] + pix[29] +
pix[30] + pix[31];
+    for (int16_t x = 0; x < stride - 32; x++)
+    {
+        sum[x] = v + sum[x - stride];
+        v += pix[x + 32] - pix[x];
+    }
+}
+
+static void integral_init4v(uint32_t *sum4, intptr_t stride)
+{
+    for (int x = 0; x < stride; x++)
+        sum4[x] = sum4[x + 4 * stride] - sum4[x];
+}
+
+static void integral_init8v(uint32_t *sum8, intptr_t stride)
+{
+    for (int x = 0; x < stride; x++)
+        sum8[x] = sum8[x + 8 * stride] - sum8[x];
+}
+
+static void integral_init12v(uint32_t *sum12, intptr_t stride)
+{
+    for (int x = 0; x < stride; x++)
+        sum12[x] = sum12[x + 12 * stride] - sum12[x];
+}
+
+static void integral_init16v(uint32_t *sum16, intptr_t stride)
+{
+    for (int x = 0; x < stride; x++)
+        sum16[x] = sum16[x + 16 * stride] - sum16[x];
+}
+
+static void integral_init24v(uint32_t *sum24, intptr_t stride)
+{
+    for (int x = 0; x < stride; x++)
+        sum24[x] = sum24[x + 24 * stride] - sum24[x];
+}
+
+static void integral_init32v(uint32_t *sum32, intptr_t stride)
+{
+    for (int x = 0; x < stride; x++)
+        sum32[x] = sum32[x + 32 * stride] - sum32[x];
+}
+
 void FrameFilter::destroy()
 {
     X265_FREE(m_ssimBuf);
@@ -65,6 +168,7 @@
     m_saoRowDelay = m_param->bEnableLoopFilter ? 1 : 0;
     m_lastHeight = (m_param->sourceHeight % g_maxCUSize) ?
(m_param->sourceHeight % g_maxCUSize) : g_maxCUSize;
     m_lastWidth = (m_param->sourceWidth % g_maxCUSize) ?
(m_param->sourceWidth % g_maxCUSize) : g_maxCUSize;
+    integralCompleted.set(0);

     if (m_param->bEnableSsim)
         m_ssimBuf = X265_MALLOC(int, 8 * (m_param->sourceWidth / 4 + 3));
@@ -664,6 +768,107 @@
         }
     } // end of (m_param->maxSlices == 1)

+    int lastRow = row ==
(int)m_frame->m_encData->m_slice->m_sps->numCuInHeight - 1;
+
+    /* generate integral planes for SEA motion search */
+    if (m_param->searchMethod == X265_SEA &&
m_frame->m_encData->m_meIntegral && m_frame->m_lowres.sliceType !=
X265_TYPE_B)
+    {
+        /* If WPP, other than first row, integral calculation for current
row needs to wait till the
+        * integral for the previous row is computed */
+        if (m_param->bEnableWavefront && row)
+        {
+            while (m_parallelFilter[row -
1].m_frameFilter->integralCompleted.get() == 0)
+            {
+                m_parallelFilter[row -
1].m_frameFilter->integralCompleted.waitForChange(0);
+            }
+        }
+
+        int stride = (int)m_frame->m_reconPic->m_stride;
+        int padX = g_maxCUSize + 32;
+        int padY = g_maxCUSize + 16;
+        int numCuInHeight =
m_frame->m_encData->m_slice->m_sps->numCuInHeight;
+        int maxHeight = numCuInHeight * g_maxCUSize;
+        int start = 0;
+
+        if (m_param->interlaceMode)
+            start = (row * g_maxCUSize >> 1);
+        else
+            start = row * g_maxCUSize;
+
+        int height = lastRow ? (maxHeight + g_maxCUSize *
m_param->interlaceMode) : (((row + m_param->interlaceMode) * g_maxCUSize) +
g_maxCUSize);
+
+        if (!row)
+        {
+            for (int i = 0; i < INTEGRAL_PLANE_NUM; i++)
+                memset(m_frame->m_encData->m_meIntegral[i] - padY * stride
- padX, 0, stride * sizeof(uint32_t));
+            start = -padY;
+        }
+
+        if (lastRow)
+            height += padY - 1;
+
+        for (int y = start; y < height; y++)
+        {
+            pixel    *pix = m_frame->m_reconPic->m_picOrg[0] + y * stride
- padX;
+            uint32_t *sum32x32 = m_frame->m_encData->m_meIntegral[0] + (y
+ 1) * stride - padX;
+            uint32_t *sum32x24 = m_frame->m_encData->m_meIntegral[1] + (y
+ 1) * stride - padX;
+            uint32_t *sum32x8 = m_frame->m_encData->m_meIntegral[2] + (y +
1) * stride - padX;
+            uint32_t *sum24x32 = m_frame->m_encData->m_meIntegral[3] + (y
+ 1) * stride - padX;
+            uint32_t *sum16x16 = m_frame->m_encData->m_meIntegral[4] + (y
+ 1) * stride - padX;
+            uint32_t *sum16x12 = m_frame->m_encData->m_meIntegral[5] + (y
+ 1) * stride - padX;
+            uint32_t *sum16x4 = m_frame->m_encData->m_meIntegral[6] + (y +
1) * stride - padX;
+            uint32_t *sum12x16 = m_frame->m_encData->m_meIntegral[7] + (y
+ 1) * stride - padX;
+            uint32_t *sum8x32 = m_frame->m_encData->m_meIntegral[8] + (y +
1) * stride - padX;
+            uint32_t *sum8x8 = m_frame->m_encData->m_meIntegral[9] + (y +
1) * stride - padX;
+            uint32_t *sum4x16 = m_frame->m_encData->m_meIntegral[10] + (y
+ 1) * stride - padX;
+            uint32_t *sum4x4 = m_frame->m_encData->m_meIntegral[11] + (y +
1) * stride - padX;
+
+            /*For width = 32 */
+            integral_init32h(sum32x32, pix, stride);
+            if (y >= 32 - padY)
+                integral_init32v(sum32x32 - 32 * stride, stride);
+            integral_init32h(sum32x24, pix, stride);
+            if (y >= 24 - padY)
+                integral_init24v(sum32x24 - 24 * stride, stride);
+            integral_init32h(sum32x8, pix, stride);
+            if (y >= 8 - padY)
+                integral_init8v(sum32x8 - 8 * stride, stride);
+            /*For width = 24 */
+            integral_init24h(sum24x32, pix, stride);
+            if (y >= 32 - padY)
+                integral_init32v(sum24x32 - 32 * stride, stride);
+            /*For width = 16 */
+            integral_init16h(sum16x16, pix, stride);
+            if (y >= 16 - padY)
+                integral_init16v(sum16x16 - 16 * stride, stride);
+            integral_init16h(sum16x12, pix, stride);
+            if (y >= 12 - padY)
+                integral_init12v(sum16x12 - 12 * stride, stride);
+            integral_init16h(sum16x4, pix, stride);
+            if (y >= 4 - padY)
+                integral_init4v(sum16x4 - 4 * stride, stride);
+            /*For width = 12 */
+            integral_init12h(sum12x16, pix, stride);
+            if (y >= 16 - padY)
+                integral_init16v(sum12x16 - 16 * stride, stride);
+            /*For width = 8 */
+            integral_init8h(sum8x32, pix, stride);
+            if (y >= 32 - padY)
+                integral_init32v(sum8x32 - 32 * stride, stride);
+            integral_init8h(sum8x8, pix, stride);
+            if (y >= 8 - padY)
+                integral_init8v(sum8x8 - 8 * stride, stride);
+            /*For width = 4 */
+            integral_init4h(sum4x16, pix, stride);
+            if (y >= 16 - padY)
+                integral_init16v(sum4x16 - 16 * stride, stride);
+            integral_init4h(sum4x4, pix, stride);
+            if (y >= 4 - padY)
+                integral_init4v(sum4x4 - 4 * stride, stride);
+        }
+        m_parallelFilter[row].m_frameFilter->integralCompleted.set(1);
+    }
+
     if (ATOMIC_INC(&m_frameEncoder->m_completionCount) == 2 *
(int)m_frameEncoder->m_numRows)
     {
         m_frameEncoder->m_completionEvent.trigger();
diff -r 5d95fbd53ca3 -r f8d523976ed6 source/encoder/framefilter.h
--- a/source/encoder/framefilter.h Fri Nov 25 12:57:52 2016 +0530
+++ b/source/encoder/framefilter.h Mon Nov 28 11:35:49 2016 +0530
@@ -57,6 +57,8 @@
     int           m_lastHeight;
     int           m_lastWidth;

+    ThreadSafeInteger integralCompleted;     /* check if integral
calculation is completed in this row */
+
     void*         m_ssimBuf;        /* Temp storage for ssim computation */

 #define MAX_PFILTER_CUS     (4) /* maximum CUs for every thread */
diff -r 5d95fbd53ca3 -r f8d523976ed6 source/encoder/motion.cpp
--- a/source/encoder/motion.cpp Fri Nov 25 12:57:52 2016 +0530
+++ b/source/encoder/motion.cpp Mon Nov 28 11:35:49 2016 +0530
@@ -109,6 +109,8 @@
     blockOffset = 0;
     bChromaSATD = false;
     chromaSatd = NULL;
+    for (int i = 0; i < INTEGRAL_PLANE_NUM; i++)
+        integral[i] = NULL;
 }

 void MotionEstimate::init(int csp)
@@ -165,10 +167,12 @@
     partEnum = partitionFromSizes(pwidth, pheight);
     X265_CHECK(LUMA_4x4 != partEnum, "4x4 inter partition detected!\n");
     sad = primitives.pu[partEnum].sad;
+    ads = primitives.pu[partEnum].ads;
     satd = primitives.pu[partEnum].satd;
     sad_x3 = primitives.pu[partEnum].sad_x3;
     sad_x4 = primitives.pu[partEnum].sad_x4;

+
     blockwidth = pwidth;
     blockOffset = offset;
     absPartIdx = ctuAddr = -1;
@@ -188,6 +192,7 @@
     partEnum = partitionFromSizes(pwidth, pheight);
     X265_CHECK(LUMA_4x4 != partEnum, "4x4 inter partition detected!\n");
     sad = primitives.pu[partEnum].sad;
+    ads = primitives.pu[partEnum].ads;
     satd = primitives.pu[partEnum].satd;
     sad_x3 = primitives.pu[partEnum].sad_x3;
     sad_x4 = primitives.pu[partEnum].sad_x4;
@@ -288,6 +293,21 @@
             COPY2_IF_LT(bcost, costs[3], bmv, omv + MV(m3x, m3y)); \
     }

+#define COST_MV_X3_ABS( m0x, m0y, m1x, m1y, m2x, m2y )\
+{\
+    sad_x3(fenc, \
+    fref + (m0x) + (m0y) * stride, \
+    fref + (m1x) + (m1y) * stride, \
+    fref + (m2x) + (m2y) * stride, \
+    stride, costs); \
+    costs[0] += p_cost_mvx[(m0x) << 2]; /* no cost_mvy */\
+    costs[1] += p_cost_mvx[(m1x) << 2]; \
+    costs[2] += p_cost_mvx[(m2x) << 2]; \
+    COPY3_IF_LT(bcost, costs[0], bmv.x, m0x, bmv.y, m0y); \
+    COPY3_IF_LT(bcost, costs[1], bmv.x, m1x, bmv.y, m1y); \
+    COPY3_IF_LT(bcost, costs[2], bmv.x, m2x, bmv.y, m2y); \
+}
+
 #define COST_MV_X4_DIR(m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y, costs) \
     { \
         pixel *pix_base = fref + bmv.x + bmv.y * stride; \
@@ -1078,6 +1098,161 @@
         break;
     }

+    case X265_SEA:
+    {
+        // Successive Elimination Algorithm
+        const int16_t minX = X265_MAX(omv.x - (int16_t)merange, mvmin.x);
+        const int16_t minY = X265_MAX(omv.y - (int16_t)merange, mvmin.y);
+        const int16_t maxX = X265_MIN(omv.x + (int16_t)merange, mvmax.x);
+        const int16_t maxY = X265_MIN(omv.y + (int16_t)merange, mvmax.y);
+        const uint16_t *p_cost_mvx = m_cost_mvx - qmvp.x;
+        const uint16_t *p_cost_mvy = m_cost_mvy - qmvp.y;
+        int16_t* meScratchBuffer = NULL;
+        int scratchSize = merange * 2 + 4;
+        if (scratchSize)
+        {
+            meScratchBuffer = X265_MALLOC(int16_t, scratchSize);
+            memset(meScratchBuffer, 0, sizeof(int16_t)* scratchSize);
+        }
+
+        /* SEA is fastest in multiples of 4 */
+        int meRangeWidth = (maxX - minX + 3) & ~3;
+        int w = 0, h = 0;                    // Width and height of the PU
+        ALIGN_VAR_32(pixel, zero[64 * FENC_STRIDE]) = { 0 };
+        ALIGN_VAR_32(int, encDC[4]);
+        uint16_t *fpelCostMvX = m_fpelMvCosts[-qmvp.x & 3] + (-qmvp.x >>
2);
+        sizesFromPartition(partEnum, &w, &h);
+        int deltaX = (w <= 8) ? (w) : (w >> 1);
+        int deltaY = (h <= 8) ? (h) : (h >> 1);
+
+        /* Check if very small rectangular blocks which cannot be
sub-divided anymore */
+        bool smallRectPartition = partEnum == LUMA_4x4 || partEnum ==
LUMA_16x12 ||
+            partEnum == LUMA_12x16 || partEnum == LUMA_16x4 || partEnum ==
LUMA_4x16;
+        /* Check if vertical partition */
+        bool verticalRect = partEnum == LUMA_32x64 || partEnum ==
LUMA_16x32 || partEnum == LUMA_8x16 ||
+            partEnum == LUMA_4x8;
+        /* Check if horizontal partition */
+        bool horizontalRect = partEnum == LUMA_64x32 || partEnum ==
LUMA_32x16 || partEnum == LUMA_16x8 ||
+            partEnum == LUMA_8x4;
+        /* Check if assymetric vertical partition */
+        bool assymetricVertical = partEnum == LUMA_12x16 || partEnum ==
LUMA_4x16 || partEnum == LUMA_24x32 ||
+            partEnum == LUMA_8x32 || partEnum == LUMA_48x64 || partEnum ==
LUMA_16x64;
+        /* Check if assymetric horizontal partition */
+        bool assymetricHorizontal = partEnum == LUMA_16x12 || partEnum ==
LUMA_16x4 || partEnum == LUMA_32x24 ||
+            partEnum == LUMA_32x8 || partEnum == LUMA_64x48 || partEnum ==
LUMA_64x16;
+
+        int tempPartEnum = 0;
+
+        /* If a vertical rectangular partition, it is horizontally split
into two, for ads_x2() */
+        if (verticalRect)
+            tempPartEnum = partitionFromSizes(w, h >> 1);
+        /* If a horizontal rectangular partition, it is vertically split
into two, for ads_x2() */
+        else if (horizontalRect)
+            tempPartEnum = partitionFromSizes(w >> 1, h);
+        /* We have integral planes introduced to account for assymetric
partitions.
+         * Hence all assymetric partitions except those which cannot be
split into legal sizes,
+         * are split into four for ads_x4() */
+        else if (assymetricVertical || assymetricHorizontal)
+            tempPartEnum = smallRectPartition ? partEnum :
partitionFromSizes(w >> 1, h >> 1);
+        /* General case: Square partitions. All partitions with width > 8
are split into four
+         * for ads_x4(), for 4x4 and 8x8 we do ads_x1() */
+        else
+            tempPartEnum = (w <= 8) ? partEnum : partitionFromSizes(w >>
1, h >> 1);
+
+        /* Successive elimination by comparing DC before a full SAD,
+         * because sum(abs(diff)) >= abs(diff(sum)). */
+        primitives.pu[tempPartEnum].sad_x4(zero,
+                         fenc,
+                         fenc + deltaX,
+                         fenc + deltaY * FENC_STRIDE,
+                         fenc + deltaX + deltaY * FENC_STRIDE,
+                         FENC_STRIDE,
+                         encDC);
+
+        /* Assigning appropriate integral plane */
+        uint32_t *sumsBase = NULL;
+        switch (deltaX)
+        {
+            case 32: if (deltaY % 24 == 0)
+                         sumsBase = integral[1];
+                     else if (deltaY == 8)
+                         sumsBase = integral[2];
+                     else
+                         sumsBase = integral[0];
+               break;
+            case 24: sumsBase = integral[3];
+               break;
+            case 16: if (deltaY % 12 == 0)
+                         sumsBase = integral[5];
+                     else if (deltaY == 4)
+                         sumsBase = integral[6];
+                     else
+                         sumsBase = integral[4];
+               break;
+            case 12: sumsBase = integral[7];
+                break;
+            case 8: if (deltaY == 32)
+                        sumsBase = integral[8];
+                    else
+                        sumsBase = integral[9];
+                break;
+            case 4: if (deltaY == 16)
+                        sumsBase = integral[10];
+                    else
+                        sumsBase = integral[11];
+                break;
+            default: sumsBase = integral[11];
+                break;
+        }
+
+        if (partEnum == LUMA_64x64 || partEnum == LUMA_32x32 || partEnum
== LUMA_16x16 ||
+            partEnum == LUMA_32x64 || partEnum == LUMA_16x32 || partEnum
== LUMA_8x16 ||
+            partEnum == LUMA_4x8 || partEnum == LUMA_12x16 || partEnum ==
LUMA_4x16 ||
+            partEnum == LUMA_24x32 || partEnum == LUMA_8x32 || partEnum ==
LUMA_48x64 ||
+            partEnum == LUMA_16x64)
+            deltaY *= (int)stride;
+
+        if (verticalRect)
+            encDC[1] = encDC[2];
+
+        if (horizontalRect)
+            deltaY = deltaX;
+
+        /* ADS and SAD */
+        MV tmv;
+        for (tmv.y = minY; tmv.y <= maxY; tmv.y++)
+        {
+            int i, xn;
+            int ycost = p_cost_mvy[tmv.y] << 2;
+            if (bcost <= ycost)
+                continue;
+            bcost -= ycost;
+
+            /* ADS_4 for 16x16, 32x32, 64x64, 24x32, 32x24, 48x64, 64x48,
32x8, 8x32, 64x16, 16x64 partitions
+             * ADS_1 for 4x4, 8x8, 16x4, 4x16, 16x12, 12x16 partitions
+             * ADS_2 for all other rectangular partitions */
+            xn = ads(encDC,
+                    sumsBase + minX + tmv.y * stride,
+                    deltaY,
+                    fpelCostMvX + minX,
+                    meScratchBuffer,
+                    meRangeWidth,
+                    bcost);
+
+            for (i = 0; i < xn - 2; i += 3)
+                COST_MV_X3_ABS(minX + meScratchBuffer[i], tmv.y,
+                             minX + meScratchBuffer[i + 1], tmv.y,
+                             minX + meScratchBuffer[i + 2], tmv.y);
+
+            bcost += ycost;
+            for (; i < xn; i++)
+                COST_MV(minX + meScratchBuffer[i], tmv.y);
+        }
+        if (meScratchBuffer)
+            x265_free(meScratchBuffer);
+        break;
+    }
+
     case X265_FULL_SEARCH:
     {
         // dead slow exhaustive search, but at least it uses sad_x4()
diff -r 5d95fbd53ca3 -r f8d523976ed6 source/encoder/motion.h
--- a/source/encoder/motion.h Fri Nov 25 12:57:52 2016 +0530
+++ b/source/encoder/motion.h Mon Nov 28 11:35:49 2016 +0530
@@ -52,6 +52,7 @@
     pixelcmp_t sad;
     pixelcmp_x3_t sad_x3;
     pixelcmp_x4_t sad_x4;
+    pixelcmp_ads_t ads;
     pixelcmp_t satd;
     pixelcmp_t chromaSatd;

@@ -61,6 +62,7 @@

     static const int COST_MAX = 1 << 28;

+    uint32_t* integral[INTEGRAL_PLANE_NUM];
     Yuv fencPUYuv;
     int partEnum;
     bool bChromaSATD;
diff -r 5d95fbd53ca3 -r f8d523976ed6 source/encoder/search.cpp
--- a/source/encoder/search.cpp Fri Nov 25 12:57:52 2016 +0530
+++ b/source/encoder/search.cpp Mon Nov 28 11:35:49 2016 +0530
@@ -2243,7 +2243,13 @@
                         if (lmv.notZero())
                             mvc[numMvc++] = lmv;
                     }
-
+                    if (m_param->searchMethod == X265_SEA)
+                    {
+                        int puX = puIdx & 1;
+                        int puY = puIdx >> 1;
+                        for (int planes = 0; planes < INTEGRAL_PLANE_NUM;
planes++)
+                            m_me.integral[planes] =
interMode.fencYuv->m_integral[list][ref][planes] + puX * pu.width + puY *
pu.height * m_slice->m_refFrameList[list][ref]->m_reconPic->m_stride;
+                    }
                     setSearchRange(cu, mvp, m_param->searchRange, mvmin,
mvmax);
                     int satdCost =
m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc,
mvc, m_param->searchRange, outmv,
                       m_param->bSourceReferenceEstimation ?
m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);
diff -r 5d95fbd53ca3 -r f8d523976ed6 source/test/regression-tests.txt
--- a/source/test/regression-tests.txt Fri Nov 25 12:57:52 2016 +0530
+++ b/source/test/regression-tests.txt Mon Nov 28 11:35:49 2016 +0530
@@ -149,4 +149,7 @@
 CrowdRun_1920x1080_50_10bit_422.yuv,--preset faster --interlace tff
 CrowdRun_1920x1080_50_10bit_422.yuv,--preset fast --interlace bff

+#SEA Implementation Test
+silent_cif_420.y4m,--preset veryslow --me 4
+big_buck_bunny_360p24.y4m,--preset superfast --me 4
 # vim: tw=200
diff -r 5d95fbd53ca3 -r f8d523976ed6 source/x265.h
--- a/source/x265.h Fri Nov 25 12:57:52 2016 +0530
+++ b/source/x265.h Mon Nov 28 11:35:49 2016 +0530
@@ -290,6 +290,7 @@
     X265_HEX_SEARCH,
     X265_UMH_SEARCH,
     X265_STAR_SEARCH,
+    X265_SEA,
     X265_FULL_SEARCH
 } X265_ME_METHODS;

@@ -464,7 +465,7 @@
 } x265_stats;

 /* String values accepted by x265_param_parse() (and CLI) for various
parameters */
-static const char * const x265_motion_est_names[] = { "dia", "hex", "umh",
"star", "full", 0 };
+static const char * const x265_motion_est_names[] = { "dia", "hex", "umh",
"star", "sea", "full", 0 };
 static const char * const x265_source_csp_names[] = { "i400", "i420",
"i422", "i444", "nv12", "nv16", 0 };
 static const char * const x265_video_format_names[] = { "component",
"pal", "ntsc", "secam", "mac", "undef", 0 };
 static const char * const x265_fullrange_names[] = { "limited", "full", 0
};
@@ -910,9 +911,9 @@
     /* Limit modes analyzed for each CU using cost metrics from the 4
sub-CUs */
     uint32_t limitModes;

-    /* ME search method (DIA, HEX, UMH, STAR, FULL). The search patterns
+    /* ME search method (DIA, HEX, UMH, STAR, SEA, FULL). The search
patterns
      * (methods) are sorted in increasing complexity, with diamond being
the
-     * simplest and fastest and full being the slowest.  DIA, HEX, and UMH
were
+     * simplest and fastest and full being the slowest.  DIA, HEX, UMH and
SEA were
      * adapted from x264 directly. STAR is an adaption of the HEVC
reference
      * encoder's three step search, while full is a naive exhaustive
search. The
      * default is the star search, it has a good balance of performance and
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20161129/c96701fd/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: SEA.patch
Type: application/octet-stream
Size: 40090 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20161129/c96701fd/attachment-0001.obj>


More information about the x265-devel mailing list