[x265] [PATCH] cleanup: reduce data size and dependency on MotionEstimate

Tue Apr 15 13:30:49 CEST 2014

# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1397561438 -28800
# Node ID dd78d554f78dd785cb8b16a6606b5fe6b6e87e2a
# Parent  1cf67a7b362d24d292d7cca574cbcfe88a8eb1cb
cleanup: reduce data size and dependency on MotionEstimate

diff -r 1cf67a7b362d -r dd78d554f78d source/Lib/TLibEncoder/TEncSearch.cpp

--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Mon Apr 14 21:26:37 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Tue Apr 15 19:30:38 2014 +0800
@@ -111,8 +111,6 @@
     m_rdCost  = rdCost;
 
     initTempBuff(cfg->param->internalCsp);
-    m_me.setSearchMethod(cfg->param->searchMethod);
-    m_me.setSubpelRefine(cfg->param->subpelRefine);
 
     /* When frame parallelism is active, only 'refLagPixels' of reference frames will be guaranteed
      * available for motion reference.  See refLagRows in FrameEncoder::compressCTURows() */
@@ -2191,7 +2189,7 @@
         cu->getCUMvField(REF_PIC_LIST_1)->m_refIdx[m.absPartIdx] = m.mvFieldNeighbours[1 + 2 * mergeCand].refIdx;
 
         motionCompensation(cu, &m_predTempYuv, REF_PIC_LIST_X, puIdx, true, false);
-        uint32_t costCand = m_me.bufSATD(m_predTempYuv.getLumaAddr(m.absPartIdx), m_predTempYuv.getStride());
+        uint32_t costCand = m_me.satd(m_me.fenc, FENC_STRIDE, m_predTempYuv.getLumaAddr(m.absPartIdx), m_predTempYuv.getStride());
         uint32_t bitsCand = mergeCand + 1;
         if (mergeCand == m_cfg->param->maxNumMergeCand - 1)
         {
@@ -2314,7 +2312,7 @@
                     cu->clipMv(mvCand);
 
                     xPredInterLumaBlk(cu, cu->getSlice()->getRefPic(l, ref)->getPicYuvRec(), partAddr, &mvCand, roiWidth, roiHeight, &m_predTempYuv);
-                    uint32_t cost = m_me.bufSAD(m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride());
+                    uint32_t cost = m_me.sad(m_me.fenc, FENC_STRIDE, m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride());
                     cost = m_rdCost->calcRdSADCost(cost, MVP_IDX_BITS);
 
                     if (bestCost > cost)
@@ -2328,11 +2326,11 @@
 
                 int merange = m_cfg->param->searchRange;
                 xSetSearchRange(cu, mvp, merange, mvmin, mvmax);
-                int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv);
+                int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv, m_cfg->param->searchMethod, m_cfg->param->subpelRefine);
 
                 /* Get total cost of partition, but only include MV bit cost once */
-                bits += m_me.bitcost(outmv);
-                uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost->getCost(bits);
+                bits += m_me.bitcost(outmv, mvp);
+                uint32_t cost = (satdCost - m_me.mvcost(outmv, mvp)) + m_rdCost->getCost(bits);
 
                 /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */
                 xCheckBestMVP(&amvpInfo[l][ref], outmv, mvp, mvpIdx, bits, cost);
@@ -2368,7 +2366,7 @@
 
             int partEnum = partitionFromSizes(roiWidth, roiHeight);
             primitives.pixelavg_pp[partEnum](avg, roiWidth, pred0, m_predYuv[0].getStride(), pred1, m_predYuv[1].getStride(), 32);
-            int satdCost = m_me.bufSATD(avg, roiWidth);
+            int satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth);
 
             bidirBits = list[0].bits + list[1].bits + listSelBits[2] - (listSelBits[0] + listSelBits[1]);
             bidirCost = satdCost + m_rdCost->getCost(bidirBits);
@@ -2397,17 +2395,15 @@
                 intptr_t refStride = m_mref[0][0]->lumaStride;
 
                 primitives.pixelavg_pp[partEnum](avg, roiWidth, ref0, refStride, ref1, refStride, 32);
-                satdCost = m_me.bufSATD(avg, roiWidth);
+                satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth);
 
                 MV mvp0 = list[0].mvp;
                 int mvpIdx0 = list[0].mvpIdx;
-                m_me.setMVP(mvp0);
-                uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv) + m_me.bitcost(mvzero);
+                uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv, mvp0) + m_me.bitcost(mvzero, mvp0);
 
                 MV mvp1 = list[1].mvp;
                 int mvpIdx1 = list[1].mvpIdx;
-                m_me.setMVP(mvp1);
-                uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv) + m_me.bitcost(mvzero);
+                uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv, mvp1) + m_me.bitcost(mvzero, mvp1);
 
                 uint32_t cost = satdCost + m_rdCost->getCost(bits0) + m_rdCost->getCost(bits1);
 
@@ -2556,9 +2552,8 @@
 {
     assert(amvpInfo->m_mvCand[outMvpIdx] == mvPred);
 
-    m_me.setMVP(mvPred);
     int bestMvpIdx = outMvpIdx;
-    int mvBitsOrig = m_me.bitcost(mv) + MVP_IDX_BITS;
+    int mvBitsOrig = m_me.bitcost(mv, mvPred) + MVP_IDX_BITS;
     int bestMvBits = mvBitsOrig;
 
     for (int mvpIdx = 0; mvpIdx < AMVP_MAX_NUM_CANDS; mvpIdx++)
@@ -2566,8 +2561,7 @@
         if (mvpIdx == outMvpIdx)
             continue;
 
-        m_me.setMVP(amvpInfo->m_mvCand[mvpIdx]);
-        int mvbits = m_me.bitcost(mv) + MVP_IDX_BITS;
+        int mvbits = m_me.bitcost(mv, amvpInfo->m_mvCand[mvpIdx]) + MVP_IDX_BITS;
 
         if (mvbits < bestMvBits)
         {
diff -r 1cf67a7b362d -r dd78d554f78d source/encoder/bitcost.h
--- a/source/encoder/bitcost.h	Mon Apr 14 21:26:37 2014 -0500
+++ b/source/encoder/bitcost.h	Tue Apr 15 19:30:38 2014 +0800
@@ -35,36 +35,26 @@
 {
 public:
 
-    BitCost() : m_cost_mvx(0), m_cost_mvy(0), m_cost(0) {}
+    BitCost() : m_cost(NULL) {}
 
     void setQP(unsigned int qp);
 
-    void setMVP(const MV& mvp)                      { m_mvp = mvp; m_cost_mvx = m_cost - mvp.x; m_cost_mvy = m_cost - mvp.y; }
-
     // return bit cost of motion vector difference, multiplied by lambda
-    inline uint16_t mvcost(const MV& mv) const      { return m_cost_mvx[mv.x] + m_cost_mvy[mv.y]; }
+    inline uint16_t mvcost(const MV mv, const MV mvp) const      { return m_cost[mv.x - mvp.x] + m_cost[mv.y - mvp.y]; }
 
     // return bit cost of motion vector difference, without lambda
-    inline uint16_t bitcost(const MV& mv) const
+    inline uint16_t bitcost(const MV mv, const MV mvp) const
     {
-        return (uint16_t)(s_bitsizes[(abs(mv.x - m_mvp.x) << 1) + !!(mv.x < m_mvp.x)] +
-                          s_bitsizes[(abs(mv.y - m_mvp.y) << 1) + !!(mv.y < m_mvp.y)] + 0.5f);
+        return (uint16_t)(s_bitsizes[(abs(mv.x - mvp.x) << 1) + !!(mv.x < mvp.x)] +
+                          s_bitsizes[(abs(mv.y - mvp.y) << 1) + !!(mv.y < mvp.y)] + 0.5f);
     }
 
     static void destroy();
 
 protected:
 
-    uint16_t *m_cost_mvx;
-
-    uint16_t *m_cost_mvy;
-
     uint16_t *m_cost;
 
-    MV        m_mvp;
-
-    BitCost& operator =(const BitCost&);
-
 private:
 
     static const int BC_MAX_MV = 0x8000;
@@ -73,6 +63,7 @@
 
     static float *s_bitsizes;
 
+    // TODO: remove this table, the size is (82 * (sizeof(uint16_t) * 64K + 32K)) = 13120 KB, it can't load into any CACHE
     static uint16_t *s_costs[BC_MAX_QP];
 
     static Lock s_costCalcLock;
diff -r 1cf67a7b362d -r dd78d554f78d source/encoder/motion.cpp
--- a/source/encoder/motion.cpp	Mon Apr 14 21:26:37 2014 -0500
+++ b/source/encoder/motion.cpp	Tue Apr 15 19:30:38 2014 +0800
@@ -93,8 +93,6 @@
 }
 
 MotionEstimate::MotionEstimate()
-    : searchMethod(3)
-    , subpelRefine(5)
 {
     if (size_scale[0] == 0)
         init_scales();
@@ -167,7 +165,7 @@
     { \
         MV tmv(mx, my); \
         int cost = sad(fenc, FENC_STRIDE, fref + mx + my * stride, stride); \
-        cost += mvcost(tmv << 2); \
+        cost += mvcost(tmv << 2, qmvp); \
         if (cost < bcost) { \
             bcost = cost; \
             bmv = tmv; \
@@ -180,7 +178,7 @@
     do \
     { \
         int cost = sad(fenc, FENC_STRIDE, fref + (mx) + (my) * stride, stride); \
-        cost += mvcost(MV(mx, my) << 2); \
+        cost += mvcost(MV(mx, my) << 2, qmvp); \
         COPY2_IF_LT(bcost, cost, bmv, MV(mx, my)); \
     } while (0)
 
@@ -192,9 +190,9 @@
                pix_base + (m1x) + (m1y) * stride, \
                pix_base + (m2x) + (m2y) * stride, \
                stride, costs); \
-        (costs)[0] += mvcost((bmv + MV(m0x, m0y)) << 2); \
-        (costs)[1] += mvcost((bmv + MV(m1x, m1y)) << 2); \
-        (costs)[2] += mvcost((bmv + MV(m2x, m2y)) << 2); \
+        (costs)[0] += mvcost((bmv + MV(m0x, m0y)) << 2, qmvp); \
+        (costs)[1] += mvcost((bmv + MV(m1x, m1y)) << 2, qmvp); \
+        (costs)[2] += mvcost((bmv + MV(m2x, m2y)) << 2, qmvp); \
     }
 
 #define COST_MV_PT_DIST_X4(m0x, m0y, p0, d0, m1x, m1y, p1, d1, m2x, m2y, p2, d2, m3x, m3y, p3, d3) \
@@ -205,10 +203,10 @@
                fref + (m2x) + (m2y) * stride, \
                fref + (m3x) + (m3y) * stride, \
                stride, costs); \
-        costs[0] += mvcost(MV(m0x, m0y) << 2); \
-        costs[1] += mvcost(MV(m1x, m1y) << 2); \
-        costs[2] += mvcost(MV(m2x, m2y) << 2); \
-        costs[3] += mvcost(MV(m3x, m3y) << 2); \
+        costs[0] += mvcost(MV(m0x, m0y) << 2, qmvp); \
+        costs[1] += mvcost(MV(m1x, m1y) << 2, qmvp); \
+        costs[2] += mvcost(MV(m2x, m2y) << 2, qmvp); \
+        costs[3] += mvcost(MV(m3x, m3y) << 2, qmvp); \
         COPY4_IF_LT(bcost, costs[0], bmv, MV(m0x, m0y), bPointNr, p0, bDistance, d0); \
         COPY4_IF_LT(bcost, costs[1], bmv, MV(m1x, m1y), bPointNr, p1, bDistance, d1); \
         COPY4_IF_LT(bcost, costs[2], bmv, MV(m2x, m2y), bPointNr, p2, bDistance, d2); \
@@ -224,10 +222,10 @@
                pix_base + (m2x) + (m2y) * stride, \
                pix_base + (m3x) + (m3y) * stride, \
                stride, costs); \
-        costs[0] += mvcost((omv + MV(m0x, m0y)) << 2); \
-        costs[1] += mvcost((omv + MV(m1x, m1y)) << 2); \
-        costs[2] += mvcost((omv + MV(m2x, m2y)) << 2); \
-        costs[3] += mvcost((omv + MV(m3x, m3y)) << 2); \
+        costs[0] += mvcost((omv + MV(m0x, m0y)) << 2, qmvp); \
+        costs[1] += mvcost((omv + MV(m1x, m1y)) << 2, qmvp); \
+        costs[2] += mvcost((omv + MV(m2x, m2y)) << 2, qmvp); \
+        costs[3] += mvcost((omv + MV(m3x, m3y)) << 2, qmvp); \
         COPY2_IF_LT(bcost, costs[0], bmv, omv + MV(m0x, m0y)); \
         COPY2_IF_LT(bcost, costs[1], bmv, omv + MV(m1x, m1y)); \
         COPY2_IF_LT(bcost, costs[2], bmv, omv + MV(m2x, m2y)); \
@@ -243,10 +241,10 @@
                pix_base + (m2x) + (m2y) * stride, \
                pix_base + (m3x) + (m3y) * stride, \
                stride, costs); \
-        (costs)[0] += mvcost((bmv + MV(m0x, m0y)) << 2); \
-        (costs)[1] += mvcost((bmv + MV(m1x, m1y)) << 2); \
-        (costs)[2] += mvcost((bmv + MV(m2x, m2y)) << 2); \
-        (costs)[3] += mvcost((bmv + MV(m3x, m3y)) << 2); \
+        (costs)[0] += mvcost((bmv + MV(m0x, m0y)) << 2, qmvp); \
+        (costs)[1] += mvcost((bmv + MV(m1x, m1y)) << 2, qmvp); \
+        (costs)[2] += mvcost((bmv + MV(m2x, m2y)) << 2, qmvp); \
+        (costs)[3] += mvcost((bmv + MV(m3x, m3y)) << 2, qmvp); \
     }
 
 #define DIA1_ITER(mx, my) \
@@ -285,6 +283,7 @@
                                        const MV &       mvmin,
                                        const MV &       mvmax,
                                        MV &             bmv,
+                                       const MV &       qmvp,
                                        int &            bcost,
                                        int &            bPointNr,
                                        int &            bDistance,
@@ -530,13 +529,15 @@
                                    int              numCandidates,
                                    const MV *       mvc,
                                    int              merange,
-                                   MV &             outQMv)
+                                   MV &             outQMv,
+                                   int              searchMethod,
+                                   int              subpelRefine)
 {
     ALIGN_VAR_16(int, costs[16]);
     size_t stride = ref->lumaStride;
     pixel *fref = ref->fpelPlane + blockOffset;
 
-    setMVP(qmvp);
+//     setMVP(qmvp);
 
     MV qmvmin = mvmin.toQPel();
     MV qmvmax = mvmax.toQPel();
@@ -563,13 +564,13 @@
     int bcost = bprecost;
     if (pmv.isSubpel())
     {
-        bcost = sad(fenc, FENC_STRIDE, fref + bmv.x + bmv.y * stride, stride) + mvcost(bmv << 2);
+        bcost = sad(fenc, FENC_STRIDE, fref + bmv.x + bmv.y * stride, stride) + mvcost(bmv << 2, qmvp);
     }
 
     // measure SAD cost at MV(0) if MVP is not zero
     if (pmv.notZero())
     {
-        int cost = sad(fenc, FENC_STRIDE, fref, stride) + mvcost(MV(0, 0));
+        int cost = sad(fenc, FENC_STRIDE, fref, stride) + mvcost(MV(0, 0), qmvp);
         if (cost < bcost)
         {
             bcost = cost;
@@ -585,9 +586,9 @@
         {
             int cost;
             if (ref->isLowres)
-                cost = ref->lowresQPelCost(fenc, blockOffset, m, sad) + mvcost(m);
+                cost = ref->lowresQPelCost(fenc, blockOffset, m, sad) + mvcost(m, qmvp);
             else
-                cost = subpelCompare(ref, m, sad) + mvcost(m);
+                cost = subpelCompare(ref, m, sad) + mvcost(m, qmvp);
 
             if (cost < bprecost)
             {
@@ -801,8 +802,8 @@
 
         /* hexagon grid */
         omv = bmv;
-        const uint16_t *p_cost_omvx = m_cost_mvx + omv.x * 4;
-        const uint16_t *p_cost_omvy = m_cost_mvy + omv.y * 4;
+        const uint16_t *p_cost_omvx = m_cost - qmvp.x + omv.x * 4;
+        const uint16_t *p_cost_omvy = m_cost - qmvp.y + omv.y * 4;
         uint16_t i = 1;
         do
         {
@@ -890,7 +891,7 @@
         int bDistance = 0;
 
         const int EarlyExitIters = 3;
-        StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bPointNr, bDistance, EarlyExitIters, merange);
+        StarPatternSearch(ref, mvmin, mvmax, bmv, qmvp, bcost, bPointNr, bDistance, EarlyExitIters, merange);
         if (bDistance == 1)
         {
             // if best distance was only 1, check two missing points.  If no new point is found, stop
@@ -939,16 +940,16 @@
                                pix_base + RasterDistance * 2,
                                pix_base + RasterDistance * 3,
                                stride, costs);
-                        costs[0] += mvcost(tmv << 2);
+                        costs[0] += mvcost(tmv << 2, qmvp);
                         COPY2_IF_LT(bcost, costs[0], bmv, tmv);
                         tmv.x += RasterDistance;
-                        costs[1] += mvcost(tmv << 2);
+                        costs[1] += mvcost(tmv << 2, qmvp);
                         COPY2_IF_LT(bcost, costs[1], bmv, tmv);
                         tmv.x += RasterDistance;
-                        costs[2] += mvcost(tmv << 2);
+                        costs[2] += mvcost(tmv << 2, qmvp);
                         COPY2_IF_LT(bcost, costs[2], bmv, tmv);
                         tmv.x += RasterDistance;
-                        costs[3] += mvcost(tmv << 3);
+                        costs[3] += mvcost(tmv << 3, qmvp);
                         COPY2_IF_LT(bcost, costs[3], bmv, tmv);
                     }
                     else
@@ -963,7 +964,7 @@
             bDistance = 0;
             bPointNr = 0;
             const int MaxIters = 32;
-            StarPatternSearch(ref, mvmin, mvmax, bmv, bcost, bPointNr, bDistance, MaxIters, merange);
+            StarPatternSearch(ref, mvmin, mvmax, bmv, qmvp, bcost, bPointNr, bDistance, MaxIters, merange);
 
             if (bDistance == 1)
             {
@@ -1011,16 +1012,16 @@
                            pix_base + 2,
                            pix_base + 3,
                            stride, costs);
-                    costs[0] += mvcost(tmv << 2);
+                    costs[0] += mvcost(tmv << 2, qmvp);
                     COPY2_IF_LT(bcost, costs[0], bmv, tmv);
                     tmv.x++;
-                    costs[1] += mvcost(tmv << 2);
+                    costs[1] += mvcost(tmv << 2, qmvp);
                     COPY2_IF_LT(bcost, costs[1], bmv, tmv);
                     tmv.x++;
-                    costs[2] += mvcost(tmv << 2);
+                    costs[2] += mvcost(tmv << 2, qmvp);
                     COPY2_IF_LT(bcost, costs[2], bmv, tmv);
                     tmv.x++;
-                    costs[3] += mvcost(tmv << 2);
+                    costs[3] += mvcost(tmv << 2, qmvp);
                     COPY2_IF_LT(bcost, costs[3], bmv, tmv);
                 }
                 else
@@ -1044,7 +1045,7 @@
     else
         bmv = bmv.toQPel(); // promote search bmv to qpel
 
-    SubpelWorkload& wl = workload[this->subpelRefine];
+    SubpelWorkload& wl = workload[subpelRefine];
 
     if (!bcost)
     {
@@ -1056,18 +1057,18 @@
         for (int i = 1; i <= wl.hpel_dirs; i++)
         {
             MV qmv = bmv + square1[i] * 2;
-            cost = ref->lowresQPelCost(fenc, blockOffset, qmv, sad) + mvcost(qmv);
+            cost = ref->lowresQPelCost(fenc, blockOffset, qmv, sad) + mvcost(qmv, qmvp);
             COPY2_IF_LT(bcost, cost, bdir, i);
         }
 
         bmv += square1[bdir] * 2;
-        bcost = ref->lowresQPelCost(fenc, blockOffset, bmv, satd) + mvcost(bmv);
+        bcost = ref->lowresQPelCost(fenc, blockOffset, bmv, satd) + mvcost(bmv, qmvp);
 
         bdir = 0;
         for (int i = 1; i <= wl.qpel_dirs; i++)
         {
             MV qmv = bmv + square1[i];
-            cost = ref->lowresQPelCost(fenc, blockOffset, qmv, satd) + mvcost(qmv);
+            cost = ref->lowresQPelCost(fenc, blockOffset, qmv, satd) + mvcost(qmv, qmvp);
             COPY2_IF_LT(bcost, cost, bdir, i);
         }
 
@@ -1079,7 +1080,7 @@
 
         if (wl.hpel_satd)
         {
-            bcost = subpelCompare(ref, bmv, satd) + mvcost(bmv);
+            bcost = subpelCompare(ref, bmv, satd) + mvcost(bmv, qmvp);
             hpelcomp = satd;
         }
         else
@@ -1091,7 +1092,7 @@
             for (int i = 1; i <= wl.hpel_dirs; i++)
             {
                 MV qmv = bmv + square1[i] * 2;
-                cost = subpelCompare(ref, qmv, hpelcomp) + mvcost(qmv);
+                cost = subpelCompare(ref, qmv, hpelcomp) + mvcost(qmv, qmvp);
                 COPY2_IF_LT(bcost, cost, bdir, i);
             }
 
@@ -1100,7 +1101,7 @@
 
         /* if HPEL search used SAD, remeasure with SATD before QPEL */
         if (!wl.hpel_satd)
-            bcost = subpelCompare(ref, bmv, satd) + mvcost(bmv);
+            bcost = subpelCompare(ref, bmv, satd) + mvcost(bmv, qmvp);
 
         for (int iter = 0; iter < wl.qpel_iters; iter++)
         {
@@ -1108,7 +1109,7 @@
             for (int i = 1; i <= wl.qpel_dirs; i++)
             {
                 MV qmv = bmv + square1[i];
-                cost = subpelCompare(ref, qmv, satd) + mvcost(qmv);
+                cost = subpelCompare(ref, qmv, satd) + mvcost(qmv, qmvp);
                 COPY2_IF_LT(bcost, cost, bdir, i);
             }
 
diff -r 1cf67a7b362d -r dd78d554f78d source/encoder/motion.h
--- a/source/encoder/motion.h	Mon Apr 14 21:26:37 2014 -0500
+++ b/source/encoder/motion.h	Tue Apr 15 19:30:38 2014 +0800
@@ -32,10 +32,8 @@
 namespace x265 {
 // private x265 namespace
 
-class MotionEstimate : public BitCost
+struct MotionEstimate : public BitCost
 {
-protected:
-
     /* Aligned copy of original pixels, extra room for manual alignment */
     pixel *fencplane;
     intptr_t fencLumaStride;
@@ -48,17 +46,11 @@
 
     intptr_t blockOffset;
     int partEnum;
-    int searchMethod;
-    int subpelRefine;
 
     /* subpel generation buffers */
     int blockwidth;
     int blockheight;
 
-    MotionEstimate& operator =(const MotionEstimate&);
-
-public:
-
     static const int COST_MAX = 1 << 28;
 
     pixel *fenc;
@@ -67,10 +59,6 @@
 
     virtual ~MotionEstimate();
 
-    void setSearchMethod(int i) { searchMethod = i; }
-
-    void setSubpelRefine(int i) { subpelRefine = i; }
-
     /* Methods called at slice setup */
 
     void setSourcePlane(pixel *Y, intptr_t luma)
@@ -84,13 +72,7 @@
     /* buf*() and motionEstimate() methods all use cached fenc pixels and thus
      * require setSourcePU() to be called prior. */
 
-    inline int bufSAD(pixel *fref, intptr_t stride)  { return sad(fenc, FENC_STRIDE, fref, stride); }
-
-    inline int bufSA8D(pixel *fref, intptr_t stride) { return sa8d(fenc, FENC_STRIDE, fref, stride); }
-
-    inline int bufSATD(pixel *fref, intptr_t stride) { return satd(fenc, FENC_STRIDE, fref, stride); }
-
-    int motionEstimate(ReferencePlanes *ref, const MV & mvmin, const MV & mvmax, const MV & qmvp, int numCandidates, const MV * mvc, int merange, MV & outQMv);
+    int motionEstimate(ReferencePlanes *ref, const MV & mvmin, const MV & mvmax, const MV & qmvp, int numCandidates, const MV * mvc, int merange, MV & outQMv, int searchMethod, int subpelRefine);
 
     int subpelCompare(ReferencePlanes * ref, const MV &qmv, pixelcmp_t);
 
@@ -100,6 +82,7 @@
                                   const MV &       mvmin,
                                   const MV &       mvmax,
                                   MV &             bmv,
+                                  const MV &       mvp,
                                   int &            bcost,
                                   int &            bPointNr,
                                   int &            bDistance,
diff -r 1cf67a7b362d -r dd78d554f78d source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp	Mon Apr 14 21:26:37 2014 -0500
+++ b/source/encoder/slicetype.cpp	Tue Apr 15 19:30:38 2014 +0800
@@ -1496,7 +1496,7 @@
                 median_mv(mvp, mvc[0], mvc[1], mvc[2]);
             }
 
-            *fenc_costs[i] = me.motionEstimate(i ? fref1 : wfref0, mvmin, mvmax, mvp, numc, mvc, merange, *fenc_mvs[i]);
+            *fenc_costs[i] = me.motionEstimate(i ? fref1 : wfref0, mvmin, mvmax, mvp, numc, mvc, merange, *fenc_mvs[i], X265_HEX_SEARCH, 1);
             COPY2_IF_LT(bcost, *fenc_costs[i], listused, i + 1);
         }
         if (bBidir)
diff -r 1cf67a7b362d -r dd78d554f78d source/encoder/slicetype.h
--- a/source/encoder/slicetype.h	Mon Apr 14 21:26:37 2014 -0500
+++ b/source/encoder/slicetype.h	Tue Apr 15 19:30:38 2014 +0800
@@ -66,8 +66,6 @@
     EstimateRow()
     {
         me.setQP(X265_LOOKAHEAD_QP);
-        me.setSearchMethod(X265_HEX_SEARCH);
-        me.setSubpelRefine(1);
         predictions = X265_MALLOC(pixel, 35 * 8 * 8);
         merange = 16;
         lookAheadLambda = (int)x265_lambda2_non_I[X265_LOOKAHEAD_QP];