<div dir="ltr"><div># HG changeset patch</div><div># User Deepthi Devaki <<a href="mailto:deepthidevaki@multicorewareinc.com">deepthidevaki@multicorewareinc.com</a>></div><div># Date 1380523193 -19800</div><div># Node ID 279b22603d7a01408abe971846267d7b2647640d</div>
<div># Parent  55edc34e253c14d3eccb83a7d1db43774349ff9a</div><div>Bidir ME: new logic adapted from x264</div><div><br></div><div>L0 and L1 MVs from unidir ME used for bidir MV. bidir cost is calculated from the average of references. Performance/PSNR with new bidir with commandline</div>
<div>x265.exe FourPeople_1280x720_60.y4m  -f 100 --b-adapt 0 -b 3 --ref 1 --hash 1 -o four.hevc -r recon.yuv</div><div>New:  (2.25 fps), 516.28 kb/s, Global PSNR: 39.702</div><div>Orig: (2.05 fps), 519.47 kb/s, Global PSNR: 39.711</div>
<div><br></div><div>diff -r 55edc34e253c -r 279b22603d7a source/Lib/TLibEncoder/TEncSearch.cpp</div><div>--- a/source/Lib/TLibEncoder/TEncSearch.cpp<span class="" style="white-space:pre">       </span>Sat Sep 28 22:54:44 2013 -0500</div>
<div>+++ b/source/Lib/TLibEncoder/TEncSearch.cpp<span class="" style="white-space:pre"> </span>Mon Sep 30 12:09:53 2013 +0530</div><div>@@ -2289,7 +2289,7 @@</div><div>  * \param bUseRes</div><div>  * \returns void</div><div>
  */</div><div>-void TEncSearch::predInterSearch(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, bool bUseMRG)</div><div>+void TEncSearch::predInterSearch(TComDataCU* cu, TComYuv* /*fencYuv*/, TComYuv* predYuv, bool bUseMRG)</div>
<div> {</div><div>     m_predYuv[0].clear();</div><div>     m_predYuv[1].clear();</div><div>@@ -2317,7 +2317,6 @@</div><div> </div><div>     UInt partAddr;</div><div>     int  roiWidth, roiHeight;</div><div>-    int refStart, refEnd;</div>
<div> </div><div>     PartSize partSize = cu->getPartitionSize(0);</div><div>     int bestBiPRefIdxL1 = 0;</div><div>@@ -2483,8 +2482,6 @@</div><div>             //  Bi-directional prediction</div><div>             if ((cu->getSlice()->isInterB()) && (cu->isBipredRestriction(partIdx) == false))</div>
<div>             {</div><div>-                UInt motBits[2];</div><div>-</div><div>                 mvBidir[0] = mv[0];</div><div>                 mvBidir[1] = mv[1];</div><div>                 refIdxBidir[0] = refIdx[0];</div>
<div>@@ -2492,112 +2489,22 @@</div><div> </div><div>                 ::memcpy(mvPredBi, mvPred, sizeof(mvPred));</div><div>                 ::memcpy(mvpIdxBi, mvpIdx, sizeof(mvpIdx));</div><div>-</div><div>-                if (cu->getSlice()->getMvdL1ZeroFlag())</div>
<div>-                {</div><div>-                    xCopyAMVPInfo(&amvpInfo[1][bestBiPRefIdxL1], cu->getCUMvField(REF_PIC_LIST_1)->getAMVPInfo());</div><div>-                    cu->setMVPIdxSubParts(bestBiPMvpL1, REF_PIC_LIST_1, partAddr, partIdx, cu->getDepth(partAddr));</div>
<div>-                    mvpIdxBi[1][bestBiPRefIdxL1] = bestBiPMvpL1;</div><div>-                    mvPredBi[1][bestBiPRefIdxL1] = cu->getCUMvField(REF_PIC_LIST_1)->getAMVPInfo()->m_mvCand[bestBiPMvpL1];</div><div>
-</div><div>-                    mvBidir[1] = mvPredBi[1][bestBiPRefIdxL1];</div><div>-                    refIdxBidir[1] = bestBiPRefIdxL1;</div><div>-                    cu->getCUMvField(REF_PIC_LIST_1)->setAllMv(mvBidir[1], partSize, partAddr, 0, partIdx);</div>
<div>-                    cu->getCUMvField(REF_PIC_LIST_1)->setAllRefIdx(refIdxBidir[1], partSize, partAddr, 0, partIdx);</div><div>-                    motionCompensation(cu, &m_predYuv[1], REF_PIC_LIST_1, partIdx);</div>
<div>-</div><div>-                    motBits[0] = bits[0] - mbBits[0];</div><div>-                    motBits[1] = mbBits[1];</div><div>-</div><div>-                    if (cu->getSlice()->getNumRefIdx(REF_PIC_LIST_1) > 1)</div>
<div>-                    {</div><div>-                        motBits[1] += bestBiPRefIdxL1 + 1;</div><div>-                        if (bestBiPRefIdxL1 == cu->getSlice()->getNumRefIdx(REF_PIC_LIST_1) - 1) motBits[1]--;</div>
<div>-                    }</div><div>-</div><div>-                    motBits[1] += m_mvpIdxCost[mvpIdxBi[1][bestBiPRefIdxL1]][AMVP_MAX_NUM_CANDS];</div><div>-</div><div>-                    bits[2] = mbBits[2] + motBits[0] + motBits[1];</div>
<div>-</div><div>-                    mvTemp[1][bestBiPRefIdxL1] = mvBidir[1];</div><div>-                }</div><div>-                else</div><div>-                {</div><div>-                    motBits[0] = bits[0] - mbBits[0];</div>
<div>-                    motBits[1] = bits[1] - mbBits[1];</div><div>-                    bits[2] = mbBits[2] + motBits[0] + motBits[1];</div><div>-                }</div><div>-</div><div>-                int refList = 0;</div>
<div>-                if (listCost[0] <= listCost[1])</div><div>-                {</div><div>-                    refList = 1;</div><div>-                }</div><div>-                else</div><div>-                {</div>
<div>-                    refList = 0;</div><div>-                }</div><div>-                if (!cu->getSlice()->getMvdL1ZeroFlag())</div><div>-                {</div><div>-                    cu->getCUMvField(RefPicList(1 - refList))->setAllMv(mv[1 - refList], partSize, partAddr, 0, partIdx);</div>
<div>-                    cu->getCUMvField(RefPicList(1 - refList))->setAllRefIdx(refIdx[1 - refList], partSize, partAddr, 0, partIdx);</div><div>-                    motionCompensation(cu, &m_predYuv[1 - refList], RefPicList(1 - refList), partIdx);</div>
<div>-                }</div><div>-                RefPicList  picList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);</div><div>-</div><div>-                if (cu->getSlice()->getMvdL1ZeroFlag())</div><div>-                {</div>
<div>-                    refList = 0;</div><div>-                    picList = REF_PIC_LIST_0;</div><div>-                }</div><div>-</div><div>-                bool bChanged = false;</div><div>-</div><div>-                refStart = 0;</div>
<div>-                refEnd   = cu->getSlice()->getNumRefIdx(picList) - 1;</div><div>-</div><div>-                for (int refIdxTmp = refStart; refIdxTmp <= refEnd; refIdxTmp++)</div><div>-                {</div>
<div>-                    bitsTemp = mbBits[2] + motBits[1 - refList];</div><div>-                    if (cu->getSlice()->getNumRefIdx(picList) > 1)</div><div>-                    {</div><div>-                        bitsTemp += refIdxTmp + 1;</div>
<div>-                        if (refIdxTmp == cu->getSlice()->getNumRefIdx(picList) - 1) bitsTemp--;</div><div>-                    }</div><div>-                    bitsTemp += m_mvpIdxCost[mvpIdxBi[refList][refIdxTmp]][AMVP_MAX_NUM_CANDS];</div>
<div>-                    // call bidir ME</div><div>-                    xMotionEstimation(cu, fencYuv, partIdx, picList, &mvPredBi[refList][refIdxTmp], refIdxTmp, mvTemp[refList][refIdxTmp],</div><div>-                                      bitsTemp, costTemp);</div>
<div>-                    xCopyAMVPInfo(&amvpInfo[refList][refIdxTmp], cu->getCUMvField(picList)->getAMVPInfo());</div><div>-                    xCheckBestMVP(cu, picList, mvTemp[refList][refIdxTmp], mvPredBi[refList][refIdxTmp], mvpIdxBi[refList][refIdxTmp],</div>
<div>-                                  bitsTemp, costTemp);</div><div>-</div><div>-                    if (costTemp < costbi)</div><div>-                    {</div><div>-                        bChanged = true;</div><div>
-</div><div>-                        mvBidir[refList]     = mvTemp[refList][refIdxTmp];</div><div>-                        refIdxBidir[refList] = refIdxTmp;</div><div>-</div><div>-                        costbi           = costTemp;</div>
<div>-                        motBits[refList] = bitsTemp - mbBits[2] - motBits[1 - refList];</div><div>-                        bits[2]          = bitsTemp;</div><div>-                    }</div><div>-                } // for loop-refIdxTmp</div>
<div>-</div><div>-                if (!bChanged)</div><div>-                {</div><div>-                    if (costbi <= listCost[0] && costbi <= listCost[1])</div><div>-                    {</div><div>-                        xCopyAMVPInfo(&amvpInfo[0][refIdxBidir[0]], cu->getCUMvField(REF_PIC_LIST_0)->getAMVPInfo());</div>
<div>-                        xCheckBestMVP(cu, REF_PIC_LIST_0, mvBidir[0], mvPredBi[0][refIdxBidir[0]], mvpIdxBi[0][refIdxBidir[0]], bits[2], costbi);</div><div>-                        if (!cu->getSlice()->getMvdL1ZeroFlag())</div>
<div>-                        {</div><div>-                            xCopyAMVPInfo(&amvpInfo[1][refIdxBidir[1]], cu->getCUMvField(REF_PIC_LIST_1)->getAMVPInfo());</div><div>-                            xCheckBestMVP(cu, REF_PIC_LIST_1, mvBidir[1], mvPredBi[1][refIdxBidir[1]], mvpIdxBi[1][refIdxBidir[1]], bits[2], costbi);</div>
<div>-                        }</div><div>-                    }</div><div>-                }</div><div>+                </div><div>+                pixel *ref0,*ref1;</div><div>+</div><div>+                xPredInterLumaBlk(cu, cu->getSlice()->m_mref[0][refIdx[0]], partAddr, &mv[0], roiWidth, roiHeight, &m_predYuv[0]);</div>
<div>+                xPredInterLumaBlk(cu, cu->getSlice()->m_mref[1][refIdx[1]], partAddr, &mv[1], roiWidth, roiHeight, &m_predYuv[1]);</div><div>+</div><div>+                ref0 = m_predYuv[0].getLumaAddr(partAddr);</div>
<div>+                ref1 = m_predYuv[1].getLumaAddr(partAddr);</div><div>+               </div><div>+                pixel avg[MAX_CU_SIZE * MAX_CU_SIZE];</div><div>+</div><div>+                int partEnum = PartitionFromSizes(roiWidth, roiHeight);</div>
<div>+                primitives.pixelavg_pp[partEnum](avg, roiWidth, ref0, ref1, m_predYuv[0].getStride(), m_predYuv[1].getStride());</div><div>+                </div><div>+                int satdCost = primitives.satd[partEnum](pu, fenc->getStride(), avg, roiWidth);</div>
<div>+                costbi =  satdCost + m_rdCost->getCost(bits[0]) + m_rdCost->getCost(bits[1]);</div><div>             } // if (B_SLICE)</div><div>         } //end if bTestNormalMC</div><div> </div><div>diff -r 55edc34e253c -r 279b22603d7a source/common/pixel.cpp</div>
<div>--- a/source/common/pixel.cpp<span class="" style="white-space:pre">       </span>Sat Sep 28 22:54:44 2013 -0500</div><div>+++ b/source/common/pixel.cpp<span class="" style="white-space:pre">        </span>Mon Sep 30 12:09:53 2013 +0530</div>
<div>@@ -578,6 +578,21 @@</div><div>     }</div><div> }</div><div> </div><div>+template<int lx, int ly></div><div>+void pixelavg_pp(pixel* dst, intptr_t dstride, pixel* src0, pixel* src1, intptr_t sstride0, intptr_t sstride1)</div>
<div>+{</div><div>+    for( int y = 0; y < ly; y++ )</div><div>+    {</div><div>+        for( int x = 0; x < lx; x++ )</div><div>+        {</div><div>+            dst[x] = ( src0[x] + src1[x] + 1 ) >> 1;</div>
<div>+        }</div><div>+        src0 += sstride0;</div><div>+        src1 += sstride1;</div><div>+        dst += dstride;</div><div>+    }</div><div>+}</div><div>+</div><div> void scale1D_128to64(pixel *dst, pixel *src, intptr_t /*stride*/)</div>
<div> {</div><div>     int x;</div><div>@@ -647,6 +662,7 @@</div><div>     SET_FUNC_PRIMITIVE_TABLE_C2(sad)</div><div>     SET_FUNC_PRIMITIVE_TABLE_C2(sad_x3)</div><div>     SET_FUNC_PRIMITIVE_TABLE_C2(sad_x4)</div><div>+    SET_FUNC_PRIMITIVE_TABLE_C2(pixelavg_pp)</div>
<div> </div><div>     // satd</div><div>     p.satd[PARTITION_4x4]   = satd_4x4;</div><div>diff -r 55edc34e253c -r 279b22603d7a source/common/primitives.h</div><div>--- a/source/common/primitives.h<span class="" style="white-space:pre">    </span>Sat Sep 28 22:54:44 2013 -0500</div>
<div>+++ b/source/common/primitives.h<span class="" style="white-space:pre">    </span>Mon Sep 30 12:09:53 2013 +0530</div><div>@@ -196,6 +196,7 @@</div><div> typedef void (*pixelsub_sp_t)(int bx, int by, short *dst, intptr_t dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);</div>
<div> typedef void (*pixeladd_ss_t)(int bx, int by, short *dst, intptr_t dstride, short *src0, short *src1, intptr_t sstride0, intptr_t sstride1);</div><div> typedef void (*pixeladd_pp_t)(int bx, int by, pixel *dst, intptr_t dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);</div>
<div>+typedef void (*pixelavg_pp_t)(pixel *dst, intptr_t dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);</div><div> typedef void (*blockfil_s_t)(short *dst, intptr_t dstride, short val);</div><div>
 </div><div> typedef void (*intra_dc_t)(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int width, int bFilter);</div><div>@@ -290,6 +291,7 @@</div><div>     pixelsub_sp_t   pixelsub_sp;</div><div>     pixeladd_ss_t   pixeladd_ss;</div>
<div>     pixeladd_pp_t   pixeladd_pp;</div><div>+    pixelavg_pp_t   pixelavg_pp[NUM_PARTITIONS];</div><div> </div><div>     filterVwghtd_t  filterVwghtd;</div><div>     filterHwghtd_t  filterHwghtd;</div><div><br></div>
<div><br></div></div><div class="gmail_extra"><br><br><div class="gmail_quote">On Mon, Sep 30, 2013 at 1:54 PM, Deepthi Devaki Akkoorath <span dir="ltr"><<a href="mailto:deepthidevaki@multicorewareinc.com" target="_blank">deepthidevaki@multicorewareinc.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote"><div><div class="h5">On Mon, Sep 30, 2013 at 12:35 PM,  <span dir="ltr"><<a href="mailto:deepthidevaki@multicorewareinc.com" target="_blank">deepthidevaki@multicorewareinc.com</a>></span> wrote:<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Deepthi Devaki <<a href="mailto:deepthidevaki@multicorewareinc.com" target="_blank">deepthidevaki@multicorewareinc.com</a>><br>
# Date 1380523193 -19800<br>
# Node ID bb238e8b36007aad896884009c720d26df8775c5<br>
# Parent  55edc34e253c14d3eccb83a7d1db43774349ff9a<br>
Bidir ME: new logic adapted from x264<br>
<br>
L0 and L1 MVs from unidir ME used for bidir MV. bidir cost is calculated from the average of references. Performance/PSNR with new bidir with commandline<br>
x265.exe FourPeople_1280x720_60.y4m  -f 100 --b-adapt 0 -b 3 --ref 1 --hash 1 -o four.hevc -r recon.yuv<br>
New:  (2.24 fps), 515.16 kb/s, Global PSNR: 39.704<br>
Orig: (2.05 fps), 519.47 kb/s, Global PSNR: 39.711<br>
<br>
diff -r 55edc34e253c -r bb238e8b3600 source/Lib/TLibEncoder/TEncSearch.cpp<br>
--- a/source/Lib/TLibEncoder/TEncSearch.cpp     Sat Sep 28 22:54:44 2013 -0500<br>
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp     Mon Sep 30 12:09:53 2013 +0530<br>
@@ -2289,7 +2289,7 @@<br>
  * \param bUseRes<br>
  * \returns void<br>
  */<br>
-void TEncSearch::predInterSearch(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, bool bUseMRG)<br>
+void TEncSearch::predInterSearch(TComDataCU* cu, TComYuv* /*fencYuv*/, TComYuv* predYuv, bool bUseMRG)<br>
 {<br>
     m_predYuv[0].clear();<br>
     m_predYuv[1].clear();<br>
@@ -2317,7 +2317,6 @@<br>
<br>
     UInt partAddr;<br>
     int  roiWidth, roiHeight;<br>
-    int refStart, refEnd;<br>
<br>
     PartSize partSize = cu->getPartitionSize(0);<br>
     int bestBiPRefIdxL1 = 0;<br>
@@ -2483,8 +2482,6 @@<br>
             //  Bi-directional prediction<br>
             if ((cu->getSlice()->isInterB()) && (cu->isBipredRestriction(partIdx) == false))<br>
             {<br>
-                UInt motBits[2];<br>
-<br>
                 mvBidir[0] = mv[0];<br>
                 mvBidir[1] = mv[1];<br>
                 refIdxBidir[0] = refIdx[0];<br>
@@ -2492,112 +2489,17 @@<br>
<br>
                 ::memcpy(mvPredBi, mvPred, sizeof(mvPred));<br>
                 ::memcpy(mvpIdxBi, mvpIdx, sizeof(mvpIdx));<br>
-<br>
-                if (cu->getSlice()->getMvdL1ZeroFlag())<br>
-                {<br>
-                    xCopyAMVPInfo(&amvpInfo[1][bestBiPRefIdxL1], cu->getCUMvField(REF_PIC_LIST_1)->getAMVPInfo());<br>
-                    cu->setMVPIdxSubParts(bestBiPMvpL1, REF_PIC_LIST_1, partAddr, partIdx, cu->getDepth(partAddr));<br>
-                    mvpIdxBi[1][bestBiPRefIdxL1] = bestBiPMvpL1;<br>
-                    mvPredBi[1][bestBiPRefIdxL1] = cu->getCUMvField(REF_PIC_LIST_1)->getAMVPInfo()->m_mvCand[bestBiPMvpL1];<br>
-<br>
-                    mvBidir[1] = mvPredBi[1][bestBiPRefIdxL1];<br>
-                    refIdxBidir[1] = bestBiPRefIdxL1;<br>
-                    cu->getCUMvField(REF_PIC_LIST_1)->setAllMv(mvBidir[1], partSize, partAddr, 0, partIdx);<br>
-                    cu->getCUMvField(REF_PIC_LIST_1)->setAllRefIdx(refIdxBidir[1], partSize, partAddr, 0, partIdx);<br>
-                    motionCompensation(cu, &m_predYuv[1], REF_PIC_LIST_1, partIdx);<br>
-<br>
-                    motBits[0] = bits[0] - mbBits[0];<br>
-                    motBits[1] = mbBits[1];<br>
-<br>
-                    if (cu->getSlice()->getNumRefIdx(REF_PIC_LIST_1) > 1)<br>
-                    {<br>
-                        motBits[1] += bestBiPRefIdxL1 + 1;<br>
-                        if (bestBiPRefIdxL1 == cu->getSlice()->getNumRefIdx(REF_PIC_LIST_1) - 1) motBits[1]--;<br>
-                    }<br>
-<br>
-                    motBits[1] += m_mvpIdxCost[mvpIdxBi[1][bestBiPRefIdxL1]][AMVP_MAX_NUM_CANDS];<br>
-<br>
-                    bits[2] = mbBits[2] + motBits[0] + motBits[1];<br>
-<br>
-                    mvTemp[1][bestBiPRefIdxL1] = mvBidir[1];<br>
-                }<br>
-                else<br>
-                {<br>
-                    motBits[0] = bits[0] - mbBits[0];<br>
-                    motBits[1] = bits[1] - mbBits[1];<br>
-                    bits[2] = mbBits[2] + motBits[0] + motBits[1];<br>
-                }<br>
-<br>
-                int refList = 0;<br>
-                if (listCost[0] <= listCost[1])<br>
-                {<br>
-                    refList = 1;<br>
-                }<br>
-                else<br>
-                {<br>
-                    refList = 0;<br>
-                }<br>
-                if (!cu->getSlice()->getMvdL1ZeroFlag())<br>
-                {<br>
-                    cu->getCUMvField(RefPicList(1 - refList))->setAllMv(mv[1 - refList], partSize, partAddr, 0, partIdx);<br>
-                    cu->getCUMvField(RefPicList(1 - refList))->setAllRefIdx(refIdx[1 - refList], partSize, partAddr, 0, partIdx);<br>
-                    motionCompensation(cu, &m_predYuv[1 - refList], RefPicList(1 - refList), partIdx);<br>
-                }<br>
-                RefPicList  picList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);<br>
-<br>
-                if (cu->getSlice()->getMvdL1ZeroFlag())<br>
-                {<br>
-                    refList = 0;<br>
-                    picList = REF_PIC_LIST_0;<br>
-                }<br>
-<br>
-                bool bChanged = false;<br>
-<br>
-                refStart = 0;<br>
-                refEnd   = cu->getSlice()->getNumRefIdx(picList) - 1;<br>
-<br>
-                for (int refIdxTmp = refStart; refIdxTmp <= refEnd; refIdxTmp++)<br>
-                {<br>
-                    bitsTemp = mbBits[2] + motBits[1 - refList];<br>
-                    if (cu->getSlice()->getNumRefIdx(picList) > 1)<br>
-                    {<br>
-                        bitsTemp += refIdxTmp + 1;<br>
-                        if (refIdxTmp == cu->getSlice()->getNumRefIdx(picList) - 1) bitsTemp--;<br>
-                    }<br>
-                    bitsTemp += m_mvpIdxCost[mvpIdxBi[refList][refIdxTmp]][AMVP_MAX_NUM_CANDS];<br>
-                    // call bidir ME<br>
-                    xMotionEstimation(cu, fencYuv, partIdx, picList, &mvPredBi[refList][refIdxTmp], refIdxTmp, mvTemp[refList][refIdxTmp],<br>
-                                      bitsTemp, costTemp);<br>
-                    xCopyAMVPInfo(&amvpInfo[refList][refIdxTmp], cu->getCUMvField(picList)->getAMVPInfo());<br>
-                    xCheckBestMVP(cu, picList, mvTemp[refList][refIdxTmp], mvPredBi[refList][refIdxTmp], mvpIdxBi[refList][refIdxTmp],<br>
-                                  bitsTemp, costTemp);<br>
-<br>
-                    if (costTemp < costbi)<br>
-                    {<br>
-                        bChanged = true;<br>
-<br>
-                        mvBidir[refList]     = mvTemp[refList][refIdxTmp];<br>
-                        refIdxBidir[refList] = refIdxTmp;<br>
-<br>
-                        costbi           = costTemp;<br>
-                        motBits[refList] = bitsTemp - mbBits[2] - motBits[1 - refList];<br>
-                        bits[2]          = bitsTemp;<br>
-                    }<br>
-                } // for loop-refIdxTmp<br>
-<br>
-                if (!bChanged)<br>
-                {<br>
-                    if (costbi <= listCost[0] && costbi <= listCost[1])<br>
-                    {<br>
-                        xCopyAMVPInfo(&amvpInfo[0][refIdxBidir[0]], cu->getCUMvField(REF_PIC_LIST_0)->getAMVPInfo());<br>
-                        xCheckBestMVP(cu, REF_PIC_LIST_0, mvBidir[0], mvPredBi[0][refIdxBidir[0]], mvpIdxBi[0][refIdxBidir[0]], bits[2], costbi);<br>
-                        if (!cu->getSlice()->getMvdL1ZeroFlag())<br>
-                        {<br>
-                            xCopyAMVPInfo(&amvpInfo[1][refIdxBidir[1]], cu->getCUMvField(REF_PIC_LIST_1)->getAMVPInfo());<br>
-                            xCheckBestMVP(cu, REF_PIC_LIST_1, mvBidir[1], mvPredBi[1][refIdxBidir[1]], mvpIdxBi[1][refIdxBidir[1]], bits[2], costbi);<br>
-                        }<br>
-                    }<br>
-                }<br>
+<br>
+                pixel *ref0,*ref1;<br>
+                ref0 = cu->getSlice()->m_mref[REF_PIC_LIST_0][refIdx[0]]->fpelPlane + (pu - fenc->getLumaAddr()) + (mv[0].x >> 2) + (mv[0].y >> 2) * fenc->getStride();<br>
+                ref1 = cu->getSlice()->m_mref[REF_PIC_LIST_1][refIdx[1]]->fpelPlane + (pu - fenc->getLumaAddr()) + (mv[1].x >> 2) + (mv[1].y >> 2) * fenc->getStride();<br>
+<br></blockquote><div><br></div></div></div><div>There is a problem here. It is taking only fullpel references. Must generate the reference subpels. </div><div><div class="h5"><div><br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">


+                pixel avg[MAX_CU_SIZE * MAX_CU_SIZE];<br>
+<br>
+                int partEnum = PartitionFromSizes(roiWidth, roiHeight);<br>
+                primitives.pixelavg_pp[partEnum](avg, roiWidth, ref0, ref1, fenc->getStride(), fenc->getStride());<br>
+                int satdCost = primitives.satd[partEnum](pu, fenc->getStride(), avg, roiWidth);<br>
+                costbi =  satdCost + m_rdCost->getCost(bits[0]) + m_rdCost->getCost(bits[1]);<br>
             } // if (B_SLICE)<br>
         } //end if bTestNormalMC<br>
<br>
diff -r 55edc34e253c -r bb238e8b3600 source/common/pixel.cpp<br>
--- a/source/common/pixel.cpp   Sat Sep 28 22:54:44 2013 -0500<br>
+++ b/source/common/pixel.cpp   Mon Sep 30 12:09:53 2013 +0530<br>
@@ -578,6 +578,21 @@<br>
     }<br>
 }<br>
<br>
+template<int lx, int ly><br>
+void pixelavg_pp(pixel* dst, intptr_t dstride, pixel* src0, pixel* src1, intptr_t sstride0, intptr_t sstride1)<br>
+{<br>
+    for( int y = 0; y < ly; y++ )<br>
+    {<br>
+        for( int x = 0; x < lx; x++ )<br>
+        {<br>
+            dst[x] = ( src0[x] + src1[x] + 1 ) >> 1;<br>
+        }<br>
+        src0 += sstride0;<br>
+        src1 += sstride1;<br>
+        dst += dstride;<br>
+    }<br>
+}<br>
+<br>
 void scale1D_128to64(pixel *dst, pixel *src, intptr_t /*stride*/)<br>
 {<br>
     int x;<br>
@@ -647,6 +662,7 @@<br>
     SET_FUNC_PRIMITIVE_TABLE_C2(sad)<br>
     SET_FUNC_PRIMITIVE_TABLE_C2(sad_x3)<br>
     SET_FUNC_PRIMITIVE_TABLE_C2(sad_x4)<br>
+    SET_FUNC_PRIMITIVE_TABLE_C2(pixelavg_pp)<br>
<br>
     // satd<br>
     p.satd[PARTITION_4x4]   = satd_4x4;<br>
diff -r 55edc34e253c -r bb238e8b3600 source/common/primitives.h<br>
--- a/source/common/primitives.h        Sat Sep 28 22:54:44 2013 -0500<br>
+++ b/source/common/primitives.h        Mon Sep 30 12:09:53 2013 +0530<br>
@@ -196,6 +196,7 @@<br>
 typedef void (*pixelsub_sp_t)(int bx, int by, short *dst, intptr_t dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);<br>
 typedef void (*pixeladd_ss_t)(int bx, int by, short *dst, intptr_t dstride, short *src0, short *src1, intptr_t sstride0, intptr_t sstride1);<br>
 typedef void (*pixeladd_pp_t)(int bx, int by, pixel *dst, intptr_t dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);<br>
+typedef void (*pixelavg_pp_t)(pixel *dst, intptr_t dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);<br>
 typedef void (*blockfil_s_t)(short *dst, intptr_t dstride, short val);<br>
<br>
 typedef void (*intra_dc_t)(pixel* above, pixel* left, pixel* dst, intptr_t dstStride, int width, int bFilter);<br>
@@ -290,6 +291,7 @@<br>
     pixelsub_sp_t   pixelsub_sp;<br>
     pixeladd_ss_t   pixeladd_ss;<br>
     pixeladd_pp_t   pixeladd_pp;<br>
+    pixelavg_pp_t   pixelavg_pp[NUM_PARTITIONS];<br>
<br>
     filterVwghtd_t  filterVwghtd;<br>
     filterHwghtd_t  filterHwghtd;<br>
</blockquote></div></div></div><br></div></div>
</blockquote></div><br></div>