<div dir="ltr"><div class="gmail_extra"><br><div class="gmail_quote">On Wed, Sep 6, 2017 at 8:30 PM, Divya Manivannan <span dir="ltr"><<a href="mailto:divya@multicorewareinc.com" target="_blank">divya@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span class=""># HG changeset patch<br>
# User Divya Manivannan <<a href="mailto:divya@multicorewareinc.com">divya@multicorewareinc.com</a>><br>
# Date 1500875370 -19800<br>
#      Mon Jul 24 11:19:30 2017 +0530<br>
</span># Node ID 6410885f92f602081c650a1322d9a1<wbr>bc30ea1d21<br>
<span class=""># Parent  2718cb5dd67f5e6735c9ee37df19c4<wbr>91f1473a60<br>
Skip the lookahead when analysis reuse mode is load and analysis file writing<br>
is disabled and scale factor is enabled.<br>
<br>
All the lookahead data are shared from analysis save mode to load mode, so it is<br>
skipped to improve performance. All the lookahead costs are approximated based<br>
on scalefactor.<br></span></blockquote><div><br></div><div>Pushed into default branch. Thanks!</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span class="">
<br>
</span>diff -r 2718cb5dd67f -r 6410885f92f6 source/common/common.h<br>
<span class="">--- a/source/common/common.h    Tue Sep 05 11:21:56 2017 +0530<br>
+++ b/source/common/common.h    Mon Jul 24 11:19:30 2017 +0530<br>
@@ -207,7 +207,6 @@<br>
<br>
 // arbitrary, but low because SATD scores are 1/4 normal<br>
 #define X265_LOOKAHEAD_QP (12 + QP_BD_OFFSET)<br>
-#define X265_LOOKAHEAD_MAX 250<br>
<br>
 // Use the same size blocks as x264.  Using larger blocks seems to give artificially<br>
 // high cost estimates (intra and inter both suffer)<br>
</span>diff -r 2718cb5dd67f -r 6410885f92f6 source/encoder/encoder.cpp<br>
<div><div class="h5">--- a/source/encoder/encoder.cpp        Tue Sep 05 11:21:56 2017 +0530<br>
+++ b/source/encoder/encoder.cpp        Mon Jul 24 11:19:30 2017 +0530<br>
@@ -790,9 +790,22 @@<br>
         {<br>
             /* readAnalysisFile reads analysis data for the frame and allocates memory based on slicetype */<br>
             readAnalysisFile(&inFrame->m_<wbr>analysisData, inFrame->m_poc, pic_in);<br>
+            inFrame->m_poc = inFrame->m_analysisData.poc;<br>
             sliceType = inFrame->m_analysisData.<wbr>sliceType;<br>
             inFrame->m_lowres.bScenecut = !!inFrame->m_analysisData.<wbr>bScenecut;<br>
             inFrame->m_lowres.satdCost = inFrame->m_analysisData.<wbr>satdCost;<br>
+            if (!m_param->bUseAnalysisFile && m_param->scaleFactor)<br>
+            {<br>
+                inFrame->m_lowres.sliceType = sliceType;<br>
+                inFrame->m_lowres.bKeyframe = !!inFrame->m_analysisData.<wbr>lookahead.keyframe;<br>
+                inFrame->m_lowres.<wbr>bLastMiniGopBFrame = !!inFrame->m_analysisData.<wbr>lookahead.lastMiniGopBFrame;<br>
+                int vbvCount = m_param->lookaheadDepth + m_param->bframes + 2;<br>
+                for (int index = 0; index < vbvCount; index++)<br>
+                {<br>
+                    inFrame->m_lowres.plannedSatd[<wbr>index] = inFrame->m_analysisData.<wbr>lookahead.plannedSatd[index];<br>
+                    inFrame->m_lowres.plannedType[<wbr>index] = inFrame->m_analysisData.<wbr>lookahead.plannedType[index];<br>
+                }<br>
+            }<br>
         }<br>
         if (m_param->bUseRcStats && pic_in->rcData)<br>
         {<br>
@@ -886,12 +899,39 @@<br>
                     pic_out->analysisData.poc = pic_out->poc;<br>
                     pic_out->analysisData.<wbr>sliceType = pic_out->sliceType;<br>
                     pic_out->analysisData.<wbr>bScenecut = outFrame->m_lowres.bScenecut;<br>
-                    pic_out->analysisData.satdCost  = outFrame->m_lowres.satdCost;<br>
+                    pic_out->analysisData.satdCost  = outFrame->m_lowres.satdCost;<br>
                     pic_out->analysisData.<wbr>numCUsInFrame = outFrame->m_analysisData.<wbr>numCUsInFrame;<br>
+                    pic_out->analysisData.<wbr>numCuInHeight = outFrame->m_analysisData.<wbr>numCuInHeight;<br>
                     pic_out->analysisData.<wbr>numPartitions = outFrame->m_analysisData.<wbr>numPartitions;<br>
                     pic_out->analysisData.wt = outFrame->m_analysisData.wt;<br>
                     pic_out->analysisData.<wbr>interData = outFrame->m_analysisData.<wbr>interData;<br>
                     pic_out->analysisData.<wbr>intraData = outFrame->m_analysisData.<wbr>intraData;<br>
+                    if (!m_param->bUseAnalysisFile && m_param->scaleFactor)<br>
+                    {<br>
+                        pic_out->analysisData.satdCost *= m_param->scaleFactor * 2;<br>
+                        pic_out->analysisData.<wbr>lookahead.keyframe = outFrame->m_lowres.bKeyframe;<br>
+                        pic_out->analysisData.<wbr>lookahead.lastMiniGopBFrame = outFrame->m_lowres.<wbr>bLastMiniGopBFrame;<br>
+                        int vbvCount = m_param->lookaheadDepth + m_param->bframes + 2;<br>
+                        for (int index = 0; index < vbvCount; index++)<br>
+                        {<br>
+                            pic_out->analysisData.<wbr>lookahead.plannedSatd[index] = outFrame->m_lowres.<wbr>plannedSatd[index] * m_param->scaleFactor * 2;<br>
+                            pic_out->analysisData.<wbr>lookahead.plannedType[index] = outFrame->m_lowres.<wbr>plannedType[index];<br>
+                        }<br>
+                        for (uint32_t index = 0; index < pic_out->analysisData.<wbr>numCuInHeight; index++)<br>
+                        {<br>
+                            outFrame->m_analysisData.<wbr>lookahead.intraSatdForVbv[<wbr>index] = outFrame->m_encData->m_<wbr>rowStat[index].intraSatdForVbv * m_param->scaleFactor * 2;<br>
+                            outFrame->m_analysisData.<wbr>lookahead.satdForVbv[index] = outFrame->m_encData->m_<wbr>rowStat[index].satdForVbv * m_param->scaleFactor * 2;<br>
+                        }<br>
+                        pic_out->analysisData.<wbr>lookahead.intraSatdForVbv = outFrame->m_analysisData.<wbr>lookahead.intraSatdForVbv;<br>
+                        pic_out->analysisData.<wbr>lookahead.satdForVbv = outFrame->m_analysisData.<wbr>lookahead.satdForVbv;<br>
+                        for (uint32_t index = 0; index < pic_out->analysisData.<wbr>numCUsInFrame; index++)<br>
+                        {<br>
+                            outFrame->m_analysisData.<wbr>lookahead.intraVbvCost[index] = outFrame->m_encData->m_cuStat[<wbr>index].intraVbvCost * m_param->scaleFactor * 2;<br>
+                            outFrame->m_analysisData.<wbr>lookahead.vbvCost[index] = outFrame->m_encData->m_cuStat[<wbr>index].vbvCost * m_param->scaleFactor * 2;<br>
+                        }<br>
+                        pic_out->analysisData.<wbr>lookahead.intraVbvCost = outFrame->m_analysisData.<wbr>lookahead.intraVbvCost;<br>
+                        pic_out->analysisData.<wbr>lookahead.vbvCost = outFrame->m_analysisData.<wbr>lookahead.vbvCost;<br>
+                    }<br>
                     writeAnalysisFile(&pic_out-><wbr>analysisData, *outFrame->m_encData);<br>
                     if (m_param->bUseAnalysisFile)<br>
                         freeAnalysis(&pic_out-><wbr>analysisData);<br>
@@ -1054,7 +1094,19 @@<br>
                 slice->m_maxNumMergeCand = m_param->maxNumMergeCand;<br>
                 slice->m_endCUAddr = slice->realEndAddress(m_sps.<wbr>numCUsInFrame * m_param->num4x4Partitions);<br>
             }<br>
-<br>
+            if (m_param->analysisReuseMode == X265_ANALYSIS_LOAD && !m_param->bUseAnalysisFile && m_param->scaleFactor)<br>
+            {<br>
+                for (uint32_t index = 0; index < frameEnc->m_analysisData.<wbr>numCuInHeight; index++)<br>
+                {<br>
+                    frameEnc->m_encData->m_<wbr>rowStat[index].intraSatdForVbv = frameEnc->m_analysisData.<wbr>lookahead.intraSatdForVbv[<wbr>index];<br>
+                    frameEnc->m_encData->m_<wbr>rowStat[index].satdForVbv = frameEnc->m_analysisData.<wbr>lookahead.satdForVbv[index];<br>
+                }<br>
+                for (uint32_t index = 0; index < frameEnc->m_analysisData.<wbr>numCUsInFrame; index++)<br>
+                {<br>
+                    frameEnc->m_encData->m_cuStat[<wbr>index].intraVbvCost = frameEnc->m_analysisData.<wbr>lookahead.intraVbvCost[index];<br>
+                    frameEnc->m_encData->m_cuStat[<wbr>index].vbvCost = frameEnc->m_analysisData.<wbr>lookahead.vbvCost[index];<br>
+                }<br>
+            }<br>
             if (m_param->searchMethod == X265_SEA && frameEnc->m_lowres.sliceType != X265_TYPE_B)<br>
             {<br>
                 int padX = m_param->maxCUSize + 32;<br>
@@ -1129,6 +1181,7 @@<br>
<br>
                 uint32_t numCUsInFrame   = widthInCU * heightInCU;<br>
                 analysis->numCUsInFrame  = numCUsInFrame;<br>
+                analysis->numCuInHeight = heightInCU;<br>
                 analysis->numPartitions  = m_param->num4x4Partitions;<br>
                 allocAnalysis(analysis);<br>
             }<br>
@@ -2705,6 +2758,13 @@<br>
 {<br>
     X265_CHECK(analysis-><wbr>sliceType, "invalid slice type\n");<br>
     analysis->interData = analysis->intraData = NULL;<br>
+    if (!m_param->bUseAnalysisFile && m_param->scaleFactor)<br>
+    {<br>
+        CHECKED_MALLOC_ZERO(analysis-><wbr>lookahead.intraSatdForVbv, uint32_t, analysis->numCuInHeight);<br>
+        CHECKED_MALLOC_ZERO(analysis-><wbr>lookahead.satdForVbv, uint32_t, analysis->numCuInHeight);<br>
+        CHECKED_MALLOC_ZERO(analysis-><wbr>lookahead.intraVbvCost, uint32_t, analysis->numCUsInFrame);<br>
+        CHECKED_MALLOC_ZERO(analysis-><wbr>lookahead.vbvCost, uint32_t, analysis->numCUsInFrame);<br>
+    }<br>
     if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I)<br>
     {<br>
         if (m_param->analysisReuseLevel < 2)<br>
@@ -2770,6 +2830,13 @@<br>
<br>
 void Encoder::freeAnalysis(x265_<wbr>analysis_data* analysis)<br>
 {<br>
+    if (!m_param->bUseAnalysisFile && m_param->scaleFactor)<br>
+    {<br>
+        X265_FREE(analysis->lookahead.<wbr>satdForVbv);<br>
+        X265_FREE(analysis->lookahead.<wbr>intraSatdForVbv);<br>
+        X265_FREE(analysis->lookahead.<wbr>vbvCost);<br>
+        X265_FREE(analysis->lookahead.<wbr>intraVbvCost);<br>
+    }<br>
     /* Early exit freeing weights alone if level is 1 (when there is no analysis inter/intra) */<br>
     if (analysis->sliceType > X265_TYPE_I && analysis->wt)<br>
         X265_FREE(analysis->wt);<br>
</div></div>@@ -2949,7 +3016,12 @@<br>
<span class="">     X265_FREAD(&analysis-><wbr>bScenecut, sizeof(int), 1, m_analysisFile, &(picData->bScenecut));<br>
     X265_FREAD(&analysis-><wbr>satdCost, sizeof(int64_t), 1, m_analysisFile, &(picData->satdCost));<br>
     X265_FREAD(&analysis-><wbr>numCUsInFrame, sizeof(int), 1, m_analysisFile, &(picData->numCUsInFrame));<br>
+    X265_FREAD(&analysis-><wbr>numCuInHeight, sizeof(uint32_t), 1, m_analysisFile, &(picData->numCuInHeight));<br>
     X265_FREAD(&analysis-><wbr>numPartitions, sizeof(int), 1, m_analysisFile, &(picData->numPartitions));<br>
+    if (!m_param->bUseAnalysisFile && m_param->scaleFactor)<br>
</span>+    {<br>
<span class="">+        X265_FREAD(&analysis-><wbr>lookahead, sizeof(x265_lookahead_data), 1, m_analysisFile, &(picData->lookahead));<br>
</span>+    }<br>
<span class="">     int scaledNumPartition = analysis->numPartitions;<br>
     int factor = 1 << m_param->scaleFactor;<br>
<br>
</span>@@ -2958,7 +3030,13 @@<br>
<span class=""><br>
     /* Memory is allocated for inter and intra analysis data based on the slicetype */<br>
     allocAnalysis(analysis);<br>
-<br>
+    if (!m_param->bUseAnalysisFile && m_param->scaleFactor)<br>
+    {<br>
+        X265_FREAD(analysis-><wbr>lookahead.intraVbvCost, sizeof(uint32_t), analysis->numCUsInFrame, m_analysisFile, picData->lookahead.<wbr>intraVbvCost);<br>
+        X265_FREAD(analysis-><wbr>lookahead.vbvCost, sizeof(uint32_t), analysis->numCUsInFrame, m_analysisFile, picData->lookahead.vbvCost);<br>
+        X265_FREAD(analysis-><wbr>lookahead.satdForVbv, sizeof(uint32_t), analysis->numCuInHeight, m_analysisFile, picData->lookahead.satdForVbv)<wbr>;<br>
+        X265_FREAD(analysis-><wbr>lookahead.intraSatdForVbv, sizeof(uint32_t), analysis->numCuInHeight, m_analysisFile, picData->lookahead.<wbr>intraSatdForVbv);<br>
+    }<br>
     if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I)<br>
     {<br>
         if (m_param->analysisReuseLevel < 2)<br>
</span>diff -r 2718cb5dd67f -r 6410885f92f6 source/encoder/frameencoder.<wbr>cpp<br>
<div><div class="h5">--- a/source/encoder/frameencoder.<wbr>cpp   Tue Sep 05 11:21:56 2017 +0530<br>
+++ b/source/encoder/frameencoder.<wbr>cpp   Mon Jul 24 11:19:30 2017 +0530<br>
@@ -1376,17 +1376,19 @@<br>
             /* TODO: use defines from slicetype.h for lowres block size */<br>
             uint32_t block_y = (ctu->m_cuPelY >> m_param->maxLog2CUSize) * noOfBlocks;<br>
             uint32_t block_x = (ctu->m_cuPelX >> m_param->maxLog2CUSize) * noOfBlocks;<br>
-<br>
-            cuStat.vbvCost = 0;<br>
-            cuStat.intraVbvCost = 0;<br>
-            for (uint32_t h = 0; h < noOfBlocks && block_y < maxBlockRows; h++, block_y++)<br>
+            if (m_param->analysisReuseMode != X265_ANALYSIS_LOAD || m_param->bUseAnalysisFile || !m_param->scaleFactor)<br>
             {<br>
-                uint32_t idx = block_x + (block_y * maxBlockCols);<br>
+                cuStat.vbvCost = 0;<br>
+                cuStat.intraVbvCost = 0;<br>
+                for (uint32_t h = 0; h < noOfBlocks && block_y < maxBlockRows; h++, block_y++)<br>
+                {<br>
+                    uint32_t idx = block_x + (block_y * maxBlockCols);<br>
<br>
-                for (uint32_t w = 0; w < noOfBlocks && (block_x + w) < maxBlockCols; w++, idx++)<br>
-                {<br>
-                    cuStat.vbvCost += m_frame->m_lowres.<wbr>lowresCostForRc[idx] & LOWRES_COST_MASK;<br>
-                    cuStat.intraVbvCost += m_frame->m_lowres.intraCost[<wbr>idx];<br>
+                    for (uint32_t w = 0; w < noOfBlocks && (block_x + w) < maxBlockCols; w++, idx++)<br>
+                    {<br>
+                        cuStat.vbvCost += m_frame->m_lowres.<wbr>lowresCostForRc[idx] & LOWRES_COST_MASK;<br>
+                        cuStat.intraVbvCost += m_frame->m_lowres.intraCost[<wbr>idx];<br>
+                    }<br>
                 }<br>
             }<br>
         }<br>
</div></div>diff -r 2718cb5dd67f -r 6410885f92f6 source/encoder/slicetype.cpp<br>
<div><div class="h5">--- a/source/encoder/slicetype.cpp      Tue Sep 05 11:21:56 2017 +0530<br>
+++ b/source/encoder/slicetype.cpp      Mon Jul 24 11:19:30 2017 +0530<br>
@@ -742,9 +742,21 @@<br>
 /* Called by API thread */<br>
 void Lookahead::addPicture(Frame& curFrame, int sliceType)<br>
 {<br>
-    checkLookaheadQueue(m_<wbr>inputCount);<br>
-    curFrame.m_lowres.sliceType = sliceType;<br>
-    addPicture(curFrame);<br>
+    if (m_param->analysisReuseMode == X265_ANALYSIS_LOAD && !m_param->bUseAnalysisFile && m_param->scaleFactor)<br>
+    {<br>
+        if (!m_filled)<br>
+            m_filled = true;<br>
+        m_outputLock.acquire();<br>
+        m_outputQueue.pushBack(<wbr>curFrame);<br>
+        m_outputLock.release();<br>
+        m_inputCount++;<br>
+    }<br>
+    else<br>
+    {<br>
+        checkLookaheadQueue(m_<wbr>inputCount);<br>
+        curFrame.m_lowres.sliceType = sliceType;<br>
+        addPicture(curFrame);<br>
+    }<br>
 }<br>
<br>
 void Lookahead::addPicture(Frame& curFrame)<br>
@@ -831,6 +843,9 @@<br>
             return out;<br>
         }<br>
<br>
+        if (m_param->analysisReuseMode == X265_ANALYSIS_LOAD && !m_param->bUseAnalysisFile && m_param->scaleFactor)<br>
+            return NULL;<br>
+<br>
         findJob(-1); /* run slicetypeDecide() if necessary */<br>
<br>
         m_inputLock.acquire();<br>
@@ -887,68 +902,68 @@<br>
     default:<br>
         return;<br>
     }<br>
+    if (m_param->analysisReuseMode != X265_ANALYSIS_LOAD || m_param->bUseAnalysisFile || !m_param->scaleFactor)<br>
+    {<br>
+        X265_CHECK(curFrame->m_lowres.<wbr>costEst[b - p0][p1 - b] > 0, "Slice cost not estimated\n")<br>
+        if (m_param->rc.cuTree && !m_param->rc.bStatRead)<br>
+            /* update row satds based on cutree offsets */<br>
+            curFrame->m_lowres.satdCost = frameCostRecalculate(frames, p0, p1, b);<br>
+        else if (m_param->analysisReuseMode != X265_ANALYSIS_LOAD)<br>
+        {<br>
+            if (m_param->rc.aqMode)<br>
+                curFrame->m_lowres.satdCost = curFrame->m_lowres.costEstAq[b - p0][p1 - b];<br>
+            else<br>
+                curFrame->m_lowres.satdCost = curFrame->m_lowres.costEst[b - p0][p1 - b];<br>
+        }<br>
+        if (m_param->rc.vbvBufferSize && m_param->rc.vbvMaxBitrate)<br>
+        {<br>
+            /* aggregate lowres row satds to CTU resolution */<br>
+            curFrame->m_lowres.<wbr>lowresCostForRc = curFrame->m_lowres.<wbr>lowresCosts[b - p0][p1 - b];<br>
+            uint32_t lowresRow = 0, lowresCol = 0, lowresCuIdx = 0, sum = 0, intraSum = 0;<br>
+            uint32_t scale = m_param->maxCUSize / (2 * X265_LOWRES_CU_SIZE);<br>
+            uint32_t numCuInHeight = (m_param->sourceHeight + m_param->maxCUSize - 1) / m_param->maxCUSize;<br>
+            uint32_t widthInLowresCu = (uint32_t)m_8x8Width, heightInLowresCu = (uint32_t)m_8x8Height;<br>
+            double *qp_offset = 0;<br>
+            /* Factor in qpoffsets based on Aq/Cutree in CU costs */<br>
+            if (m_param->rc.aqMode || m_param->bAQMotion)<br>
+                qp_offset = (frames[b]->sliceType == X265_TYPE_B || !m_param->rc.cuTree) ? frames[b]->qpAqOffset : frames[b]->qpCuTreeOffset;<br>
<br>
-    X265_CHECK(curFrame->m_lowres.<wbr>costEst[b - p0][p1 - b] > 0, "Slice cost not estimated\n")<br>
-<br>
-    if (m_param->rc.cuTree && !m_param->rc.bStatRead)<br>
-        /* update row satds based on cutree offsets */<br>
-        curFrame->m_lowres.satdCost = frameCostRecalculate(frames, p0, p1, b);<br>
-    else if (m_param->analysisReuseMode != X265_ANALYSIS_LOAD || m_param->scaleFactor)<br>
-    {<br>
-        if (m_param->rc.aqMode)<br>
-            curFrame->m_lowres.satdCost = curFrame->m_lowres.costEstAq[b - p0][p1 - b];<br>
-        else<br>
-            curFrame->m_lowres.satdCost = curFrame->m_lowres.costEst[b - p0][p1 - b];<br>
-    }<br>
-<br>
-    if (m_param->rc.vbvBufferSize && m_param->rc.vbvMaxBitrate)<br>
-    {<br>
-        /* aggregate lowres row satds to CTU resolution */<br>
-        curFrame->m_lowres.<wbr>lowresCostForRc = curFrame->m_lowres.<wbr>lowresCosts[b - p0][p1 - b];<br>
-        uint32_t lowresRow = 0, lowresCol = 0, lowresCuIdx = 0, sum = 0, intraSum = 0;<br>
-        uint32_t scale = m_param->maxCUSize / (2 * X265_LOWRES_CU_SIZE);<br>
-        uint32_t numCuInHeight = (m_param->sourceHeight + m_param->maxCUSize - 1) / m_param->maxCUSize;<br>
-        uint32_t widthInLowresCu = (uint32_t)m_8x8Width, heightInLowresCu = (uint32_t)m_8x8Height;<br>
-        double *qp_offset = 0;<br>
-        /* Factor in qpoffsets based on Aq/Cutree in CU costs */<br>
-        if (m_param->rc.aqMode || m_param->bAQMotion)<br>
-            qp_offset = (frames[b]->sliceType == X265_TYPE_B || !m_param->rc.cuTree) ? frames[b]->qpAqOffset : frames[b]->qpCuTreeOffset;<br>
-<br>
-        for (uint32_t row = 0; row < numCuInHeight; row++)<br>
-        {<br>
-            lowresRow = row * scale;<br>
-            for (uint32_t cnt = 0; cnt < scale && lowresRow < heightInLowresCu; lowresRow++, cnt++)<br>
+            for (uint32_t row = 0; row < numCuInHeight; row++)<br>
             {<br>
-                sum = 0; intraSum = 0;<br>
-                int diff = 0;<br>
-                lowresCuIdx = lowresRow * widthInLowresCu;<br>
-                for (lowresCol = 0; lowresCol < widthInLowresCu; lowresCol++, lowresCuIdx++)<br>
+                lowresRow = row * scale;<br>
+                for (uint32_t cnt = 0; cnt < scale && lowresRow < heightInLowresCu; lowresRow++, cnt++)<br>
                 {<br>
-                    uint16_t lowresCuCost = curFrame->m_lowres.<wbr>lowresCostForRc[lowresCuIdx] & LOWRES_COST_MASK;<br>
-                    if (qp_offset)<br>
+                    sum = 0; intraSum = 0;<br>
+                    int diff = 0;<br>
+                    lowresCuIdx = lowresRow * widthInLowresCu;<br>
+                    for (lowresCol = 0; lowresCol < widthInLowresCu; lowresCol++, lowresCuIdx++)<br>
                     {<br>
-                        double qpOffset;<br>
-                        if (m_param->rc.qgSize == 8)<br>
-                            qpOffset = (qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4] +<br>
-                                        qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4 + 1] +<br>
-                                        qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4 + curFrame->m_lowres.<wbr>maxBlocksInRowFullRes] +<br>
-                                        qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4 + curFrame->m_lowres.<wbr>maxBlocksInRowFullRes + 1]) / 4;<br>
-                        else<br>
-                            qpOffset = qp_offset[lowresCuIdx];<br>
-                        lowresCuCost = (uint16_t)((lowresCuCost * x265_exp2fix8(qpOffset) + 128) >> 8);<br>
-                        int32_t intraCuCost = curFrame->m_lowres.intraCost[<wbr>lowresCuIdx];<br>
-                        curFrame->m_lowres.intraCost[<wbr>lowresCuIdx] = (intraCuCost * x265_exp2fix8(qpOffset) + 128) >> 8;<br>
+                        uint16_t lowresCuCost = curFrame->m_lowres.<wbr>lowresCostForRc[lowresCuIdx] & LOWRES_COST_MASK;<br>
+                        if (qp_offset)<br>
+                        {<br>
+                            double qpOffset;<br>
+                            if (m_param->rc.qgSize == 8)<br>
+                                qpOffset = (qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4] +<br>
+                                qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4 + 1] +<br>
+                                qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4 + curFrame->m_lowres.<wbr>maxBlocksInRowFullRes] +<br>
+                                qp_offset[lowresCol * 2 + lowresRow * widthInLowresCu * 4 + curFrame->m_lowres.<wbr>maxBlocksInRowFullRes + 1]) / 4;<br>
+                            else<br>
+                                qpOffset = qp_offset[lowresCuIdx];<br>
+                            lowresCuCost = (uint16_t)((lowresCuCost * x265_exp2fix8(qpOffset) + 128) >> 8);<br>
+                            int32_t intraCuCost = curFrame->m_lowres.intraCost[<wbr>lowresCuIdx];<br>
+                            curFrame->m_lowres.intraCost[<wbr>lowresCuIdx] = (intraCuCost * x265_exp2fix8(qpOffset) + 128) >> 8;<br>
+                        }<br>
+                        if (m_param->bIntraRefresh && slice->m_sliceType == X265_TYPE_P)<br>
+                            for (uint32_t x = curFrame->m_encData->m_pir.<wbr>pirStartCol; x <= curFrame->m_encData->m_pir.<wbr>pirEndCol; x++)<br>
+                                diff += curFrame->m_lowres.intraCost[<wbr>lowresCuIdx] - lowresCuCost;<br>
+                        curFrame->m_lowres.<wbr>lowresCostForRc[lowresCuIdx] = lowresCuCost;<br>
+                        sum += lowresCuCost;<br>
+                        intraSum += curFrame->m_lowres.intraCost[<wbr>lowresCuIdx];<br>
                     }<br>
-                    if (m_param->bIntraRefresh && slice->m_sliceType == X265_TYPE_P)<br>
-                        for (uint32_t x = curFrame->m_encData->m_pir.<wbr>pirStartCol; x <= curFrame->m_encData->m_pir.<wbr>pirEndCol; x++)<br>
-                            diff += curFrame->m_lowres.intraCost[<wbr>lowresCuIdx] - lowresCuCost;<br>
-                    curFrame->m_lowres.<wbr>lowresCostForRc[lowresCuIdx] = lowresCuCost;<br>
-                    sum += lowresCuCost;<br>
-                    intraSum += curFrame->m_lowres.intraCost[<wbr>lowresCuIdx];<br>
+                    curFrame->m_encData->m_<wbr>rowStat[row].satdForVbv += sum;<br>
+                    curFrame->m_encData->m_<wbr>rowStat[row].satdForVbv += diff;<br>
+                    curFrame->m_encData->m_<wbr>rowStat[row].intraSatdForVbv += intraSum;<br>
                 }<br>
-                curFrame->m_encData->m_<wbr>rowStat[row].satdForVbv += sum;<br>
-                curFrame->m_encData->m_<wbr>rowStat[row].satdForVbv += diff;<br>
-                curFrame->m_encData->m_<wbr>rowStat[row].intraSatdForVbv += intraSum;<br>
             }<br>
         }<br>
     }<br>
</div></div>diff -r 2718cb5dd67f -r 6410885f92f6 source/x265.h<br>
<div class="HOEnZb"><div class="h5">--- a/source/x265.h     Tue Sep 05 11:21:56 2017 +0530<br>
+++ b/source/x265.h     Mon Jul 24 11:19:30 2017 +0530<br>
@@ -88,6 +88,20 @@<br>
     uint8_t* payload;<br>
 } x265_nal;<br>
<br>
+#define X265_LOOKAHEAD_MAX 250<br>
+<br>
+typedef struct x265_lookahead_data<br>
+{<br>
+    int64_t   plannedSatd[X265_LOOKAHEAD_MAX + 1];<br>
+    uint32_t  *vbvCost;<br>
+    uint32_t  *intraVbvCost;<br>
+    uint32_t  *satdForVbv;<br>
+    uint32_t  *intraSatdForVbv;<br>
+    int       keyframe;<br>
+    int       lastMiniGopBFrame;<br>
+    int       plannedType[X265_LOOKAHEAD_MAX + 1];<br>
+} x265_lookahead_data;<br>
+<br>
 /* Stores all analysis data for a single frame */<br>
 typedef struct x265_analysis_data<br>
 {<br>
@@ -102,6 +116,8 @@<br>
     void*            wt;<br>
     void*            interData;<br>
     void*            intraData;<br>
+    uint32_t         numCuInHeight;<br>
+    x265_lookahead_data lookahead;<br>
 } x265_analysis_data;<br>
<br>
 /* cu statistics */<br>
</div></div><br>______________________________<wbr>_________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/<wbr>listinfo/x265-devel</a><br>
<br></blockquote></div><br></div></div>