<p>initially both frameNumThreads and m_totalFrameThreads are set to same value specificed by user. So, we are always creating frameEncoder objects, frameNumThreads holds actual frame thread count.<br>
Only when we begin to encode, we change the value n within that second we reset it to orig num of frame threads. So , the destroy func called at very end can use frameNumThreads safely. Logically, There shouldn't be memory leaks here..<br>
</p>
<div class="gmail_quote">On May 29, 2014 11:52 PM, "Steve Borho" <<a href="mailto:steve@borho.org">steve@borho.org</a>> wrote:<br type="attribution"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
On Thu, May 29, 2014 at 1:10 PM, <<a href="mailto:aarthi@multicorewareinc.com">aarthi@multicorewareinc.com</a>> wrote:<br>
> # HG changeset patch<br>
> # User Aarthi Thirumalai<br>
> # Date 1401386381 -19800<br>
> # Thu May 29 23:29:41 2014 +0530<br>
> # Node ID cbefd4760814f85da2f805781177a1fade437709<br>
> # Parent e9776dfd1471ec6691276518007b725095ab6d52<br>
> ratecontrol: improve visual quality and bitrate savings in ABR.<br>
><br>
> Try to prevent ABR over-compensation after I frames by amortizing the cost over<br>
> the next few frames;<br>
><br>
> Improve ABR quality with frame parallelism - enable frame parallelism only after first<br>
> few P frames to prevent excessive qp fluctuations.<br>
><br>
> Fix initial I frame qp. when cu tree is enabled, the qp decided arbitrarily is<br>
> too low. This causes a huge qp spike in immediate P frames.Tuned cplxrSum<br>
> for intial I frame so that a more appropriate qp is chosen.<br>
><br>
> diff -r e9776dfd1471 -r cbefd4760814 source/encoder/encoder.cpp<br>
> --- a/source/encoder/encoder.cpp Wed May 28 20:39:40 2014 -0500<br>
> +++ b/source/encoder/encoder.cpp Thu May 29 23:29:41 2014 +0530<br>
> @@ -186,6 +186,7 @@<br>
> }<br>
> m_lookahead->init();<br>
> m_encodeStartTime = x265_mdate();<br>
> + m_totalFrameThreads = param->frameNumThreads;<br>
> }<br>
><br>
> int Encoder::getStreamHeaders(NALUnitEBSP **nalunits)<br>
> @@ -323,6 +324,20 @@<br>
> if (flush)<br>
> m_lookahead->flush();<br>
><br>
> + if (param->rc.rateControlMode == X265_RC_ABR)<br>
> + {<br>
> + // delay frame parallelism for non-VBV ABR<br>
> + if (m_pocLast == 0 && !param->rc.vbvBufferSize && !param->rc.vbvMaxBitrate)<br>
> + param->frameNumThreads = 1;<br>
> + else if (param->frameNumThreads != m_totalFrameThreads)<br>
> + {<br>
> + // re-enable frame parallelism after the first few P frames are encoded<br>
> + uint32_t frameCnt = (uint32_t)((0.5 * param->fpsNum / param->fpsDenom) / (param->bframes + 1));<br>
> + if (m_analyzeP.m_numPics > frameCnt)<br>
> + param->frameNumThreads = m_totalFrameThreads;<br>
> + }<br>
> + }<br>
<br>
There a few places where param->frameNumThreads is used that should be<br>
modified to use m_totalFrameThreads instead. In particular, the<br>
encoder destroy() function and in the frame encoder and other places<br>
which peek at this param to determine if frame parallelism is enabled<br>
in order to determine if the ME search range is restricted. This<br>
could lead to non-determinism or memory leaks.<br>
<br>
> +<br>
> FrameEncoder *curEncoder = &m_frameEncoder[m_curEncoder];<br>
> m_curEncoder = (m_curEncoder + 1) % param->frameNumThreads;<br>
> int ret = 0;<br>
> diff -r e9776dfd1471 -r cbefd4760814 source/encoder/encoder.h<br>
> --- a/source/encoder/encoder.h Wed May 28 20:39:40 2014 -0500<br>
> +++ b/source/encoder/encoder.h Thu May 29 23:29:41 2014 +0530<br>
> @@ -90,6 +90,7 @@<br>
> DPB* m_dpb;<br>
> /* frame parallelism */<br>
> int m_curEncoder;<br>
> + int m_totalFrameThreads;<br>
><br>
> /* Collect statistics globally */<br>
> EncStats m_analyzeAll;<br>
> diff -r e9776dfd1471 -r cbefd4760814 source/encoder/ratecontrol.cpp<br>
> --- a/source/encoder/ratecontrol.cpp Wed May 28 20:39:40 2014 -0500<br>
> +++ b/source/encoder/ratecontrol.cpp Thu May 29 23:29:41 2014 +0530<br>
> @@ -30,6 +30,10 @@<br>
><br>
> using namespace x265;<br>
><br>
> +/* Amortize the partial cost of I frames over the next N frames */<br>
> +const double RateControl::amortizeFraction = 0.85;<br>
> +const int RateControl::amortizeFrames = 75;<br>
> +<br>
> /* Compute variance to derive AC energy of each block */<br>
> static inline uint32_t acEnergyVar(TComPic *pic, uint64_t sum_ssd, int shift, int i)<br>
> {<br>
> @@ -204,6 +208,8 @@<br>
> qCompress = param->rc.qCompress;<br>
><br>
> // validate for param->rc, maybe it is need to add a function like x265_parameters_valiate()<br>
> + residualFrames = 0;<br>
> + residualCost = 0;<br>
> param->rc.rfConstant = Clip3((double)-QP_BD_OFFSET, (double)51, param->rc.rfConstant);<br>
> param->rc.rfConstantMax = Clip3((double)-QP_BD_OFFSET, (double)51, param->rc.rfConstantMax);<br>
> rateFactorMaxIncrement = 0;<br>
> @@ -316,7 +322,7 @@<br>
> {<br>
> /* Adjust the first frame in order to stabilize the quality level compared to the rest */<br>
> #define ABR_INIT_QP_MIN (24 + QP_BD_OFFSET)<br>
> -#define ABR_INIT_QP_MAX (34 + QP_BD_OFFSET)<br>
> +#define ABR_INIT_QP_MAX (40 + QP_BD_OFFSET)<br>
> }<br>
> else if (param->rc.rateControlMode == X265_RC_CRF)<br>
> {<br>
> @@ -353,9 +359,12 @@<br>
> {<br>
> totalBits = 0;<br>
> framesDone = 0;<br>
> -<br>
> + double tuneCplxFactor = 1;<br>
> + /* 720p videos seem to be a good cutoff for cplxrSum */<br>
> + if (param->rc.cuTree && ncu > 3600)<br>
> + tuneCplxFactor = 2.5;<br>
> /* estimated ratio that produces a reasonable QP for the first I-frame */<br>
> - cplxrSum = .01 * pow(7.0e5, qCompress) * pow(ncu, 0.5);<br>
> + cplxrSum = .01 * pow(7.0e5, qCompress) * pow(ncu, 0.5) * tuneCplxFactor;<br>
> wantedBitsWindow = bitrate * frameDuration;<br>
> accumPNorm = .01;<br>
> accumPQp = (param->rc.rateControlMode == X265_RC_CRF ? CRF_INIT_QP : ABR_INIT_QP_MIN) * accumPNorm;<br>
> @@ -550,7 +559,7 @@<br>
> /* use framesDone instead of POC as poc count is not serial with bframes enabled */<br>
> double timeDone = (double)(framesDone - param->frameNumThreads + 1) * frameDuration;<br>
> wantedBits = timeDone * bitrate;<br>
> - if (wantedBits > 0 && totalBits > 0)<br>
> + if (wantedBits > 0 && totalBits > 0 && !residualFrames)<br>
> {<br>
> abrBuffer *= X265_MAX(1, sqrt(timeDone));<br>
> overflow = Clip3(.5, 2.0, 1.0 + (totalBits - wantedBits) / abrBuffer);<br>
> @@ -572,10 +581,13 @@<br>
> double lqmin = 0, lqmax = 0;<br>
> lqmin = lastQScaleFor[sliceType] / lstep;<br>
> lqmax = lastQScaleFor[sliceType] * lstep;<br>
> - if (overflow > 1.1 && framesDone > 3)<br>
> - lqmax *= lstep;<br>
> - else if (overflow < 0.9)<br>
> - lqmin /= lstep;<br>
> + if (!residualFrames)<br>
> + {<br>
> + if (overflow > 1.1 && framesDone > 3)<br>
> + lqmax *= lstep;<br>
> + else if (overflow < 0.9)<br>
> + lqmin /= lstep;<br>
> + }<br>
> q = Clip3(lqmin, lqmax, q);<br>
> }<br>
> }<br>
> @@ -1083,6 +1095,24 @@<br>
> }<br>
> }<br>
><br>
> + /* amortize part of each I slice over the next several frames, up to<br>
> + * keyint-max, to avoid over-compensating for the large I slice cost */<br>
> + if (rce->sliceType == I_SLICE)<br>
> + {<br>
> + /* previous I still had a residual; roll it into the new loan */<br>
> + if (residualFrames)<br>
> + bits += residualCost * residualFrames;<br>
> +<br>
> + residualFrames = X265_MIN(amortizeFrames, param->keyframeMax);<br>
> + residualCost = (int)((bits * amortizeFraction) / residualFrames);<br>
> + bits -= residualCost * residualFrames;<br>
> + }<br>
> + else if (residualFrames)<br>
> + {<br>
> + bits += residualCost;<br>
> + residualFrames--;<br>
> + }<br>
> +<br>
> if (rce->sliceType != B_SLICE)<br>
> /* The factor 1.5 is to tune up the actual bits, otherwise the cplxrSum is scaled too low<br>
> * to improve short term compensation for next frame. */<br>
> @@ -1111,7 +1141,7 @@<br>
> }<br>
> }<br>
> updateVbv(bits, rce);<br>
> - rce->isActive = false;<br>
> }<br>
> + rce->isActive = false;<br>
> return 0;<br>
> }<br>
> diff -r e9776dfd1471 -r cbefd4760814 source/encoder/ratecontrol.h<br>
> --- a/source/encoder/ratecontrol.h Wed May 28 20:39:40 2014 -0500<br>
> +++ b/source/encoder/ratecontrol.h Thu May 29 23:29:41 2014 +0530<br>
> @@ -132,6 +132,12 @@<br>
><br>
> protected:<br>
><br>
> + static const double amortizeFraction;<br>
> + static const int amortizeFrames;<br>
> +<br>
> + int residualFrames;<br>
> + int residualCost;<br>
> +<br>
> void init();<br>
> double getQScale(RateControlEntry *rce, double rateFactor);<br>
> double rateEstimateQscale(TComPic* pic, RateControlEntry *rce); // main logic for calculating QP based on ABR<br>
> _______________________________________________<br>
> x265-devel mailing list<br>
> <a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
> <a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
<br>
<br>
<br>
--<br>
Steve Borho<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div>