<div dir="ltr">The patch is not applying on Release_3.2. Please rebase and send the patch.</div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Thu, Oct 10, 2019 at 2:31 PM Akil <<a href="mailto:akil@multicorewareinc.com">akil@multicorewareinc.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr"># HG changeset patch<br># User Akil Ayyappan<<a href="mailto:akil@multicorewareinc.com" target="_blank">akil@multicorewareinc.com</a>><br># Date 1570594514 -19800<br># Wed Oct 09 09:45:14 2019 +0530<br># Node ID b66d88859a528ae80f6f19eae7553fe7fcdb88e6<br># Parent 354901970679c787efdfdcc6577228e9c06785cf<br>Fix: Performance drop in aq-mode 4<br><br>This patch moves the memory allocation part of the edge information required for aq-mode 4<br>to the Frame class-level in that way it can be reused by the threads.<br><br>diff -r 354901970679 -r b66d88859a52 source/common/frame.cpp<br>--- a/source/common/frame.cpp Fri Sep 13 15:57:26 2019 +0530<br>+++ b/source/common/frame.cpp Wed Oct 09 09:45:14 2019 +0530<br>@@ -58,6 +58,9 @@<br> m_classifyFrame = false;<br> m_fieldNum = 0;<br> m_picStruct = 0;<br>+ m_edgePic = NULL;<br>+ m_gaussianPic = NULL;<br>+ m_thetaPic = NULL;<br> }<br> <br> bool Frame::create(x265_param *param, float* quantOffsets)<br>@@ -98,6 +101,20 @@<br> CHECKED_MALLOC_ZERO(m_classifyCount, uint32_t, size);<br> }<br> <br>+ if (param->rc.aqMode == X265_AQ_EDGE || (param->rc.zonefileCount && param->rc.aqMode != 0))<br>+ {<br>+ uint32_t numCuInWidth = (param->sourceWidth + param->maxCUSize - 1) / param->maxCUSize;<br>+ uint32_t numCuInHeight = (param->sourceHeight + param->maxCUSize - 1) / param->maxCUSize;<br>+ uint32_t m_lumaMarginX = param->maxCUSize + 32; // search margin and 8-tap filter half-length, padded for 32-byte alignment<br>+ uint32_t m_lumaMarginY = param->maxCUSize + 16; // margin for 8-tap filter and infinite padding<br>+ intptr_t m_stride = (numCuInWidth * param->maxCUSize) + (m_lumaMarginX << 1);<br>+ int maxHeight = numCuInHeight * param->maxCUSize;<br>+<br>+ m_edgePic = X265_MALLOC(pixel, m_stride * (maxHeight + (m_lumaMarginY * 2)));<br>+ m_gaussianPic = X265_MALLOC(pixel, m_stride * (maxHeight + (m_lumaMarginY * 2)));<br>+ m_thetaPic = X265_MALLOC(pixel, m_stride * (maxHeight + (m_lumaMarginY * 2)));<br>+ }<br>+<br> if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) && m_lowres.create(param, m_fencPic, param->rc.qgSize))<br> {<br> X265_CHECK((m_reconColCount == NULL), "m_reconColCount was initialized");<br>@@ -243,4 +260,11 @@<br> X265_FREE_ZERO(m_classifyVariance);<br> X265_FREE_ZERO(m_classifyCount);<br> }<br>+<br>+ if (m_param->rc.aqMode == X265_AQ_EDGE || (m_param->rc.zonefileCount && m_param->rc.aqMode != 0))<br>+ {<br>+ X265_FREE(m_edgePic);<br>+ X265_FREE(m_gaussianPic);<br>+ X265_FREE(m_thetaPic);<br>+ }<br> }<br>diff -r 354901970679 -r b66d88859a52 source/common/frame.h<br>--- a/source/common/frame.h Fri Sep 13 15:57:26 2019 +0530<br>+++ b/source/common/frame.h Wed Oct 09 09:45:14 2019 +0530<br>@@ -132,6 +132,11 @@<br> bool m_classifyFrame;<br> int m_fieldNum;<br> <br>+ /* aq-mode 4 : Gaussian, edge and theta frames for edge information */<br>+ pixel* m_edgePic;<br>+ pixel* m_gaussianPic;<br>+ pixel* m_thetaPic;<br>+<br> Frame();<br> <br> bool create(x265_param *param, float* quantOffsets);<br>diff -r 354901970679 -r b66d88859a52 source/encoder/slicetype.cpp<br>--- a/source/encoder/slicetype.cpp Fri Sep 13 15:57:26 2019 +0530<br>+++ b/source/encoder/slicetype.cpp Wed Oct 09 09:45:14 2019 +0530<br>@@ -85,12 +85,22 @@<br> <br> } // end anonymous namespace<br> <br>-void edgeFilter(Frame *curFrame, pixel *pic1, pixel *pic2, pixel *pic3, intptr_t stride, int height, int width)<br>+void edgeFilter(Frame *curFrame, x265_param* param)<br> {<br>+ int height = curFrame->m_fencPic->m_picHeight;<br>+ int width = curFrame->m_fencPic->m_picWidth;<br>+ intptr_t stride = curFrame->m_fencPic->m_stride;<br>+ uint32_t numCuInHeight = (height + param->maxCUSize - 1) / param->maxCUSize;<br>+ int maxHeight = numCuInHeight * param->maxCUSize;<br>+<br>+ memset(curFrame->m_edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));<br>+ memset(curFrame->m_gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));<br>+ memset(curFrame->m_thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));<br>+<br> pixel *src = (pixel*)curFrame->m_fencPic->m_picOrg[0];<br>- pixel *edgePic = pic1 + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;<br>- pixel *refPic = pic2 + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;<br>- pixel *edgeTheta = pic3 + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;<br>+ pixel *edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;<br>+ pixel *refPic = curFrame->m_gaussianPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;<br>+ pixel *edgeTheta = curFrame->m_thetaPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;<br> <br> for (int i = 0; i < height; i++)<br> {<br>@@ -103,7 +113,7 @@<br> <br> //Applying Gaussian filter on the picture<br> src = (pixel*)curFrame->m_fencPic->m_picOrg[0];<br>- refPic = pic2 + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;<br>+ refPic = curFrame->m_gaussianPic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;<br> pixel pixelValue = 0;<br> <br> for (int rowNum = 0; rowNum < height; rowNum++)<br>@@ -148,7 +158,7 @@<br> float gradientH = 0, gradientV = 0, radians = 0, theta = 0;<br> float gradientMagnitude = 0;<br> pixel blackPixel = 0;<br>- edgePic = pic1 + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;<br>+ edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * stride + curFrame->m_fencPic->m_lumaMarginX;<br> //Applying Sobel filter on the gaussian filtered picture<br> for (int rowNum = 0; rowNum < height; rowNum++)<br> {<br>@@ -198,8 +208,10 @@<br> angle = sum / (size*size);<br> }<br> <br>-uint32_t LookaheadTLD::edgeDensityCu(Frame* curFrame,pixel *edgeImage, pixel *edgeTheta, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize)<br>+uint32_t LookaheadTLD::edgeDensityCu(Frame* curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize)<br> {<br>+ pixel *edgeImage = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;<br>+ pixel *edgeTheta = curFrame->m_thetaPic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;<br> intptr_t srcStride = curFrame->m_fencPic->m_stride;<br> intptr_t blockOffsetLuma = blockX + (blockY * srcStride);<br> int plane = 0; // Sobel filter is applied only on Y component<br>@@ -478,31 +490,14 @@<br> }<br> else<br> {<br>-#define AQ_EDGE_BIAS 0.5<br>-#define EDGE_INCLINATION 45<br>-<br>- pixel *edgePic = NULL;<br>- pixel *gaussianPic = NULL;<br>- pixel *thetaPic = NULL;<br>-<br>- if (param->rc.aqMode == X265_AQ_EDGE)<br>- {<br>- uint32_t numCuInHeight = (maxRow + param->maxCUSize - 1) / param->maxCUSize;<br>- int maxHeight = numCuInHeight * param->maxCUSize;<br>- intptr_t stride = curFrame->m_fencPic->m_stride;<br>- edgePic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));<br>- gaussianPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));<br>- thetaPic = X265_MALLOC(pixel, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));<br>- memset(edgePic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));<br>- memset(gaussianPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));<br>- memset(thetaPic, 0, stride * (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));<br>- edgeFilter(curFrame, edgePic, gaussianPic, thetaPic, stride, maxRow, maxCol);<br>- } <br>-<br> int blockXY = 0, inclinedEdge = 0;<br> double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0;<br> double bias_strength = 0.f;<br> double strength = 0.f;<br>+<br>+ if (param->rc.aqMode == X265_AQ_EDGE)<br>+ edgeFilter(curFrame, param);<br>+<br> if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE || param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED || param->rc.aqMode == X265_AQ_EDGE)<br> {<br> double bit_depth_correction = 1.f / (1 << (2 * (X265_DEPTH - 8)));<br>@@ -514,9 +509,7 @@<br> energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);<br> if (param->rc.aqMode == X265_AQ_EDGE)<br> {<br>- pixel *edgeImage = edgePic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;<br>- pixel *edgeTheta = thetaPic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;<br>- edgeDensity = edgeDensityCu(curFrame, edgeImage, edgeTheta, avgAngle, blockX, blockY, param->rc.qgSize);<br>+ edgeDensity = edgeDensityCu(curFrame, avgAngle, blockX, blockY, param->rc.qgSize);<br> if (edgeDensity)<br> {<br> qp_adj = pow(edgeDensity * bit_depth_correction + 1, 0.1);<br>@@ -549,13 +542,6 @@<br> else<br> strength = param->rc.aqStrength * 1.0397f;<br> <br>- if (param->rc.aqMode == X265_AQ_EDGE)<br>- {<br>- X265_FREE(edgePic);<br>- X265_FREE(gaussianPic);<br>- X265_FREE(thetaPic);<br>- }<br>-<br> blockXY = 0;<br> for (int blockY = 0; blockY < maxRow; blockY += loopIncr)<br> {<br>diff -r 354901970679 -r b66d88859a52 source/encoder/slicetype.h<br>--- a/source/encoder/slicetype.h Fri Sep 13 15:57:26 2019 +0530<br>+++ b/source/encoder/slicetype.h Wed Oct 09 09:45:14 2019 +0530<br>@@ -40,6 +40,8 @@<br> <br> #define LOWRES_COST_MASK ((1 << 14) - 1)<br> #define LOWRES_COST_SHIFT 14<br>+#define AQ_EDGE_BIAS 0.5<br>+#define EDGE_INCLINATION 45<br> <br> /* Thread local data for lookahead tasks */<br> struct LookaheadTLD<br>@@ -92,7 +94,7 @@<br> protected:<br> <br> uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp, uint32_t qgSize);<br>- uint32_t edgeDensityCu(Frame*curFrame, pixel *edgeImage, pixel *edgeTheta, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize);<br>+ uint32_t edgeDensityCu(Frame*curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize);<br> uint32_t lumaSumCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, uint32_t qgSize);<br> uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp);<br> bool allocWeightedRef(Lowres& fenc);<br>diff -r 354901970679 -r b66d88859a52 source/test/regression-tests.txt<br>--- a/source/test/regression-tests.txt Fri Sep 13 15:57:26 2019 +0530<br>+++ b/source/test/regression-tests.txt Wed Oct 09 09:45:14 2019 +0530<br>@@ -154,7 +154,7 @@<br> BasketballDrive_1920x1080_50.y4m, --preset medium --no-open-gop --keyint 50 --min-keyint 50 --radl 2 --vbv-maxrate 5000 --vbv-bufsize 5000<br> big_buck_bunny_360p24.y4m, --bitrate 500 --fades<br> 720p50_parkrun_ter.y4m,--preset medium --bitrate 400 --hme<br>-ducks_take_off_420_1_720p50.y4m,--preset medium --aq-mode 4 --crf 22 --no-cutree<br>+ducks_take_off_420_720p50.y4m,--preset medium --aq-mode 4 --crf 22 --no-cutree<br> ducks_take_off_420_1_720p50.y4m,--preset medium --selective-sao 4 --sao --crf 20<br> Traffic_4096x2048_30p.y4m, --preset medium --frame-dup --dup-threshold 60 --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000<br> <br><div><div dir="ltr"><div dir="ltr"><div dir="ltr"><div dir="ltr"><div dir="ltr"><font face="verdana, sans-serif" color="#0c343d">Thanks & Regards</font><div><font face="verdana, sans-serif" color="#0c343d"><b>Akil R</b></font></div><div><font face="verdana, sans-serif" color="#0c343d" size="1">Video Codec Engineer </font></div><div><font face="verdana, sans-serif" color="#0c343d" size="1">Media & AI Analytics</font></div><div><a href="https://multicorewareinc.com/" target="_blank"><img src="https://docs.google.com/uc?export=download&id=1kc3RJu9M8bnIf6Xa5rUw2d-eEVUsPBE5&revid=0B7tw9XJBmynaemR1VUpQUi9DVytRVW5SVkRwVTFjb1hBMUcwPQ"></a><br></div></div></div></div></div></div></div></div>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org" target="_blank">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br clear="all"><div><br></div>-- <br><div dir="ltr" class="gmail_signature"><div dir="ltr"><font face="georgia, serif">Regards,</font><div><font face="georgia, serif">Aruna</font></div></div></div>