[x265] [x265 patch] Fix: Performance drop in aq-mode 4
Akil
akil at multicorewareinc.com
Fri Oct 11 10:11:07 CEST 2019
This should work.
# HG changeset patch
# User Akil Ayyappan<akil at multicorewareinc.com>
# Date 1570778152 -19800
# Fri Oct 11 12:45:52 2019 +0530
# Branch Release_3.2
# Node ID efe5ac3c25dac009efbffaf5ed5e54734a02f812
# Parent 377cb2b0c3698342008a9304e8e7f5bedcf3f1f4
Fix: Performance drop in aq-mode 4
This patch moves the memory handling part of the edge information required
for aq-mode 4
to the Frame class-level in that way it can be reused by the threads.
diff -r 377cb2b0c369 -r efe5ac3c25da source/common/frame.cpp
--- a/source/common/frame.cpp Tue Sep 24 15:02:05 2019 +0530
+++ b/source/common/frame.cpp Fri Oct 11 12:45:52 2019 +0530
@@ -57,6 +57,9 @@
m_addOnPrevChange = NULL;
m_classifyFrame = false;
m_fieldNum = 0;
+ m_edgePic = NULL;
+ m_gaussianPic = NULL;
+ m_thetaPic = NULL;
}
bool Frame::create(x265_param *param, float* quantOffsets)
@@ -97,6 +100,20 @@
CHECKED_MALLOC_ZERO(m_classifyCount, uint32_t, size);
}
+ if (param->rc.aqMode == X265_AQ_EDGE || (param->rc.zonefileCount &&
param->rc.aqMode != 0))
+ {
+ uint32_t numCuInWidth = (param->sourceWidth + param->maxCUSize -
1) / param->maxCUSize;
+ uint32_t numCuInHeight = (param->sourceHeight + param->maxCUSize -
1) / param->maxCUSize;
+ uint32_t m_lumaMarginX = param->maxCUSize + 32; // search margin
and 8-tap filter half-length, padded for 32-byte alignment
+ uint32_t m_lumaMarginY = param->maxCUSize + 16; // margin for
8-tap filter and infinite padding
+ intptr_t m_stride = (numCuInWidth * param->maxCUSize) +
(m_lumaMarginX << 1);
+ int maxHeight = numCuInHeight * param->maxCUSize;
+
+ m_edgePic = X265_MALLOC(pixel, m_stride * (maxHeight +
(m_lumaMarginY * 2)));
+ m_gaussianPic = X265_MALLOC(pixel, m_stride * (maxHeight +
(m_lumaMarginY * 2)));
+ m_thetaPic = X265_MALLOC(pixel, m_stride * (maxHeight +
(m_lumaMarginY * 2)));
+ }
+
if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) &&
m_lowres.create(param, m_fencPic, param->rc.qgSize))
{
X265_CHECK((m_reconColCount == NULL), "m_reconColCount was
initialized");
@@ -242,4 +259,11 @@
X265_FREE_ZERO(m_classifyVariance);
X265_FREE_ZERO(m_classifyCount);
}
+
+ if (m_param->rc.aqMode == X265_AQ_EDGE || (m_param->rc.zonefileCount
&& m_param->rc.aqMode != 0))
+ {
+ X265_FREE(m_edgePic);
+ X265_FREE(m_gaussianPic);
+ X265_FREE(m_thetaPic);
+ }
}
diff -r 377cb2b0c369 -r efe5ac3c25da source/common/frame.h
--- a/source/common/frame.h Tue Sep 24 15:02:05 2019 +0530
+++ b/source/common/frame.h Fri Oct 11 12:45:52 2019 +0530
@@ -131,6 +131,11 @@
bool m_classifyFrame;
int m_fieldNum;
+ /* aq-mode 4 : Gaussian, edge and theta frames for edge information */
+ pixel* m_edgePic;
+ pixel* m_gaussianPic;
+ pixel* m_thetaPic;
+
Frame();
bool create(x265_param *param, float* quantOffsets);
diff -r 377cb2b0c369 -r efe5ac3c25da source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Tue Sep 24 15:02:05 2019 +0530
+++ b/source/encoder/slicetype.cpp Fri Oct 11 12:45:52 2019 +0530
@@ -85,12 +85,22 @@
} // end anonymous namespace
-void edgeFilter(Frame *curFrame, pixel *pic1, pixel *pic2, pixel *pic3,
intptr_t stride, int height, int width)
+void edgeFilter(Frame *curFrame, x265_param* param)
{
+ int height = curFrame->m_fencPic->m_picHeight;
+ int width = curFrame->m_fencPic->m_picWidth;
+ intptr_t stride = curFrame->m_fencPic->m_stride;
+ uint32_t numCuInHeight = (height + param->maxCUSize - 1) /
param->maxCUSize;
+ int maxHeight = numCuInHeight * param->maxCUSize;
+
+ memset(curFrame->m_edgePic, 0, stride * (maxHeight +
(curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+ memset(curFrame->m_gaussianPic, 0, stride * (maxHeight +
(curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+ memset(curFrame->m_thetaPic, 0, stride * (maxHeight +
(curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
+
pixel *src = (pixel*)curFrame->m_fencPic->m_picOrg[0];
- pixel *edgePic = pic1 + curFrame->m_fencPic->m_lumaMarginY * stride +
curFrame->m_fencPic->m_lumaMarginX;
- pixel *refPic = pic2 + curFrame->m_fencPic->m_lumaMarginY * stride +
curFrame->m_fencPic->m_lumaMarginX;
- pixel *edgeTheta = pic3 + curFrame->m_fencPic->m_lumaMarginY * stride
+ curFrame->m_fencPic->m_lumaMarginX;
+ pixel *edgePic = curFrame->m_edgePic +
curFrame->m_fencPic->m_lumaMarginY * stride +
curFrame->m_fencPic->m_lumaMarginX;
+ pixel *refPic = curFrame->m_gaussianPic +
curFrame->m_fencPic->m_lumaMarginY * stride +
curFrame->m_fencPic->m_lumaMarginX;
+ pixel *edgeTheta = curFrame->m_thetaPic +
curFrame->m_fencPic->m_lumaMarginY * stride +
curFrame->m_fencPic->m_lumaMarginX;
for (int i = 0; i < height; i++)
{
@@ -103,7 +113,7 @@
//Applying Gaussian filter on the picture
src = (pixel*)curFrame->m_fencPic->m_picOrg[0];
- refPic = pic2 + curFrame->m_fencPic->m_lumaMarginY * stride +
curFrame->m_fencPic->m_lumaMarginX;
+ refPic = curFrame->m_gaussianPic + curFrame->m_fencPic->m_lumaMarginY
* stride + curFrame->m_fencPic->m_lumaMarginX;
pixel pixelValue = 0;
for (int rowNum = 0; rowNum < height; rowNum++)
@@ -148,7 +158,7 @@
float gradientH = 0, gradientV = 0, radians = 0, theta = 0;
float gradientMagnitude = 0;
pixel blackPixel = 0;
- edgePic = pic1 + curFrame->m_fencPic->m_lumaMarginY * stride +
curFrame->m_fencPic->m_lumaMarginX;
+ edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY *
stride + curFrame->m_fencPic->m_lumaMarginX;
//Applying Sobel filter on the gaussian filtered picture
for (int rowNum = 0; rowNum < height; rowNum++)
{
@@ -198,8 +208,10 @@
angle = sum / (size*size);
}
-uint32_t LookaheadTLD::edgeDensityCu(Frame* curFrame,pixel *edgeImage,
pixel *edgeTheta, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY,
uint32_t qgSize)
+uint32_t LookaheadTLD::edgeDensityCu(Frame* curFrame, uint32_t &avgAngle,
uint32_t blockX, uint32_t blockY, uint32_t qgSize)
{
+ pixel *edgeImage = curFrame->m_edgePic +
curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride +
curFrame->m_fencPic->m_lumaMarginX;
+ pixel *edgeTheta = curFrame->m_thetaPic +
curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride +
curFrame->m_fencPic->m_lumaMarginX;
intptr_t srcStride = curFrame->m_fencPic->m_stride;
intptr_t blockOffsetLuma = blockX + (blockY * srcStride);
int plane = 0; // Sobel filter is applied only on Y component
@@ -478,31 +490,14 @@
}
else
{
-#define AQ_EDGE_BIAS 0.5
-#define EDGE_INCLINATION 45
-
- pixel *edgePic = NULL;
- pixel *gaussianPic = NULL;
- pixel *thetaPic = NULL;
-
- if (param->rc.aqMode == X265_AQ_EDGE)
- {
- uint32_t numCuInHeight = (maxRow + param->maxCUSize -
1) / param->maxCUSize;
- int maxHeight = numCuInHeight * param->maxCUSize;
- intptr_t stride = curFrame->m_fencPic->m_stride;
- edgePic = X265_MALLOC(pixel, stride * (maxHeight +
(curFrame->m_fencPic->m_lumaMarginY * 2)));
- gaussianPic = X265_MALLOC(pixel, stride * (maxHeight +
(curFrame->m_fencPic->m_lumaMarginY * 2)));
- thetaPic = X265_MALLOC(pixel, stride * (maxHeight +
(curFrame->m_fencPic->m_lumaMarginY * 2)));
- memset(edgePic, 0, stride * (maxHeight +
(curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
- memset(gaussianPic, 0, stride * (maxHeight +
(curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
- memset(thetaPic, 0, stride * (maxHeight +
(curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
- edgeFilter(curFrame, edgePic, gaussianPic, thetaPic,
stride, maxRow, maxCol);
- }
-
int blockXY = 0, inclinedEdge = 0;
double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0;
double bias_strength = 0.f;
double strength = 0.f;
+
+ if (param->rc.aqMode == X265_AQ_EDGE)
+ edgeFilter(curFrame, param);
+
if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE ||
param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED || param->rc.aqMode ==
X265_AQ_EDGE)
{
double bit_depth_correction = 1.f / (1 << (2 *
(X265_DEPTH - 8)));
@@ -514,9 +509,7 @@
energy = acEnergyCu(curFrame, blockX, blockY,
param->internalCsp, param->rc.qgSize);
if (param->rc.aqMode == X265_AQ_EDGE)
{
- pixel *edgeImage = edgePic +
curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride +
curFrame->m_fencPic->m_lumaMarginX;
- pixel *edgeTheta = thetaPic +
curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride +
curFrame->m_fencPic->m_lumaMarginX;
- edgeDensity = edgeDensityCu(curFrame,
edgeImage, edgeTheta, avgAngle, blockX, blockY, param->rc.qgSize);
+ edgeDensity = edgeDensityCu(curFrame,
avgAngle, blockX, blockY, param->rc.qgSize);
if (edgeDensity)
{
qp_adj = pow(edgeDensity *
bit_depth_correction + 1, 0.1);
@@ -549,13 +542,6 @@
else
strength = param->rc.aqStrength * 1.0397f;
- if (param->rc.aqMode == X265_AQ_EDGE)
- {
- X265_FREE(edgePic);
- X265_FREE(gaussianPic);
- X265_FREE(thetaPic);
- }
-
blockXY = 0;
for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
{
diff -r 377cb2b0c369 -r efe5ac3c25da source/encoder/slicetype.h
--- a/source/encoder/slicetype.h Tue Sep 24 15:02:05 2019 +0530
+++ b/source/encoder/slicetype.h Fri Oct 11 12:45:52 2019 +0530
@@ -40,6 +40,8 @@
#define LOWRES_COST_MASK ((1 << 14) - 1)
#define LOWRES_COST_SHIFT 14
+#define AQ_EDGE_BIAS 0.5
+#define EDGE_INCLINATION 45
/* Thread local data for lookahead tasks */
struct LookaheadTLD
@@ -92,7 +94,7 @@
protected:
uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY,
int csp, uint32_t qgSize);
- uint32_t edgeDensityCu(Frame*curFrame, pixel *edgeImage, pixel
*edgeTheta, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t
qgSize);
+ uint32_t edgeDensityCu(Frame*curFrame, uint32_t &avgAngle, uint32_t
blockX, uint32_t blockY, uint32_t qgSize);
uint32_t lumaSumCu(Frame* curFrame, uint32_t blockX, uint32_t blockY,
uint32_t qgSize);
uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp);
bool allocWeightedRef(Lowres& fenc);
diff -r 377cb2b0c369 -r efe5ac3c25da source/test/regression-tests.txt
--- a/source/test/regression-tests.txt Tue Sep 24 15:02:05 2019 +0530
+++ b/source/test/regression-tests.txt Fri Oct 11 12:45:52 2019 +0530
@@ -154,7 +154,7 @@
BasketballDrive_1920x1080_50.y4m, --preset medium --no-open-gop --keyint
50 --min-keyint 50 --radl 2 --vbv-maxrate 5000 --vbv-bufsize 5000
big_buck_bunny_360p24.y4m, --bitrate 500 --fades
720p50_parkrun_ter.y4m,--preset medium --bitrate 400 --hme
-ducks_take_off_420_1_720p50.y4m,--preset medium --aq-mode 4 --crf 22
--no-cutree
+ducks_take_off_420_720p50.y4m,--preset medium --aq-mode 4 --crf 22
--no-cutree
ducks_take_off_420_1_720p50.y4m,--preset medium --selective-sao 4 --sao
--crf 20
# Main12 intraCost overflow bug test
Thanks & Regards
*Akil R*
Video Codec Engineer
Media & AI Analytics
<https://multicorewareinc.com/>
On Thu, Oct 10, 2019 at 6:30 PM Aruna Matheswaran <
aruna at multicorewareinc.com> wrote:
> The patch is not applying on Release_3.2. Please rebase and send the patch.
>
> On Thu, Oct 10, 2019 at 2:31 PM Akil <akil at multicorewareinc.com> wrote:
>
>> # HG changeset patch
>> # User Akil Ayyappan<akil at multicorewareinc.com>
>> # Date 1570594514 -19800
>> # Wed Oct 09 09:45:14 2019 +0530
>> # Node ID b66d88859a528ae80f6f19eae7553fe7fcdb88e6
>> # Parent 354901970679c787efdfdcc6577228e9c06785cf
>> Fix: Performance drop in aq-mode 4
>>
>> This patch moves the memory allocation part of the edge information
>> required for aq-mode 4
>> to the Frame class-level in that way it can be reused by the threads.
>>
>> diff -r 354901970679 -r b66d88859a52 source/common/frame.cpp
>> --- a/source/common/frame.cpp Fri Sep 13 15:57:26 2019 +0530
>> +++ b/source/common/frame.cpp Wed Oct 09 09:45:14 2019 +0530
>> @@ -58,6 +58,9 @@
>> m_classifyFrame = false;
>> m_fieldNum = 0;
>> m_picStruct = 0;
>> + m_edgePic = NULL;
>> + m_gaussianPic = NULL;
>> + m_thetaPic = NULL;
>> }
>>
>> bool Frame::create(x265_param *param, float* quantOffsets)
>> @@ -98,6 +101,20 @@
>> CHECKED_MALLOC_ZERO(m_classifyCount, uint32_t, size);
>> }
>>
>> + if (param->rc.aqMode == X265_AQ_EDGE || (param->rc.zonefileCount &&
>> param->rc.aqMode != 0))
>> + {
>> + uint32_t numCuInWidth = (param->sourceWidth + param->maxCUSize -
>> 1) / param->maxCUSize;
>> + uint32_t numCuInHeight = (param->sourceHeight + param->maxCUSize
>> - 1) / param->maxCUSize;
>> + uint32_t m_lumaMarginX = param->maxCUSize + 32; // search margin
>> and 8-tap filter half-length, padded for 32-byte alignment
>> + uint32_t m_lumaMarginY = param->maxCUSize + 16; // margin for
>> 8-tap filter and infinite padding
>> + intptr_t m_stride = (numCuInWidth * param->maxCUSize) +
>> (m_lumaMarginX << 1);
>> + int maxHeight = numCuInHeight * param->maxCUSize;
>> +
>> + m_edgePic = X265_MALLOC(pixel, m_stride * (maxHeight +
>> (m_lumaMarginY * 2)));
>> + m_gaussianPic = X265_MALLOC(pixel, m_stride * (maxHeight +
>> (m_lumaMarginY * 2)));
>> + m_thetaPic = X265_MALLOC(pixel, m_stride * (maxHeight +
>> (m_lumaMarginY * 2)));
>> + }
>> +
>> if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) &&
>> m_lowres.create(param, m_fencPic, param->rc.qgSize))
>> {
>> X265_CHECK((m_reconColCount == NULL), "m_reconColCount was
>> initialized");
>> @@ -243,4 +260,11 @@
>> X265_FREE_ZERO(m_classifyVariance);
>> X265_FREE_ZERO(m_classifyCount);
>> }
>> +
>> + if (m_param->rc.aqMode == X265_AQ_EDGE || (m_param->rc.zonefileCount
>> && m_param->rc.aqMode != 0))
>> + {
>> + X265_FREE(m_edgePic);
>> + X265_FREE(m_gaussianPic);
>> + X265_FREE(m_thetaPic);
>> + }
>> }
>> diff -r 354901970679 -r b66d88859a52 source/common/frame.h
>> --- a/source/common/frame.h Fri Sep 13 15:57:26 2019 +0530
>> +++ b/source/common/frame.h Wed Oct 09 09:45:14 2019 +0530
>> @@ -132,6 +132,11 @@
>> bool m_classifyFrame;
>> int m_fieldNum;
>>
>> + /* aq-mode 4 : Gaussian, edge and theta frames for edge information
>> */
>> + pixel* m_edgePic;
>> + pixel* m_gaussianPic;
>> + pixel* m_thetaPic;
>> +
>> Frame();
>>
>> bool create(x265_param *param, float* quantOffsets);
>> diff -r 354901970679 -r b66d88859a52 source/encoder/slicetype.cpp
>> --- a/source/encoder/slicetype.cpp Fri Sep 13 15:57:26 2019 +0530
>> +++ b/source/encoder/slicetype.cpp Wed Oct 09 09:45:14 2019 +0530
>> @@ -85,12 +85,22 @@
>>
>> } // end anonymous namespace
>>
>> -void edgeFilter(Frame *curFrame, pixel *pic1, pixel *pic2, pixel *pic3,
>> intptr_t stride, int height, int width)
>> +void edgeFilter(Frame *curFrame, x265_param* param)
>> {
>> + int height = curFrame->m_fencPic->m_picHeight;
>> + int width = curFrame->m_fencPic->m_picWidth;
>> + intptr_t stride = curFrame->m_fencPic->m_stride;
>> + uint32_t numCuInHeight = (height + param->maxCUSize - 1) /
>> param->maxCUSize;
>> + int maxHeight = numCuInHeight * param->maxCUSize;
>> +
>> + memset(curFrame->m_edgePic, 0, stride * (maxHeight +
>> (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
>> + memset(curFrame->m_gaussianPic, 0, stride * (maxHeight +
>> (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
>> + memset(curFrame->m_thetaPic, 0, stride * (maxHeight +
>> (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
>> +
>> pixel *src = (pixel*)curFrame->m_fencPic->m_picOrg[0];
>> - pixel *edgePic = pic1 + curFrame->m_fencPic->m_lumaMarginY * stride
>> + curFrame->m_fencPic->m_lumaMarginX;
>> - pixel *refPic = pic2 + curFrame->m_fencPic->m_lumaMarginY * stride +
>> curFrame->m_fencPic->m_lumaMarginX;
>> - pixel *edgeTheta = pic3 + curFrame->m_fencPic->m_lumaMarginY *
>> stride + curFrame->m_fencPic->m_lumaMarginX;
>> + pixel *edgePic = curFrame->m_edgePic +
>> curFrame->m_fencPic->m_lumaMarginY * stride +
>> curFrame->m_fencPic->m_lumaMarginX;
>> + pixel *refPic = curFrame->m_gaussianPic +
>> curFrame->m_fencPic->m_lumaMarginY * stride +
>> curFrame->m_fencPic->m_lumaMarginX;
>> + pixel *edgeTheta = curFrame->m_thetaPic +
>> curFrame->m_fencPic->m_lumaMarginY * stride +
>> curFrame->m_fencPic->m_lumaMarginX;
>>
>> for (int i = 0; i < height; i++)
>> {
>> @@ -103,7 +113,7 @@
>>
>> //Applying Gaussian filter on the picture
>> src = (pixel*)curFrame->m_fencPic->m_picOrg[0];
>> - refPic = pic2 + curFrame->m_fencPic->m_lumaMarginY * stride +
>> curFrame->m_fencPic->m_lumaMarginX;
>> + refPic = curFrame->m_gaussianPic +
>> curFrame->m_fencPic->m_lumaMarginY * stride +
>> curFrame->m_fencPic->m_lumaMarginX;
>> pixel pixelValue = 0;
>>
>> for (int rowNum = 0; rowNum < height; rowNum++)
>> @@ -148,7 +158,7 @@
>> float gradientH = 0, gradientV = 0, radians = 0, theta = 0;
>> float gradientMagnitude = 0;
>> pixel blackPixel = 0;
>> - edgePic = pic1 + curFrame->m_fencPic->m_lumaMarginY * stride +
>> curFrame->m_fencPic->m_lumaMarginX;
>> + edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY *
>> stride + curFrame->m_fencPic->m_lumaMarginX;
>> //Applying Sobel filter on the gaussian filtered picture
>> for (int rowNum = 0; rowNum < height; rowNum++)
>> {
>> @@ -198,8 +208,10 @@
>> angle = sum / (size*size);
>> }
>>
>> -uint32_t LookaheadTLD::edgeDensityCu(Frame* curFrame,pixel *edgeImage,
>> pixel *edgeTheta, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY,
>> uint32_t qgSize)
>> +uint32_t LookaheadTLD::edgeDensityCu(Frame* curFrame, uint32_t
>> &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize)
>> {
>> + pixel *edgeImage = curFrame->m_edgePic +
>> curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride +
>> curFrame->m_fencPic->m_lumaMarginX;
>> + pixel *edgeTheta = curFrame->m_thetaPic +
>> curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride +
>> curFrame->m_fencPic->m_lumaMarginX;
>> intptr_t srcStride = curFrame->m_fencPic->m_stride;
>> intptr_t blockOffsetLuma = blockX + (blockY * srcStride);
>> int plane = 0; // Sobel filter is applied only on Y component
>> @@ -478,31 +490,14 @@
>> }
>> else
>> {
>> -#define AQ_EDGE_BIAS 0.5
>> -#define EDGE_INCLINATION 45
>> -
>> - pixel *edgePic = NULL;
>> - pixel *gaussianPic = NULL;
>> - pixel *thetaPic = NULL;
>> -
>> - if (param->rc.aqMode == X265_AQ_EDGE)
>> - {
>> - uint32_t numCuInHeight = (maxRow + param->maxCUSize
>> - 1) / param->maxCUSize;
>> - int maxHeight = numCuInHeight * param->maxCUSize;
>> - intptr_t stride = curFrame->m_fencPic->m_stride;
>> - edgePic = X265_MALLOC(pixel, stride * (maxHeight +
>> (curFrame->m_fencPic->m_lumaMarginY * 2)));
>> - gaussianPic = X265_MALLOC(pixel, stride * (maxHeight
>> + (curFrame->m_fencPic->m_lumaMarginY * 2)));
>> - thetaPic = X265_MALLOC(pixel, stride * (maxHeight +
>> (curFrame->m_fencPic->m_lumaMarginY * 2)));
>> - memset(edgePic, 0, stride * (maxHeight +
>> (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
>> - memset(gaussianPic, 0, stride * (maxHeight +
>> (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
>> - memset(thetaPic, 0, stride * (maxHeight +
>> (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
>> - edgeFilter(curFrame, edgePic, gaussianPic, thetaPic,
>> stride, maxRow, maxCol);
>> - }
>> -
>> int blockXY = 0, inclinedEdge = 0;
>> double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0;
>> double bias_strength = 0.f;
>> double strength = 0.f;
>> +
>> + if (param->rc.aqMode == X265_AQ_EDGE)
>> + edgeFilter(curFrame, param);
>> +
>> if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE ||
>> param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED || param->rc.aqMode ==
>> X265_AQ_EDGE)
>> {
>> double bit_depth_correction = 1.f / (1 << (2 *
>> (X265_DEPTH - 8)));
>> @@ -514,9 +509,7 @@
>> energy = acEnergyCu(curFrame, blockX,
>> blockY, param->internalCsp, param->rc.qgSize);
>> if (param->rc.aqMode == X265_AQ_EDGE)
>> {
>> - pixel *edgeImage = edgePic +
>> curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride +
>> curFrame->m_fencPic->m_lumaMarginX;
>> - pixel *edgeTheta = thetaPic +
>> curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride +
>> curFrame->m_fencPic->m_lumaMarginX;
>> - edgeDensity = edgeDensityCu(curFrame,
>> edgeImage, edgeTheta, avgAngle, blockX, blockY, param->rc.qgSize);
>> + edgeDensity = edgeDensityCu(curFrame,
>> avgAngle, blockX, blockY, param->rc.qgSize);
>> if (edgeDensity)
>> {
>> qp_adj = pow(edgeDensity *
>> bit_depth_correction + 1, 0.1);
>> @@ -549,13 +542,6 @@
>> else
>> strength = param->rc.aqStrength * 1.0397f;
>>
>> - if (param->rc.aqMode == X265_AQ_EDGE)
>> - {
>> - X265_FREE(edgePic);
>> - X265_FREE(gaussianPic);
>> - X265_FREE(thetaPic);
>> - }
>> -
>> blockXY = 0;
>> for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
>> {
>> diff -r 354901970679 -r b66d88859a52 source/encoder/slicetype.h
>> --- a/source/encoder/slicetype.h Fri Sep 13 15:57:26 2019 +0530
>> +++ b/source/encoder/slicetype.h Wed Oct 09 09:45:14 2019 +0530
>> @@ -40,6 +40,8 @@
>>
>> #define LOWRES_COST_MASK ((1 << 14) - 1)
>> #define LOWRES_COST_SHIFT 14
>> +#define AQ_EDGE_BIAS 0.5
>> +#define EDGE_INCLINATION 45
>>
>> /* Thread local data for lookahead tasks */
>> struct LookaheadTLD
>> @@ -92,7 +94,7 @@
>> protected:
>>
>> uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t
>> blockY, int csp, uint32_t qgSize);
>> - uint32_t edgeDensityCu(Frame*curFrame, pixel *edgeImage, pixel
>> *edgeTheta, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t
>> qgSize);
>> + uint32_t edgeDensityCu(Frame*curFrame, uint32_t &avgAngle, uint32_t
>> blockX, uint32_t blockY, uint32_t qgSize);
>> uint32_t lumaSumCu(Frame* curFrame, uint32_t blockX, uint32_t
>> blockY, uint32_t qgSize);
>> uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp);
>> bool allocWeightedRef(Lowres& fenc);
>> diff -r 354901970679 -r b66d88859a52 source/test/regression-tests.txt
>> --- a/source/test/regression-tests.txt Fri Sep 13 15:57:26 2019 +0530
>> +++ b/source/test/regression-tests.txt Wed Oct 09 09:45:14 2019 +0530
>> @@ -154,7 +154,7 @@
>> BasketballDrive_1920x1080_50.y4m, --preset medium --no-open-gop --keyint
>> 50 --min-keyint 50 --radl 2 --vbv-maxrate 5000 --vbv-bufsize 5000
>> big_buck_bunny_360p24.y4m, --bitrate 500 --fades
>> 720p50_parkrun_ter.y4m,--preset medium --bitrate 400 --hme
>> -ducks_take_off_420_1_720p50.y4m,--preset medium --aq-mode 4 --crf 22
>> --no-cutree
>> +ducks_take_off_420_720p50.y4m,--preset medium --aq-mode 4 --crf 22
>> --no-cutree
>> ducks_take_off_420_1_720p50.y4m,--preset medium --selective-sao 4 --sao
>> --crf 20
>> Traffic_4096x2048_30p.y4m, --preset medium --frame-dup --dup-threshold
>> 60 --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000
>>
>> Thanks & Regards
>> *Akil R*
>> Video Codec Engineer
>> Media & AI Analytics
>> <https://multicorewareinc.com/>
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
> --
> Regards,
> Aruna
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20191011/324e4fe9/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: AQ_4_fix.patch
Type: application/octet-stream
Size: 11935 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20191011/324e4fe9/attachment-0001.obj>
More information about the x265-devel
mailing list