[x265] [x265 patch] Fix: Performance drop in aq-mode 4

Mon Oct 14 16:52:41 CEST 2019

Pushed to Release_3.2.

On Fri, Oct 11, 2019 at 1:41 PM Akil <akil at multicorewareinc.com> wrote:

> This should work.
>
> # HG changeset patch
> # User Akil Ayyappan<akil at multicorewareinc.com>
> # Date 1570778152 -19800
> #      Fri Oct 11 12:45:52 2019 +0530
> # Branch Release_3.2
> # Node ID efe5ac3c25dac009efbffaf5ed5e54734a02f812
> # Parent  377cb2b0c3698342008a9304e8e7f5bedcf3f1f4
> Fix: Performance drop in aq-mode 4
>
> This patch moves the memory handling part of the edge information required
> for aq-mode 4
> to the Frame class-level in that way it can be reused by the threads.
>
> diff -r 377cb2b0c369 -r efe5ac3c25da source/common/frame.cpp
> --- a/source/common/frame.cpp Tue Sep 24 15:02:05 2019 +0530
> +++ b/source/common/frame.cpp Fri Oct 11 12:45:52 2019 +0530
> @@ -57,6 +57,9 @@
>      m_addOnPrevChange = NULL;
>      m_classifyFrame = false;
>      m_fieldNum = 0;
> +    m_edgePic = NULL;
> +    m_gaussianPic = NULL;
> +    m_thetaPic = NULL;
>  }
>
>  bool Frame::create(x265_param *param, float* quantOffsets)
> @@ -97,6 +100,20 @@
>          CHECKED_MALLOC_ZERO(m_classifyCount, uint32_t, size);
>      }
>
> +    if (param->rc.aqMode == X265_AQ_EDGE || (param->rc.zonefileCount &&
> param->rc.aqMode != 0))
> +    {
> +        uint32_t numCuInWidth = (param->sourceWidth + param->maxCUSize -
> 1) / param->maxCUSize;
> +        uint32_t numCuInHeight = (param->sourceHeight + param->maxCUSize
> - 1) / param->maxCUSize;
> +        uint32_t m_lumaMarginX = param->maxCUSize + 32; // search margin
> and 8-tap filter half-length, padded for 32-byte alignment
> +        uint32_t m_lumaMarginY = param->maxCUSize + 16; // margin for
> 8-tap filter and infinite padding
> +        intptr_t m_stride = (numCuInWidth * param->maxCUSize) +
> (m_lumaMarginX << 1);
> +        int maxHeight = numCuInHeight * param->maxCUSize;
> +
> +        m_edgePic = X265_MALLOC(pixel, m_stride * (maxHeight +
> (m_lumaMarginY * 2)));
> +        m_gaussianPic = X265_MALLOC(pixel, m_stride * (maxHeight +
> (m_lumaMarginY * 2)));
> +        m_thetaPic = X265_MALLOC(pixel, m_stride * (maxHeight +
> (m_lumaMarginY * 2)));
> +    }
> +
>      if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) &&
> m_lowres.create(param, m_fencPic, param->rc.qgSize))
>      {
>          X265_CHECK((m_reconColCount == NULL), "m_reconColCount was
> initialized");
> @@ -242,4 +259,11 @@
>          X265_FREE_ZERO(m_classifyVariance);
>          X265_FREE_ZERO(m_classifyCount);
>      }
> +
> +    if (m_param->rc.aqMode == X265_AQ_EDGE || (m_param->rc.zonefileCount
> && m_param->rc.aqMode != 0))
> +    {
> +        X265_FREE(m_edgePic);
> +        X265_FREE(m_gaussianPic);
> +        X265_FREE(m_thetaPic);
> +    }
>  }
> diff -r 377cb2b0c369 -r efe5ac3c25da source/common/frame.h
> --- a/source/common/frame.h Tue Sep 24 15:02:05 2019 +0530
> +++ b/source/common/frame.h Fri Oct 11 12:45:52 2019 +0530
> @@ -131,6 +131,11 @@
>      bool                   m_classifyFrame;
>      int                    m_fieldNum;
>
> +    /* aq-mode 4 : Gaussian, edge and theta frames for edge information */
> +    pixel*                 m_edgePic;
> +    pixel*                 m_gaussianPic;
> +    pixel*                 m_thetaPic;
> +
>      Frame();
>
>      bool create(x265_param *param, float* quantOffsets);
> diff -r 377cb2b0c369 -r efe5ac3c25da source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp Tue Sep 24 15:02:05 2019 +0530
> +++ b/source/encoder/slicetype.cpp Fri Oct 11 12:45:52 2019 +0530
> @@ -85,12 +85,22 @@
>
>  } // end anonymous namespace
>
> -void edgeFilter(Frame *curFrame, pixel *pic1, pixel *pic2, pixel *pic3,
> intptr_t stride, int height, int width)
> +void edgeFilter(Frame *curFrame, x265_param* param)
>  {
> +    int height = curFrame->m_fencPic->m_picHeight;
> +    int width = curFrame->m_fencPic->m_picWidth;
> +    intptr_t stride = curFrame->m_fencPic->m_stride;
> +    uint32_t numCuInHeight = (height + param->maxCUSize - 1) /
> param->maxCUSize;
> +    int maxHeight = numCuInHeight * param->maxCUSize;
> +
> +    memset(curFrame->m_edgePic, 0, stride * (maxHeight +
> (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
> +    memset(curFrame->m_gaussianPic, 0, stride * (maxHeight +
> (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
> +    memset(curFrame->m_thetaPic, 0, stride * (maxHeight +
> (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
> +
>      pixel *src = (pixel*)curFrame->m_fencPic->m_picOrg[0];
> -    pixel *edgePic = pic1 + curFrame->m_fencPic->m_lumaMarginY * stride +
> curFrame->m_fencPic->m_lumaMarginX;
> -    pixel *refPic = pic2 + curFrame->m_fencPic->m_lumaMarginY * stride +
> curFrame->m_fencPic->m_lumaMarginX;
> -    pixel *edgeTheta = pic3 + curFrame->m_fencPic->m_lumaMarginY * stride
> + curFrame->m_fencPic->m_lumaMarginX;
> +    pixel *edgePic = curFrame->m_edgePic +
> curFrame->m_fencPic->m_lumaMarginY * stride +
> curFrame->m_fencPic->m_lumaMarginX;
> +    pixel *refPic = curFrame->m_gaussianPic +
> curFrame->m_fencPic->m_lumaMarginY * stride +
> curFrame->m_fencPic->m_lumaMarginX;
> +    pixel *edgeTheta = curFrame->m_thetaPic +
> curFrame->m_fencPic->m_lumaMarginY * stride +
> curFrame->m_fencPic->m_lumaMarginX;
>
>      for (int i = 0; i < height; i++)
>      {
> @@ -103,7 +113,7 @@
>
>      //Applying Gaussian filter on the picture
>      src = (pixel*)curFrame->m_fencPic->m_picOrg[0];
> -    refPic = pic2 + curFrame->m_fencPic->m_lumaMarginY * stride +
> curFrame->m_fencPic->m_lumaMarginX;
> +    refPic = curFrame->m_gaussianPic + curFrame->m_fencPic->m_lumaMarginY
> * stride + curFrame->m_fencPic->m_lumaMarginX;
>      pixel pixelValue = 0;
>
>      for (int rowNum = 0; rowNum < height; rowNum++)
> @@ -148,7 +158,7 @@
>      float gradientH = 0, gradientV = 0, radians = 0, theta = 0;
>      float gradientMagnitude = 0;
>      pixel blackPixel = 0;
> -    edgePic = pic1 + curFrame->m_fencPic->m_lumaMarginY * stride +
> curFrame->m_fencPic->m_lumaMarginX;
> +    edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY *
> stride + curFrame->m_fencPic->m_lumaMarginX;
>      //Applying Sobel filter on the gaussian filtered picture
>      for (int rowNum = 0; rowNum < height; rowNum++)
>      {
> @@ -198,8 +208,10 @@
>      angle = sum / (size*size);
>  }
>
> -uint32_t LookaheadTLD::edgeDensityCu(Frame* curFrame,pixel *edgeImage,
> pixel *edgeTheta, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY,
> uint32_t qgSize)
> +uint32_t LookaheadTLD::edgeDensityCu(Frame* curFrame, uint32_t &avgAngle,
> uint32_t blockX, uint32_t blockY, uint32_t qgSize)
>  {
> +    pixel *edgeImage = curFrame->m_edgePic +
> curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride +
> curFrame->m_fencPic->m_lumaMarginX;
> +    pixel *edgeTheta = curFrame->m_thetaPic +
> curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride +
> curFrame->m_fencPic->m_lumaMarginX;
>      intptr_t srcStride = curFrame->m_fencPic->m_stride;
>      intptr_t blockOffsetLuma = blockX + (blockY * srcStride);
>      int plane = 0; // Sobel filter is applied only on Y component
> @@ -478,31 +490,14 @@
>              }
>              else
>              {
> -#define AQ_EDGE_BIAS 0.5
> -#define EDGE_INCLINATION 45
> -
> -                pixel *edgePic = NULL;
> -                pixel *gaussianPic = NULL;
> -                pixel *thetaPic = NULL;
> -
> -                if (param->rc.aqMode == X265_AQ_EDGE)
> -                {
> -                    uint32_t numCuInHeight = (maxRow + param->maxCUSize -
> 1) / param->maxCUSize;
> -                    int maxHeight = numCuInHeight * param->maxCUSize;
> -                    intptr_t stride = curFrame->m_fencPic->m_stride;
> -                    edgePic = X265_MALLOC(pixel, stride * (maxHeight +
> (curFrame->m_fencPic->m_lumaMarginY * 2)));
> -                    gaussianPic = X265_MALLOC(pixel, stride * (maxHeight
> + (curFrame->m_fencPic->m_lumaMarginY * 2)));
> -                    thetaPic = X265_MALLOC(pixel, stride * (maxHeight +
> (curFrame->m_fencPic->m_lumaMarginY * 2)));
> -                    memset(edgePic, 0, stride * (maxHeight +
> (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
> -                    memset(gaussianPic, 0, stride * (maxHeight +
> (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
> -                    memset(thetaPic, 0, stride * (maxHeight +
> (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
> -                    edgeFilter(curFrame, edgePic, gaussianPic, thetaPic,
> stride, maxRow, maxCol);
> -                }
> -
>                  int blockXY = 0, inclinedEdge = 0;
>                  double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0;
>                  double bias_strength = 0.f;
>                  double strength = 0.f;
> +
> +                if (param->rc.aqMode == X265_AQ_EDGE)
> +                    edgeFilter(curFrame, param);
> +
>                  if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE ||
> param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED || param->rc.aqMode ==
> X265_AQ_EDGE)
>                  {
>                      double bit_depth_correction = 1.f / (1 << (2 *
> (X265_DEPTH - 8)));
> @@ -514,9 +509,7 @@
>                              energy = acEnergyCu(curFrame, blockX, blockY,
> param->internalCsp, param->rc.qgSize);
>                              if (param->rc.aqMode == X265_AQ_EDGE)
>                              {
> -                                pixel *edgeImage = edgePic +
> curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride +
> curFrame->m_fencPic->m_lumaMarginX;
> -                                pixel *edgeTheta = thetaPic +
> curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride +
> curFrame->m_fencPic->m_lumaMarginX;
> -                                edgeDensity = edgeDensityCu(curFrame,
> edgeImage, edgeTheta, avgAngle, blockX, blockY, param->rc.qgSize);
> +                                edgeDensity = edgeDensityCu(curFrame,
> avgAngle, blockX, blockY, param->rc.qgSize);
>                                  if (edgeDensity)
>                                  {
>                                      qp_adj = pow(edgeDensity *
> bit_depth_correction + 1, 0.1);
> @@ -549,13 +542,6 @@
>                  else
>                      strength = param->rc.aqStrength * 1.0397f;
>
> -                if (param->rc.aqMode == X265_AQ_EDGE)
> -                {
> -                    X265_FREE(edgePic);
> -                    X265_FREE(gaussianPic);
> -                    X265_FREE(thetaPic);
> -                }
> -
>                  blockXY = 0;
>                  for (int blockY = 0; blockY < maxRow; blockY += loopIncr)
>                  {
> diff -r 377cb2b0c369 -r efe5ac3c25da source/encoder/slicetype.h
> --- a/source/encoder/slicetype.h Tue Sep 24 15:02:05 2019 +0530
> +++ b/source/encoder/slicetype.h Fri Oct 11 12:45:52 2019 +0530
> @@ -40,6 +40,8 @@
>
>  #define LOWRES_COST_MASK  ((1 << 14) - 1)
>  #define LOWRES_COST_SHIFT 14
> +#define AQ_EDGE_BIAS 0.5
> +#define EDGE_INCLINATION 45
>
>  /* Thread local data for lookahead tasks */
>  struct LookaheadTLD
> @@ -92,7 +94,7 @@
>  protected:
>
>      uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t
> blockY, int csp, uint32_t qgSize);
> -    uint32_t edgeDensityCu(Frame*curFrame, pixel *edgeImage, pixel
> *edgeTheta, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t
> qgSize);
> +    uint32_t edgeDensityCu(Frame*curFrame, uint32_t &avgAngle, uint32_t
> blockX, uint32_t blockY, uint32_t qgSize);
>      uint32_t lumaSumCu(Frame* curFrame, uint32_t blockX, uint32_t blockY,
> uint32_t qgSize);
>      uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp);
>      bool     allocWeightedRef(Lowres& fenc);
> diff -r 377cb2b0c369 -r efe5ac3c25da source/test/regression-tests.txt
> --- a/source/test/regression-tests.txt Tue Sep 24 15:02:05 2019 +0530
> +++ b/source/test/regression-tests.txt Fri Oct 11 12:45:52 2019 +0530
> @@ -154,7 +154,7 @@
>  BasketballDrive_1920x1080_50.y4m, --preset medium --no-open-gop --keyint
> 50 --min-keyint 50 --radl 2 --vbv-maxrate 5000 --vbv-bufsize 5000
>  big_buck_bunny_360p24.y4m, --bitrate 500 --fades
>  720p50_parkrun_ter.y4m,--preset medium --bitrate 400 --hme
> -ducks_take_off_420_1_720p50.y4m,--preset medium --aq-mode 4 --crf 22
> --no-cutree
> +ducks_take_off_420_720p50.y4m,--preset medium --aq-mode 4 --crf 22
> --no-cutree
>  ducks_take_off_420_1_720p50.y4m,--preset medium --selective-sao 4 --sao
> --crf 20
>
>  # Main12 intraCost overflow bug test
>
> Thanks & Regards
> *Akil R*
> Video Codec Engineer
> Media & AI Analytics
> <https://multicorewareinc.com/>
>
>
> On Thu, Oct 10, 2019 at 6:30 PM Aruna Matheswaran <
> aruna at multicorewareinc.com> wrote:
>
>> The patch is not applying on Release_3.2. Please rebase and send the
>> patch.
>>
>> On Thu, Oct 10, 2019 at 2:31 PM Akil <akil at multicorewareinc.com> wrote:
>>
>>> # HG changeset patch
>>> # User Akil Ayyappan<akil at multicorewareinc.com>
>>> # Date 1570594514 -19800
>>> #      Wed Oct 09 09:45:14 2019 +0530
>>> # Node ID b66d88859a528ae80f6f19eae7553fe7fcdb88e6
>>> # Parent  354901970679c787efdfdcc6577228e9c06785cf
>>> Fix: Performance drop in aq-mode 4
>>>
>>> This patch moves the memory allocation part of the edge information
>>> required for aq-mode 4
>>> to the Frame class-level in that way it can be reused by the threads.
>>>
>>> diff -r 354901970679 -r b66d88859a52 source/common/frame.cpp
>>> --- a/source/common/frame.cpp Fri Sep 13 15:57:26 2019 +0530
>>> +++ b/source/common/frame.cpp Wed Oct 09 09:45:14 2019 +0530
>>> @@ -58,6 +58,9 @@
>>>      m_classifyFrame = false;
>>>      m_fieldNum = 0;
>>>      m_picStruct = 0;
>>> +    m_edgePic = NULL;
>>> +    m_gaussianPic = NULL;
>>> +    m_thetaPic = NULL;
>>>  }
>>>
>>>  bool Frame::create(x265_param *param, float* quantOffsets)
>>> @@ -98,6 +101,20 @@
>>>          CHECKED_MALLOC_ZERO(m_classifyCount, uint32_t, size);
>>>      }
>>>
>>> +    if (param->rc.aqMode == X265_AQ_EDGE || (param->rc.zonefileCount &&
>>> param->rc.aqMode != 0))
>>> +    {
>>> +        uint32_t numCuInWidth = (param->sourceWidth + param->maxCUSize
>>> - 1) / param->maxCUSize;
>>> +        uint32_t numCuInHeight = (param->sourceHeight +
>>> param->maxCUSize - 1) / param->maxCUSize;
>>> +        uint32_t m_lumaMarginX = param->maxCUSize + 32; // search
>>> margin and 8-tap filter half-length, padded for 32-byte alignment
>>> +        uint32_t m_lumaMarginY = param->maxCUSize + 16; // margin for
>>> 8-tap filter and infinite padding
>>> +        intptr_t m_stride = (numCuInWidth * param->maxCUSize) +
>>> (m_lumaMarginX << 1);
>>> +        int maxHeight = numCuInHeight * param->maxCUSize;
>>> +
>>> +        m_edgePic = X265_MALLOC(pixel, m_stride * (maxHeight +
>>> (m_lumaMarginY * 2)));
>>> +        m_gaussianPic = X265_MALLOC(pixel, m_stride * (maxHeight +
>>> (m_lumaMarginY * 2)));
>>> +        m_thetaPic = X265_MALLOC(pixel, m_stride * (maxHeight +
>>> (m_lumaMarginY * 2)));
>>> +    }
>>> +
>>>      if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) &&
>>> m_lowres.create(param, m_fencPic, param->rc.qgSize))
>>>      {
>>>          X265_CHECK((m_reconColCount == NULL), "m_reconColCount was
>>> initialized");
>>> @@ -243,4 +260,11 @@
>>>          X265_FREE_ZERO(m_classifyVariance);
>>>          X265_FREE_ZERO(m_classifyCount);
>>>      }
>>> +
>>> +    if (m_param->rc.aqMode == X265_AQ_EDGE ||
>>> (m_param->rc.zonefileCount && m_param->rc.aqMode != 0))
>>> +    {
>>> +        X265_FREE(m_edgePic);
>>> +        X265_FREE(m_gaussianPic);
>>> +        X265_FREE(m_thetaPic);
>>> +    }
>>>  }
>>> diff -r 354901970679 -r b66d88859a52 source/common/frame.h
>>> --- a/source/common/frame.h Fri Sep 13 15:57:26 2019 +0530
>>> +++ b/source/common/frame.h Wed Oct 09 09:45:14 2019 +0530
>>> @@ -132,6 +132,11 @@
>>>      bool                   m_classifyFrame;
>>>      int                    m_fieldNum;
>>>
>>> +    /* aq-mode 4 : Gaussian, edge and theta frames for edge information
>>> */
>>> +    pixel*                 m_edgePic;
>>> +    pixel*                 m_gaussianPic;
>>> +    pixel*                 m_thetaPic;
>>> +
>>>      Frame();
>>>
>>>      bool create(x265_param *param, float* quantOffsets);
>>> diff -r 354901970679 -r b66d88859a52 source/encoder/slicetype.cpp
>>> --- a/source/encoder/slicetype.cpp Fri Sep 13 15:57:26 2019 +0530
>>> +++ b/source/encoder/slicetype.cpp Wed Oct 09 09:45:14 2019 +0530
>>> @@ -85,12 +85,22 @@
>>>
>>>  } // end anonymous namespace
>>>
>>> -void edgeFilter(Frame *curFrame, pixel *pic1, pixel *pic2, pixel *pic3,
>>> intptr_t stride, int height, int width)
>>> +void edgeFilter(Frame *curFrame, x265_param* param)
>>>  {
>>> +    int height = curFrame->m_fencPic->m_picHeight;
>>> +    int width = curFrame->m_fencPic->m_picWidth;
>>> +    intptr_t stride = curFrame->m_fencPic->m_stride;
>>> +    uint32_t numCuInHeight = (height + param->maxCUSize - 1) /
>>> param->maxCUSize;
>>> +    int maxHeight = numCuInHeight * param->maxCUSize;
>>> +
>>> +    memset(curFrame->m_edgePic, 0, stride * (maxHeight +
>>> (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
>>> +    memset(curFrame->m_gaussianPic, 0, stride * (maxHeight +
>>> (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
>>> +    memset(curFrame->m_thetaPic, 0, stride * (maxHeight +
>>> (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
>>> +
>>>      pixel *src = (pixel*)curFrame->m_fencPic->m_picOrg[0];
>>> -    pixel *edgePic = pic1 + curFrame->m_fencPic->m_lumaMarginY * stride
>>> + curFrame->m_fencPic->m_lumaMarginX;
>>> -    pixel *refPic = pic2 + curFrame->m_fencPic->m_lumaMarginY * stride
>>> + curFrame->m_fencPic->m_lumaMarginX;
>>> -    pixel *edgeTheta = pic3 + curFrame->m_fencPic->m_lumaMarginY *
>>> stride + curFrame->m_fencPic->m_lumaMarginX;
>>> +    pixel *edgePic = curFrame->m_edgePic +
>>> curFrame->m_fencPic->m_lumaMarginY * stride +
>>> curFrame->m_fencPic->m_lumaMarginX;
>>> +    pixel *refPic = curFrame->m_gaussianPic +
>>> curFrame->m_fencPic->m_lumaMarginY * stride +
>>> curFrame->m_fencPic->m_lumaMarginX;
>>> +    pixel *edgeTheta = curFrame->m_thetaPic +
>>> curFrame->m_fencPic->m_lumaMarginY * stride +
>>> curFrame->m_fencPic->m_lumaMarginX;
>>>
>>>      for (int i = 0; i < height; i++)
>>>      {
>>> @@ -103,7 +113,7 @@
>>>
>>>      //Applying Gaussian filter on the picture
>>>      src = (pixel*)curFrame->m_fencPic->m_picOrg[0];
>>> -    refPic = pic2 + curFrame->m_fencPic->m_lumaMarginY * stride +
>>> curFrame->m_fencPic->m_lumaMarginX;
>>> +    refPic = curFrame->m_gaussianPic +
>>> curFrame->m_fencPic->m_lumaMarginY * stride +
>>> curFrame->m_fencPic->m_lumaMarginX;
>>>      pixel pixelValue = 0;
>>>
>>>      for (int rowNum = 0; rowNum < height; rowNum++)
>>> @@ -148,7 +158,7 @@
>>>      float gradientH = 0, gradientV = 0, radians = 0, theta = 0;
>>>      float gradientMagnitude = 0;
>>>      pixel blackPixel = 0;
>>> -    edgePic = pic1 + curFrame->m_fencPic->m_lumaMarginY * stride +
>>> curFrame->m_fencPic->m_lumaMarginX;
>>> +    edgePic = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY
>>> * stride + curFrame->m_fencPic->m_lumaMarginX;
>>>      //Applying Sobel filter on the gaussian filtered picture
>>>      for (int rowNum = 0; rowNum < height; rowNum++)
>>>      {
>>> @@ -198,8 +208,10 @@
>>>      angle = sum / (size*size);
>>>  }
>>>
>>> -uint32_t LookaheadTLD::edgeDensityCu(Frame* curFrame,pixel *edgeImage,
>>> pixel *edgeTheta, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY,
>>> uint32_t qgSize)
>>> +uint32_t LookaheadTLD::edgeDensityCu(Frame* curFrame, uint32_t
>>> &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize)
>>>  {
>>> +    pixel *edgeImage = curFrame->m_edgePic +
>>> curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride +
>>> curFrame->m_fencPic->m_lumaMarginX;
>>> +    pixel *edgeTheta = curFrame->m_thetaPic +
>>> curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride +
>>> curFrame->m_fencPic->m_lumaMarginX;
>>>      intptr_t srcStride = curFrame->m_fencPic->m_stride;
>>>      intptr_t blockOffsetLuma = blockX + (blockY * srcStride);
>>>      int plane = 0; // Sobel filter is applied only on Y component
>>> @@ -478,31 +490,14 @@
>>>              }
>>>              else
>>>              {
>>> -#define AQ_EDGE_BIAS 0.5
>>> -#define EDGE_INCLINATION 45
>>> -
>>> -                pixel *edgePic = NULL;
>>> -                pixel *gaussianPic = NULL;
>>> -                pixel *thetaPic = NULL;
>>> -
>>> -                if (param->rc.aqMode == X265_AQ_EDGE)
>>> -                {
>>> -                    uint32_t numCuInHeight = (maxRow + param->maxCUSize
>>> - 1) / param->maxCUSize;
>>> -                    int maxHeight = numCuInHeight * param->maxCUSize;
>>> -                    intptr_t stride = curFrame->m_fencPic->m_stride;
>>> -                    edgePic = X265_MALLOC(pixel, stride * (maxHeight +
>>> (curFrame->m_fencPic->m_lumaMarginY * 2)));
>>> -                    gaussianPic = X265_MALLOC(pixel, stride *
>>> (maxHeight + (curFrame->m_fencPic->m_lumaMarginY * 2)));
>>> -                    thetaPic = X265_MALLOC(pixel, stride * (maxHeight +
>>> (curFrame->m_fencPic->m_lumaMarginY * 2)));
>>> -                    memset(edgePic, 0, stride * (maxHeight +
>>> (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
>>> -                    memset(gaussianPic, 0, stride * (maxHeight +
>>> (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
>>> -                    memset(thetaPic, 0, stride * (maxHeight +
>>> (curFrame->m_fencPic->m_lumaMarginY * 2)) * sizeof(pixel));
>>> -                    edgeFilter(curFrame, edgePic, gaussianPic,
>>> thetaPic, stride, maxRow, maxCol);
>>> -                }
>>> -
>>>                  int blockXY = 0, inclinedEdge = 0;
>>>                  double avg_adj_pow2 = 0, avg_adj = 0, qp_adj = 0;
>>>                  double bias_strength = 0.f;
>>>                  double strength = 0.f;
>>> +
>>> +                if (param->rc.aqMode == X265_AQ_EDGE)
>>> +                    edgeFilter(curFrame, param);
>>> +
>>>                  if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE ||
>>> param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED || param->rc.aqMode ==
>>> X265_AQ_EDGE)
>>>                  {
>>>                      double bit_depth_correction = 1.f / (1 << (2 *
>>> (X265_DEPTH - 8)));
>>> @@ -514,9 +509,7 @@
>>>                              energy = acEnergyCu(curFrame, blockX,
>>> blockY, param->internalCsp, param->rc.qgSize);
>>>                              if (param->rc.aqMode == X265_AQ_EDGE)
>>>                              {
>>> -                                pixel *edgeImage = edgePic +
>>> curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride +
>>> curFrame->m_fencPic->m_lumaMarginX;
>>> -                                pixel *edgeTheta = thetaPic +
>>> curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride +
>>> curFrame->m_fencPic->m_lumaMarginX;
>>> -                                edgeDensity = edgeDensityCu(curFrame,
>>> edgeImage, edgeTheta, avgAngle, blockX, blockY, param->rc.qgSize);
>>> +                                edgeDensity = edgeDensityCu(curFrame,
>>> avgAngle, blockX, blockY, param->rc.qgSize);
>>>                                  if (edgeDensity)
>>>                                  {
>>>                                      qp_adj = pow(edgeDensity *
>>> bit_depth_correction + 1, 0.1);
>>> @@ -549,13 +542,6 @@
>>>                  else
>>>                      strength = param->rc.aqStrength * 1.0397f;
>>>
>>> -                if (param->rc.aqMode == X265_AQ_EDGE)
>>> -                {
>>> -                    X265_FREE(edgePic);
>>> -                    X265_FREE(gaussianPic);
>>> -                    X265_FREE(thetaPic);
>>> -                }
>>> -
>>>                  blockXY = 0;
>>>                  for (int blockY = 0; blockY < maxRow; blockY +=
>>> loopIncr)
>>>                  {
>>> diff -r 354901970679 -r b66d88859a52 source/encoder/slicetype.h
>>> --- a/source/encoder/slicetype.h Fri Sep 13 15:57:26 2019 +0530
>>> +++ b/source/encoder/slicetype.h Wed Oct 09 09:45:14 2019 +0530
>>> @@ -40,6 +40,8 @@
>>>
>>>  #define LOWRES_COST_MASK  ((1 << 14) - 1)
>>>  #define LOWRES_COST_SHIFT 14
>>> +#define AQ_EDGE_BIAS 0.5
>>> +#define EDGE_INCLINATION 45
>>>
>>>  /* Thread local data for lookahead tasks */
>>>  struct LookaheadTLD
>>> @@ -92,7 +94,7 @@
>>>  protected:
>>>
>>>      uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t
>>> blockY, int csp, uint32_t qgSize);
>>> -    uint32_t edgeDensityCu(Frame*curFrame, pixel *edgeImage, pixel
>>> *edgeTheta, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t
>>> qgSize);
>>> +    uint32_t edgeDensityCu(Frame*curFrame, uint32_t &avgAngle, uint32_t
>>> blockX, uint32_t blockY, uint32_t qgSize);
>>>      uint32_t lumaSumCu(Frame* curFrame, uint32_t blockX, uint32_t
>>> blockY, uint32_t qgSize);
>>>      uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp);
>>>      bool     allocWeightedRef(Lowres& fenc);
>>> diff -r 354901970679 -r b66d88859a52 source/test/regression-tests.txt
>>> --- a/source/test/regression-tests.txt Fri Sep 13 15:57:26 2019 +0530
>>> +++ b/source/test/regression-tests.txt Wed Oct 09 09:45:14 2019 +0530
>>> @@ -154,7 +154,7 @@
>>>  BasketballDrive_1920x1080_50.y4m, --preset medium --no-open-gop
>>> --keyint 50 --min-keyint 50 --radl 2 --vbv-maxrate 5000 --vbv-bufsize 5000
>>>  big_buck_bunny_360p24.y4m, --bitrate 500 --fades
>>>  720p50_parkrun_ter.y4m,--preset medium --bitrate 400 --hme
>>> -ducks_take_off_420_1_720p50.y4m,--preset medium --aq-mode 4 --crf 22
>>> --no-cutree
>>> +ducks_take_off_420_720p50.y4m,--preset medium --aq-mode 4 --crf 22
>>> --no-cutree
>>>  ducks_take_off_420_1_720p50.y4m,--preset medium --selective-sao 4 --sao
>>> --crf 20
>>>  Traffic_4096x2048_30p.y4m, --preset medium --frame-dup --dup-threshold
>>> 60 --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000
>>>
>>> Thanks & Regards
>>> *Akil R*
>>> Video Codec Engineer
>>> Media & AI Analytics
>>> <https://multicorewareinc.com/>
>>> _______________________________________________
>>> x265-devel mailing list
>>> x265-devel at videolan.org
>>> https://mailman.videolan.org/listinfo/x265-devel
>>>
>>
>>
>> --
>> Regards,
>> Aruna
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>

-- 
Regards,
Aruna
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20191014/3441dc2f/attachment-0001.html>