[x265] [x265 patch] Adaptive Frame Duplication

Fri Oct 4 12:32:45 CEST 2019

On Fri, Oct 4, 2019 at 3:59 PM Akil <akil at multicorewareinc.com> wrote:

> Thanks for the comments. Fixed accordingly.
>
> Thanks & Regards
> *Akil R*
> Video Codec Engineer
> Media & AI Analytics
> <https://multicorewareinc.com/>
>
>
> On Fri, Oct 4, 2019 at 3:19 PM Pradeep Ramachandran <
> pradeep at multicorewareinc.com> wrote:
>
>>
>> On Thu, Oct 3, 2019 at 12:35 PM Akil <akil at multicorewareinc.com> wrote:
>>
>>> # HG changeset patch
>>> # User Akil Ayyappan<akil at multicorewareinc.com>
>>> # Date 1568370446 -19800
>>> #      Fri Sep 13 15:57:26 2019 +0530
>>> # Node ID fa0d04affb8a4405ac548841ad12cfa2dab87ef2
>>> # Parent  c4b098f973e6b0ee4aee3bf0d7b54da4e2734d42
>>> Adaptive Frame duplication
>>>
>>> This patch does the following.
>>> 1. Replaces 2-3 near-identical frames with one frame and sets pic_struct
>>> based on frame doubling / tripling.
>>> 2. Add option "--frame-dup" and "--dup-threshold' to enable frame
>>> duplication and to set threshold for frame similarity (optional).
>>>
>>
[PR] Pushed to default branch.

>
>>> diff -r c4b098f973e6 -r fa0d04affb8a doc/reST/cli.rst
>>> --- a/doc/reST/cli.rst Tue Aug 13 10:51:21 2019 +0530
>>> +++ b/doc/reST/cli.rst Fri Sep 13 15:57:26 2019 +0530
>>> @@ -501,6 +501,17 @@
>>>   second. The decoder must re-combine the fields in their correct
>>>   orientation for display.
>>>
>>> +.. option:: --frame-dup, --no-frame-dup
>>> +
>>> + Enable Adaptive Frame duplication. Replaces 2-3 near-identical frames
>>> with one
>>> + frame and sets pic_struct based on frame doubling / tripling.
>>> + Default disabled.
>>> +
>>> +.. option:: --dup-threshold <integer>
>>> +
>>> + Frame similarity threshold can vary between 1 and 99. This requires
>>> Adaptive
>>> + Frame Duplication to be enabled. Default 70.
>>> +
>>>  .. option:: --seek <integer>
>>>
>>>   Number of frames to skip at start of input file. Default 0
>>> diff -r c4b098f973e6 -r fa0d04affb8a source/CMakeLists.txt
>>> --- a/source/CMakeLists.txt Tue Aug 13 10:51:21 2019 +0530
>>> +++ b/source/CMakeLists.txt Fri Sep 13 15:57:26 2019 +0530
>>> @@ -29,7 +29,7 @@
>>>  option(STATIC_LINK_CRT "Statically link C runtime for release builds"
>>> OFF)
>>>  mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
>>>  # X265_BUILD must be incremented each time the public API is changed
>>> -set(X265_BUILD 179)
>>> +set(X265_BUILD 180)
>>>  configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
>>>                 "${PROJECT_BINARY_DIR}/x265.def")
>>>  configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
>>> diff -r c4b098f973e6 -r fa0d04affb8a source/common/frame.cpp
>>> --- a/source/common/frame.cpp Tue Aug 13 10:51:21 2019 +0530
>>> +++ b/source/common/frame.cpp Fri Sep 13 15:57:26 2019 +0530
>>> @@ -57,6 +57,7 @@
>>>      m_addOnPrevChange = NULL;
>>>      m_classifyFrame = false;
>>>      m_fieldNum = 0;
>>> +    m_picStruct = 0;
>>>  }
>>>
>>>  bool Frame::create(x265_param *param, float* quantOffsets)
>>> diff -r c4b098f973e6 -r fa0d04affb8a source/common/frame.h
>>> --- a/source/common/frame.h Tue Aug 13 10:51:21 2019 +0530
>>> +++ b/source/common/frame.h Fri Sep 13 15:57:26 2019 +0530
>>> @@ -98,6 +98,7 @@
>>>
>>>      float*                 m_quantOffsets;       // points to
>>> quantOffsets in x265_picture
>>>      x265_sei               m_userSEI;
>>> +    uint32_t               m_picStruct;          // picture structure
>>> SEI message
>>>      x265_dolby_vision_rpu            m_rpu;
>>>
>>>      /* Frame Parallelism - notification between FrameEncoders of
>>> available motion reference rows */
>>> diff -r c4b098f973e6 -r fa0d04affb8a source/common/param.cpp
>>> --- a/source/common/param.cpp Tue Aug 13 10:51:21 2019 +0530
>>> +++ b/source/common/param.cpp Fri Sep 13 15:57:26 2019 +0530
>>> @@ -135,6 +135,7 @@
>>>
>>>      /* Source specifications */
>>>      param->internalBitDepth = X265_DEPTH;
>>> +    param->sourceBitDepth = 8;
>>>      param->internalCsp = X265_CSP_I420;
>>>      param->levelIdc = 0; //Auto-detect level
>>>      param->uhdBluray = 0;
>>> @@ -338,6 +339,9 @@
>>>      param->pictureStructure = -1;
>>>      param->bEmitCLL = 1;
>>>
>>> +    param->bEnableFrameDuplication = 0;
>>> +    param->dupThreshold = 70;
>>> +
>>>      /* SVT Hevc Encoder specific params */
>>>      param->bEnableSvtHevc = 0;
>>>      param->svtHevcParam = NULL;
>>> @@ -1294,6 +1298,8 @@
>>>          OPT("fades") p->bEnableFades = atobool(value);
>>>          OPT("field") p->bField = atobool( value );
>>>          OPT("cll") p->bEmitCLL = atobool(value);
>>> +        OPT("frame-dup") p->bEnableFrameDuplication = atobool(value);
>>> +        OPT("dup-threshold") p->dupThreshold = atoi(value);
>>>          OPT("hme") p->bEnableHME = atobool(value);
>>>          OPT("hme-search")
>>>          {
>>> @@ -1680,6 +1686,8 @@
>>>          "Supported factor for controlling max AU size is from 0.5 to
>>> 1");
>>>      CHECK((param->dolbyProfile != 0) && (param->dolbyProfile != 50) &&
>>> (param->dolbyProfile != 81) && (param->dolbyProfile != 82),
>>>          "Unsupported Dolby Vision profile, only profile 5, profile 8.1
>>> and profile 8.2 enabled");
>>> +    CHECK(param->dupThreshold < 1 || 99 < param->dupThreshold,
>>> +        "Invalid frame-duplication threshold. Value must be between 1
>>> and 99.");
>>>      if (param->dolbyProfile)
>>>      {
>>>          CHECK((param->rc.vbvMaxBitrate <= 0 || param->rc.vbvBufferSize
>>> <= 0), "Dolby Vision requires VBV settings to enable HRD.\n");
>>> @@ -1972,6 +1980,9 @@
>>>      s += sprintf(s, " subme=%d", p->subpelRefine);
>>>      s += sprintf(s, " merange=%d", p->searchRange);
>>>      BOOL(p->bEnableTemporalMvp, "temporal-mvp");
>>> +    BOOL(p->bEnableFrameDuplication, "frame-dup");
>>> +    if(p->bEnableFrameDuplication)
>>> +        s += sprintf(s, " dup-threshold=%d", p->dupThreshold);
>>>      BOOL(p->bEnableHME, "hme");
>>>      if (p->bEnableHME)
>>>          s += sprintf(s, " Level 0,1,2=%d,%d,%d", p->hmeSearchMethod[0],
>>> p->hmeSearchMethod[1], p->hmeSearchMethod[2]);
>>> @@ -2209,6 +2220,7 @@
>>>      if (src->csvfn) dst->csvfn = strdup(src->csvfn);
>>>      else dst->csvfn = NULL;
>>>      dst->internalBitDepth = src->internalBitDepth;
>>> +    dst->sourceBitDepth = src->sourceBitDepth;
>>>      dst->internalCsp = src->internalCsp;
>>>      dst->fpsNum = src->fpsNum;
>>>      dst->fpsDenom = src->fpsDenom;
>>> @@ -2263,6 +2275,8 @@
>>>      dst->subpelRefine = src->subpelRefine;
>>>      dst->searchRange = src->searchRange;
>>>      dst->bEnableTemporalMvp = src->bEnableTemporalMvp;
>>> +    dst->bEnableFrameDuplication = src->bEnableFrameDuplication;
>>> +    dst->dupThreshold = src->dupThreshold;
>>>      dst->bEnableHME = src->bEnableHME;
>>>      if (src->bEnableHME)
>>>      {
>>> diff -r c4b098f973e6 -r fa0d04affb8a source/encoder/api.cpp
>>> --- a/source/encoder/api.cpp Tue Aug 13 10:51:21 2019 +0530
>>> +++ b/source/encoder/api.cpp Fri Sep 13 15:57:26 2019 +0530
>>> @@ -923,6 +923,7 @@
>>>      pic->userSEI.numPayloads = 0;
>>>      pic->rpu.payloadSize = 0;
>>>      pic->rpu.payload = NULL;
>>> +    pic->picStruct = 0;
>>>
>>>      if ((param->analysisSave || param->analysisLoad) ||
>>> (param->bAnalysisType == AVC_INFO))
>>>      {
>>> diff -r c4b098f973e6 -r fa0d04affb8a source/encoder/encoder.cpp
>>> --- a/source/encoder/encoder.cpp Tue Aug 13 10:51:21 2019 +0530
>>> +++ b/source/encoder/encoder.cpp Fri Sep 13 15:57:26 2019 +0530
>>> @@ -117,6 +117,8 @@
>>>      m_cR = 1.0;
>>>      for (int i = 0; i < X265_MAX_FRAME_THREADS; i++)
>>>          m_frameEncoder[i] = NULL;
>>> +    for (uint32_t i = 0; i < DUP_BUFFER; i++)
>>> +        m_dupBuffer[i] = NULL;
>>>      MotionEstimate::initScales();
>>>
>>>  #if ENABLE_HDR10_PLUS
>>> @@ -160,6 +162,53 @@
>>>      int rows = (p->sourceHeight + p->maxCUSize - 1) >>
>>> g_log2Size[p->maxCUSize];
>>>      int cols = (p->sourceWidth  + p->maxCUSize - 1) >>
>>> g_log2Size[p->maxCUSize];
>>>
>>> +    if (m_param->bEnableFrameDuplication)
>>> +    {
>>> +        size_t framesize = 0;
>>> +        int pixelbytes = p->sourceBitDepth > 8 ? 2 : 1;
>>> +        for (int i = 0; i < x265_cli_csps[p->internalCsp].planes; i++)
>>> +        {
>>> +            int stride = (p->sourceWidth >>
>>> x265_cli_csps[p->internalCsp].width[i]) * pixelbytes;
>>> +            framesize += (stride * (p->sourceHeight >>
>>> x265_cli_csps[p->internalCsp].height[i]));
>>> +        }
>>> +
>>> +        //Sets the picture structure and emits it in the picture timing
>>> SEI message
>>> +        m_param->pictureStructure = 0;
>>> +
>>> +        for (uint32_t i = 0; i < DUP_BUFFER; i++)
>>> +        {
>>> +            m_dupBuffer[i] =
>>> (AdaptiveFrameDuplication*)x265_malloc(sizeof(AdaptiveFrameDuplication));
>>> +            m_dupBuffer[i]->dupPic = NULL;
>>> +            m_dupBuffer[i]->dupPic = x265_picture_alloc();
>>> +            x265_picture_init(p, m_dupBuffer[i]->dupPic);
>>> +            m_dupBuffer[i]->dupPlane = NULL;
>>> +            m_dupBuffer[i]->dupPlane = X265_MALLOC(char, framesize);
>>> +            m_dupBuffer[i]->dupPic->planes[0] =
>>> m_dupBuffer[i]->dupPlane;
>>> +            m_dupBuffer[i]->bOccupied = false;
>>> +            m_dupBuffer[i]->bDup = false;
>>> +        }
>>> +
>>> +        if (!(p->sourceBitDepth == 8 && p->internalBitDepth == 8))
>>> +        {
>>> +            int size = p->sourceWidth * p->sourceHeight;
>>> +            int hshift = CHROMA_H_SHIFT(p->internalCsp);
>>> +            int vshift = CHROMA_V_SHIFT(p->internalCsp);
>>> +            int widthC = p->sourceWidth >> hshift;
>>> +            int heightC = p->sourceHeight >> vshift;
>>> +
>>> +            m_dupPicOne[0] = X265_MALLOC(pixel, size);
>>> +            m_dupPicTwo[0] = X265_MALLOC(pixel, size);
>>> +            if (p->internalCsp != X265_CSP_I400)
>>> +            {
>>> +                for (int k = 1; k < 3; k++)
>>> +                {
>>> +                    m_dupPicOne[k] = X265_MALLOC(pixel, widthC *
>>> heightC);
>>> +                    m_dupPicTwo[k] = X265_MALLOC(pixel, widthC *
>>> heightC);
>>> +                }
>>> +            }
>>> +        }
>>> +    }
>>> +
>>>      // Do not allow WPP if only one row or fewer than 3 columns, it is
>>> pointless and unstable
>>>      if (rows == 1 || cols < 3)
>>>      {
>>> @@ -771,6 +820,33 @@
>>>          m_exportedPic = NULL;
>>>      }
>>>
>>> +    if (m_param->bEnableFrameDuplication)
>>> +    {
>>> +        for (uint32_t i = 0; i < DUP_BUFFER; i++)
>>> +        {
>>> +            X265_FREE(m_dupBuffer[i]->dupPlane);
>>> +            x265_picture_free(m_dupBuffer[i]->dupPic);
>>> +            X265_FREE(m_dupBuffer[i]);
>>> +        }
>>> +
>>> +        if (!(m_param->sourceBitDepth == 8 && m_param->internalBitDepth
>>> == 8))
>>> +        {
>>> +            for (int k = 0; k < 3; k++)
>>> +            {
>>> +                if (k == 0)
>>> +                {
>>> +                    X265_FREE(m_dupPicOne[k]);
>>> +                    X265_FREE(m_dupPicTwo[k]);
>>> +                }
>>> +                else if(k >= 1 && m_param->internalCsp != X265_CSP_I400)
>>> +                {
>>> +                    X265_FREE(m_dupPicOne[k]);
>>> +                    X265_FREE(m_dupPicTwo[k]);
>>> +                }
>>> +            }
>>> +        }
>>> +    }
>>> +
>>>      for (int i = 0; i < m_param->frameNumThreads; i++)
>>>      {
>>>          if (m_frameEncoder[i])
>>> @@ -981,6 +1057,250 @@
>>>      }
>>>  }
>>>
>>> +//Find Sum of Squared Difference (SSD) between two pictures
>>> +uint64_t Encoder::computeSSD(pixel *fenc, pixel *rec, intptr_t stride,
>>> uint32_t width, uint32_t height, x265_param *param)
>>> +{
>>> +    uint64_t ssd = 0;
>>> +
>>> +    if (!param->bEnableFrameDuplication || (width & 3))
>>> +    {
>>> +        if ((width | height) & 3)
>>> +        {
>>> +            /* Slow Path */
>>> +            for (uint32_t y = 0; y < height; y++)
>>> +            {
>>> +                for (uint32_t x = 0; x < width; x++)
>>> +                {
>>> +                    int diff = (int)(fenc[x] - rec[x]);
>>> +                    ssd += diff * diff;
>>> +                }
>>> +
>>> +                fenc += stride;
>>> +                rec += stride;
>>> +            }
>>> +
>>> +            return ssd;
>>> +        }
>>> +    }
>>> +
>>> +    uint32_t y = 0;
>>> +
>>> +    /* Consume rows in ever narrower chunks of height */
>>> +    for (int size = BLOCK_64x64; size >= BLOCK_4x4 && y < height;
>>> size--)
>>> +    {
>>> +        uint32_t rowHeight = 1 << (size + 2);
>>> +
>>> +        for (; y + rowHeight <= height; y += rowHeight)
>>> +        {
>>> +            uint32_t y1, x = 0;
>>> +
>>> +            /* Consume each row using the largest square blocks
>>> possible */
>>> +            if (size == BLOCK_64x64 && !(stride & 31))
>>> +                for (; x + 64 <= width; x += 64)
>>> +                    ssd += primitives.cu[BLOCK_64x64].sse_pp(fenc + x,
>>> stride, rec + x, stride);
>>> +
>>> +            if (size >= BLOCK_32x32 && !(stride & 15))
>>> +                for (; x + 32 <= width; x += 32)
>>> +                    for (y1 = 0; y1 + 32 <= rowHeight; y1 += 32)
>>> +                        ssd += primitives.cu[BLOCK_32x32].sse_pp(fenc
>>> + y1 * stride + x, stride, rec + y1 * stride + x, stride);
>>> +
>>> +            if (size >= BLOCK_16x16)
>>> +                for (; x + 16 <= width; x += 16)
>>> +                    for (y1 = 0; y1 + 16 <= rowHeight; y1 += 16)
>>> +                        ssd += primitives.cu[BLOCK_16x16].sse_pp(fenc
>>> + y1 * stride + x, stride, rec + y1 * stride + x, stride);
>>> +
>>> +            if (size >= BLOCK_8x8)
>>> +                for (; x + 8 <= width; x += 8)
>>> +                    for (y1 = 0; y1 + 8 <= rowHeight; y1 += 8)
>>> +                        ssd += primitives.cu[BLOCK_8x8].sse_pp(fenc +
>>> y1 * stride + x, stride, rec + y1 * stride + x, stride);
>>> +
>>> +            for (; x + 4 <= width; x += 4)
>>> +                for (y1 = 0; y1 + 4 <= rowHeight; y1 += 4)
>>> +                    ssd += primitives.cu[BLOCK_4x4].sse_pp(fenc + y1 *
>>> stride + x, stride, rec + y1 * stride + x, stride);
>>> +
>>> +            fenc += stride * rowHeight;
>>> +            rec += stride * rowHeight;
>>> +        }
>>> +    }
>>> +
>>> +    /* Handle last few rows of frames for videos
>>> +    with height not divisble by 4 */
>>> +    uint32_t h = height % y;
>>> +    if (param->bEnableFrameDuplication && h)
>>> +    {
>>> +        for (uint32_t i = 0; i < h; i++)
>>> +        {
>>> +            for (uint32_t j = 0; j < width; j++)
>>> +            {
>>> +                int diff = (int)(fenc[j] - rec[j]);
>>> +                ssd += diff * diff;
>>> +            }
>>> +
>>> +            fenc += stride;
>>> +            rec += stride;
>>> +        }
>>> +    }
>>> +
>>> +    return ssd;
>>> +}
>>> +
>>> +//Compute the PSNR weightage between two pictures
>>> +double Encoder::ComputePSNR(x265_picture *firstPic, x265_picture
>>> *secPic, x265_param *param)
>>> +{
>>> +    uint64_t ssdY = 0, ssdU = 0, ssdV = 0;
>>> +    intptr_t strideL, strideC;
>>> +    uint32_t widthL, heightL, widthC, heightC;
>>> +    double psnrY = 0, psnrU = 0, psnrV = 0, psnrWeight = 0;
>>> +    int width = firstPic->width;
>>> +    int height = firstPic->height;
>>> +    int hshift = CHROMA_H_SHIFT(firstPic->colorSpace);
>>> +    int vshift = CHROMA_V_SHIFT(firstPic->colorSpace);
>>> +    pixel *yFirstPic = NULL, *ySecPic = NULL;
>>> +    pixel *uFirstPic = NULL, *uSecPic = NULL;
>>> +    pixel *vFirstPic = NULL, *vSecPic = NULL;
>>> +
>>> +    strideL = widthL = width;
>>> +    heightL = height;
>>> +
>>> +    strideC = widthC = widthL >> hshift;
>>> +    heightC = heightL >> vshift;
>>> +
>>> +    int size = width * height;
>>> +    int maxvalY = 255 << (X265_DEPTH - 8);
>>> +    int maxvalC = 255 << (X265_DEPTH - 8);
>>> +    double refValueY = (double)maxvalY * maxvalY * size;
>>> +    double refValueC = (double)maxvalC * maxvalC * size / 4.0;
>>> +
>>> +    if (firstPic->bitDepth == 8 && X265_DEPTH == 8)
>>> +    {
>>> +        yFirstPic = (pixel*)firstPic->planes[0];
>>> +        ySecPic = (pixel*)secPic->planes[0];
>>> +        if (param->internalCsp != X265_CSP_I400)
>>> +        {
>>> +            uFirstPic = (pixel*)firstPic->planes[1];
>>> +            uSecPic = (pixel*)secPic->planes[1];
>>> +            vFirstPic = (pixel*)firstPic->planes[2];
>>> +            vSecPic = (pixel*)secPic->planes[2];
>>> +        }
>>> +    }
>>> +    else if (firstPic->bitDepth == 8 && X265_DEPTH > 8)
>>> +    {
>>> +        int shift = (X265_DEPTH - 8);
>>> +        uint8_t *yChar1, *yChar2, *uChar1, *uChar2, *vChar1, *vChar2;
>>> +
>>> +        yChar1 = (uint8_t*)firstPic->planes[0];
>>> +        yChar2 = (uint8_t*)secPic->planes[0];
>>> +
>>> +        primitives.planecopy_cp(yChar1, firstPic->stride[0] /
>>> sizeof(*yChar1), m_dupPicOne[0], firstPic->stride[0] / sizeof(*yChar1),
>>> width, height, shift);
>>> +        primitives.planecopy_cp(yChar2, secPic->stride[0] /
>>> sizeof(*yChar2), m_dupPicTwo[0], secPic->stride[0] / sizeof(*yChar2),
>>> width, height, shift);
>>> +
>>> +        if (param->internalCsp != X265_CSP_I400)
>>> +        {
>>> +            uChar1 = (uint8_t*)firstPic->planes[1];
>>> +            uChar2 = (uint8_t*)secPic->planes[1];
>>> +            vChar1 = (uint8_t*)firstPic->planes[2];
>>> +            vChar2 = (uint8_t*)secPic->planes[2];
>>> +
>>> +            primitives.planecopy_cp(uChar1, firstPic->stride[1] /
>>> sizeof(*uChar1), m_dupPicOne[1], firstPic->stride[1] / sizeof(*uChar1),
>>> widthC, heightC, shift);
>>> +            primitives.planecopy_cp(uChar2, secPic->stride[1] /
>>> sizeof(*uChar2), m_dupPicTwo[1], secPic->stride[1] / sizeof(*uChar2),
>>> widthC, heightC, shift);
>>> +
>>> +            primitives.planecopy_cp(vChar1, firstPic->stride[2] /
>>> sizeof(*vChar1), m_dupPicOne[2], firstPic->stride[2] / sizeof(*vChar1),
>>> widthC, heightC, shift);
>>> +            primitives.planecopy_cp(vChar2, secPic->stride[2] /
>>> sizeof(*vChar2), m_dupPicTwo[2], secPic->stride[2] / sizeof(*vChar2),
>>> widthC, heightC, shift);
>>> +        }
>>> +    }
>>> +    else
>>> +    {
>>> +        uint16_t *yShort1, *yShort2, *uShort1, *uShort2, *vShort1,
>>> *vShort2;
>>> +        /* defensive programming, mask off bits that are supposed to be
>>> zero */
>>> +        uint16_t mask = (1 << X265_DEPTH) - 1;
>>> +        int shift = abs(firstPic->bitDepth - X265_DEPTH);
>>> +
>>> +        yShort1 = (uint16_t*)firstPic->planes[0];
>>> +        yShort2 = (uint16_t*)secPic->planes[0];
>>> +
>>> +        if (firstPic->bitDepth > X265_DEPTH)
>>> +        {
>>> +            /* shift right and mask pixels to final size */
>>> +            primitives.planecopy_sp(yShort1, firstPic->stride[0] /
>>> sizeof(*yShort1), m_dupPicOne[0], firstPic->stride[0] / sizeof(*yShort1),
>>> width, height, shift, mask);
>>> +            primitives.planecopy_sp(yShort2, secPic->stride[0] /
>>> sizeof(*yShort2), m_dupPicTwo[0], secPic->stride[0] / sizeof(*yShort2),
>>> width, height, shift, mask);
>>> +        }
>>> +        else /* Case for (pic.bitDepth <= X265_DEPTH) */
>>> +        {
>>> +            /* shift left and mask pixels to final size */
>>> +            primitives.planecopy_sp_shl(yShort1, firstPic->stride[0] /
>>> sizeof(*yShort1), m_dupPicOne[0], firstPic->stride[0] / sizeof(*yShort1),
>>> width, height, shift, mask);
>>> +            primitives.planecopy_sp_shl(yShort2, secPic->stride[0] /
>>> sizeof(*yShort2), m_dupPicTwo[0], secPic->stride[0] / sizeof(*yShort2),
>>> width, height, shift, mask);
>>> +        }
>>> +
>>> +        if (param->internalCsp != X265_CSP_I400)
>>> +        {
>>> +            uShort1 = (uint16_t*)firstPic->planes[1];
>>> +            uShort2 = (uint16_t*)secPic->planes[1];
>>> +            vShort1 = (uint16_t*)firstPic->planes[2];
>>> +            vShort2 = (uint16_t*)secPic->planes[2];
>>> +
>>> +            if (firstPic->bitDepth > X265_DEPTH)
>>> +            {
>>> +                primitives.planecopy_sp(uShort1, firstPic->stride[1] /
>>> sizeof(*uShort1), m_dupPicOne[1], firstPic->stride[1] / sizeof(*uShort1),
>>> widthC, heightC, shift, mask);
>>> +                primitives.planecopy_sp(uShort2, secPic->stride[1] /
>>> sizeof(*uShort2), m_dupPicTwo[1], secPic->stride[1] / sizeof(*uShort2),
>>> widthC, heightC, shift, mask);
>>> +
>>> +                primitives.planecopy_sp(vShort1, firstPic->stride[2] /
>>> sizeof(*vShort1), m_dupPicOne[2], firstPic->stride[2] / sizeof(*vShort1),
>>> widthC, heightC, shift, mask);
>>> +                primitives.planecopy_sp(vShort2, secPic->stride[2] /
>>> sizeof(*vShort2), m_dupPicTwo[2], secPic->stride[2] / sizeof(*vShort2),
>>> widthC, heightC, shift, mask);
>>> +            }
>>> +            else /* Case for (pic.bitDepth <= X265_DEPTH) */
>>> +            {
>>> +                primitives.planecopy_sp_shl(uShort1,
>>> firstPic->stride[1] / sizeof(*uShort1), m_dupPicOne[1], firstPic->stride[1]
>>> / sizeof(*uShort1), widthC, heightC, shift, mask);
>>> +                primitives.planecopy_sp_shl(uShort2, secPic->stride[1]
>>> / sizeof(*uShort2), m_dupPicTwo[1], secPic->stride[1] / sizeof(*uShort2),
>>> widthC, heightC, shift, mask);
>>> +
>>> +                primitives.planecopy_sp_shl(vShort1,
>>> firstPic->stride[2] / sizeof(*vShort1), m_dupPicOne[2], firstPic->stride[2]
>>> / sizeof(*vShort1), widthC, heightC, shift, mask);
>>> +                primitives.planecopy_sp_shl(vShort2, secPic->stride[2]
>>> / sizeof(*vShort2), m_dupPicTwo[2], secPic->stride[2] / sizeof(*vShort2),
>>> widthC, heightC, shift, mask);
>>> +            }
>>> +        }
>>> +    }
>>> +
>>> +    if (!(firstPic->bitDepth == 8 && X265_DEPTH == 8))
>>> +    {
>>> +        yFirstPic = m_dupPicOne[0]; ySecPic = m_dupPicTwo[0];
>>> +        uFirstPic = m_dupPicOne[1]; uSecPic = m_dupPicTwo[1];
>>> +        vFirstPic = m_dupPicOne[2]; vSecPic = m_dupPicTwo[2];
>>> +    }
>>> +
>>> +    //Compute SSD
>>> +    ssdY = computeSSD(yFirstPic, ySecPic, strideL, widthL, heightL,
>>> param);
>>> +    psnrY = (ssdY ? 10.0 * log10(refValueY / (double)ssdY) : 99.99);
>>> +
>>> +    if (param->internalCsp != X265_CSP_I400)
>>> +    {
>>> +        ssdU = computeSSD(uFirstPic, uSecPic, strideC, widthC, heightC,
>>> param);
>>> +        ssdV = computeSSD(vFirstPic, vSecPic, strideC, widthC, heightC,
>>> param);
>>> +        psnrU = (ssdU ? 10.0 * log10(refValueC / (double)ssdU) : 99.99);
>>> +        psnrV = (ssdV ? 10.0 * log10(refValueC / (double)ssdV) : 99.99);
>>> +    }
>>> +
>>> +    //Compute PSNR(picN,pic(N+1))
>>> +    return psnrWeight = (psnrY * 6 + psnrU + psnrV) / 8;
>>> +}
>>> +
>>> +void Encoder::copyPicture(x265_picture *dest, const x265_picture *src)
>>> +{
>>> +    dest->poc = src->poc;
>>> +    dest->pts = src->pts;
>>> +    dest->userSEI = src->userSEI;
>>> +    dest->bitDepth = src->bitDepth;
>>> +    dest->framesize = src->framesize;
>>> +    dest->height = src->height;
>>> +    dest->width = src->width;
>>> +    dest->colorSpace = src->colorSpace;
>>> +    dest->userSEI = src->userSEI;
>>> +    dest->rpu.payload = src->rpu.payload;
>>> +    dest->picStruct = src->picStruct;
>>> +    dest->stride[0] = src->stride[0];
>>> +    dest->stride[1] = src->stride[1];
>>> +    dest->stride[2] = src->stride[2];
>>> +    memcpy(dest->planes[0], src->planes[0], src->framesize *
>>> sizeof(char));
>>> +    dest->planes[1] = (char*)dest->planes[0] + src->stride[0] *
>>> src->height;
>>> +    dest->planes[2] = (char*)dest->planes[1] + src->stride[1] *
>>> (src->height >> x265_cli_csps[src->colorSpace].height[1]);
>>> +}
>>> +
>>>  /**
>>>   * Feed one new input frame into the encoder, get one frame out. If
>>> pic_in is
>>>   * NULL, a flush condition is implied and pic_in must be NULL for all
>>> subsequent
>>> @@ -1004,6 +1324,10 @@
>>>      if (m_aborted)
>>>          return -1;
>>>
>>> +    const x265_picture* inputPic = NULL;
>>> +    static int written = 0, read = 0;
>>> +    bool dontRead = false;
>>> +
>>>      if (m_exportedPic)
>>>      {
>>>          if (!m_param->bUseAnalysisFile && m_param->analysisSave)
>>> @@ -1012,25 +1336,84 @@
>>>          m_exportedPic = NULL;
>>>          m_dpb->recycleUnreferenced();
>>>      }
>>> -    if (pic_in && (!m_param->chunkEnd || (m_encodedFrameNum <
>>> m_param->chunkEnd)))
>>> -    {
>>> -        if (m_latestParam->forceFlush == 1)
>>> +    if ((pic_in && (!m_param->chunkEnd || (m_encodedFrameNum <
>>> m_param->chunkEnd))) || (m_param->bEnableFrameDuplication && !pic_in &&
>>> (read < written)))
>>> +    {
>>> +        if ((m_param->bEnableFrameDuplication && !pic_in && (read <
>>> written)))
>>> +            dontRead = true;
>>> +        else
>>>          {
>>> -            m_lookahead->setLookaheadQueue();
>>> -            m_latestParam->forceFlush = 0;
>>> +            if (m_latestParam->forceFlush == 1)
>>> +            {
>>> +                m_lookahead->setLookaheadQueue();
>>> +                m_latestParam->forceFlush = 0;
>>> +            }
>>> +            if (m_latestParam->forceFlush == 2)
>>> +            {
>>> +                m_lookahead->m_filled = false;
>>> +                m_latestParam->forceFlush = 0;
>>> +            }
>>> +
>>> +            if (pic_in->bitDepth < 8 || pic_in->bitDepth > 16)
>>> +            {
>>> +                x265_log(m_param, X265_LOG_ERROR, "Input bit depth (%d)
>>> must be between 8 and 16\n",
>>> +                    pic_in->bitDepth);
>>> +                return -1;
>>> +            }
>>>          }
>>> -        if (m_latestParam->forceFlush == 2)
>>> +
>>> +        if (m_param->bEnableFrameDuplication)
>>>          {
>>> -            m_lookahead->m_filled = false;
>>> -            m_latestParam->forceFlush = 0;
>>> +            double psnrWeight = 0;
>>> +
>>> +            if (!dontRead)
>>> +            {
>>> +                if (!m_dupBuffer[0]->bOccupied)
>>> +                {
>>> +                    copyPicture(m_dupBuffer[0]->dupPic, pic_in);
>>> +                    m_dupBuffer[0]->bOccupied = true;
>>> +                    written++;
>>> +                    return 0;
>>> +                }
>>> +                else if (!m_dupBuffer[1]->bOccupied)
>>> +                {
>>> +                    copyPicture(m_dupBuffer[1]->dupPic, pic_in);
>>> +                    m_dupBuffer[1]->bOccupied = true;
>>> +                    written++;
>>> +                }
>>> +
>>> +                psnrWeight = ComputePSNR(m_dupBuffer[0]->dupPic,
>>> m_dupBuffer[1]->dupPic, m_param);
>>> +
>>> +                if (psnrWeight >= m_param->dupThreshold)
>>> +                {
>>> +                    if (m_dupBuffer[0]->bDup)
>>> +                    {
>>> +                        m_dupBuffer[0]->dupPic->picStruct = tripling;
>>> +                        m_dupBuffer[0]->bDup = false;
>>> +                        read++;
>>> +                    }
>>> +                    else
>>> +                    {
>>> +                        m_dupBuffer[0]->dupPic->picStruct = doubling;
>>> +                        m_dupBuffer[0]->bDup = true;
>>> +                        m_dupBuffer[1]->bOccupied = false;
>>> +                        read++;
>>> +                        return 0;
>>> +                    }
>>> +                }
>>> +                else if (m_dupBuffer[0]->bDup)
>>> +                    m_dupBuffer[0]->bDup = false;
>>> +                else
>>> +                    m_dupBuffer[0]->dupPic->picStruct = 0;
>>> +            }
>>> +
>>> +            if (read < written)
>>> +            {
>>> +                inputPic = m_dupBuffer[0]->dupPic;
>>> +                read++;
>>> +            }
>>>          }
>>> -
>>> -        if (pic_in->bitDepth < 8 || pic_in->bitDepth > 16)
>>> -        {
>>> -            x265_log(m_param, X265_LOG_ERROR, "Input bit depth (%d)
>>> must be between 8 and 16\n",
>>> -                     pic_in->bitDepth);
>>> -            return -1;
>>> -        }
>>> +        else
>>> +            inputPic = pic_in;
>>>
>>>          Frame *inFrame;
>>>          x265_param* p = (m_reconfigure || m_reconfigureRc) ?
>>> m_latestParam : m_param;
>>> @@ -1038,7 +1421,7 @@
>>>          {
>>>              inFrame = new Frame;
>>>              inFrame->m_encodeStartTime = x265_mdate();
>>> -            if (inFrame->create(p, pic_in->quantOffsets))
>>> +            if (inFrame->create(p, inputPic->quantOffsets))
>>>              {
>>>                  /* the first PicYuv created is asked to generate the CU
>>> and block unit offset
>>>                   * arrays which are then shared with all subsequent
>>> PicYuv (orig and recon)
>>> @@ -1098,34 +1481,35 @@
>>>          }
>>>
>>>          /* Copy input picture into a Frame and PicYuv, send to
>>> lookahead */
>>> -        inFrame->m_fencPic->copyFromPicture(*pic_in, *m_param,
>>> m_sps.conformanceWindow.rightOffset, m_sps.conformanceWindow.bottomOffset);
>>> +        inFrame->m_fencPic->copyFromPicture(*inputPic, *m_param,
>>> m_sps.conformanceWindow.rightOffset, m_sps.conformanceWindow.bottomOffset);
>>>
>>>          inFrame->m_poc       = ++m_pocLast;
>>> -        inFrame->m_userData  = pic_in->userData;
>>> -        inFrame->m_pts       = pic_in->pts;
>>> -        inFrame->m_forceqp   = pic_in->forceqp;
>>> +        inFrame->m_userData  = inputPic->userData;
>>> +        inFrame->m_pts       = inputPic->pts;
>>> +        inFrame->m_forceqp   = inputPic->forceqp;
>>>          inFrame->m_param     = (m_reconfigure || m_reconfigureRc) ?
>>> m_latestParam : m_param;
>>> +        inFrame->m_picStruct = inputPic->picStruct;
>>>          if (m_param->bField && m_param->interlaceMode)
>>> -            inFrame->m_fieldNum = pic_in->fieldNum;
>>> -
>>> -        copyUserSEIMessages(inFrame, pic_in);
>>> -
>>> -        /*Copy Dolby Vision RPU from pic_in to frame*/
>>> -        if (pic_in->rpu.payloadSize)
>>> +            inFrame->m_fieldNum = inputPic->fieldNum;
>>> +
>>> +        copyUserSEIMessages(inFrame, inputPic);
>>> +
>>> +        /*Copy Dolby Vision RPU from inputPic to frame*/
>>> +        if (inputPic->rpu.payloadSize)
>>>          {
>>> -            inFrame->m_rpu.payloadSize = pic_in->rpu.payloadSize;
>>> -            inFrame->m_rpu.payload = new
>>> uint8_t[pic_in->rpu.payloadSize];
>>> -            memcpy(inFrame->m_rpu.payload, pic_in->rpu.payload,
>>> pic_in->rpu.payloadSize);
>>> +            inFrame->m_rpu.payloadSize = inputPic->rpu.payloadSize;
>>> +            inFrame->m_rpu.payload = new
>>> uint8_t[inputPic->rpu.payloadSize];
>>> +            memcpy(inFrame->m_rpu.payload, inputPic->rpu.payload,
>>> inputPic->rpu.payloadSize);
>>>          }
>>>
>>> -        if (pic_in->quantOffsets != NULL)
>>> +        if (inputPic->quantOffsets != NULL)
>>>          {
>>>              int cuCount;
>>>              if (m_param->rc.qgSize == 8)
>>>                  cuCount = inFrame->m_lowres.maxBlocksInRowFullRes *
>>> inFrame->m_lowres.maxBlocksInColFullRes;
>>>              else
>>>                  cuCount = inFrame->m_lowres.maxBlocksInRow *
>>> inFrame->m_lowres.maxBlocksInCol;
>>> -            memcpy(inFrame->m_quantOffsets, pic_in->quantOffsets,
>>> cuCount * sizeof(float));
>>> +            memcpy(inFrame->m_quantOffsets, inputPic->quantOffsets,
>>> cuCount * sizeof(float));
>>>          }
>>>
>>>          if (m_pocLast == 0)
>>> @@ -1147,9 +1531,9 @@
>>>          }
>>>
>>>          /* Use the frame types from the first pass, if available */
>>> -        int sliceType = (m_param->rc.bStatRead) ?
>>> m_rateControl->rateControlSliceType(inFrame->m_poc) : pic_in->sliceType;
>>> -
>>> -        /* In analysisSave mode, x265_analysis_data is allocated in
>>> pic_in and inFrame points to this */
>>> +        int sliceType = (m_param->rc.bStatRead) ?
>>> m_rateControl->rateControlSliceType(inFrame->m_poc) : inputPic->sliceType;
>>> +
>>> +        /* In analysisSave mode, x265_analysis_data is allocated in
>>> inputPic and inFrame points to this */
>>>          /* Load analysis data before lookahead->addPicture, since
>>> sliceType has been decided */
>>>          if (m_param->analysisLoad)
>>>          {
>>> @@ -1157,7 +1541,7 @@
>>>              static int paramBytes = 0;
>>>              if (!inFrame->m_poc && m_param->bAnalysisType != HEVC_INFO)
>>>              {
>>> -                x265_analysis_data analysisData = pic_in->analysisData;
>>> +                x265_analysis_data analysisData =
>>> inputPic->analysisData;
>>>                  paramBytes = validateAnalysisData(&analysisData, 0);
>>>                  if (paramBytes == -1)
>>>                  {
>>> @@ -1178,10 +1562,10 @@
>>>                  uint32_t outOfBoundaryLowresH = extendedHeight -
>>> m_param->sourceHeight / 2;
>>>                  if (outOfBoundaryLowresH * 2 >= m_param->maxCUSize)
>>>                      cuLocInFrame.skipHeight = true;
>>> -                readAnalysisFile(&inFrame->m_analysisData,
>>> inFrame->m_poc, pic_in, paramBytes, cuLocInFrame);
>>> +                readAnalysisFile(&inFrame->m_analysisData,
>>> inFrame->m_poc, inputPic, paramBytes, cuLocInFrame);
>>>              }
>>>              else
>>> -                readAnalysisFile(&inFrame->m_analysisData,
>>> inFrame->m_poc, pic_in, paramBytes);
>>> +                readAnalysisFile(&inFrame->m_analysisData,
>>> inFrame->m_poc, inputPic, paramBytes);
>>>              inFrame->m_poc = inFrame->m_analysisData.poc;
>>>              sliceType = inFrame->m_analysisData.sliceType;
>>>              inFrame->m_lowres.bScenecut =
>>> !!inFrame->m_analysisData.bScenecut;
>>> @@ -1202,9 +1586,9 @@
>>>                  }
>>>              }
>>>          }
>>> -        if (m_param->bUseRcStats && pic_in->rcData)
>>> +        if (m_param->bUseRcStats && inputPic->rcData)
>>>          {
>>> -            RcStats* rc = (RcStats*)pic_in->rcData;
>>> +            RcStats* rc = (RcStats*)inputPic->rcData;
>>>              m_rateControl->m_accumPQp = rc->cumulativePQp;
>>>              m_rateControl->m_accumPNorm = rc->cumulativePNorm;
>>>              m_rateControl->m_isNextGop = true;
>>> @@ -1228,6 +1612,18 @@
>>>              }
>>>              m_param->bUseRcStats = 0;
>>>          }
>>> +
>>> +        if (m_param->bEnableFrameDuplication && ((read < written) ||
>>> (m_dupBuffer[0]->dupPic->picStruct == tripling && (read <= written))))
>>> +        {
>>> +            if (m_dupBuffer[0]->dupPic->picStruct == tripling)
>>> +                m_dupBuffer[0]->bOccupied = m_dupBuffer[1]->bOccupied =
>>> false;
>>> +            else
>>> +            {
>>> +                copyPicture(m_dupBuffer[0]->dupPic,
>>> m_dupBuffer[1]->dupPic);
>>> +                m_dupBuffer[1]->bOccupied = false;
>>> +            }
>>> +        }
>>> +
>>>          if (m_reconfigureRc)
>>>              inFrame->m_reconfigureRc = true;
>>>
>>> @@ -1262,7 +1658,7 @@
>>>              Slice *slice = outFrame->m_encData->m_slice;
>>>              x265_frame_stats* frameData = NULL;
>>>
>>> -            /* Free up pic_in->analysisData since it has already been
>>> used */
>>> +            /* Free up inputPic->analysisData since it has already been
>>> used */
>>>              if ((m_param->analysisLoad && !m_param->analysisSave) ||
>>> ((m_param->bAnalysisType == AVC_INFO) && slice->m_sliceType != I_SLICE))
>>>                  x265_free_analysis_data(m_param,
>>> &outFrame->m_analysisData);
>>>
>>> @@ -3174,6 +3570,30 @@
>>>          p->dynamicRd = 0;
>>>          x265_log(p, X265_LOG_WARNING, "Dynamic-rd disabled, requires RD
>>> <= 4, VBV and aq-mode enabled\n");
>>>      }
>>> +
>>> +    if (!p->bEnableFrameDuplication && p->dupThreshold &&
>>> p->dupThreshold != 70)
>>> +    {
>>> +        x265_log(p, X265_LOG_WARNING, "Frame-duplication threshold
>>> works only with frame-duplication enabled. Enabling frame-duplication.\n");
>>> +        p->bEnableFrameDuplication = 1;
>>> +    }
>>> +
>>> +    if (p->bEnableFrameDuplication && p->interlaceMode)
>>> +    {
>>> +        x265_log(p, X265_LOG_WARNING, "Frame-duplication does not
>>> support interlace mode. Disabling Frame Duplication.\n");
>>> +        p->bEnableFrameDuplication = 0;
>>> +    }
>>> +
>>> +    if (p->bEnableFrameDuplication && p->pictureStructure != 0 &&
>>> p->pictureStructure != -1)
>>> +    {
>>> +        x265_log(p, X265_LOG_WARNING, "Frame-duplication works only
>>> with pic_struct = 0. Setting pic-struct = 0.\n");
>>> +        p->pictureStructure = 0;
>>> +    }
>>> +
>>> +    if (m_param->bEnableFrameDuplication && (!bIsVbv ||
>>> !m_param->bEmitHRDSEI))
>>> +    {
>>> +        x265_log(m_param, X265_LOG_WARNING, "Frame-duplication require
>>> NAL HRD and VBV parameters. Disabling frame duplication\n");
>>> +        m_param->bEnableFrameDuplication = 0;
>>> +    }
>>>  #ifdef ENABLE_HDR10_PLUS
>>>      if (m_param->bDhdr10opt && m_param->toneMapFile == NULL)
>>>      {
>>> diff -r c4b098f973e6 -r fa0d04affb8a source/encoder/encoder.h
>>> --- a/source/encoder/encoder.h Tue Aug 13 10:51:21 2019 +0530
>>> +++ b/source/encoder/encoder.h Fri Sep 13 15:57:26 2019 +0530
>>> @@ -88,6 +88,9 @@
>>>  };
>>>
>>>  #define MAX_NUM_REF_IDX 64
>>> +#define DUP_BUFFER 2
>>> +#define doubling 7
>>> +#define tripling 8
>>>
>>>  struct RefIdxLastGOP
>>>  {
>>> @@ -141,6 +144,18 @@
>>>      }
>>>  };
>>>
>>> +struct AdaptiveFrameDuplication
>>> +{
>>> +    x265_picture* dupPic;
>>> +    char* dupPlane;
>>> +
>>> +    //Flag to denote the availability of the picture buffer.
>>> +    bool bOccupied;
>>> +
>>> +    //Flag to check whether the picture has duplicated.
>>> +    bool bDup;
>>> +};
>>> +
>>>
>>>  class FrameEncoder;
>>>  class DPB;
>>> @@ -189,6 +204,10 @@
>>>      x265_param*        m_latestParam;     // Holds latest param during
>>> a reconfigure
>>>      RateControl*       m_rateControl;
>>>      Lookahead*         m_lookahead;
>>> +    AdaptiveFrameDuplication* m_dupBuffer[DUP_BUFFER];      // picture
>>> buffer of size 2
>>> +    /*Frame duplication: Two pictures used to compute PSNR */
>>> +    pixel*             m_dupPicOne[3];
>>> +    pixel*             m_dupPicTwo[3];
>>>
>>>      bool               m_externalFlush;
>>>      /* Collect statistics globally */
>>> @@ -324,6 +343,12 @@
>>>
>>>      void calcRefreshInterval(Frame* frameEnc);
>>>
>>> +    uint64_t computeSSD(pixel *fenc, pixel *rec, intptr_t stride,
>>> uint32_t width, uint32_t height, x265_param *param);
>>> +
>>> +    double ComputePSNR(x265_picture *firstPic, x265_picture *secPic,
>>> x265_param *param);
>>> +
>>> +    void copyPicture(x265_picture *dest, const x265_picture *src);
>>> +
>>>      void initRefIdx();
>>>      void analyseRefIdx(int *numRefIdx);
>>>      void updateRefIdx();
>>> diff -r c4b098f973e6 -r fa0d04affb8a source/encoder/frameencoder.cpp
>>> --- a/source/encoder/frameencoder.cpp Tue Aug 13 10:51:21 2019 +0530
>>> +++ b/source/encoder/frameencoder.cpp Fri Sep 13 15:57:26 2019 +0530
>>> @@ -713,6 +713,8 @@
>>>                          sei->m_picStruct = (poc & 1) ? 2 /* bottom */ :
>>> 1 /* top */;
>>>                  }
>>>              }
>>> +            else if (m_param->bEnableFrameDuplication)
>>> +                sei->m_picStruct = m_frame->m_picStruct;
>>>              else
>>>                  sei->m_picStruct = m_param->pictureStructure;
>>>
>>> diff -r c4b098f973e6 -r fa0d04affb8a source/encoder/framefilter.cpp
>>> --- a/source/encoder/framefilter.cpp Tue Aug 13 10:51:21 2019 +0530
>>> +++ b/source/encoder/framefilter.cpp Fri Sep 13 15:57:26 2019 +0530
>>> @@ -32,7 +32,6 @@
>>>
>>>  using namespace X265_NS;
>>>
>>> -static uint64_t computeSSD(pixel *fenc, pixel *rec, intptr_t stride,
>>> uint32_t width, uint32_t height);
>>>  static float calculateSSIM(pixel *pix1, intptr_t stride1, pixel *pix2,
>>> intptr_t stride2, uint32_t width, uint32_t height, void *buf, uint32_t&
>>> cnt);
>>>
>>>  namespace X265_NS
>>> @@ -673,7 +672,7 @@
>>>          uint32_t width  = reconPic->m_picWidth - m_pad[0];
>>>          uint32_t height = m_parallelFilter[row].getCUHeight();
>>>
>>> -        uint64_t ssdY = computeSSD(fencPic->getLumaAddr(cuAddr),
>>> reconPic->getLumaAddr(cuAddr), stride, width, height);
>>> +        uint64_t ssdY =
>>> m_frameEncoder->m_top->computeSSD(fencPic->getLumaAddr(cuAddr),
>>> reconPic->getLumaAddr(cuAddr), stride, width, height, m_param);
>>>          m_frameEncoder->m_SSDY += ssdY;
>>>
>>>          if (m_param->internalCsp != X265_CSP_I400)
>>> @@ -682,8 +681,8 @@
>>>              width >>= m_hChromaShift;
>>>              stride = reconPic->m_strideC;
>>>
>>> -            uint64_t ssdU = computeSSD(fencPic->getCbAddr(cuAddr),
>>> reconPic->getCbAddr(cuAddr), stride, width, height);
>>> -            uint64_t ssdV = computeSSD(fencPic->getCrAddr(cuAddr),
>>> reconPic->getCrAddr(cuAddr), stride, width, height);
>>> +            uint64_t ssdU =
>>> m_frameEncoder->m_top->computeSSD(fencPic->getCbAddr(cuAddr),
>>> reconPic->getCbAddr(cuAddr), stride, width, height, m_param);
>>> +            uint64_t ssdV =
>>> m_frameEncoder->m_top->computeSSD(fencPic->getCrAddr(cuAddr),
>>> reconPic->getCrAddr(cuAddr), stride, width, height, m_param);
>>>
>>>              m_frameEncoder->m_SSDU += ssdU;
>>>              m_frameEncoder->m_SSDV += ssdV;
>>> @@ -825,71 +824,6 @@
>>>      }
>>>  }
>>>
>>> -static uint64_t computeSSD(pixel *fenc, pixel *rec, intptr_t stride,
>>> uint32_t width, uint32_t height)
>>> -{
>>> -    uint64_t ssd = 0;
>>> -
>>> -    if ((width | height) & 3)
>>> -    {
>>> -        /* Slow Path */
>>> -        for (uint32_t y = 0; y < height; y++)
>>> -        {
>>> -            for (uint32_t x = 0; x < width; x++)
>>> -            {
>>> -                int diff = (int)(fenc[x] - rec[x]);
>>> -                ssd += diff * diff;
>>> -            }
>>> -
>>> -            fenc += stride;
>>> -            rec += stride;
>>> -        }
>>> -
>>> -        return ssd;
>>> -    }
>>> -
>>> -    uint32_t y = 0;
>>> -
>>> -    /* Consume rows in ever narrower chunks of height */
>>> -    for (int size = BLOCK_64x64; size >= BLOCK_4x4 && y < height;
>>> size--)
>>> -    {
>>> -        uint32_t rowHeight = 1 << (size + 2);
>>> -
>>> -        for (; y + rowHeight <= height; y += rowHeight)
>>> -        {
>>> -            uint32_t y1, x = 0;
>>> -
>>> -            /* Consume each row using the largest square blocks
>>> possible */
>>> -            if (size == BLOCK_64x64 && !(stride & 31))
>>> -                for (; x + 64 <= width; x += 64)
>>> -                    ssd += primitives.cu[BLOCK_64x64].sse_pp(fenc + x,
>>> stride, rec + x, stride);
>>> -
>>> -            if (size >= BLOCK_32x32 && !(stride & 15))
>>> -                for (; x + 32 <= width; x += 32)
>>> -                    for (y1 = 0; y1 + 32 <= rowHeight; y1 += 32)
>>> -                        ssd += primitives.cu[BLOCK_32x32].sse_pp(fenc
>>> + y1 * stride + x, stride, rec + y1 * stride + x, stride);
>>> -
>>> -            if (size >= BLOCK_16x16)
>>> -                for (; x + 16 <= width; x += 16)
>>> -                    for (y1 = 0; y1 + 16 <= rowHeight; y1 += 16)
>>> -                        ssd += primitives.cu[BLOCK_16x16].sse_pp(fenc
>>> + y1 * stride + x, stride, rec + y1 * stride + x, stride);
>>> -
>>> -            if (size >= BLOCK_8x8)
>>> -                for (; x + 8 <= width; x += 8)
>>> -                    for (y1 = 0; y1 + 8 <= rowHeight; y1 += 8)
>>> -                        ssd += primitives.cu[BLOCK_8x8].sse_pp(fenc +
>>> y1 * stride + x, stride, rec + y1 * stride + x, stride);
>>> -
>>> -            for (; x + 4 <= width; x += 4)
>>> -                for (y1 = 0; y1 + 4 <= rowHeight; y1 += 4)
>>> -                    ssd += primitives.cu[BLOCK_4x4].sse_pp(fenc + y1 *
>>> stride + x, stride, rec + y1 * stride + x, stride);
>>> -
>>> -            fenc += stride * rowHeight;
>>> -            rec += stride * rowHeight;
>>> -        }
>>> -    }
>>> -
>>> -    return ssd;
>>> -}
>>> -
>>>  /* Function to calculate SSIM for each row */
>>>  static float calculateSSIM(pixel *pix1, intptr_t stride1, pixel *pix2,
>>> intptr_t stride2, uint32_t width, uint32_t height, void *buf, uint32_t& cnt)
>>>  {
>>> diff -r c4b098f973e6 -r fa0d04affb8a source/input/y4m.cpp
>>> --- a/source/input/y4m.cpp Tue Aug 13 10:51:21 2019 +0530
>>> +++ b/source/input/y4m.cpp Fri Sep 13 15:57:26 2019 +0530
>>> @@ -388,6 +388,7 @@
>>>          pic.bitDepth = depth;
>>>          pic.framesize = framesize;
>>>          pic.height = height;
>>> +        pic.width = width;
>>>          pic.colorSpace = colorSpace;
>>>          pic.stride[0] = width * pixelbytes;
>>>          pic.stride[1] = pic.stride[0] >>
>>> x265_cli_csps[colorSpace].width[1];
>>> diff -r c4b098f973e6 -r fa0d04affb8a source/input/yuv.cpp
>>> --- a/source/input/yuv.cpp Tue Aug 13 10:51:21 2019 +0530
>>> +++ b/source/input/yuv.cpp Fri Sep 13 15:57:26 2019 +0530
>>> @@ -204,6 +204,7 @@
>>>          pic.bitDepth = depth;
>>>          pic.framesize = framesize;
>>>          pic.height = height;
>>> +        pic.width = width;
>>>          pic.stride[0] = width * pixelbytes;
>>>          pic.stride[1] = pic.stride[0] >>
>>> x265_cli_csps[colorSpace].width[1];
>>>          pic.stride[2] = pic.stride[0] >>
>>> x265_cli_csps[colorSpace].width[2];
>>> diff -r c4b098f973e6 -r fa0d04affb8a source/test/regression-tests.txt
>>> --- a/source/test/regression-tests.txt Tue Aug 13 10:51:21 2019 +0530
>>> +++ b/source/test/regression-tests.txt Fri Sep 13 15:57:26 2019 +0530
>>> @@ -156,6 +156,7 @@
>>>  720p50_parkrun_ter.y4m,--preset medium --bitrate 400 --hme
>>>  ducks_take_off_420_1_720p50.y4m,--preset medium --aq-mode 4 --crf 22
>>> --no-cutree
>>>  ducks_take_off_420_1_720p50.y4m,--preset medium --selective-sao 4 --sao
>>> --crf 20
>>> +Traffic_4096x2048_30p.y4m, --preset medium --frame-dup --dup-threshold
>>> 60 --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000
>>>
>>>  # Main12 intraCost overflow bug test
>>>  720p50_parkrun_ter.y4m,--preset medium
>>> diff -r c4b098f973e6 -r fa0d04affb8a source/x265.cpp
>>> --- a/source/x265.cpp Tue Aug 13 10:51:21 2019 +0530
>>> +++ b/source/x265.cpp Fri Sep 13 15:57:26 2019 +0530
>>> @@ -541,10 +541,11 @@
>>>          return true;
>>>      }
>>>
>>> -    /* Unconditionally accept height/width/csp from file info */
>>> +    /* Unconditionally accept height/width/csp/bitDepth from file info
>>> */
>>>      param->sourceWidth = info.width;
>>>      param->sourceHeight = info.height;
>>>      param->internalCsp = info.csp;
>>> +    param->sourceBitDepth = info.depth;
>>>
>>>      /* Accept fps and sar from file info if not specified by user */
>>>      if (param->fpsDenom == 0 || param->fpsNum == 0)
>>> diff -r c4b098f973e6 -r fa0d04affb8a source/x265.h
>>> --- a/source/x265.h Tue Aug 13 10:51:21 2019 +0530
>>> +++ b/source/x265.h Fri Sep 13 15:57:26 2019 +0530
>>> @@ -455,16 +455,21 @@
>>>       * multi pass ratecontrol mode. */
>>>      void*  rcData;
>>>
>>> -    uint64_t framesize;
>>> +    size_t framesize;
>>>
>>>      int    height;
>>>
>>> +    int    width;
>>> +
>>>
>>
>> [PR] Please add new fields in an externally-exposed structure only to the
>> end to ensure backwards compatibility with applications that cannot / don't
>> want to recompile.
>>
>>
>>>      // pts is reordered in the order of encoding.
>>>      int64_t reorderedPts;
>>>
>>>      //Dolby Vision RPU metadata
>>>      x265_dolby_vision_rpu rpu;
>>> -
>>> +
>>> +    //SEI picture structure message
>>> +    uint32_t picStruct;
>>> +
>>>      int fieldNum;
>>>  } x265_picture;
>>>
>>> @@ -844,6 +849,9 @@
>>>       * Future builds may support 12bit pixels. */
>>>      int       internalBitDepth;
>>>
>>> +    /*Input sequence bit depth. It can be either 8bit, 10bit or 12bit.*/
>>> +    int       sourceBitDepth;
>>> +
>>>
>>
>> [PR] Please add new fields in an externally-exposed structure only to the
>> end to ensure backwards compatibility with applications that cannot / don't
>> want to recompile.
>>
>>
>>>      /* Color space of internal pictures, must match color space of input
>>>       * pictures */
>>>      int       internalCsp;
>>> @@ -1327,6 +1335,19 @@
>>>   * */
>>>   int       pictureStructure;
>>>
>>> +    /*
>>> +    * Signals picture structure SEI timing message for every frame
>>> +    * picture structure 7 is signalled for frame doubling
>>> +    * picture structure 8 is signalled for frame tripling
>>> +    * */
>>> +    int       bEnableFrameDuplication;
>>> +
>>> +    /*
>>> +    * For adaptive frame duplication, a threshold is set above which
>>> the frames are similar.
>>> +    * User can set a variable threshold. Default 70.
>>> +    * */
>>> +    int       dupThreshold;
>>> +
>>>
>>
>> [PR] Please add new fields in an externally-exposed structure only to the
>> end to ensure backwards compatibility with applications that cannot / don't
>> want to recompile.
>>
>>
>>>      struct
>>>      {
>>>          /* Explicit mode of rate-control, necessary for API users. It
>>> must
>>> diff -r c4b098f973e6 -r fa0d04affb8a source/x265cli.h
>>> --- a/source/x265cli.h Tue Aug 13 10:51:21 2019 +0530
>>> +++ b/source/x265cli.h Fri Sep 13 15:57:26 2019 +0530
>>> @@ -321,6 +321,9 @@
>>>      { "hevc-aq", no_argument, NULL, 0 },
>>>      { "no-hevc-aq", no_argument, NULL, 0 },
>>>      { "qp-adaptation-range", required_argument, NULL, 0 },
>>> +    { "frame-dup",            no_argument, NULL, 0 },
>>> +    { "no-frame-dup", no_argument, NULL, 0 },
>>> +    { "dup-threshold", required_argument, NULL, 0 },
>>>  #ifdef SVT_HEVC
>>>      { "svt",     no_argument, NULL, 0 },
>>>      { "no-svt",  no_argument, NULL, 0 },
>>> @@ -638,6 +641,8 @@
>>>      H1("   --recon-depth <integer>       Bit-depth of reconstructed raw
>>> image file. Defaults to input bit depth, or 8 if Y4M\n");
>>>      H1("   --recon-y4m-exec <string>     pipe reconstructed frames to
>>> Y4M viewer, ex:\"ffplay -i pipe:0 -autoexit\"\n");
>>>      H0("   --lowpass-dct                 Use low-pass subband dct
>>> approximation. Default %s\n", OPT(param->bLowPassDct));
>>> +    H0("   --[no-]frame-dup              Enable Frame duplication.
>>> Default %s\n", OPT(param->bEnableFrameDuplication));
>>> +    H0("   --dup-threshold <integer>     PSNR threshold for Frame
>>> duplication. Default %d\n", param->dupThreshold);
>>>  #ifdef SVT_HEVC
>>>      H0("   --[no]svt                     Enable SVT HEVC encoder %s\n",
>>> OPT(param->bEnableSvtHevc));
>>>      H0("   --[no-]svt-hme                Enable Hierarchial motion
>>> estimation(HME) in SVT HEVC encoder \n");
>>>
>>>
>>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20191004/5ef8c790/attachment-0001.html>