<div dir="ltr">From 2f0eaa3d26099ebc9edd5e62db87722dac626692 Mon Sep 17 00:00:00 2001<br>From: Kirithika <<a href="mailto:kirithika@multicorewareinc.com">kirithika@multicorewareinc.com</a>><br>Date: Tue, 20 Dec 2022 14:47:29 +0530<br>Subject: [PATCH] Readjust reference frames and NAL type based on temporal<br> layer<br><br>---<br> source/common/frame.cpp    |   1 +<br> source/common/frame.h      |   2 +<br> source/encoder/api.cpp     |   4 +-<br> source/encoder/dpb.cpp     | 167 +++++++++++++++++++++++++++++++++++--<br> source/encoder/dpb.h       |   9 +-<br> source/encoder/encoder.cpp |   2 +<br> source/x265.h              |   2 +-<br> 7 files changed, 174 insertions(+), 13 deletions(-)<br><br>diff --git a/source/common/frame.cpp b/source/common/frame.cpp<br>index 48c538714..768d69f34 100644<br>--- a/source/common/frame.cpp<br>+++ b/source/common/frame.cpp<br>@@ -75,6 +75,7 @@ Frame::Frame()<br>     m_prevMCSTF = NULL;<br> <br>     m_tempLayer = 0;<br>+    m_sameLayerRefPic = false;<br> }<br> <br> bool Frame::create(x265_param *param, float* quantOffsets)<br>diff --git a/source/common/frame.h b/source/common/frame.h<br>index c916f7714..fcd0031bc 100644<br>--- a/source/common/frame.h<br>+++ b/source/common/frame.h<br>@@ -163,6 +163,8 @@ public:<br>     /*Frame's temporal layer info*/<br>     uint8_t                m_tempLayer;<br>     int8_t                 m_gopId;<br>+    bool                   m_sameLayerRefPic;<br>+<br>     Frame();<br> <br>     bool create(x265_param *param, float* quantOffsets);<br>diff --git a/source/encoder/api.cpp b/source/encoder/api.cpp<br>index 5f1bba67b..9b53e62ae 100644<br>--- a/source/encoder/api.cpp<br>+++ b/source/encoder/api.cpp<br>@@ -1297,7 +1297,7 @@ FILE* x265_csvlog_open(const x265_param* param)<br>             if (param->csvLogLevel)<br>             {<br>                 fprintf(csvfp, "Encode Order, Type, POC, QP, Bits, Scenecut, ");<br>-                if (param->bEnableTemporalSubLayers > 2)<br>+                if (!!param->bEnableTemporalSubLayers)<br>                     fprintf(csvfp, "Temporal Sub Layer ID, ");<br>                 if (param->csvLogLevel >= 2)<br>                     fprintf(csvfp, "I/P cost ratio, ");<br>@@ -1412,7 +1412,7 @@ void x265_csvlog_frame(const x265_param* param, const x265_picture* pic)<br>     const x265_frame_stats* frameStats = &pic->frameData;<br>     fprintf(param->csvfpt, "%d, %c-SLICE, %4d, %2.2lf, %10d, %d,", frameStats->encoderOrder, frameStats->sliceType, frameStats->poc,<br>                                                                    frameStats->qp, (int)frameStats->bits, frameStats->bScenecut);<br>-    if (param->bEnableTemporalSubLayers > 2)<br>+    if (!!param->bEnableTemporalSubLayers)<br>         fprintf(param->csvfpt, "%d,", frameStats->tLayer);<br>     if (param->csvLogLevel >= 2)<br>         fprintf(param->csvfpt, "%.2f,", frameStats->ipCostRatio);<br>diff --git a/source/encoder/dpb.cpp b/source/encoder/dpb.cpp<br>index bfe6f2290..24d3cd202 100644<br>--- a/source/encoder/dpb.cpp<br>+++ b/source/encoder/dpb.cpp<br>@@ -150,12 +150,13 @@ void DPB::prepareEncode(Frame *newFrame)<br>     {<br>         newFrame->m_encData->m_bHasReferences = false;<br> <br>+        newFrame->m_tempLayer = (newFrame->m_param->bEnableTemporalSubLayers && !m_bTemporalSublayer) ? 1 : newFrame->m_tempLayer;<br>         // Adjust NAL type for unreferenced B frames (change from _R "referenced"<br>         // to _N "non-referenced" NAL unit type)<br>         switch (slice->m_nalUnitType)<br>         {<br>         case NAL_UNIT_CODED_SLICE_TRAIL_R:<br>-            slice->m_nalUnitType = m_bTemporalSublayer ? NAL_UNIT_CODED_SLICE_TSA_N : NAL_UNIT_CODED_SLICE_TRAIL_N;<br>+            slice->m_nalUnitType = newFrame->m_param->bEnableTemporalSubLayers ? NAL_UNIT_CODED_SLICE_TSA_N : NAL_UNIT_CODED_SLICE_TRAIL_N;<br>             break;<br>         case NAL_UNIT_CODED_SLICE_RADL_R:<br>             slice->m_nalUnitType = NAL_UNIT_CODED_SLICE_RADL_N;<br>@@ -176,13 +177,94 @@ void DPB::prepareEncode(Frame *newFrame)<br> <br>     m_picList.pushFront(*newFrame);<br> <br>+    if (m_bTemporalSublayer && getTemporalLayerNonReferenceFlag())<br>+    {<br>+        switch (slice->m_nalUnitType)<br>+        {<br>+        case NAL_UNIT_CODED_SLICE_TRAIL_R:<br>+            slice->m_nalUnitType =  NAL_UNIT_CODED_SLICE_TRAIL_N;<br>+            break;<br>+        case NAL_UNIT_CODED_SLICE_RADL_R:<br>+            slice->m_nalUnitType = NAL_UNIT_CODED_SLICE_RADL_N;<br>+            break;<br>+        case NAL_UNIT_CODED_SLICE_RASL_R:<br>+            slice->m_nalUnitType = NAL_UNIT_CODED_SLICE_RASL_N;<br>+            break;<br>+        default:<br>+            break;<br>+        }<br>+    }<br>     // Do decoding refresh marking if any<br>     decodingRefreshMarking(pocCurr, slice->m_nalUnitType);<br> <br>-    computeRPS(pocCurr, slice->isIRAP(), &slice->m_rps, slice->m_sps->maxDecPicBuffering[newFrame->m_tempLayer]);<br>-<br>+    computeRPS(pocCurr, newFrame->m_tempLayer, slice->isIRAP(), &slice->m_rps, slice->m_sps->maxDecPicBuffering[newFrame->m_tempLayer]);<br>+    bool isTSAPic = ((slice->m_nalUnitType == 2) || (slice->m_nalUnitType == 3)) ? true : false;<br>     // Mark pictures in m_piclist as unreferenced if they are not included in RPS<br>-    applyReferencePictureSet(&slice->m_rps, pocCurr);<br>+    applyReferencePictureSet(&slice->m_rps, pocCurr, newFrame->m_tempLayer, isTSAPic);<br>+<br>+<br>+    if (m_bTemporalSublayer && newFrame->m_tempLayer > 0<br>+        && !(slice->m_nalUnitType == NAL_UNIT_CODED_SLICE_RADL_N     // Check if not a leading picture<br>+            || slice->m_nalUnitType == NAL_UNIT_CODED_SLICE_RADL_R<br>+            || slice->m_nalUnitType == NAL_UNIT_CODED_SLICE_RASL_N<br>+            || slice->m_nalUnitType == NAL_UNIT_CODED_SLICE_RASL_R)<br>+        )<br>+    {<br>+        if (isTemporalLayerSwitchingPoint(pocCurr, newFrame->m_tempLayer) || (slice->m_sps->maxTempSubLayers == 1))<br>+        {<br>+            if (getTemporalLayerNonReferenceFlag())<br>+            {<br>+                slice->m_nalUnitType = NAL_UNIT_CODED_SLICE_TSA_N;<br>+            }<br>+            else<br>+            {<br>+                slice->m_nalUnitType = NAL_UNIT_CODED_SLICE_TSA_R;<br>+            }<br>+        }<br>+        else if (isStepwiseTemporalLayerSwitchingPoint(&slice->m_rps, pocCurr, newFrame->m_tempLayer))<br>+        {<br>+            bool isSTSA = true;<br>+            int id = newFrame->m_gopOffset % x265_gop_ra_length[newFrame->m_gopId];<br>+            for (int ii = id; (ii < x265_gop_ra_length[newFrame->m_gopId] && isSTSA == true); ii++)<br>+            {<br>+                int tempIdRef = x265_gop_ra[newFrame->m_gopId][ii].layer;<br>+                if (tempIdRef == newFrame->m_tempLayer)<br>+                {<br>+                    for (int jj = 0; jj < slice->m_rps.numberOfPositivePictures + slice->m_rps.numberOfNegativePictures; jj++)<br>+                    {<br>+                        if (slice->m_rps.bUsed[jj])<br>+                        {<br>+                            int refPoc = x265_gop_ra[newFrame->m_gopId][ii].poc_offset + slice->m_rps.deltaPOC[jj];<br>+                            int kk = 0;<br>+                            for (kk = 0; kk < x265_gop_ra_length[newFrame->m_gopId]; kk++)<br>+                            {<br>+                                if (x265_gop_ra[newFrame->m_gopId][kk].poc_offset == refPoc)<br>+                                {<br>+                                    break;<br>+                                }<br>+                            }<br>+                            if (x265_gop_ra[newFrame->m_gopId][kk].layer >= newFrame->m_tempLayer)<br>+                            {<br>+                                isSTSA = false;<br>+                                break;<br>+                            }<br>+                        }<br>+                    }<br>+                }<br>+            }<br>+            if (isSTSA == true)<br>+            {<br>+                if (getTemporalLayerNonReferenceFlag())<br>+                {<br>+                    slice->m_nalUnitType = NAL_UNIT_CODED_SLICE_STSA_N;<br>+                }<br>+                else<br>+                {<br>+                    slice->m_nalUnitType = NAL_UNIT_CODED_SLICE_STSA_R;<br>+                }<br>+            }<br>+        }<br>+    }<br> <br>     if (slice->m_sliceType != I_SLICE)<br>         slice->m_numRefIdx[0] = x265_clip3(1, newFrame->m_param->maxNumReferences, slice->m_rps.numberOfNegativePictures);<br>@@ -226,7 +308,7 @@ void DPB::prepareEncode(Frame *newFrame)<br>     }<br> }<br> <br>-void DPB::computeRPS(int curPoc, bool isRAP, RPS * rps, unsigned int maxDecPicBuffer)<br>+void DPB::computeRPS(int curPoc, int tempId, bool isRAP, RPS * rps, unsigned int maxDecPicBuffer)<br> {<br>     unsigned int poci = 0, numNeg = 0, numPos = 0;<br> <br>@@ -236,7 +318,7 @@ void DPB::computeRPS(int curPoc, bool isRAP, RPS * rps, unsigned int maxDecPicBu<br>     {<br>         if ((iterPic->m_poc != curPoc) && iterPic->m_encData->m_bHasReferences)<br>         {<br>-            if ((m_lastIDR >= curPoc) || (m_lastIDR <= iterPic->m_poc))<br>+            if ((!m_bTemporalSublayer || (iterPic->m_tempLayer <= tempId)) && ((m_lastIDR >= curPoc) || (m_lastIDR <= iterPic->m_poc)))<br>             {<br>                     rps->poc[poci] = iterPic->m_poc;<br>                     rps->deltaPOC[poci] = rps->poc[poci] - curPoc;<br>@@ -255,6 +337,18 @@ void DPB::computeRPS(int curPoc, bool isRAP, RPS * rps, unsigned int maxDecPicBu<br>     rps->sortDeltaPOC();<br> }<br> <br>+bool DPB::getTemporalLayerNonReferenceFlag()<br>+{<br>+    Frame* curFrame = m_picList.first();<br>+    if (curFrame->m_encData->m_bHasReferences)<br>+    {<br>+        curFrame->m_sameLayerRefPic = true;<br>+        return false;<br>+    }<br>+    else<br>+        return true;<br>+}<br>+<br> /* Marking reference pictures when an IDR/CRA is encountered. */<br> void DPB::decodingRefreshMarking(int pocCurr, NalUnitType nalUnitType)<br> {<br>@@ -304,7 +398,7 @@ void DPB::decodingRefreshMarking(int pocCurr, NalUnitType nalUnitType)<br> }<br> <br> /** Function for applying picture marking based on the Reference Picture Set */<br>-void DPB::applyReferencePictureSet(RPS *rps, int curPoc)<br>+void DPB::applyReferencePictureSet(RPS *rps, int curPoc, int tempId, bool isTSAPicture)<br> {<br>     // loop through all pictures in the reference picture buffer<br>     Frame* iterFrame = m_picList.first();<br>@@ -325,9 +419,68 @@ void DPB::applyReferencePictureSet(RPS *rps, int curPoc)<br>             }<br>             if (!referenced)<br>                 iterFrame->m_encData->m_bHasReferences = false;<br>+<br>+            if (m_bTemporalSublayer)<br>+            {<br>+                //check that pictures of higher temporal layers are not used<br>+                assert(referenced == 0 || iterFrame->m_encData->m_bHasReferences == false || iterFrame->m_tempLayer <= tempId);<br>+<br>+                //check that pictures of higher or equal temporal layer are not in the RPS if the current picture is a TSA picture<br>+                if (isTSAPicture)<br>+                {<br>+                    assert(referenced == 0 || iterFrame->m_tempLayer < tempId);<br>+                }<br>+                //check that pictures marked as temporal layer non-reference pictures are not used for reference<br>+                if (iterFrame->m_tempLayer == tempId)<br>+                {<br>+                    assert(referenced == 0 || iterFrame->m_sameLayerRefPic == true);<br>+                }<br>+            }<br>+        }<br>+        iterFrame = iterFrame->m_next;<br>+    }<br>+}<br>+<br>+bool DPB::isTemporalLayerSwitchingPoint(int curPoc, int tempId)<br>+{<br>+    // loop through all pictures in the reference picture buffer<br>+    Frame* iterFrame = m_picList.first();<br>+    while (iterFrame)<br>+    {<br>+        if (iterFrame->m_poc != curPoc && iterFrame->m_encData->m_bHasReferences)<br>+        {<br>+            if (iterFrame->m_tempLayer >= tempId)<br>+            {<br>+                return false;<br>+            }<br>+        }<br>+        iterFrame = iterFrame->m_next;<br>+    }<br>+    return true;<br>+}<br>+<br>+bool DPB::isStepwiseTemporalLayerSwitchingPoint(RPS *rps, int curPoc, int tempId)<br>+{<br>+    // loop through all pictures in the reference picture buffer<br>+    Frame* iterFrame = m_picList.first();<br>+    while (iterFrame)<br>+    {<br>+        if (iterFrame->m_poc != curPoc && iterFrame->m_encData->m_bHasReferences)<br>+        {<br>+            for (int i = 0; i < rps->numberOfPositivePictures + rps->numberOfNegativePictures; i++)<br>+            {<br>+                if ((iterFrame->m_poc == curPoc + rps->deltaPOC[i]) && rps->bUsed[i])<br>+                {<br>+                    if (iterFrame->m_tempLayer >= tempId)<br>+                    {<br>+                        return false;<br>+                    }<br>+                }<br>+            }<br>         }<br>         iterFrame = iterFrame->m_next;<br>     }<br>+    return true;<br> }<br> <br> /* deciding the nal_unit_type */<br>diff --git a/source/encoder/dpb.h b/source/encoder/dpb.h<br>index e47d54d61..2cc7df778 100644<br>--- a/source/encoder/dpb.h<br>+++ b/source/encoder/dpb.h<br>@@ -66,7 +66,7 @@ public:<br>         m_bRefreshPending = false;<br>         m_frameDataFreeList = NULL;<br>         m_bOpenGOP = param->bOpenGOP;<br>-        m_bTemporalSublayer = !!param->bEnableTemporalSubLayers;<br>+        m_bTemporalSublayer = (param->bEnableTemporalSubLayers > 2);<br>     }<br> <br>     ~DPB();<br>@@ -77,10 +77,13 @@ public:<br> <br> protected:<br> <br>-    void computeRPS(int curPoc, bool isRAP, RPS * rps, unsigned int maxDecPicBuffer);<br>+    void computeRPS(int curPoc,int tempId, bool isRAP, RPS * rps, unsigned int maxDecPicBuffer);<br> <br>-    void applyReferencePictureSet(RPS *rps, int curPoc);<br>+    void applyReferencePictureSet(RPS *rps, int curPoc, int tempId, bool isTSAPicture);<br>+    bool getTemporalLayerNonReferenceFlag();<br>     void decodingRefreshMarking(int pocCurr, NalUnitType nalUnitType);<br>+    bool isTemporalLayerSwitchingPoint(int curPoc, int tempId);<br>+    bool isStepwiseTemporalLayerSwitchingPoint(RPS *rps, int curPoc, int tempId);<br> <br>     NalUnitType getNalUnitType(int curPoc, bool bIsKeyFrame);<br> };<br>diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp<br>index 51068a875..64a4e231c 100644<br>--- a/source/encoder/encoder.cpp<br>+++ b/source/encoder/encoder.cpp<br>@@ -1637,6 +1637,8 @@ int Encoder::encode(const x265_picture* pic_in, x265_picture* pic_out)<br>             inFrame->m_lowres.satdCost = (int64_t)-1;<br>             inFrame->m_lowresInit = false;<br>             inFrame->m_isInsideWindow = 0;<br>+            inFrame->m_tempLayer = 0;<br>+            inFrame->m_sameLayerRefPic = 0;<br>         }<br> <br>         /* Copy input picture into a Frame and PicYuv, send to lookahead */<br>diff --git a/source/x265.h b/source/x265.h<br>index 86d324d10..81df146e8 100644<br>--- a/source/x265.h<br>+++ b/source/x265.h<br>@@ -60,7 +60,7 @@ typedef enum<br>     NAL_UNIT_CODED_SLICE_TRAIL_N = 0,<br>     NAL_UNIT_CODED_SLICE_TRAIL_R,<br>     NAL_UNIT_CODED_SLICE_TSA_N,<br>-    NAL_UNIT_CODED_SLICE_TLA_R,<br>+    NAL_UNIT_CODED_SLICE_TSA_R,<br>     NAL_UNIT_CODED_SLICE_STSA_N,<br>     NAL_UNIT_CODED_SLICE_STSA_R,<br>     NAL_UNIT_CODED_SLICE_RADL_N,<br>-- <br>2.28.0.windows.1<br><br><div><div dir="ltr" class="gmail_signature" data-smartmail="gmail_signature"><div dir="ltr"><i>Thanks,</i><div><i>Kirithika</i></div></div></div></div></div>