[x265] [PATCH MV-HEVC 10/10] Decouple mv-hevc code from alpha and latest-tip

Tue Aug 6 10:50:25 UTC 2024

>From 6990541ee2e5b63c818b813a0b766192dbd21f53 Mon Sep 17 00:00:00 2001
From: AnusuyaKumarasamy <anusuya.kumarasamy at multicorewareinc.com>
Date: Mon, 15 Jul 2024 23:11:51 +0530
Subject: [PATCH] Decouple mv-hevc code from alpha and latest-tip

---
 source/abrEncApp.cpp            |  5 +++--
 source/abrEncApp.h              |  1 +
 source/common/cudata.cpp        |  5 +++--
 source/common/param.cpp         |  5 ++++-
 source/common/slice.h           |  7 ++-----
 source/encoder/dpb.cpp          |  8 ++++++--
 source/encoder/encoder.cpp      | 30 +++++++++++++++++-------------
 source/encoder/entropy.cpp      | 14 ++++----------
 source/encoder/frameencoder.cpp |  4 +++-
 source/x265.h                   |  2 ++
 source/x265cli.cpp              |  2 +-
 11 files changed, 46 insertions(+), 37 deletions(-)

diff --git a/source/abrEncApp.cpp b/source/abrEncApp.cpp
index 3460dfe78..0ae7e5800 100644
--- a/source/abrEncApp.cpp
+++ b/source/abrEncApp.cpp
@@ -63,6 +63,7 @@ namespace X265_NS {
             m_passEnc[i]->init(ret);
         }

+        m_numInputViews = m_passEnc[0]->m_param->numViews;
         if (!allocBuffers())
         {
             x265_log(NULL, X265_LOG_ERROR, "Unable to allocate memory for
buffers\n");
@@ -96,7 +97,7 @@ namespace X265_NS {
 #if ENABLE_MULTIVIEW
         if (m_passEnc[0]->m_param->numViews > 1)
         {
-            for (uint8_t pass = 0; pass < m_passEnc[0]->m_param->numViews;
pass++)
+            for (uint8_t pass = 0; pass < m_numInputViews; pass++)
             {
                 m_inputPicBuffer[pass] = X265_MALLOC(x265_picture*,
m_queueSize);
                 for (uint32_t idx = 0; idx < m_queueSize; idx++)
@@ -144,7 +145,7 @@ namespace X265_NS {
     {
         x265_cleanup(); /* Free library singletons */
 #if ENABLE_MULTIVIEW
-        for (uint8_t pass = 0; pass < MAX_VIEWS; pass++)
+        for (uint8_t pass = 0; pass < m_numInputViews; pass++)
         {
             for (uint32_t index = 0; index < m_queueSize; index++)
             {
diff --git a/source/abrEncApp.h b/source/abrEncApp.h
index 43b202cc5..fab694656 100644
--- a/source/abrEncApp.h
+++ b/source/abrEncApp.h
@@ -42,6 +42,7 @@ namespace X265_NS {
     {
     public:
         uint8_t           m_numEncodes;
+        uint8_t           m_numInputViews; // Number of inputs for
multiview-extension
         PassEncoder        **m_passEnc;
         uint32_t           m_queueSize;
         ThreadSafeInteger  m_numActiveEncodes;
diff --git a/source/common/cudata.cpp b/source/common/cudata.cpp
index baa9be476..c0d68bc5f 100644
--- a/source/common/cudata.cpp
+++ b/source/common/cudata.cpp
@@ -2053,11 +2053,12 @@ bool CUData::getIndirectPMV(MV& outMV,
InterNeighbourMV *neighbours, uint32_t pi
                     outMV = mvp;
                 if (!(curRefPOC == curPOC))
                     outMV = scaleMvByPOCDist(mvp, curPOC, curRefPOC,
neibPOC, neibRefPOC);
+                return true;
+            }
 #else
             outMV = scaleMvByPOCDist(mvp, curPOC, curRefPOC, neibPOC,
neibRefPOC);
-#endif
             return true;
-            }
+#endif
         }
     }
     return false;
diff --git a/source/common/param.cpp b/source/common/param.cpp
index c310a7b4c..084743ee9 100755
--- a/source/common/param.cpp
+++ b/source/common/param.cpp
@@ -406,7 +406,7 @@ void x265_param_default(x265_param* param)

     /* Multi-View Encoding*/
     param->numViews = 1;
-    param->format = 1;
+    param->format = 0;

     param->numLayers = 1;
 }
@@ -1461,6 +1461,7 @@ int x265_param_parse(x265_param* p, const char* name,
const char* value)
             {
                 p->bEnableAlpha = 1;
                 p->numScalableLayers = 2;
+                p->numLayers = 2;
             }
         }
 #endif
@@ -1952,6 +1953,8 @@ int x265_check_params(x265_param* param)
 #endif
 #if ENABLE_MULTIVIEW
     CHECK((param->numViews > 2), "Multi-View Encoding currently support
only 2 views");
+    CHECK((param->numViews > 1) && (param->internalBitDepth != 8),
"BitDepthConstraint must be 8 for Multiview main profile");
+    CHECK((param->numViews > 1 && param->rc.rateControlMode !=
X265_RC_CQP), "Multiview encode supported only with CQP mode");
 #endif
     return check_failed;
 }
diff --git a/source/common/slice.h b/source/common/slice.h
index 90009ae7f..0d6807952 100644
--- a/source/common/slice.h
+++ b/source/common/slice.h
@@ -164,6 +164,7 @@ struct VPS
     uint32_t         maxLatencyIncrease[MAX_T_LAYERS];
     int              m_numLayers;
     int              m_numViews;
+    bool             vps_extension_flag;

 #if (ENABLE_ALPHA || ENABLE_MULTIVIEW)
     bool             splitting_flag;
@@ -176,7 +177,6 @@ struct VPS
     uint8_t          m_layerIdInVps[MAX_VPS_LAYER_ID_PLUS1];
     int              m_viewIdLen;
     int              m_vpsNumLayerSetsMinus1;
-    bool             vps_extension_flag;
 #endif

 #if ENABLE_MULTIVIEW
@@ -276,9 +276,9 @@ struct SPS

     Window   conformanceWindow;
     VUI      vuiParameters;
+    bool     sps_extension_flag;

 #if ENABLE_MULTIVIEW
-    bool     sps_extension_flag;
     int      setSpsExtOrMaxSubLayersMinus1;
     int      maxViews;
     bool     vui_parameters_present_flag;
@@ -322,11 +322,8 @@ struct PPS
     int      numRefIdxDefault[2];
     bool     pps_slice_chroma_qp_offsets_present_flag;

-#if ENABLE_MULTIVIEW
     bool     pps_extension_flag;
     int      maxViews;
-#endif
-
 };

 struct WeightParam
diff --git a/source/encoder/dpb.cpp b/source/encoder/dpb.cpp
index a187e6657..eafc9c91e 100644
--- a/source/encoder/dpb.cpp
+++ b/source/encoder/dpb.cpp
@@ -95,10 +95,12 @@ void DPB::recycleUnreferenced()

             // iterator is invalidated by remove, restart scan
             m_picList.remove(*curFrame);
-            if (!curFrame->m_viewId && m_picList.getPOC(curFrame->m_poc,
1) && curFrame == m_picList.getPOC(curFrame->m_poc,
1)->refPicSetInterLayer0.getPOC(curFrame->m_poc, curFrame->m_viewId))
+#if ENABLE_MULTIVIEW
+            if (curFrame->m_param->numViews > 1 && !curFrame->m_viewId &&
m_picList.getPOC(curFrame->m_poc, 1) && curFrame ==
m_picList.getPOC(curFrame->m_poc,
1)->refPicSetInterLayer0.getPOC(curFrame->m_poc, curFrame->m_viewId))
             {
                 m_picList.getPOC(curFrame->m_poc,
1)->refPicSetInterLayer0.removeSubDPB(*curFrame);
             }
+#endif
             iterFrame = m_picList.first();

             m_freeList.pushBack(*curFrame);
@@ -271,8 +273,10 @@ void DPB::prepareEncode(Frame *newFrame)
         }
     }

+#if ENABLE_MULTIVIEW
     if (newFrame->m_viewId)
         slice->createInterLayerReferencePictureSet(m_picList,
newFrame->refPicSetInterLayer0, newFrame->refPicSetInterLayer1);
+#endif
     if (slice->m_sliceType != I_SLICE)
         slice->m_numRefIdx[0] = x265_clip3(1,
newFrame->m_param->maxNumReferences, slice->m_rps.numberOfNegativePictures
+ newFrame->refPicSetInterLayer0.size() +
newFrame->refPicSetInterLayer1.size());
     else
@@ -349,7 +353,7 @@ void DPB::computeRPS(int curPoc, int tempId, bool
isRAP, RPS * rps, unsigned int
             if ((!m_bTemporalSublayer || (iterPic->m_tempLayer <= tempId))
&& ((m_lastIDR >= curPoc) || (m_lastIDR <= iterPic->m_poc)))
             {
 #if ENABLE_MULTIVIEW
-                    if (layer && numNeg ==
iterPic->m_param->maxNumReferences - 1 && (iterPic->m_poc - curPoc) < 0)
+                    if (iterPic->m_param->numViews > 1 && layer && numNeg
== iterPic->m_param->maxNumReferences - 1 && (iterPic->m_poc - curPoc) < 0)
                     {
                         iterPic = iterPic->m_next;
                         continue;
diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
index 5fb822c8a..ff8f53883 100644
--- a/source/encoder/encoder.cpp
+++ b/source/encoder/encoder.cpp
@@ -1595,12 +1595,13 @@ int Encoder::encode(const x265_picture* pic_in,
x265_picture** pic_out)
                 inFrame[layer] = new Frame;
                 inFrame[layer]->m_encodeStartTime = x265_mdate();
 #if ENABLE_MULTIVIEW
-                inFrame[layer]->m_viewId = layer;
-#else
-                inFrame[layer]->m_sLayerId = layer;
+                inFrame[layer]->m_viewId = m_param->numViews > 1 ? layer :
0;
+#endif
+#if ENABLE_ALPHA
+                inFrame[layer]->m_sLayerId = m_param->numScalableLayers >
1 ? layer : 0;
 #endif
                 inFrame[layer]->m_valid = false;
-                if (inFrame[layer]->create(p,
inputPic[layer]->quantOffsets))
+                if (inFrame[layer]->create(p, inputPic[!m_param->format ?
(m_param->numScalableLayers > 1) ? 0 : layer : 0]->quantOffsets))
                 {
                     /* the first PicYuv created is asked to generate the
CU and block unit offset
                      * arrays which are then shared with all subsequent
PicYuv (orig and recon)
@@ -1663,9 +1664,10 @@ int Encoder::encode(const x265_picture* pic_in,
x265_picture** pic_out)
                 inFrame[layer]->m_tempLayer = 0;
                 inFrame[layer]->m_sameLayerRefPic = 0;
 #if ENABLE_MULTIVIEW
-                inFrame[layer]->m_viewId = layer;
-#else
-                inFrame[layer]->m_sLayerId = layer;
+                inFrame[layer]->m_viewId = m_param->numViews > 1 ? layer :
0;
+#endif
+#if ENABLE_ALPHA
+                inFrame[layer]->m_sLayerId = m_param->numScalableLayers >
1 ? layer : 0;
 #endif
                 inFrame[layer]->m_valid = false;
                 inFrame[layer]->m_lowres.bKeyframe = false;
@@ -1701,7 +1703,7 @@ int Encoder::encode(const x265_picture* pic_in,
x265_picture** pic_out)
             }

             /* Copy input picture into a Frame and PicYuv, send to
lookahead */
-
 inFrame[layer]->m_fencPic->copyFromPicture(*inputPic[!m_param->format ?
layer : 0], *m_param, m_sps.conformanceWindow.rightOffset,
m_sps.conformanceWindow.bottomOffset, !layer);
+
 inFrame[layer]->m_fencPic->copyFromPicture(*inputPic[!m_param->format ?
(m_param->numScalableLayers > 1) ? 0 : layer : 0], *m_param,
m_sps.conformanceWindow.rightOffset, m_sps.conformanceWindow.bottomOffset,
!layer);

             inFrame[layer]->m_poc = (!layer) ? (++m_pocLast) : m_pocLast;
             inFrame[layer]->m_userData = inputPic[0]->userData;
@@ -2211,12 +2213,11 @@ int Encoder::encode(const x265_picture* pic_in,
x265_picture** pic_out)
             {
                 Frame* currentFrame =
m_dpb->m_picList.getPOC(frameEnc[0]->m_poc, layer);
                 frameEnc[layer] =
m_dpb->m_picList.removeFrame(*currentFrame);
-#if ENABLE_ALPHA
-                frameEnc[layer]->m_lowres.sliceType =
frameEnc[0]->m_lowres.sliceType;
-#else
                 int baseViewType = frameEnc[0]->m_lowres.sliceType;
-                frameEnc[layer]->m_lowres.sliceType =
IS_X265_TYPE_I(baseViewType) ? X265_TYPE_P : baseViewType;
-#endif
+                if (m_param->numScalableLayers > 1)
+                    frameEnc[layer]->m_lowres.sliceType = baseViewType;
+                else if(m_param->numViews > 1)
+                    frameEnc[layer]->m_lowres.sliceType =
IS_X265_TYPE_I(baseViewType) ? X265_TYPE_P : baseViewType;
             }
 #endif

@@ -3644,6 +3645,7 @@ void Encoder::initSPS(SPS *sps)

     vui.timingInfo.numUnitsInTick = m_param->fpsDenom;
     vui.timingInfo.timeScale = m_param->fpsNum;
+    sps->sps_extension_flag = false;

 #if ENABLE_MULTIVIEW
     if (m_param->numViews > 1)
@@ -3698,6 +3700,8 @@ void Encoder::initPPS(PPS *pps)

     pps->numRefIdxDefault[0] = 1;
     pps->numRefIdxDefault[1] = 1;
+    pps->pps_extension_flag = false;
+    pps->maxViews = 1;

 #if ENABLE_MULTIVIEW
     if (m_param->numViews > 1)
diff --git a/source/encoder/entropy.cpp b/source/encoder/entropy.cpp
index 1fe3d5895..0e45b4976 100644
--- a/source/encoder/entropy.cpp
+++ b/source/encoder/entropy.cpp
@@ -487,11 +487,13 @@ void Entropy::codeVPS(const VPS& vps, const SPS& sps)
 void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList,
const ProfileTierLevel& ptl, int layer)
 {
     WRITE_CODE(0, 4, "sps_video_parameter_set_id");
-    WRITE_CODE(!layer ? sps.maxTempSubLayers - 1 :
sps.setSpsExtOrMaxSubLayersMinus1, 3, "sps_ext_or_max_sub_layers_minus1");
 #if ENABLE_MULTIVIEW
+    if(layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7)
+        WRITE_CODE(sps.setSpsExtOrMaxSubLayersMinus1, 3,
"sps_ext_or_max_sub_layers_minus1");
     if (!(layer != 0 && sps.setSpsExtOrMaxSubLayersMinus1 == 7))
 #endif
     {
+        WRITE_CODE(sps.maxTempSubLayers - 1, 3,
"sps_max_sub_layers_minus1");
         WRITE_FLAG(sps.maxTempSubLayers == 1,
"sps_temporal_id_nesting_flag");
         codeProfileTier(ptl, sps.maxTempSubLayers, layer);
     }
@@ -924,21 +926,13 @@ void Entropy::codeSliceHeader(const Slice& slice,
FrameData& encData, uint32_t s

     WRITE_UVLC(slice.m_sliceType, "slice_type");

-    if (layer > 0 || !slice.getIdrPicFlag())
+    if ((slice.m_param->numViews > 1 && layer > 0) ||
!slice.getIdrPicFlag())
     {
         int picOrderCntLSB = (slice.m_poc - slice.m_lastIDR + (1 <<
slice.m_sps->log2MaxPocLsb)) % (1 << slice.m_sps->log2MaxPocLsb);
         WRITE_CODE(picOrderCntLSB, slice.m_sps->log2MaxPocLsb,
"pic_order_cnt_lsb");
     }
     if (!slice.getIdrPicFlag())
     {
-#if ENABLE_MULTIVIEW
-        if (!(slice.m_param->numViews > 1))
-#endif
-        {
-            int picOrderCntLSB = (slice.m_poc - slice.m_lastIDR + (1 <<
slice.m_sps->log2MaxPocLsb)) % (1 << slice.m_sps->log2MaxPocLsb);
-            WRITE_CODE(picOrderCntLSB, slice.m_sps->log2MaxPocLsb,
"pic_order_cnt_lsb");
-        }
-
 #if _DEBUG || CHECKED_BUILD
         // check for bitstream restriction stating that:
         // If the current picture is a BLA or CRA picture, the value of
NumPocTotalCurr shall be equal to 0.
diff --git a/source/encoder/frameencoder.cpp
b/source/encoder/frameencoder.cpp
index 19d61ec35..c5175f538 100644
--- a/source/encoder/frameencoder.cpp
+++ b/source/encoder/frameencoder.cpp
@@ -1159,8 +1159,10 @@ void FrameEncoder::compressFrame(int layer)
     /* rateControlEnd may also block for earlier frames to call
rateControlUpdateStats */
     if (!layer && m_top->m_rateControl->rateControlEnd(m_frame[layer],
m_accessUnitBits[layer], &m_rce, &filler) < 0)
         m_top->m_aborted = true;
-    if (!layer)
+#if ENABLE_ALPHA || ENABLE_MULTIVIEW
+    if (!layer && m_frame[layer+1])
         m_frame[1]->m_encData->m_avgQpAq =
m_frame[layer]->m_encData->m_avgQpAq;
+#endif

     if (filler > 0)
     {
diff --git a/source/x265.h b/source/x265.h
index fb06372af..7b5144ec2 100644
--- a/source/x265.h
+++ b/source/x265.h
@@ -650,6 +650,8 @@ typedef enum

 #if ENABLE_ALPHA || ENABLE_MULTIVIEW
 #define MAX_LAYERS              2
+#else
+#define MAX_LAYERS              1
 #endif

 #define X265_IPRATIO_STRENGTH   1.43
diff --git a/source/x265cli.cpp b/source/x265cli.cpp
index c0c70b78b..a4ce6d272 100755
--- a/source/x265cli.cpp
+++ b/source/x265cli.cpp
@@ -983,7 +983,7 @@ namespace X265_NS {
                 }
             }
 #endif
-            for (int i = 0; i < param->numLayers - !!param->format; i++)
+            for (int i = 0; i < param->numLayers; i++)
             {
                 this->recon[i] = ReconFile::open(reconfn[i],
param->sourceWidth, param->sourceHeight, reconFileBitDepth,
                     param->fpsNum, param->fpsDenom, param->internalCsp,
param->sourceBitDepth);
-- 
2.36.0.windows.1
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20240806/cb64483d/attachment-0001.htm>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0010-Decouple-mv-hevc-code-from-alpha-and-latest-tip.patch
Type: application/x-patch
Size: 15357 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20240806/cb64483d/attachment-0001.bin>