<div dir="ltr">Thanks, I like initAqQPs since it gets rid of the messy section, but I was also hoping we could avoid passing qp around for the compress* functions. <br><br>Since cuGeom now has cuIdx - the analysis functions can just look up the appropriate QP in m_aqQp? In fact, with some interesting calculations on cuGeom->absPartIdx and depth, we dont even need cuIdx?<br></div><div class="gmail_extra"><br><div class="gmail_quote">On Sun, Apr 26, 2015 at 10:51 PM, Steve Borho <span dir="ltr"><<a href="mailto:steve@borho.org" target="_blank">steve@borho.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>

# User Steve Borho <<a href="mailto:steve@borho.org">steve@borho.org</a>><br>

# Date 1429909512 18000<br>

#      Fri Apr 24 16:05:12 2015 -0500<br>

# Node ID 5644bbd23e71996651f4ed558e0260201a91f70d<br>

# Parent  bfd57a0c0875e219d902ff3af6f4a0ddaa16b125<br>

analysis: keep per-CU AQ QPs in cuGeom index order, simplify arguments<br>

<br>

diff -r bfd57a0c0875 -r 5644bbd23e71 source/common/cudata.cpp<br>

--- a/source/common/cudata.cpp  Fri Apr 24 15:14:54 2015 -0500<br>

+++ b/source/common/cudata.cpp  Fri Apr 24 16:05:12 2015 -0500<br>

@@ -2027,6 +2027,7 @@<br>

         uint32_t blockSize = 1 << log2CUSize;<br>

         uint32_t sbWidth   = 1 << (g_log2Size[maxCUSize] - log2CUSize);<br>

         int32_t lastLevelFlag = log2CUSize == g_log2Size[minCUSize];<br>

+<br>

         for (uint32_t sbY = 0; sbY < sbWidth; sbY++)<br>

         {<br>

             for (uint32_t sbX = 0; sbX < sbWidth; sbX++)<br>

@@ -2049,7 +2050,8 @@<br>

                 cu->childOffset = childIdx - cuIdx;<br>

                 cu->absPartIdx = g_depthScanIdx[yOffset][xOffset] * 4;<br>

                 cu->numPartitions = (NUM_4x4_PARTITIONS >> ((g_maxLog2CUSize - cu->log2CUSize) * 2));<br>

-                cu->depth = g_log2Size[maxCUSize] - log2CUSize;<br>

+                cu->depth = (uint16_t)(g_log2Size[maxCUSize] - log2CUSize);<br>

+                cu->index = (uint16_t)cuIdx;<br>

<br>

                 cu->flags = 0;<br>

                 CU_SET_FLAG(cu->flags, CUGeom::PRESENT, presentFlag);<br>

diff -r bfd57a0c0875 -r 5644bbd23e71 source/common/cudata.h<br>

--- a/source/common/cudata.h    Fri Apr 24 15:14:54 2015 -0500<br>

+++ b/source/common/cudata.h    Fri Apr 24 16:05:12 2015 -0500<br>

@@ -85,8 +85,8 @@<br>

     uint32_t childOffset;   // offset of the first child CU from current CU<br>

     uint32_t absPartIdx;    // Part index of this CU in terms of 4x4 blocks.<br>

     uint32_t numPartitions; // Number of 4x4 blocks in the CU<br>

-    uint32_t depth;         // depth of this CU relative from CTU<br>

     uint32_t flags;         // CU flags.<br>

+    uint16_t depth, index;  // depth of this CU relative from CTU, absolute index<br>

 };<br>

<br>

 struct MVField<br>

diff -r bfd57a0c0875 -r 5644bbd23e71 source/encoder/analysis.cpp<br>

--- a/source/encoder/analysis.cpp       Fri Apr 24 15:14:54 2015 -0500<br>

+++ b/source/encoder/analysis.cpp       Fri Apr 24 16:05:12 2015 -0500<br>

@@ -75,8 +75,6 @@<br>

     m_reuseInterDataCTU = NULL;<br>

     m_reuseRef = NULL;<br>

     m_reuseBestMergeCand = NULL;<br>

-    for (int i = 0; i < NUM_CU_DEPTH; i++)<br>

-        m_aqQP[i] = NULL;<br>

 }<br>

<br>

 bool Analysis::create(ThreadLocalData *tld)<br>

@@ -103,12 +101,9 @@<br>

             ok &= md.pred[j].reconYuv.create(cuSize, csp);<br>

             md.pred[j].fencYuv = &md.fencYuv;<br>

         }<br>

-        CHECKED_MALLOC(m_aqQP[depth], int, (size_t)1 << (depth << 1));<br>

     }<br>

<br>

     return ok;<br>

-fail:<br>

-    return false;<br>

 }<br>

<br>

 void Analysis::destroy()<br>

@@ -123,7 +118,17 @@<br>

             m_modeDepth[i].pred[j].predYuv.destroy();<br>

             m_modeDepth[i].pred[j].reconYuv.destroy();<br>

         }<br>

-        X265_FREE(m_aqQP[i]);<br>

+    }<br>

+}<br>

+<br>

+void Analysis::initAqQPs(uint32_t depth, const CUData& ctu, const CUGeom* rootGeom)<br>

+{<br>

+    for (int d0 = 0; d0 < 4; d0++)<br>

+    {<br>

+        m_aqQP[rootGeom->index + d0] = calculateQpforCuSize(ctu, rootGeom[d0]);<br>

+<br>

+        if (m_slice->m_pps->maxCuDQPDepth > depth)<br>

+            initAqQPs(depth + 1, ctu, &rootGeom[d0] + rootGeom[d0].childOffset);<br>

     }<br>

 }<br>

<br>

@@ -141,32 +146,16 @@<br>

<br>

     if (m_slice->m_pps->bUseDQP)<br>

     {<br>

-        /* TODO: In future, we could extend this to 8x8 QGs as well, since that's the minimum size<br>

-         * allowed by the HEVC standard. The AQ offset calculation will need to be at 8x8 granularity.<br>

-         * And this messy section will need to be reworked */<br>

-        m_aqQP[0][0] = calculateQpforCuSize(ctu, cuGeom);<br>

+        m_aqQP[0] = calculateQpforCuSize(ctu, cuGeom);<br>

+        setLambdaFromQP(*m_slice, m_aqQP[0]);<br>

+        m_aqQP[0] = x265_clip3(QP_MIN, QP_MAX_SPEC, m_aqQP[0]);<br>

+        ctu.setQPSubParts((int8_t)m_aqQP[0], 0, 0);<br>

<br>

-        const CUGeom* rootGeom = &cuGeom + 1;<br>

-        if (m_slice->m_pps->maxCuDQPDepth >= 1)<br>

-        {<br>

-            for (int d0 = 0; d0 < 4; d0++)<br>

-            {<br>

-                m_aqQP[1][d0] = calculateQpforCuSize(ctu, rootGeom[d0]);<br>

-                if (m_slice->m_pps->maxCuDQPDepth == 2)<br>

-                {<br>

-                    const CUGeom* curGeom = &rootGeom[d0] + rootGeom[d0].childOffset;<br>

-                    for (int d1 = 0; d1 < 4; d1++)<br>

-                        m_aqQP[2][d0 * 4 + d1] = calculateQpforCuSize(ctu, curGeom[d1]);<br>

-                }<br>

-            }<br>

-        }<br>

-<br>

-        setLambdaFromQP(*m_slice, m_aqQP[0][0]);<br>

-        m_aqQP[0][0] = x265_clip3(QP_MIN, QP_MAX_SPEC, m_aqQP[0][0]);<br>

-        ctu.setQPSubParts((int8_t)m_aqQP[0][0], 0, 0);<br>

+        if (m_slice->m_pps->maxCuDQPDepth)<br>

+            initAqQPs(1, ctu, &cuGeom + 1);<br>

     }<br>

     else<br>

-        m_aqQP[0][0] = m_slice->m_sliceQp;<br>

+        m_aqQP[0] = m_slice->m_sliceQp;<br>

<br>

     m_quant.setQPforQuant(ctu);<br>

     m_rqt[0].cur.load(initialContext);<br>

@@ -191,7 +180,7 @@<br>

     uint32_t zOrder = 0;<br>

     if (m_slice->m_sliceType == I_SLICE)<br>

     {<br>

-        compressIntraCU(ctu, cuGeom, zOrder, m_aqQP[0][0], 0);<br>

+        compressIntraCU(ctu, cuGeom, zOrder, m_aqQP[0]);<br>

         if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_analysisData.intraData)<br>

         {<br>

             CUData* bestCU = &m_modeDepth[0].bestMode->cu;<br>

@@ -209,18 +198,18 @@<br>

             * they are available for intra predictions */<br>

             m_modeDepth[0].fencYuv.copyToPicYuv(*m_frame->m_reconPic, ctu.m_cuAddr, 0);<br>

<br>

-            compressInterCU_rd0_4(ctu, cuGeom, m_aqQP[0][0], 0);<br>

+            compressInterCU_rd0_4(ctu, cuGeom, m_aqQP[0]);<br>

<br>

             /* generate residual for entire CTU at once and copy to reconPic */<br>

             encodeResidue(ctu, cuGeom);<br>

         }<br>

         else if (m_param->bDistributeModeAnalysis && m_param->rdLevel >= 2)<br>

-            compressInterCU_dist(ctu, cuGeom, m_aqQP[0][0], 0);<br>

+            compressInterCU_dist(ctu, cuGeom, m_aqQP[0]);<br>

         else if (m_param->rdLevel <= 4)<br>

-            compressInterCU_rd0_4(ctu, cuGeom, m_aqQP[0][0], 0);<br>

+            compressInterCU_rd0_4(ctu, cuGeom, m_aqQP[0]);<br>

         else<br>

         {<br>

-            compressInterCU_rd5_6(ctu, cuGeom, zOrder, m_aqQP[0][0], 0);<br>

+            compressInterCU_rd5_6(ctu, cuGeom, zOrder, m_aqQP[0]);<br>

             if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_frame->m_analysisData.interData)<br>

             {<br>

                 CUData* bestCU = &m_modeDepth[0].bestMode->cu;<br>

@@ -259,7 +248,7 @@<br>

     }<br>

 }<br>

<br>

-void Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t& zOrder, int32_t qp, uint32_t partIdx)<br>

+void Analysis::compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t& zOrder, int32_t qp)<br>

 {<br>

     uint32_t depth = cuGeom.depth;<br>

     ModeDepth& md = m_modeDepth[depth];<br>

@@ -270,7 +259,6 @@<br>

<br>

     if (m_slice->m_pps->bUseDQP && depth && depth <= m_slice->m_pps->maxCuDQPDepth)<br>

     {<br>

-        qp = m_aqQP[depth][partIdx];<br>

         setLambdaFromQP(*m_slice, qp);<br>

         qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);<br>

     }<br>

@@ -342,7 +330,10 @@<br>

             {<br>

                 m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx);<br>

                 m_rqt[nextDepth].cur.load(*nextContext);<br>

-                compressIntraCU(parentCTU, childGeom, zOrder, qp, partIdx * 4 + subPartIdx);<br>

+<br>

+                int32_t nextQP = m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth ?<br>

+                                 m_aqQP[childGeom.index] : qp;<br>

+                compressIntraCU(parentCTU, childGeom, zOrder, nextQP);<br>

<br>

                 // Save best CU and pred data for this sub CU<br>

                 splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);<br>

@@ -530,7 +521,7 @@<br>

     while (task >= 0);<br>

 }<br>

<br>

-void Analysis::compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp, uint32_t partIdx)<br>

+void Analysis::compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp)<br>

 {<br>

     uint32_t depth = cuGeom.depth;<br>

     uint32_t cuAddr = parentCTU.m_cuAddr;<br>

@@ -545,7 +536,6 @@<br>

<br>

     if (m_slice->m_pps->bUseDQP && depth && depth <= m_slice->m_pps->maxCuDQPDepth)<br>

     {<br>

-        qp = m_aqQP[depth][partIdx];<br>

         setLambdaFromQP(*m_slice, qp);<br>

         qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);<br>

     }<br>

@@ -749,7 +739,9 @@<br>

             {<br>

                 m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx);<br>

                 m_rqt[nextDepth].cur.load(*nextContext);<br>

-                compressInterCU_dist(parentCTU, childGeom, qp, partIdx * 4 + subPartIdx);<br>

+                int32_t nextQP = m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth ?<br>

+                                 m_aqQP[childGeom.index] : qp;<br>

+                compressInterCU_dist(parentCTU, childGeom, nextQP);<br>

<br>

                 // Save best CU and pred data for this sub CU<br>

                 splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);<br>

@@ -788,7 +780,7 @@<br>

         md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.absPartIdx);<br>

 }<br>

<br>

-void Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp, uint32_t partIdx)<br>

+void Analysis::compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp)<br>

 {<br>

     uint32_t depth = cuGeom.depth;<br>

     uint32_t cuAddr = parentCTU.m_cuAddr;<br>

@@ -801,7 +793,6 @@<br>

<br>

     if (m_slice->m_pps->bUseDQP && depth && depth <= m_slice->m_pps->maxCuDQPDepth)<br>

     {<br>

-        qp = m_aqQP[depth][partIdx];<br>

         setLambdaFromQP(*m_slice, qp);<br>

         qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);<br>

     }<br>

@@ -1028,7 +1019,9 @@<br>

             {<br>

                 m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx);<br>

                 m_rqt[nextDepth].cur.load(*nextContext);<br>

-                compressInterCU_rd0_4(parentCTU, childGeom, qp, partIdx * 4 + subPartIdx);<br>

+                int32_t nextQP = m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth ?<br>

+                                 m_aqQP[childGeom.index] : qp;<br>

+                compressInterCU_rd0_4(parentCTU, childGeom, nextQP);<br>

<br>

                 // Save best CU and pred data for this sub CU<br>

                 splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);<br>

@@ -1079,7 +1072,7 @@<br>

         md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, cuAddr, cuGeom.absPartIdx);<br>

 }<br>

<br>

-void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qp, uint32_t partIdx)<br>

+void Analysis::compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qp)<br>

 {<br>

     uint32_t depth = cuGeom.depth;<br>

     ModeDepth& md = m_modeDepth[depth];<br>

@@ -1090,7 +1083,6 @@<br>

<br>

     if (m_slice->m_pps->bUseDQP && depth && depth <= m_slice->m_pps->maxCuDQPDepth)<br>

     {<br>

-        qp = m_aqQP[depth][partIdx];<br>

         setLambdaFromQP(*m_slice, qp);<br>

         qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);<br>

     }<br>

@@ -1234,7 +1226,9 @@<br>

             {<br>

                 m_modeDepth[0].fencYuv.copyPartToYuv(nd.fencYuv, childGeom.absPartIdx);<br>

                 m_rqt[nextDepth].cur.load(*nextContext);<br>

-                compressInterCU_rd5_6(parentCTU, childGeom, zOrder, qp, partIdx * 4 + subPartIdx);<br>

+                int32_t nextQP = m_slice->m_pps->bUseDQP && nextDepth <= m_slice->m_pps->maxCuDQPDepth ?<br>

+                                 m_aqQP[childGeom.index] : qp;<br>

+                compressInterCU_rd5_6(parentCTU, childGeom, zOrder, nextQP);<br>

<br>

                 // Save best CU and pred data for this sub CU<br>

                 splitCU->copyPartFrom(nd.bestMode->cu, childGeom, subPartIdx);<br>

diff -r bfd57a0c0875 -r 5644bbd23e71 source/encoder/analysis.h<br>

--- a/source/encoder/analysis.h Fri Apr 24 15:14:54 2015 -0500<br>

+++ b/source/encoder/analysis.h Fri Apr 24 16:05:12 2015 -0500<br>

@@ -90,7 +90,7 @@<br>

     void processPmode(PMODE& pmode, Analysis& slave);<br>

<br>

     ModeDepth m_modeDepth[NUM_CU_DEPTH];<br>

-    int*      m_aqQP[NUM_CU_DEPTH];<br>

+    int       m_aqQP[CUGeom::MAX_GEOMS];<br>

     bool      m_bTryLossless;<br>

     bool      m_bChromaSa8d;<br>

<br>

@@ -109,13 +109,15 @@<br>

     int32_t*             m_reuseRef;<br>

     uint32_t*            m_reuseBestMergeCand;<br>

<br>

+    void initAqQPs(uint32_t depth, const CUData& ctu, const CUGeom* rootGeom);<br>

+<br>

     /* full analysis for an I-slice CU */<br>

-    void compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qpDepth, uint32_t partIdx);<br>

+    void compressIntraCU(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qp);<br>

<br>

     /* full analysis for a P or B slice CU */<br>

-    void compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qpDepth, uint32_t partIdx);<br>

-    void compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qpDepth, uint32_t partIdx);<br>

-    void compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qpDepth, uint32_t partIdx);<br>

+    void compressInterCU_dist(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp);<br>

+    void compressInterCU_rd0_4(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp);<br>

+    void compressInterCU_rd5_6(const CUData& parentCTU, const CUGeom& cuGeom, uint32_t &zOrder, int32_t qp);<br>

<br>

     /* measure merge and skip */<br>

     void checkMerge2Nx2N_rd0_4(Mode& skip, Mode& merge, const CUGeom& cuGeom);<br>

_______________________________________________<br>

x265-devel mailing list<br>

<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>

<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>

</blockquote></div><br></div>