[x265] [PATCH 2 of 4] Reuse analysis info of low resolution for full resolution encoding

kavitha at multicorewareinc.com kavitha at multicorewareinc.com
Fri Jun 2 19:28:57 CEST 2017


# HG changeset patch
# User Kavitha Sampath <kavitha at multicorewareinc.com>
# Date 1495615463 -19800
#      Wed May 24 14:14:23 2017 +0530
# Node ID 802bb7ead1e0151a604bc293451c735990656162
# Parent  404253434d33e99955aac29480ee16b8e939a64c
Reuse analysis info of low resolution for full resolution encoding.

New CLI option --scale-factor is added. This option should be coupled
with analysis-mode, --refine-level 10. It specifies the factor
by which input video of save mode should be scaled down.
Currently only scale-factor 2 is supported.

diff -r 404253434d33 -r 802bb7ead1e0 source/common/cudata.cpp
--- a/source/common/cudata.cpp	Wed May 24 08:39:45 2017 +0530
+++ b/source/common/cudata.cpp	Wed May 24 14:14:23 2017 +0530
@@ -28,6 +28,7 @@
 #include "picyuv.h"
 #include "mv.h"
 #include "cudata.h"
+#define MAX_MV 1 << 14
 
 using namespace X265_NS;
 
@@ -1623,6 +1624,11 @@
                 dir |= (1 << list);
                 candMvField[count][list].mv = colmv;
                 candMvField[count][list].refIdx = refIdx;
+                if (m_encData->m_param->scaleFactor && m_encData->m_param->analysisMode == X265_ANALYSIS_SAVE && m_log2CUSize[0] < 4)
+                {
+                    MV dist(MAX_MV, MAX_MV);
+                    candMvField[count][list].mv = dist;
+                }
             }
         }
 
@@ -1783,7 +1789,13 @@
             int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
             int curPOC = m_slice->m_poc;
 
-            pmv[numMvc++] = amvpCand[num++] = scaleMvByPOCDist(neighbours[MD_COLLOCATED].mv[picList], curPOC, curRefPOC, colPOC, colRefPOC);
+            if (m_encData->m_param->scaleFactor && m_encData->m_param->analysisMode == X265_ANALYSIS_SAVE && (m_log2CUSize[0] < 4))
+            {
+                MV dist(MAX_MV, MAX_MV);
+                pmv[numMvc++] = amvpCand[num++] = dist;
+            }
+            else
+                pmv[numMvc++] = amvpCand[num++] = scaleMvByPOCDist(neighbours[MD_COLLOCATED].mv[picList], curPOC, curRefPOC, colPOC, colRefPOC);
         }
     }
 
diff -r 404253434d33 -r 802bb7ead1e0 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Wed May 24 08:39:45 2017 +0530
+++ b/source/encoder/analysis.cpp	Wed May 24 14:14:23 2017 +0530
@@ -2263,6 +2263,19 @@
                             mode.cu.m_mvd[list][pu.puAbsPartIdx] = mode.cu.m_mv[list][pu.puAbsPartIdx] - mvp;
                         }
                     }
+                    else if(m_param->scaleFactor)
+                    {
+                        MVField candMvField[MRG_MAX_NUM_CANDS][2]; // double length for mv of both lists
+                        uint8_t candDir[MRG_MAX_NUM_CANDS];
+                        mode.cu.getInterMergeCandidates(pu.puAbsPartIdx, part, candMvField, candDir);
+                        mode.cu.m_mvpIdx[0][pu.puAbsPartIdx] = interDataCTU->mvpIdx[0][cuIdx + part];
+                        uint8_t mvpIdx = mode.cu.m_mvpIdx[0][pu.puAbsPartIdx];
+                        mode.cu.setPUInterDir(candDir[mvpIdx], pu.puAbsPartIdx, part);
+                        mode.cu.setPUMv(0, candMvField[mvpIdx][0].mv, pu.puAbsPartIdx, part);
+                        mode.cu.setPUMv(1, candMvField[mvpIdx][1].mv, pu.puAbsPartIdx, part);
+                        mode.cu.setPURefIdx(0, (int8_t)candMvField[mvpIdx][0].refIdx, pu.puAbsPartIdx, part);
+                        mode.cu.setPURefIdx(1, (int8_t)candMvField[mvpIdx][1].refIdx, pu.puAbsPartIdx, part);
+                    }
                 }
                 motionCompensation(mode.cu, pu, mode.predYuv, true, (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400));
             }
diff -r 404253434d33 -r 802bb7ead1e0 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp	Wed May 24 08:39:45 2017 +0530
+++ b/source/encoder/encoder.cpp	Wed May 24 14:14:23 2017 +0530
@@ -2839,6 +2839,11 @@
     X265_FREAD(&analysis->satdCost, sizeof(int64_t), 1, m_analysisFile);
     X265_FREAD(&analysis->numCUsInFrame, sizeof(int), 1, m_analysisFile);
     X265_FREAD(&analysis->numPartitions, sizeof(int), 1, m_analysisFile);
+    int scaledNumPartition = analysis->numPartitions;
+    int factor = 1 << m_param->scaleFactor;
+
+    if (m_param->scaleFactor)
+        analysis->numPartitions *= factor;
 
     /* Memory is allocated for inter and intra analysis data based on the slicetype */
     allocAnalysis(analysis);
@@ -2862,12 +2867,31 @@
         for (uint32_t d = 0; d < depthBytes; d++)
         {
             int bytes = analysis->numPartitions >> (depthBuf[d] * 2);
+            if (m_param->scaleFactor)
+            {
+                if (depthBuf[d] == 0)
+                    depthBuf[d] = 1;
+                if (partSizes[d] == SIZE_NxN)
+                    partSizes[d] = SIZE_2Nx2N;
+            }
             memset(&((analysis_intra_data *)analysis->intraData)->depth[count], depthBuf[d], bytes);
             memset(&((analysis_intra_data *)analysis->intraData)->chromaModes[count], modeBuf[d], bytes);
             memset(&((analysis_intra_data *)analysis->intraData)->partSizes[count], partSizes[d], bytes);
             count += bytes;
         }
-        X265_FREAD(((analysis_intra_data *)analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
+
+        if (!m_param->scaleFactor)
+        {
+            X265_FREAD(((analysis_intra_data *)analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
+        }
+        else
+        {
+            uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, analysis->numCUsInFrame * scaledNumPartition);
+            X265_FREAD(tempLumaBuf, sizeof(uint8_t), analysis->numCUsInFrame * scaledNumPartition, m_analysisFile);
+            for (uint32_t ctu32Idx = 0, cnt = 0; ctu32Idx < analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++, cnt += factor)
+                memset(&((analysis_intra_data *)analysis->intraData)->modes[cnt], tempLumaBuf[ctu32Idx], factor);
+            X265_FREE(tempLumaBuf);
+        }
         X265_FREE(tempBuf);
         consumedBytes += frameRecordSize;
     }
@@ -2923,12 +2947,16 @@
         for (uint32_t d = 0; d < depthBytes; d++)
         {
             int bytes = analysis->numPartitions >> (depthBuf[d] * 2);
+            if (m_param->scaleFactor && modeBuf[d] == MODE_INTRA && depthBuf[d] == 0)
+                 depthBuf[d] = 1;
             memset(&((analysis_inter_data *)analysis->interData)->depth[count], depthBuf[d], bytes);
             memset(&((analysis_inter_data *)analysis->interData)->modes[count], modeBuf[d], bytes);
             if (m_param->analysisRefineLevel > 4)
             {
+                if (m_param->scaleFactor && modeBuf[d] == MODE_INTRA && partSize[d] == SIZE_NxN)
+                     partSize[d] = SIZE_2Nx2N;
                 memset(&((analysis_inter_data *)analysis->interData)->partSize[count], partSize[d], bytes);
-                int numPU = nbPartsTable[(int)partSize[d]];
+                int numPU = (modeBuf[d] == MODE_INTRA) ? 1 : nbPartsTable[(int)partSize[d]];
                 for (int pu = 0; pu < numPU; pu++)
                 {
                     if (pu) d++;
@@ -2940,6 +2968,11 @@
                         {
                             ((analysis_inter_data *)analysis->interData)->mvpIdx[i][count + pu] = mvpIdx[i][d];
                             ((analysis_inter_data *)analysis->interData)->refIdx[i][count + pu] = refIdx[i][d];
+                            if (m_param->scaleFactor)
+                            {
+                                mv[i][d].x *= (int16_t)m_param->scaleFactor;
+                                mv[i][d].y *= (int16_t)m_param->scaleFactor;
+                            }
                             memcpy(&((analysis_inter_data *)analysis->interData)->mv[i][count + pu], &mv[i][d], sizeof(MV));
                         }
                     }
@@ -2961,7 +2994,20 @@
                 X265_FREE(mv[i]);
             }
             if (bIntraInInter)
-                X265_FREAD(((analysis_intra_data *)analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
+            {
+                if (!m_param->scaleFactor)
+                {
+                    X265_FREAD(((analysis_intra_data *)analysis->intraData)->modes, sizeof(uint8_t), analysis->numCUsInFrame * analysis->numPartitions, m_analysisFile);
+                }
+                else
+                {
+                    uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, analysis->numCUsInFrame * scaledNumPartition);
+                    X265_FREAD(tempLumaBuf, sizeof(uint8_t), analysis->numCUsInFrame * scaledNumPartition, m_analysisFile);
+                    for (uint32_t ctu32Idx = 0, cnt = 0; ctu32Idx < analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++, cnt += factor)
+                        memset(&((analysis_intra_data *)analysis->intraData)->modes[cnt], tempLumaBuf[ctu32Idx], factor);
+                    X265_FREE(tempLumaBuf);
+                }
+            }
         }
         else
             X265_FREAD(((analysis_inter_data *)analysis->interData)->ref, sizeof(int32_t), analysis->numCUsInFrame * X265_MAX_PRED_MODE_PER_CTU * numDir, m_analysisFile);
@@ -3183,7 +3229,7 @@
                         interDataCTU->partSize[depthBytes] = partSize;
 
                         /* Store per PU data */
-                        uint32_t numPU = nbPartsTable[(int)partSize];
+                        uint32_t numPU = (predMode == MODE_INTRA) ? 1 : nbPartsTable[(int)partSize];
                         for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
                         {
                             uint32_t puabsPartIdx = ctu->getPUOffset(puIdx, absPartIdx) + absPartIdx;
diff -r 404253434d33 -r 802bb7ead1e0 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp	Wed May 24 08:39:45 2017 +0530
+++ b/source/encoder/slicetype.cpp	Wed May 24 14:14:23 2017 +0530
@@ -893,7 +893,7 @@
     if (m_param->rc.cuTree && !m_param->rc.bStatRead)
         /* update row satds based on cutree offsets */
         curFrame->m_lowres.satdCost = frameCostRecalculate(frames, p0, p1, b);
-    else if (m_param->analysisMode != X265_ANALYSIS_LOAD)
+    else if (m_param->analysisMode != X265_ANALYSIS_LOAD || m_param->scaleFactor)
     {
         if (m_param->rc.aqMode)
             curFrame->m_lowres.satdCost = curFrame->m_lowres.costEstAq[b - p0][p1 - b];


More information about the x265-devel mailing list