[x265-commits] [x265] Backed out changeset: 940cec3bf0b4

Thu Sep 25 18:20:20 CEST 2014

details:   http://hg.videolan.org/x265/rev/0d330611fa97
branches:  
changeset: 8137:0d330611fa97
user:      Deepthi Nandakumar <deepthi at multicorewareinc.com>
date:      Thu Sep 25 13:12:43 2014 +0530
description:
Backed out changeset: 940cec3bf0b4

This commit causes hash mismatches in vc11 x86_64 Release
mode consistently, when lft is enabled. Stack/heap corruption
likely.
Subject: [x265] encoder: rename cuCoder to analysis for better clarity

details:   http://hg.videolan.org/x265/rev/a3f952bcada5
branches:  
changeset: 8138:a3f952bcada5
user:      Steve Borho <steve at borho.org>
date:      Wed Sep 24 16:21:42 2014 -0500
description:
encoder: rename cuCoder to analysis for better clarity

the data type of cuCoder changed from TEncCu to Analysis weeks ago
Subject: [x265] predict: inline single call of predInterBi()

details:   http://hg.videolan.org/x265/rev/b6c9a51d9201
branches:  
changeset: 8139:b6c9a51d9201
user:      Steve Borho <steve at borho.org>
date:      Wed Sep 24 16:38:05 2014 -0500
description:
predict: inline single call of predInterBi()
Subject: [x265] predict: inline predInterUni(), getWpScaling() and simplify motionCompensation()

details:   http://hg.videolan.org/x265/rev/a961728f906c
branches:  
changeset: 8140:a961728f906c
user:      Steve Borho <steve at borho.org>
date:      Wed Sep 24 17:41:55 2014 -0500
description:
predict: inline predInterUni(), getWpScaling() and simplify motionCompensation()

After this refactor, motionCompensation no longer needs the cu parameter. It
was only used to pass to another member function to gain access to cu->m_slice
which is now a member variable.

This refactor removed a number of arguments to addWeightBi and addWeightUni
which were always member variables.
Subject: [x265] predict: remove check for reallocations, comment nits

details:   http://hg.videolan.org/x265/rev/7c88fb6128cf
branches:  
changeset: 8141:7c88fb6128cf
user:      Steve Borho <steve at borho.org>
date:      Wed Sep 24 17:46:57 2014 -0500
description:
predict: remove check for reallocations, comment nits

we don't do this anywhere else; there would be huge leaks if the Search object
were initialized multiple times. there's no reason to check here.
Subject: [x265] predict: combine and check allocations and return failures

details:   http://hg.videolan.org/x265/rev/982040e91112
branches:  
changeset: 8142:982040e91112
user:      Steve Borho <steve at borho.org>
date:      Wed Sep 24 17:54:39 2014 -0500
description:
predict: combine and check allocations and return failures
Subject: [x265] predict: split weighted prediction values from WeightParam

details:   http://hg.videolan.org/x265/rev/7dccbbed0349
branches:  
changeset: 8143:7dccbbed0349
user:      Steve Borho <steve at borho.org>
date:      Wed Sep 24 18:26:45 2014 -0500
description:
predict: split weighted prediction values from WeightParam

The arguments passed to addWeightBi() and addWeightUni() are just the
"w, o, offset, shift, round" integers. They don't need the fields which were
signaled in the slice header or vice-versa.

diffstat:

 source/Lib/TLibCommon/TComPicYuv.h |    2 -
 source/common/deblock.cpp          |  317 ++++++++++++++++-------------
 source/common/deblock.h            |    2 +-
 source/common/slice.h              |   10 +-
 source/encoder/analysis.cpp        |   15 +-
 source/encoder/encoder.cpp         |    6 +-
 source/encoder/frameencoder.cpp    |   40 +-
 source/encoder/frameencoder.h      |    4 +-
 source/encoder/predict.cpp         |  390 ++++++++++++++++--------------------
 source/encoder/predict.h           |   31 +-
 source/encoder/search.cpp          |    8 +-
 11 files changed, 406 insertions(+), 419 deletions(-)

diffs (truncated from 1409 to 300 lines):

diff -r e47e127da779 -r 7dccbbed0349 source/Lib/TLibCommon/TComPicYuv.h

--- a/source/Lib/TLibCommon/TComPicYuv.h	Wed Sep 24 21:51:12 2014 -0500
+++ b/source/Lib/TLibCommon/TComPicYuv.h	Wed Sep 24 18:26:45 2014 -0500
@@ -155,8 +155,6 @@ public:
 
     pixel*  getChromaAddr(uint32_t chromaId, int cuAddr, int absZOrderIdx) { return m_picOrg[chromaId] + m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; }
 
-    int32_t getChromaAddrOffset(int cuAddr, int absZOrderIdx) { return m_cuOffsetC[cuAddr] + m_buOffsetC[absZOrderIdx]; }
-
     uint32_t getCUHeight(int rowNum);
 
     void  copyFromPicture(const x265_picture&, int padx, int pady);
diff -r e47e127da779 -r 7dccbbed0349 source/common/deblock.cpp
--- a/source/common/deblock.cpp	Wed Sep 24 21:51:12 2014 -0500
+++ b/source/common/deblock.cpp	Wed Sep 24 18:26:45 2014 -0500
@@ -48,7 +48,7 @@ void Deblock::deblockCU(TComDataCU* cu, 
         return;
 
     Frame* pic = cu->m_pic;
-    uint32_t curNumParts = m_numPartitions >> (depth * 2);
+    uint32_t curNumParts = pic->getNumPartInCU() >> (depth << 1);
 
     if (cu->getDepth(absZOrderIdx) > depth)
     {
@@ -56,34 +56,35 @@ void Deblock::deblockCU(TComDataCU* cu, 
         uint32_t xmax = cu->m_slice->m_sps->picWidthInLumaSamples  - cu->getCUPelX();
         uint32_t ymax = cu->m_slice->m_sps->picHeightInLumaSamples - cu->getCUPelY();
         for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absZOrderIdx += qNumParts)
+        {
             if (g_zscanToPelX[absZOrderIdx] < xmax && g_zscanToPelY[absZOrderIdx] < ymax)
                 deblockCU(cu, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength);
+        }
         return;
     }
 
     Param params;
     setLoopfilterParam(cu, absZOrderIdx, &params);
-    setEdgefilterTU(cu, absZOrderIdx, depth, dir, edgeFilter, blockingStrength);
+    setEdgefilterTU(cu, absZOrderIdx, absZOrderIdx, depth, dir, edgeFilter, blockingStrength);
     setEdgefilterPU(cu, absZOrderIdx, dir, &params, edgeFilter, blockingStrength);
 
     for (uint32_t partIdx = absZOrderIdx; partIdx < absZOrderIdx + curNumParts; partIdx++)
     {
-        uint32_t bsCheck = !(partIdx & (1 << dir));
+        uint32_t bsCheck = (dir == EDGE_VER ? !(partIdx & 1) : !(partIdx & 2));
 
-        if (bsCheck && edgeFilter[partIdx])
+        if (edgeFilter[partIdx] && bsCheck)
             getBoundaryStrengthSingle(cu, dir, partIdx, blockingStrength);
     }
 
-    const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
+    uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
     uint32_t sizeInPU = pic->getNumPartInCUSize() >> depth;
     uint32_t shiftFactor = (dir == EDGE_VER) ? cu->getHorzChromaShift() : cu->getVertChromaShift();
-    uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1;
-    uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absZOrderIdx] : g_zscanToPelY[absZOrderIdx]) >> LOG2_UNIT_SIZE;
-        
+    const bool alwaysDoChroma = cu->getChromaFormat() == X265_CSP_I444;
+
     for (uint32_t e = 0; e < sizeInPU; e += partIdxIncr)
     {
         edgeFilterLuma(cu, absZOrderIdx, depth, dir, e, blockingStrength);
-        if (!((e0 + e) & chromaMask))
+        if (alwaysDoChroma || !(e % ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE)))
             edgeFilterChroma(cu, absZOrderIdx, depth, dir, e, blockingStrength);
     }
 }
@@ -114,60 +115,66 @@ void Deblock::setEdgefilterMultiple(TCom
     }
 }
 
-void Deblock::setEdgefilterTU(TComDataCU* cu, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[])
+void Deblock::setEdgefilterTU(TComDataCU* cu, uint32_t absTUPartIdx, uint32_t absZOrderIdx, uint32_t depth, int32_t dir, bool edgeFilter[], uint8_t blockingStrength[])
 {
     if (cu->getTransformIdx(absZOrderIdx) + cu->getDepth(absZOrderIdx) > (uint8_t)depth)
     {
-        const uint32_t curNumParts = m_numPartitions >> (depth * 2);
+        const uint32_t curNumParts = cu->m_pic->getNumPartInCU() >> (depth << 1);
         const uint32_t qNumParts   = curNumParts >> 2;
 
         for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absZOrderIdx += qNumParts)
-            setEdgefilterTU(cu, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength);
+        {
+            uint32_t nsAddr = absZOrderIdx;
+            setEdgefilterTU(cu, nsAddr, absZOrderIdx, depth + 1, dir, edgeFilter, blockingStrength);
+        }
         return;
     }
 
     uint32_t widthInBaseUnits  = 1 << (cu->getLog2CUSize(absZOrderIdx) - cu->getTransformIdx(absZOrderIdx) - LOG2_UNIT_SIZE);
-    setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, 0, true, edgeFilter, blockingStrength, widthInBaseUnits);
+    setEdgefilterMultiple(cu, absTUPartIdx, depth, dir, 0, true, edgeFilter, blockingStrength, widthInBaseUnits);
 }
 
 void Deblock::setEdgefilterPU(TComDataCU* cu, uint32_t absZOrderIdx, int32_t dir, Param *params, bool edgeFilter[], uint8_t blockingStrength[])
 {
     const uint32_t depth = cu->getDepth(absZOrderIdx);
     const uint32_t widthInBaseUnits  = cu->m_pic->getNumPartInCUSize() >> depth;
-    const uint32_t hWidthInBaseUnits = widthInBaseUnits >> 1;
-    const uint32_t qWidthInBaseUnits = widthInBaseUnits >> 2;
+    const uint32_t hWidthInBaseUnits  = widthInBaseUnits  >> 1;
+    const uint32_t qWidthInBaseUnits  = widthInBaseUnits  >> 2;
 
     setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, 0, (dir == EDGE_VER ? params->leftEdge : params->topEdge), edgeFilter, blockingStrength);
 
-    switch (cu->getPartitionSize(absZOrderIdx))
+    int32_t mode = cu->getPartitionSize(absZOrderIdx);
+    switch (mode)
     {
     case SIZE_2NxN:
-        if (EDGE_HOR == dir)
+    case SIZE_Nx2N:
+        {
+            const int32_t realDir = (mode == SIZE_2NxN ? EDGE_HOR : EDGE_VER);
+            if (realDir == dir)
+                setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
+            break;
+        }
+    case SIZE_NxN:
+        {
             setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
-        break;
-    case SIZE_Nx2N:
-        if (EDGE_VER == dir)
-            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
-        break;
-    case SIZE_NxN:
-        setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, hWidthInBaseUnits, true, edgeFilter, blockingStrength);
-        break;
+            break;
+        }
     case SIZE_2NxnU:
-        if (EDGE_HOR == dir)
-            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, qWidthInBaseUnits, true, edgeFilter, blockingStrength);
-        break;
     case SIZE_nLx2N:
-        if (EDGE_VER == dir)
-            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, qWidthInBaseUnits, true, edgeFilter, blockingStrength);
-        break;
+        {
+            const int32_t realDir = (mode == SIZE_2NxnU ? EDGE_HOR : EDGE_VER);
+            if (realDir == dir)
+                setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, qWidthInBaseUnits, true, edgeFilter, blockingStrength);
+            break;
+        }
     case SIZE_2NxnD:
-        if (EDGE_HOR == dir)
-            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, widthInBaseUnits - qWidthInBaseUnits, true, edgeFilter, blockingStrength);
-        break;
     case SIZE_nRx2N:
-        if (EDGE_VER == dir)
-            setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, widthInBaseUnits - qWidthInBaseUnits, true, edgeFilter, blockingStrength);
-        break;
+        {
+            const int32_t realDir = (mode == SIZE_2NxnD ? EDGE_HOR : EDGE_VER);
+            if (realDir == dir)
+                setEdgefilterMultiple(cu, absZOrderIdx, depth, dir, widthInBaseUnits - qWidthInBaseUnits, true, edgeFilter, blockingStrength);
+            break;
+        }
 
     case SIZE_2Nx2N:
     default:
@@ -331,15 +338,15 @@ static inline int32_t calcDQ(pixel* src,
     return abs(static_cast<int32_t>(src[0]) - 2 * src[offset] + src[offset * 2]);
 }
 
-static inline bool useStrongFiltering(int32_t offset, int32_t beta, int32_t tc, pixel* src)
+static inline bool useStrongFiltering(int32_t offset, int32_t d, int32_t beta, int32_t tc, pixel* src)
 {
+    int16_t m4     = (int16_t)src[0];
+    int16_t m3     = (int16_t)src[-offset];
+    int16_t m7     = (int16_t)src[offset * 3];
     int16_t m0     = (int16_t)src[-offset * 4];
-    int16_t m3     = (int16_t)src[-offset];
-    int16_t m4     = (int16_t)src[0];
-    int16_t m7     = (int16_t)src[offset * 3];
     int32_t strong = abs(m0 - m3) + abs(m7 - m4);
 
-    return (strong < (beta >> 3)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
+    return (strong < (beta >> 3)) && (d < (beta >> 2)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
 }
 
 /* Deblocking for the luminance component with strong or weak filter
@@ -355,61 +362,63 @@ static inline bool useStrongFiltering(in
 static inline void pelFilterLuma(pixel* src, int32_t offset, int32_t tc, bool sw, bool partPNoFilter, bool partQNoFilter,
                                  int32_t thrCut, bool filterSecondP, bool filterSecondQ)
 {
+    int32_t delta;
+
+    int16_t m4  = (int16_t)src[0];
+    int16_t m3  = (int16_t)src[-offset];
+    int16_t m5  = (int16_t)src[offset];
+    int16_t m2  = (int16_t)src[-offset * 2];
+    int16_t m6  = (int16_t)src[offset * 2];
     int16_t m1  = (int16_t)src[-offset * 3];
-    int16_t m2  = (int16_t)src[-offset * 2];
-    int16_t m3  = (int16_t)src[-offset];
-    int16_t m4  = (int16_t)src[0];
-    int16_t m5  = (int16_t)src[offset];
-    int16_t m6  = (int16_t)src[offset * 2];
+    int16_t m7  = (int16_t)src[offset * 3];
+    int16_t m0  = (int16_t)src[-offset * 4];
 
     if (sw)
     {
-        int16_t m0  = (int16_t)src[-offset * 4];
-        int16_t m7  = (int16_t)src[offset * 3];
-        int32_t tc2 = 2 * tc;
-        if (!partPNoFilter)
-        {
-            src[-offset * 3] = (pixel)(Clip3(-tc2, tc2, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3) - m1) + m1);
-            src[-offset * 2] = (pixel)(Clip3(-tc2, tc2, ((m1 + m2 + m3 + m4 + 2) >> 2) - m2) + m2);
-            src[-offset]     = (pixel)(Clip3(-tc2, tc2, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3) - m3) + m3);
-        }
-        if (!partQNoFilter)
-        {
-            src[0]           = (pixel)(Clip3(-tc2, tc2, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3) - m4) + m4);
-            src[offset]      = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 + m5 + m6 + 2) >> 2) - m5) + m5);
-            src[offset * 2]  = (pixel)(Clip3(-tc2, tc2, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3) - m6) + m6);
-        }
+        src[-offset]     = (pixel)Clip3(m3 - 2 * tc, m3 + 2 * tc, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3));
+        src[0]           = (pixel)Clip3(m4 - 2 * tc, m4 + 2 * tc, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3));
+        src[-offset * 2] = (pixel)Clip3(m2 - 2 * tc, m2 + 2 * tc, ((m1 + m2 + m3 + m4 + 2) >> 2));
+        src[offset]      = (pixel)Clip3(m5 - 2 * tc, m5 + 2 * tc, ((m3 + m4 + m5 + m6 + 2) >> 2));
+        src[-offset * 3] = (pixel)Clip3(m1 - 2 * tc, m1 + 2 * tc, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3));
+        src[offset * 2]  = (pixel)Clip3(m6 - 2 * tc, m6 + 2 * tc, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3));
     }
     else
     {
         /* Weak filter */
-        int32_t delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
+        delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
 
         if (abs(delta) < thrCut)
         {
             delta = Clip3(-tc, tc, delta);
+            src[-offset] = Clip(m3 + delta);
+            src[0] = Clip(m4 - delta);
 
             int32_t tc2 = tc >> 1;
-            if (!partPNoFilter)
+            if (filterSecondP)
             {
-                src[-offset] = Clip(m3 + delta);
-                if (filterSecondP)
-                {
-                    int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
-                    src[-offset * 2] = Clip(m2 + delta1);
-                }
+                int32_t delta1 = Clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
+                src[-offset * 2] = Clip(m2 + delta1);
             }
-            if (!partQNoFilter)
+            if (filterSecondQ)
             {
-                src[0] = Clip(m4 - delta);
-                if (filterSecondQ)
-                {
-                    int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
-                    src[offset] = Clip(m5 + delta2);
-                }
+                int32_t delta2 = Clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
+                src[offset] = Clip(m5 + delta2);
             }
         }
     }
+
+    if (partPNoFilter)
+    {
+        src[-offset] = (pixel)m3;
+        src[-offset * 2] = (pixel)m2;
+        src[-offset * 3] = (pixel)m1;
+    }
+    if (partQNoFilter)
+    {
+        src[0] = (pixel)m4;
+        src[offset] = (pixel)m5;
+        src[offset * 2] = (pixel)m6;
+    }
 }
 
 /* Deblocking of one line/column for the chrominance component
@@ -420,26 +429,34 @@ static inline void pelFilterLuma(pixel* 
  * \param partQNoFilter  indicator to disable filtering on partQ */
 static inline void pelFilterChroma(pixel* src, int32_t offset, int32_t tc, bool partPNoFilter, bool partQNoFilter)
 {
+    int32_t delta;
+
+    int16_t m4  = (int16_t)src[0];
+    int16_t m3  = (int16_t)src[-offset];
+    int16_t m5  = (int16_t)src[offset];
     int16_t m2  = (int16_t)src[-offset * 2];
-    int16_t m3  = (int16_t)src[-offset];
-    int16_t m4  = (int16_t)src[0];
-    int16_t m5  = (int16_t)src[offset];
 
-    int32_t delta = Clip3(-tc, tc, ((((m4 - m3) << 2) + m2 - m5 + 4) >> 3));
-    if (!partPNoFilter)
-        src[-offset] = Clip(m3 + delta);