[x265] [PATCH 13 of 15] sao: avoid thread conflict on offsetEo and offsetBo

Min Chen chenm003 at 163.com
Wed Dec 2 18:28:36 CET 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1449076377 21600
# Node ID a3a9660c91b8eeb8f70869fc4022f939c01023f0
# Parent  b1c261378db29a1988d8e27c5eabe1a76821f83d
sao: avoid thread conflict on offsetEo and offsetBo
---
 source/encoder/framefilter.cpp |   12 +++++-------
 source/encoder/sao.cpp         |   38 ++++++++++++++++++++------------------
 source/encoder/sao.h           |    4 ++--
 3 files changed, 27 insertions(+), 27 deletions(-)

diff -r b1c261378db2 -r a3a9660c91b8 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp	Wed Dec 02 11:12:54 2015 -0600
+++ b/source/encoder/framefilter.cpp	Wed Dec 02 11:12:57 2015 -0600
@@ -543,18 +543,16 @@
     SAOParam* saoParam = encData.m_saoParam;
     uint32_t numCols = encData.m_slice->m_sps->numCuInWidth;
 
-    if (saoParam->bSaoFlag[0])
+    for(uint32_t col = 0; col < numCols; col++)
     {
-        for(uint32_t col = 0; col < numCols; col++)
+        if (saoParam->bSaoFlag[0])
             m_parallelFilter[row].m_sao.processSaoUnitCu(saoParam->ctuParam[0], row, col, 0);
-    }
 
-    if (saoParam->bSaoFlag[1])
-    {
-        for(uint32_t col = 0; col < numCols; col++)
+        if (saoParam->bSaoFlag[1])
+        {
             m_parallelFilter[row].m_sao.processSaoUnitCu(saoParam->ctuParam[1], row, col, 1);
-        for(uint32_t col = 0; col < numCols; col++)
             m_parallelFilter[row].m_sao.processSaoUnitCu(saoParam->ctuParam[2], row, col, 2);
+        }
     }
 
     if (encData.m_slice->m_pps->bTransquantBypassEnabled)
diff -r b1c261378db2 -r a3a9660c91b8 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp	Wed Dec 02 11:12:54 2015 -0600
+++ b/source/encoder/sao.cpp	Wed Dec 02 11:12:57 2015 -0600
@@ -325,6 +325,8 @@
     tmpL = m_tmpL1[plane];
     tmpU = &(m_tmpU[plane][lpelx]);
 
+    int8_t* offsetEo = m_offsetEo[plane];
+
     switch (typeIdx)
     {
     case SAO_EO_0: // dir: -
@@ -343,7 +345,7 @@
                     int edgeType = signRight + signLeft + 2;
                     signLeft = -signRight;
 
-                    rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
+                    rec[x] = m_clipTable[rec[x] + offsetEo[edgeType]];
                 }
 
                 rec += stride;
@@ -368,7 +370,7 @@
                     row1LastPxl = rec[stride + ctuWidth - 1];
                 }
 
-                primitives.saoCuOrgE0(rec, m_offsetEo, ctuWidth, signLeft1, stride);
+                primitives.saoCuOrgE0(rec, offsetEo, ctuWidth, signLeft1, stride);
 
                 if (!lpelx)
                 {
@@ -407,7 +409,7 @@
                     int edgeType = signDown + upBuff1[x] + 2;
                     upBuff1[x] = -signDown;
 
-                    rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
+                    rec[x] = m_clipTable[rec[x] + offsetEo[edgeType]];
                 }
 
                 rec += stride;
@@ -420,11 +422,11 @@
             int diff = (endY - startY) % 2;
             for (y = startY; y < endY - diff; y += 2)
             {
-                primitives.saoCuOrgE1_2Rows(rec, upBuff1, m_offsetEo, stride, ctuWidth);
+                primitives.saoCuOrgE1_2Rows(rec, upBuff1, offsetEo, stride, ctuWidth);
                 rec += 2 * stride;
             }
             if (diff & 1)
-                primitives.saoCuOrgE1(rec, upBuff1, m_offsetEo, stride, ctuWidth);
+                primitives.saoCuOrgE1(rec, upBuff1, offsetEo, stride, ctuWidth);
         }
 
         break;
@@ -474,7 +476,7 @@
                      int8_t signDown = signOf(rec[x] - rec[x + stride + 1]);
                      int edgeType = signDown + upBuff1[x] + 2;
                      upBufft[x + 1] = -signDown;
-                     rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
+                     rec[x] = m_clipTable[rec[x] + offsetEo[edgeType]];
                  }
 
                  std::swap(upBuff1, upBufft);
@@ -488,7 +490,7 @@
             {
                 int8_t iSignDown2 = signOf(rec[stride + startX] - tmpL[y]);
 
-                primitives.saoCuOrgE2[endX > 16](rec + startX, upBufft + startX, upBuff1 + startX, m_offsetEo, endX - startX, stride);
+                primitives.saoCuOrgE2[endX > 16](rec + startX, upBufft + startX, upBuff1 + startX, offsetEo, endX - startX, stride);
 
                 upBufft[startX] = iSignDown2;
 
@@ -520,14 +522,14 @@
                 int8_t signDown = signOf(rec[x] - tmpL[y + 1]);
                 int edgeType = signDown + upBuff1[x] + 2;
                 upBuff1[x - 1] = -signDown;
-                rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
+                rec[x] = m_clipTable[rec[x] + offsetEo[edgeType]];
 
                 for (x = startX + 1; x < endX; x++)
                 {
                     signDown = signOf(rec[x] - rec[x + stride - 1]);
                     edgeType = signDown + upBuff1[x] + 2;
                     upBuff1[x - 1] = -signDown;
-                    rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
+                    rec[x] = m_clipTable[rec[x] + offsetEo[edgeType]];
                 }
 
                 upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
@@ -557,9 +559,9 @@
                 int8_t signDown = signOf(rec[x] - tmpL[y + 1]);
                 int edgeType = signDown + upBuff1[x] + 2;
                 upBuff1[x - 1] = -signDown;
-                rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
+                rec[x] = m_clipTable[rec[x] + offsetEo[edgeType]];
 
-                primitives.saoCuOrgE3[endX > 16](rec, upBuff1, m_offsetEo, stride - 1, startX, endX);
+                primitives.saoCuOrgE3[endX > 16](rec, upBuff1, offsetEo, stride - 1, startX, endX);
 
                 upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
 
@@ -571,7 +573,7 @@
     }
     case SAO_BO:
     {
-        const int8_t* offsetBo = m_offsetBo;
+        const int8_t* offsetBo = m_offsetBo[plane];
 
         if (ctuWidth & 15)
         {
@@ -649,10 +651,10 @@
             {
                 if (typeIdx == SAO_BO)
                 {
-                    memset(m_offsetBo, 0, sizeof(m_offsetBo));
+                    memset(m_offsetBo[plane], 0, sizeof(m_offsetBo[0]));
 
                     for (int i = 0; i < SAO_NUM_OFFSET; i++)
-                        m_offsetBo[((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[addr].offset[i] << SAO_BIT_INC);
+                        m_offsetBo[plane][((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[addr].offset[i] << SAO_BIT_INC);
                 }
                 else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
                 {
@@ -662,7 +664,7 @@
                         offset[i + 1] = ctuParam[addr].offset[i] << SAO_BIT_INC;
 
                     for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
-                        m_offsetEo[edgeType] = (int8_t)offset[s_eoTable[edgeType]];
+                        m_offsetEo[plane][edgeType] = (int8_t)offset[s_eoTable[edgeType]];
                 }
             }
             processSaoCu(addr, typeIdx, plane);
@@ -718,10 +720,10 @@
         {
             if (typeIdx == SAO_BO)
             {
-                memset(m_offsetBo, 0, sizeof(m_offsetBo));
+                memset(m_offsetBo[plane], 0, sizeof(m_offsetBo[0]));
 
                 for (int i = 0; i < SAO_NUM_OFFSET; i++)
-                    m_offsetBo[((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[addr].offset[i] << SAO_BIT_INC);
+                    m_offsetBo[plane][((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[addr].offset[i] << SAO_BIT_INC);
             }
             else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
             {
@@ -731,7 +733,7 @@
                     offset[i + 1] = ctuParam[addr].offset[i] << SAO_BIT_INC;
 
                 for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
-                    m_offsetEo[edgeType] = (int8_t)offset[s_eoTable[edgeType]];
+                    m_offsetEo[plane][edgeType] = (int8_t)offset[s_eoTable[edgeType]];
             }
         }
         processSaoCu(addr, typeIdx, plane);
diff -r b1c261378db2 -r a3a9660c91b8 source/encoder/sao.h
--- a/source/encoder/sao.h	Wed Dec 02 11:12:54 2015 -0600
+++ b/source/encoder/sao.h	Wed Dec 02 11:12:57 2015 -0600
@@ -80,8 +80,8 @@
     PerPlane*   m_offsetOrgPreDblk;
 
     double      m_depthSaoRate[2][4];
-    int8_t      m_offsetBo[SAO_NUM_BO_CLASSES];
-    int8_t      m_offsetEo[NUM_EDGETYPE];
+    int8_t      m_offsetBo[NUM_PLANE][SAO_NUM_BO_CLASSES];
+    int8_t      m_offsetEo[NUM_PLANE][NUM_EDGETYPE];
 
     int         m_chromaFormat;
     int         m_numCuInWidth;



More information about the x265-devel mailing list