[x265] [PATCH 1 of 4] improve calcSaoStatsCu, use local temporary buffer to reduce array index operators

Min Chen chenm003 at 163.com
Thu May 14 01:53:17 CEST 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1431561161 25200
# Node ID 2cb7386376a40786ca6cbd02d9f19bc304942251
# Parent  0ffdd0f379adcbf6b3e49956580301211a01c203
improve calcSaoStatsCu, use local temporary buffer to reduce array index operators
---
 source/encoder/sao.cpp |   91 ++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 69 insertions(+), 22 deletions(-)

diff -r 0ffdd0f379ad -r 2cb7386376a4 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp	Wed May 13 18:12:50 2015 +0530
+++ b/source/encoder/sao.cpp	Wed May 13 16:52:41 2015 -0700
@@ -696,6 +696,11 @@
     int8_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;
     int8_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
 
+    // Dynamic Range: 64x64x14bpp = 24bits
+    int32_t tmp_stats[NUM_EDGETYPE];
+    // TODO: improve by uint64_t, but need Haswell SHLX
+    uint16_t tmp_count[NUM_EDGETYPE];
+
     // SAO_BO:
     {
         const int boShift = X265_DEPTH - SAO_BO_BITS;
@@ -736,30 +741,41 @@
                 skipB = plane ? 1 : 3;
                 skipR = plane ? 3 : 5;
             }
-            stats = m_offsetOrg[plane][SAO_EO_0];
-            count = m_count[plane][SAO_EO_0];
 
             fenc = fenc0;
             rec  = rec0;
 
             startX = !lpelx;
             endX   = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
+
+            memset(tmp_stats, 0, sizeof(tmp_stats));
+            memset(tmp_count, 0, sizeof(tmp_count));
+
             for (y = 0; y < ctuHeight - skipB; y++)
             {
                 int signLeft = signOf(rec[startX] - rec[startX - 1]);
                 for (x = startX; x < endX; x++)
                 {
                     int signRight = signOf(rec[x] - rec[x + 1]);
-                    int edgeType = signRight + signLeft + 2;
+                    uint32_t edgeType = signRight + signLeft + 2;
                     signLeft = -signRight;
 
-                    stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
-                    count[s_eoTable[edgeType]]++;
+                    X265_CHECK((edgeType >= 0) && (edgeType <= 4), "edgeType check failure\n");
+                    tmp_stats[edgeType] += (fenc[x] - rec[x]);
+                    tmp_count[edgeType]++;
                 }
 
                 fenc += stride;
                 rec += stride;
             }
+
+            stats = m_offsetOrg[plane][SAO_EO_0];
+            count = m_count[plane][SAO_EO_0];
+            for (x = 0; x < NUM_EDGETYPE; x++)
+            {
+                stats[s_eoTable[x]] += tmp_stats[x];
+                count[s_eoTable[x]] += tmp_count[x];
+            }
         }
 
         // SAO_EO_1: // dir: |
@@ -769,8 +785,6 @@
                 skipB = plane ? 2 : 4;
                 skipR = plane ? 2 : 4;
             }
-            stats = m_offsetOrg[plane][SAO_EO_1];
-            count = m_count[plane][SAO_EO_1];
 
             fenc = fenc0;
             rec  = rec0;
@@ -786,21 +800,32 @@
 
             primitives.sign(upBuff1, rec, &rec[- stride], ctuWidth);
 
+            memset(tmp_stats, 0, sizeof(tmp_stats));
+            memset(tmp_count, 0, sizeof(tmp_count));
+
             for (y = startY; y < endY; y++)
             {
                 for (x = 0; x < endX; x++)
                 {
-                    int8_t signDown = signOf(rec[x] - rec[x + stride]);
-                    int edgeType = signDown + upBuff1[x] + 2;
-                    upBuff1[x] = -signDown;
+                    int signDown = signOf(rec[x] - rec[x + stride]);
+                    uint32_t edgeType = signDown + upBuff1[x] + 2;
+                    upBuff1[x] = (int8_t)(-signDown);
 
-                    stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
-                    count[s_eoTable[edgeType]]++;
+                    tmp_stats[edgeType] += (fenc[x] - rec[x]);
+                    tmp_count[edgeType]++;
                 }
 
                 fenc += stride;
                 rec += stride;
             }
+
+            stats = m_offsetOrg[plane][SAO_EO_1];
+            count = m_count[plane][SAO_EO_1];
+            for (x = 0; x < NUM_EDGETYPE; x++)
+            {
+                stats[s_eoTable[x]] += tmp_stats[x];
+                count[s_eoTable[x]] += tmp_count[x];
+            }
         }
 
         // SAO_EO_2: // dir: 135
@@ -829,16 +854,19 @@
 
             primitives.sign(&upBuff1[startX], &rec[startX], &rec[startX - stride - 1], (endX - startX));
 
+            memset(tmp_stats, 0, sizeof(tmp_stats));
+            memset(tmp_count, 0, sizeof(tmp_count));
+
             for (y = startY; y < endY; y++)
             {
                 upBufft[startX] = signOf(rec[startX + stride] - rec[startX - 1]);
                 for (x = startX; x < endX; x++)
                 {
-                    int8_t signDown = signOf(rec[x] - rec[x + stride + 1]);
-                    int edgeType = signDown + upBuff1[x] + 2;
-                    upBufft[x + 1] = -signDown;
-                    stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
-                    count[s_eoTable[edgeType]]++;
+                    int signDown = signOf(rec[x] - rec[x + stride + 1]);
+                    uint32_t edgeType = signDown + upBuff1[x] + 2;
+                    upBufft[x + 1] = (int8_t)(-signDown);
+                    tmp_stats[edgeType] += (fenc[x] - rec[x]);
+                    tmp_count[edgeType]++;
                 }
 
                 std::swap(upBuff1, upBufft);
@@ -846,6 +874,14 @@
                 rec += stride;
                 fenc += stride;
             }
+
+            stats = m_offsetOrg[plane][SAO_EO_2];
+            count = m_count[plane][SAO_EO_2];
+            for (x = 0; x < NUM_EDGETYPE; x++)
+            {
+                stats[s_eoTable[x]] += tmp_stats[x];
+                count[s_eoTable[x]] += tmp_count[x];
+            }
         }
 
         // SAO_EO_3: // dir: 45
@@ -875,15 +911,18 @@
 
             primitives.sign(&upBuff1[startX - 1], &rec[startX - 1], &rec[startX - 1 - stride + 1], (endX - startX + 1));
 
+            memset(tmp_stats, 0, sizeof(tmp_stats));
+            memset(tmp_count, 0, sizeof(tmp_count));
+
             for (y = startY; y < endY; y++)
             {
                 for (x = startX; x < endX; x++)
                 {
-                    int8_t signDown = signOf(rec[x] - rec[x + stride - 1]);
-                    int edgeType = signDown + upBuff1[x] + 2;
-                    upBuff1[x - 1] = -signDown;
-                    stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
-                    count[s_eoTable[edgeType]]++;
+                    int signDown = signOf(rec[x] - rec[x + stride - 1]);
+                    uint32_t edgeType = signDown + upBuff1[x] + 2;
+                    upBuff1[x - 1] = (int8_t)(-signDown);
+                    tmp_stats[edgeType] += (fenc[x] - rec[x]);
+                    tmp_count[edgeType]++;
                 }
 
                 upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
@@ -891,6 +930,14 @@
                 rec += stride;
                 fenc += stride;
             }
+
+            stats = m_offsetOrg[plane][SAO_EO_3];
+            count = m_count[plane][SAO_EO_3];
+            for (x = 0; x < NUM_EDGETYPE; x++)
+            {
+                stats[s_eoTable[x]] += tmp_stats[x];
+                count[s_eoTable[x]] += tmp_count[x];
+            }
         }
     }
 }



More information about the x265-devel mailing list