[x265] [PATCH 1 of 4] improve calcSaoStatsCu, use local temporary buffer to reduce array index operators

Steve Borho steve at borho.org
Thu May 14 00:32:49 CEST 2015


On 05/13, Min Chen wrote:
> # HG changeset patch
> # User Min Chen <chenm003 at 163.com>
> # Date 1431561161 25200
> # Node ID 2cb7386376a40786ca6cbd02d9f19bc304942251
> # Parent  0ffdd0f379adcbf6b3e49956580301211a01c203
> improve calcSaoStatsCu, use local temporary buffer to reduce array index operators
> ---
>  source/encoder/sao.cpp |   91 ++++++++++++++++++++++++++++++++++++-----------
>  1 files changed, 69 insertions(+), 22 deletions(-)
> 
> diff -r 0ffdd0f379ad -r 2cb7386376a4 source/encoder/sao.cpp
> --- a/source/encoder/sao.cpp	Wed May 13 18:12:50 2015 +0530
> +++ b/source/encoder/sao.cpp	Wed May 13 16:52:41 2015 -0700
> @@ -696,6 +696,11 @@
>      int8_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;
>      int8_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
>  
> +    // Dynamic Range: 64x64x14bpp = 24bits
> +    int32_t tmp_stats[NUM_EDGETYPE];
> +    // TODO: improve by uint64_t, but need Haswell SHLX
> +    uint16_t tmp_count[NUM_EDGETYPE];
> +
>      // SAO_BO:
>      {
>          const int boShift = X265_DEPTH - SAO_BO_BITS;
> @@ -736,30 +741,41 @@
>                  skipB = plane ? 1 : 3;
>                  skipR = plane ? 3 : 5;
>              }
> -            stats = m_offsetOrg[plane][SAO_EO_0];
> -            count = m_count[plane][SAO_EO_0];
>  
>              fenc = fenc0;
>              rec  = rec0;
>  
>              startX = !lpelx;
>              endX   = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
> +
> +            memset(tmp_stats, 0, sizeof(tmp_stats));
> +            memset(tmp_count, 0, sizeof(tmp_count));
> +
>              for (y = 0; y < ctuHeight - skipB; y++)
>              {
>                  int signLeft = signOf(rec[startX] - rec[startX - 1]);
>                  for (x = startX; x < endX; x++)
>                  {
>                      int signRight = signOf(rec[x] - rec[x + 1]);
> -                    int edgeType = signRight + signLeft + 2;
> +                    uint32_t edgeType = signRight + signLeft + 2;
>                      signLeft = -signRight;
>  
> -                    stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
> -                    count[s_eoTable[edgeType]]++;
> +                    X265_CHECK((edgeType >= 0) && (edgeType <= 4), "edgeType check failure\n");

I can see before building this that it will have the same warning on
GCC. edgeType is uint32_t so it is always >= 0. I'll fix this before
testing.

> +                    tmp_stats[edgeType] += (fenc[x] - rec[x]);
> +                    tmp_count[edgeType]++;
>                  }
>  
>                  fenc += stride;
>                  rec += stride;
>              }
> +
> +            stats = m_offsetOrg[plane][SAO_EO_0];
> +            count = m_count[plane][SAO_EO_0];
> +            for (x = 0; x < NUM_EDGETYPE; x++)
> +            {
> +                stats[s_eoTable[x]] += tmp_stats[x];
> +                count[s_eoTable[x]] += tmp_count[x];
> +            }
>          }
>  
>          // SAO_EO_1: // dir: |
> @@ -769,8 +785,6 @@
>                  skipB = plane ? 2 : 4;
>                  skipR = plane ? 2 : 4;
>              }
> -            stats = m_offsetOrg[plane][SAO_EO_1];
> -            count = m_count[plane][SAO_EO_1];
>  
>              fenc = fenc0;
>              rec  = rec0;
> @@ -786,21 +800,32 @@
>  
>              primitives.sign(upBuff1, rec, &rec[- stride], ctuWidth);
>  
> +            memset(tmp_stats, 0, sizeof(tmp_stats));
> +            memset(tmp_count, 0, sizeof(tmp_count));
> +
>              for (y = startY; y < endY; y++)
>              {
>                  for (x = 0; x < endX; x++)
>                  {
> -                    int8_t signDown = signOf(rec[x] - rec[x + stride]);
> -                    int edgeType = signDown + upBuff1[x] + 2;
> -                    upBuff1[x] = -signDown;
> +                    int signDown = signOf(rec[x] - rec[x + stride]);
> +                    uint32_t edgeType = signDown + upBuff1[x] + 2;
> +                    upBuff1[x] = (int8_t)(-signDown);
>  
> -                    stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
> -                    count[s_eoTable[edgeType]]++;
> +                    tmp_stats[edgeType] += (fenc[x] - rec[x]);
> +                    tmp_count[edgeType]++;
>                  }
>  
>                  fenc += stride;
>                  rec += stride;
>              }
> +
> +            stats = m_offsetOrg[plane][SAO_EO_1];
> +            count = m_count[plane][SAO_EO_1];
> +            for (x = 0; x < NUM_EDGETYPE; x++)
> +            {
> +                stats[s_eoTable[x]] += tmp_stats[x];
> +                count[s_eoTable[x]] += tmp_count[x];
> +            }
>          }
>  
>          // SAO_EO_2: // dir: 135
> @@ -829,16 +854,19 @@
>  
>              primitives.sign(&upBuff1[startX], &rec[startX], &rec[startX - stride - 1], (endX - startX));
>  
> +            memset(tmp_stats, 0, sizeof(tmp_stats));
> +            memset(tmp_count, 0, sizeof(tmp_count));
> +
>              for (y = startY; y < endY; y++)
>              {
>                  upBufft[startX] = signOf(rec[startX + stride] - rec[startX - 1]);
>                  for (x = startX; x < endX; x++)
>                  {
> -                    int8_t signDown = signOf(rec[x] - rec[x + stride + 1]);
> -                    int edgeType = signDown + upBuff1[x] + 2;
> -                    upBufft[x + 1] = -signDown;
> -                    stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
> -                    count[s_eoTable[edgeType]]++;
> +                    int signDown = signOf(rec[x] - rec[x + stride + 1]);
> +                    uint32_t edgeType = signDown + upBuff1[x] + 2;
> +                    upBufft[x + 1] = (int8_t)(-signDown);
> +                    tmp_stats[edgeType] += (fenc[x] - rec[x]);
> +                    tmp_count[edgeType]++;
>                  }
>  
>                  std::swap(upBuff1, upBufft);
> @@ -846,6 +874,14 @@
>                  rec += stride;
>                  fenc += stride;
>              }
> +
> +            stats = m_offsetOrg[plane][SAO_EO_2];
> +            count = m_count[plane][SAO_EO_2];
> +            for (x = 0; x < NUM_EDGETYPE; x++)
> +            {
> +                stats[s_eoTable[x]] += tmp_stats[x];
> +                count[s_eoTable[x]] += tmp_count[x];
> +            }
>          }
>  
>          // SAO_EO_3: // dir: 45
> @@ -875,15 +911,18 @@
>  
>              primitives.sign(&upBuff1[startX - 1], &rec[startX - 1], &rec[startX - 1 - stride + 1], (endX - startX + 1));
>  
> +            memset(tmp_stats, 0, sizeof(tmp_stats));
> +            memset(tmp_count, 0, sizeof(tmp_count));
> +
>              for (y = startY; y < endY; y++)
>              {
>                  for (x = startX; x < endX; x++)
>                  {
> -                    int8_t signDown = signOf(rec[x] - rec[x + stride - 1]);
> -                    int edgeType = signDown + upBuff1[x] + 2;
> -                    upBuff1[x - 1] = -signDown;
> -                    stats[s_eoTable[edgeType]] += (fenc[x] - rec[x]);
> -                    count[s_eoTable[edgeType]]++;
> +                    int signDown = signOf(rec[x] - rec[x + stride - 1]);
> +                    uint32_t edgeType = signDown + upBuff1[x] + 2;
> +                    upBuff1[x - 1] = (int8_t)(-signDown);
> +                    tmp_stats[edgeType] += (fenc[x] - rec[x]);
> +                    tmp_count[edgeType]++;
>                  }
>  
>                  upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
> @@ -891,6 +930,14 @@
>                  rec += stride;
>                  fenc += stride;
>              }
> +
> +            stats = m_offsetOrg[plane][SAO_EO_3];
> +            count = m_count[plane][SAO_EO_3];
> +            for (x = 0; x < NUM_EDGETYPE; x++)
> +            {
> +                stats[s_eoTable[x]] += tmp_stats[x];
> +                count[s_eoTable[x]] += tmp_count[x];
> +            }
>          }
>      }
>  }
> 
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho


More information about the x265-devel mailing list