[x265] [PATCH] sao: add saoCuOrgE3_2Rows function to process 2 rows

Divya Manivannan divya at multicorewareinc.com
Tue Apr 14 06:58:09 CEST 2015


# HG changeset patch
# User Divya Manivannan <divya at multicorewareinc.com>
# Date 1428986909 -19800
#      Tue Apr 14 10:18:29 2015 +0530
# Node ID e3017cf9bff50cdd6e89efdd8cf37c89ca0c62e3
# Parent  4cccf22b00ee188a72c8dc3896d7dc1613d855ad
sao: add saoCuOrgE3_2Rows function to process 2 rows

diff -r 4cccf22b00ee -r e3017cf9bff5 source/common/loopfilter.cpp
--- a/source/common/loopfilter.cpp	Fri Apr 10 18:15:38 2015 -0500
+++ b/source/common/loopfilter.cpp	Tue Apr 14 10:18:29 2015 +0530
@@ -122,6 +122,29 @@
     }
 }
 
+void processSaoCUE3_2Rows(pixel *rec, int8_t *upBuff1, int8_t *offsetEo, intptr_t stride, int startX, int endX, int8_t* signDown)
+{
+    int8_t signDown1;
+    int8_t edgeType;
+
+    for (int y = 0; y < 2; y++)
+    {
+        edgeType = signDown[y] + upBuff1[startX] + 2;
+        upBuff1[startX - 1] = -signDown[y];
+        rec[startX] = x265_clip(rec[startX] + offsetEo[edgeType]);
+
+        for (int x = startX + 1; x < endX; x++)
+        {
+            signDown1 = signOf(rec[x] - rec[x + stride]);
+            edgeType = signDown1 + upBuff1[x] + 2;
+            upBuff1[x - 1] = -signDown1;
+            rec[x] = x265_clip(rec[x] + offsetEo[edgeType]);
+        }
+        upBuff1[endX - 1] = signOf(rec[endX - 1 + stride + 1] - rec[endX]);
+        rec += stride + 1;
+    }
+}
+
 void processSaoCUB0(pixel* rec, const int8_t* offset, int ctuWidth, int ctuHeight, intptr_t stride)
 {
     #define SAO_BO_BITS 5
@@ -146,6 +169,7 @@
     p.saoCuOrgE1_2Rows = processSaoCUE1_2Rows;
     p.saoCuOrgE2 = processSaoCUE2;
     p.saoCuOrgE3 = processSaoCUE3;
+    p.saoCuOrgE3_2Rows = processSaoCUE3_2Rows;
     p.saoCuOrgB0 = processSaoCUB0;
     p.sign = calSign;
 }
diff -r 4cccf22b00ee -r e3017cf9bff5 source/common/primitives.h
--- a/source/common/primitives.h	Fri Apr 10 18:15:38 2015 -0500
+++ b/source/common/primitives.h	Tue Apr 14 10:18:29 2015 +0530
@@ -172,6 +172,7 @@
 typedef void (*saoCuOrgE1_t)(pixel* rec, int8_t* upBuff1, int8_t* offsetEo, intptr_t stride, int width);
 typedef void (*saoCuOrgE2_t)(pixel* rec, int8_t* pBufft, int8_t* pBuff1, int8_t* offsetEo, int lcuWidth, intptr_t stride);
 typedef void (*saoCuOrgE3_t)(pixel* rec, int8_t* upBuff1, int8_t* m_offsetEo, intptr_t stride, int startX, int endX);
+typedef void (*saoCuOrgE3_2Rows_t)(pixel* rec, int8_t* upBuff1, int8_t* m_offsetEo, intptr_t stride, int startX, int endX, int8_t* signDown);
 typedef void (*saoCuOrgB0_t)(pixel* rec, const int8_t* offsetBo, int ctuWidth, int ctuHeight, intptr_t stride);
 typedef void (*sign_t)(int8_t *dst, const pixel *src1, const pixel *src2, const int endX);
 typedef void (*planecopy_cp_t) (const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);
@@ -277,6 +278,7 @@
     saoCuOrgE1_t          saoCuOrgE1, saoCuOrgE1_2Rows;
     saoCuOrgE2_t          saoCuOrgE2;
     saoCuOrgE3_t          saoCuOrgE3;
+    saoCuOrgE3_2Rows_t    saoCuOrgE3_2Rows;
     saoCuOrgB0_t          saoCuOrgB0;
 
     downscale_t           frameInitLowres;
diff -r 4cccf22b00ee -r e3017cf9bff5 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp	Fri Apr 10 18:15:38 2015 -0500
+++ b/source/encoder/sao.cpp	Tue Apr 14 10:18:29 2015 +0530
@@ -516,20 +516,28 @@
             if (rpelx == picWidth)
                 upBuff1[ctuWidth - 1] = lastSign;
 
-            for (y = startY; y < endY; y++)
-            {
-                x = startX;
-                int8_t signDown = signOf(rec[x] - tmpL[y + 1]);
-                int edgeType = signDown + upBuff1[x] + 2;
-                upBuff1[x - 1] = -signDown;
-                rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
-
-                primitives.saoCuOrgE3(rec, upBuff1, m_offsetEo, stride - 1, startX, endX);
-
-                upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
-
-                rec += stride;
-            }
+            int diff = endY - startY;
+            for (y = 0; y < diff / 2; y++)
+            {
+                int8_t signDown[2];
+                signDown[0] = signOf(rec[startX] - tmpL[y * 2 + 1 + startY]);
+                signDown[1] = signOf(rec[startX + stride] - tmpL[y * 2 + 2 + startY]);
+
+                primitives.saoCuOrgE3_2Rows(rec, upBuff1, m_offsetEo, stride - 1, startX, endX, signDown);
+
+                rec += 2 * stride;
+            }
+            if (diff & 1)
+            {
+                int8_t signDown1 = signOf(rec[startX] - tmpL[y * 2 + 1 + startY]);
+                int edgeType = signDown1 + upBuff1[startX] + 2;
+                upBuff1[startX - 1] = -signDown1;
+                rec[startX] = m_clipTable[rec[startX] + m_offsetEo[edgeType]];
+
+                primitives.saoCuOrgE3(rec, upBuff1, m_offsetEo, stride - 1, startX, endX);
+
+                upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]);
+            }
         }
 
         break;
diff -r 4cccf22b00ee -r e3017cf9bff5 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp	Fri Apr 10 18:15:38 2015 -0500
+++ b/source/test/pixelharness.cpp	Tue Apr 14 10:18:29 2015 +0530
@@ -66,7 +66,7 @@
         sbuf2[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1; //max(SHORT_MIN, min(rand(), SMAX));
         ibuf1[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1;
         psbuf1[i] = psbuf4[i] = (rand() % 65) - 32;                   // range is between -32 to 32
-        psbuf2[i] = psbuf5[i] = (rand() % 3) - 1;                     // possible values {-1,0,1}
+        psbuf2[i] = psbuf5[i] = psbuf6[i] = psbuf7[i] = (rand() % 3) - 1; // possible values {-1,0,1}
         psbuf3[i] = (rand() % 129) - 128;
         sbuf3[i] = rand() % PIXEL_MAX; // for blockcopy only
     }
@@ -1011,6 +1011,35 @@
     return true;
 }
 
+bool PixelHarness::check_saoCuOrgE3_2Rows_t(saoCuOrgE3_2Rows_t ref, saoCuOrgE3_2Rows_t opt)
+{
+    ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
+    ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
+
+    memset(ref_dest, 0xCD, sizeof(ref_dest));
+    memset(opt_dest, 0xCD, sizeof(opt_dest));
+
+    int j = 0;
+
+    for (int i = 0; i < ITERS; i++)
+    {
+        int stride = 16 * (rand() % 4 + 1);
+        int start = rand() % 2;
+        int end = (16 * (rand() % 4 + 1)) - rand() % 2;
+
+        ref(ref_dest, psbuf2 + j, psbuf1 + j, stride, start, end, psbuf6 + j);
+        checked(opt, opt_dest, psbuf5 + j, psbuf1 + j, stride, start, end, psbuf7 + j);
+
+        if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)) || memcmp(psbuf2, psbuf5, BUFFSIZE))
+            return false;
+
+        reportfail();
+        j += INCR;
+    }
+
+    return true;
+}
+
 bool PixelHarness::check_planecopy_sp(planecopy_sp_t ref, planecopy_sp_t opt)
 {
     ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
@@ -1721,6 +1750,15 @@
         }
     }
 
+    if (opt.saoCuOrgE3_2Rows)
+    {
+        if (!check_saoCuOrgE3_2Rows_t(ref.saoCuOrgE3_2Rows, opt.saoCuOrgE3_2Rows))
+        {
+            printf("SAO_EO_3_2Rows failed\n");
+            return false;
+        }
+    }
+
     if (opt.saoCuOrgB0)
     {
         if (!check_saoCuOrgB0_t(ref.saoCuOrgB0, opt.saoCuOrgB0))
@@ -2104,6 +2142,12 @@
         REPORT_SPEEDUP(opt.saoCuOrgE3, ref.saoCuOrgE3, pbuf1, psbuf2, psbuf1, 64, 0, 64);
     }
 
+    if (opt.saoCuOrgE3_2Rows)
+    {
+        HEADER0("SAO_EO_3_2Rows");
+        REPORT_SPEEDUP(opt.saoCuOrgE3_2Rows, ref.saoCuOrgE3_2Rows, pbuf1, psbuf2, psbuf1, 64, 0, 64, psbuf6);
+    }
+
     if (opt.saoCuOrgB0)
     {
         HEADER0("SAO_BO_0");
diff -r 4cccf22b00ee -r e3017cf9bff5 source/test/pixelharness.h
--- a/source/test/pixelharness.h	Fri Apr 10 18:15:38 2015 -0500
+++ b/source/test/pixelharness.h	Tue Apr 14 10:18:29 2015 +0530
@@ -51,6 +51,8 @@
     int8_t   psbuf3[BUFFSIZE];
     int8_t   psbuf4[BUFFSIZE];
     int8_t   psbuf5[BUFFSIZE];
+    int8_t   psbuf6[BUFFSIZE];
+    int8_t   psbuf7[BUFFSIZE];
 
     int16_t  sbuf1[BUFFSIZE];
     int16_t  sbuf2[BUFFSIZE];
@@ -98,6 +100,7 @@
     bool check_saoCuOrgE1_t(saoCuOrgE1_t ref, saoCuOrgE1_t opt);
     bool check_saoCuOrgE2_t(saoCuOrgE2_t ref, saoCuOrgE2_t opt);
     bool check_saoCuOrgE3_t(saoCuOrgE3_t ref, saoCuOrgE3_t opt);
+    bool check_saoCuOrgE3_2Rows_t(saoCuOrgE3_2Rows_t ref, saoCuOrgE3_2Rows_t opt);
     bool check_saoCuOrgB0_t(saoCuOrgB0_t ref, saoCuOrgB0_t opt);
     bool check_planecopy_sp(planecopy_sp_t ref, planecopy_sp_t opt);
     bool check_planecopy_cp(planecopy_cp_t ref, planecopy_cp_t opt);


More information about the x265-devel mailing list