[x265] [PATCH] sao: modify saoCuOrgE2 primitive to handle width=16 separately

Divya Manivannan divya at multicorewareinc.com
Thu Apr 23 15:03:33 CEST 2015


# HG changeset patch
# User Divya Manivannan <divya at multicorewareinc.com>
# Date 1429792993 -19800
#      Thu Apr 23 18:13:13 2015 +0530
# Node ID 861ffbedeaefd45eb6431d8ce6d5a3b4789f9a2c
# Parent  cec68d3e37ef15c571cfa7f2784a12e944a2e2a7
sao: modify saoCuOrgE2 primitive to handle width=16 separately

diff -r cec68d3e37ef -r 861ffbedeaef source/common/loopfilter.cpp
--- a/source/common/loopfilter.cpp	Wed Apr 22 21:35:55 2015 -0500
+++ b/source/common/loopfilter.cpp	Thu Apr 23 18:13:13 2015 +0530
@@ -144,7 +144,8 @@
     p.saoCuOrgE0 = processSaoCUE0;
     p.saoCuOrgE1 = processSaoCUE1;
     p.saoCuOrgE1_2Rows = processSaoCUE1_2Rows;
-    p.saoCuOrgE2 = processSaoCUE2;
+    p.saoCuOrgE2[0] = processSaoCUE2;
+    p.saoCuOrgE2[1] = processSaoCUE2;
     p.saoCuOrgE3[0] = processSaoCUE3;
     p.saoCuOrgE3[1] = processSaoCUE3;
     p.saoCuOrgB0 = processSaoCUB0;
diff -r cec68d3e37ef -r 861ffbedeaef source/common/primitives.h
--- a/source/common/primitives.h	Wed Apr 22 21:35:55 2015 -0500
+++ b/source/common/primitives.h	Thu Apr 23 18:13:13 2015 +0530
@@ -276,7 +276,7 @@
     sign_t                sign;
     saoCuOrgE0_t          saoCuOrgE0;
     saoCuOrgE1_t          saoCuOrgE1, saoCuOrgE1_2Rows;
-    saoCuOrgE2_t          saoCuOrgE2;
+    saoCuOrgE2_t          saoCuOrgE2[2];
     saoCuOrgE3_t          saoCuOrgE3[2];
     saoCuOrgB0_t          saoCuOrgB0;
 
diff -r cec68d3e37ef -r 861ffbedeaef source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed Apr 22 21:35:55 2015 -0500
+++ b/source/common/x86/asm-primitives.cpp	Thu Apr 23 18:13:13 2015 +0530
@@ -1496,7 +1496,8 @@
         p.saoCuOrgE0 = x265_saoCuOrgE0_sse4;
         p.saoCuOrgE1 = x265_saoCuOrgE1_sse4;
         p.saoCuOrgE1_2Rows = x265_saoCuOrgE1_2Rows_sse4;
-        p.saoCuOrgE2 = x265_saoCuOrgE2_sse4;
+        p.saoCuOrgE2[0] = x265_saoCuOrgE2_sse4;
+        p.saoCuOrgE2[1] = x265_saoCuOrgE2_sse4;
         p.saoCuOrgE3[0] = x265_saoCuOrgE3_sse4;
         p.saoCuOrgE3[1] = x265_saoCuOrgE3_sse4;
         p.saoCuOrgB0 = x265_saoCuOrgB0_sse4;
diff -r cec68d3e37ef -r 861ffbedeaef source/encoder/sao.cpp
--- a/source/encoder/sao.cpp	Wed Apr 22 21:35:55 2015 -0500
+++ b/source/encoder/sao.cpp	Thu Apr 23 18:13:13 2015 +0530
@@ -442,8 +442,8 @@
                 int8_t one = upBufft[1];
                 int8_t two = upBufft[endX + 1];
 
-                primitives.saoCuOrgE2(rec, upBufft, upBuff1, m_offsetEo, ctuWidth, stride);
-                if (!lpelx)
+                primitives.saoCuOrgE2[ctuWidth > 16](rec, upBufft, upBuff1, m_offsetEo, ctuWidth, stride);
+                if (startX)
                 {
                     rec[0] = firstPxl;
                     upBufft[1] = one;
diff -r cec68d3e37ef -r 861ffbedeaef source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp	Wed Apr 22 21:35:55 2015 -0500
+++ b/source/test/pixelharness.cpp	Thu Apr 23 18:13:13 2015 +0530
@@ -951,7 +951,7 @@
     return true;
 }
 
-bool PixelHarness::check_saoCuOrgE2_t(saoCuOrgE2_t ref, saoCuOrgE2_t opt)
+bool PixelHarness::check_saoCuOrgE2_t(saoCuOrgE2_t ref[2], saoCuOrgE2_t opt[2])
 {
     ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
     ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
@@ -959,24 +959,29 @@
     memset(ref_dest, 0xCD, sizeof(ref_dest));
     memset(opt_dest, 0xCD, sizeof(opt_dest));
 
-    int j = 0;
+    for (int id = 0; id < 2; id++)
+    {
+        int j = 0;
+        if (opt[id])
+        {
+            for (int i = 0; i < ITERS; i++)
+            {
+                int width = 16 * (1 << (id * (rand() % 2 + 1)));
+                int stride = width + 1;
 
-    for (int i = 0; i < ITERS; i++)
-    {
-        int width = 16 * (rand() % 4 + 1);
-        int stride = width + 1;
+                ref[width > 16](ref_dest, psbuf1 + j, psbuf2 + j, psbuf3 + j, width, stride);
+                checked(opt[width > 16], opt_dest, psbuf4 + j, psbuf2 + j, psbuf3 + j, width, stride);
 
-        ref(ref_dest, psbuf1 + j, psbuf2 + j, psbuf3 + j, width, stride);
-        checked(opt, opt_dest, psbuf4 + j, psbuf2 + j, psbuf3 + j, width, stride);
+                if (memcmp(psbuf1 + j, psbuf4 + j, width * sizeof(int8_t)))
+                    return false;
 
-        if (memcmp(psbuf1 + j, psbuf4 + j, width * sizeof(int8_t)))
-            return false;
+                if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
+                    return false;
 
-        if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
-            return false;
-
-        reportfail();
-        j += INCR;
+                reportfail();
+                j += INCR;
+            }
+        }
     }
 
     return true;
@@ -996,7 +1001,7 @@
     {
         int stride = 16 * (rand() % 4 + 1);
         int start = rand() % 2;
-        int end = (16 * (rand() % 4 + 1)) - rand() % 2;
+        int end = 16 - rand() % 2;
 
         ref(ref_dest, psbuf2 + j, psbuf1 + j, stride, start, end);
         checked(opt, opt_dest, psbuf5 + j, psbuf1 + j, stride, start, end);
@@ -1799,14 +1804,17 @@
         }
     }
 
-    if (opt.saoCuOrgE2)
-    {
-        if (!check_saoCuOrgE2_t(ref.saoCuOrgE2, opt.saoCuOrgE2))
-        {
-            printf("SAO_EO_2 failed\n");
-            return false;
-        }
-    }
+    if (opt.saoCuOrgE2[0] || opt.saoCuOrgE2[1])
+    {
+        saoCuOrgE2_t ref1[] = { ref.saoCuOrgE2[0], ref.saoCuOrgE2[1] };
+        saoCuOrgE2_t opt1[] = { opt.saoCuOrgE2[0], opt.saoCuOrgE2[1] };
+
+        if (!check_saoCuOrgE2_t(ref1, opt1))
+        {
+            printf("SAO_EO_2[0] && SAO_EO_2[1] failed\n");
+            return false;
+        }
+    }
 
     if (opt.saoCuOrgE3[0])
     {
@@ -2206,16 +2214,22 @@
         REPORT_SPEEDUP(opt.saoCuOrgE1_2Rows, ref.saoCuOrgE1_2Rows, pbuf1, psbuf2, psbuf1, 64, 64);
     }
 
-    if (opt.saoCuOrgE2)
+    if (opt.saoCuOrgE2[0])
     {
-        HEADER0("SAO_EO_2");
-        REPORT_SPEEDUP(opt.saoCuOrgE2, ref.saoCuOrgE2, pbuf1, psbuf1, psbuf2, psbuf3, 64, 64);
+        HEADER0("SAO_EO_2[0]");
+        REPORT_SPEEDUP(opt.saoCuOrgE2[0], ref.saoCuOrgE2[0], pbuf1, psbuf1, psbuf2, psbuf3, 16, 64);
+    }
+
+    if (opt.saoCuOrgE2[1])
+    {
+        HEADER0("SAO_EO_2[1]");
+        REPORT_SPEEDUP(opt.saoCuOrgE2[1], ref.saoCuOrgE2[1], pbuf1, psbuf1, psbuf2, psbuf3, 64, 64);
     }
 
     if (opt.saoCuOrgE3[0])
     {
         HEADER0("SAO_EO_3[0]");
-        REPORT_SPEEDUP(opt.saoCuOrgE3[0], ref.saoCuOrgE3[0], pbuf1, psbuf2, psbuf1, 64, 0, 64);
+        REPORT_SPEEDUP(opt.saoCuOrgE3[0], ref.saoCuOrgE3[0], pbuf1, psbuf2, psbuf1, 64, 0, 16);
     }
 
     if (opt.saoCuOrgE3[1])
diff -r cec68d3e37ef -r 861ffbedeaef source/test/pixelharness.h
--- a/source/test/pixelharness.h	Wed Apr 22 21:35:55 2015 -0500
+++ b/source/test/pixelharness.h	Thu Apr 23 18:13:13 2015 +0530
@@ -96,7 +96,7 @@
     bool check_addAvg(addAvg_t, addAvg_t);
     bool check_saoCuOrgE0_t(saoCuOrgE0_t ref, saoCuOrgE0_t opt);
     bool check_saoCuOrgE1_t(saoCuOrgE1_t ref, saoCuOrgE1_t opt);
-    bool check_saoCuOrgE2_t(saoCuOrgE2_t ref, saoCuOrgE2_t opt);
+    bool check_saoCuOrgE2_t(saoCuOrgE2_t ref[], saoCuOrgE2_t opt[]);
     bool check_saoCuOrgE3_t(saoCuOrgE3_t ref, saoCuOrgE3_t opt);
     bool check_saoCuOrgE3_32_t(saoCuOrgE3_t ref, saoCuOrgE3_t opt);
     bool check_saoCuOrgB0_t(saoCuOrgB0_t ref, saoCuOrgB0_t opt);


More information about the x265-devel mailing list