[x265] [PATCH 2 of 2] asm: Testbench for planeClipAndMax and enable it

Min Chen chenm003 at 163.com
Wed Aug 19 20:29:11 CEST 2015


# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1440007775 25200
# Node ID 0ed7994745c62eaa5af3204c671e77378c64fe37
# Parent  1f3102f601abc0c730f471ae7762870b784fe507
asm: Testbench for planeClipAndMax and enable it
---
 source/common/x86/asm-primitives.cpp |    1 +
 source/common/x86/pixel.h            |    1 +
 source/test/pixelharness.cpp         |   43 ++++++++++++++++++++++++++++++++++
 source/test/pixelharness.h           |    1 +
 4 files changed, 46 insertions(+), 0 deletions(-)

diff -r 1f3102f601ab -r 0ed7994745c6 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed Aug 19 11:09:33 2015 -0700
+++ b/source/common/x86/asm-primitives.cpp	Wed Aug 19 11:09:35 2015 -0700
@@ -3660,6 +3660,7 @@
         p.chroma[X265_CSP_I420].cu[CHROMA_420_32x32].copy_ps = PFX(blockcopy_ps_32x32_avx2);
         p.chroma[X265_CSP_I422].cu[CHROMA_422_32x64].copy_ps = PFX(blockcopy_ps_32x64_avx2);
         p.cu[BLOCK_64x64].copy_ps = PFX(blockcopy_ps_64x64_avx2);
+        p.planeClipAndMax = PFX(planeClipAndMax_avx2);
 
         /* The following primitives have been disabled since performance compared to SSE is negligible/negative */
 #if 0
diff -r 1f3102f601ab -r 0ed7994745c6 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h	Wed Aug 19 11:09:33 2015 -0700
+++ b/source/common/x86/pixel.h	Wed Aug 19 11:09:35 2015 -0700
@@ -34,6 +34,7 @@
 void PFX(upShift_16_avx2)(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask);
 void PFX(upShift_8_sse4)(const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);
 void PFX(upShift_8_avx2)(const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);
+pixel PFX(planeClipAndMax_avx2)(pixel *src, intptr_t stride, int width, int height, uint64_t *outsum, const pixel minPix, const pixel maxPix);
 
 #define DECL_PIXELS(cpu) \
     FUNCDEF_PU(uint32_t, pixel_ssd, cpu, const pixel*, intptr_t, const pixel*, intptr_t); \
diff -r 1f3102f601ab -r 0ed7994745c6 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp	Wed Aug 19 11:09:33 2015 -0700
+++ b/source/test/pixelharness.cpp	Wed Aug 19 11:09:35 2015 -0700
@@ -1751,6 +1751,34 @@
     return true;
 }
 
+bool PixelHarness::check_planeClipAndMax(planeClipAndMax_t ref, planeClipAndMax_t opt)
+{
+    for (int i = 0; i < ITERS; i++)
+    {
+        intptr_t rand_stride = rand() % STRIDE;
+        int rand_width = (rand() % (STRIDE * 2)) + 1;
+        const int rand_height = (rand() % MAX_HEIGHT) + 1;
+        const pixel rand_min = rand() % 32;
+        const pixel rand_max = PIXEL_MAX - (rand() % 32);
+        uint64_t ref_sum, opt_sum;
+
+        // video width must be more than or equal to 32
+        if (rand_width < 32)
+            rand_width = 32;
+
+        // stride must be more than or equal to width
+        if (rand_stride < rand_width)
+            rand_stride = rand_width;
+
+        pixel ref_max = ref(pbuf1, rand_stride, rand_width, rand_height, &ref_sum, rand_min, rand_max);
+        pixel opt_max = (pixel)checked(opt, pbuf1, rand_stride, rand_width, rand_height, &opt_sum, rand_min, rand_max);
+
+        if (ref_max != opt_max)
+            return false;
+    }
+    return true;
+}
+
 bool PixelHarness::testPU(int part, const EncoderPrimitives& ref, const EncoderPrimitives& opt)
 {
     if (opt.pu[part].satd)
@@ -2379,6 +2407,15 @@
         }
     }
 
+    if (opt.planeClipAndMax)
+    {
+        if (!check_planeClipAndMax(ref.planeClipAndMax, opt.planeClipAndMax))
+        {
+            printf("planeClipAndMax failed!\n");
+            return false;
+        }
+    }
+
     return true;
 }
 
@@ -2849,4 +2886,10 @@
         memset(abscoefBuf + 32 * 31, 1, 32 * sizeof(uint16_t));
         REPORT_SPEEDUP(opt.costCoeffRemain, ref.costCoeffRemain, abscoefBuf, 16, 3);
     }
+    if (opt.planeClipAndMax)
+    {
+        HEADER0("planeClipAndMax");
+        uint64_t dummy;
+        REPORT_SPEEDUP(opt.planeClipAndMax, ref.planeClipAndMax, pbuf1, 128, 63, 62, &dummy, 1, PIXEL_MAX - 1);
+    }
 }
diff -r 1f3102f601ab -r 0ed7994745c6 source/test/pixelharness.h
--- a/source/test/pixelharness.h	Wed Aug 19 11:09:33 2015 -0700
+++ b/source/test/pixelharness.h	Wed Aug 19 11:09:35 2015 -0700
@@ -116,6 +116,7 @@
     bool check_findPosFirstLast(findPosFirstLast_t ref, findPosFirstLast_t opt);
     bool check_costCoeffNxN(costCoeffNxN_t ref, costCoeffNxN_t opt);
     bool check_costCoeffRemain(costCoeffRemain_t ref, costCoeffRemain_t opt);
+    bool check_planeClipAndMax(planeClipAndMax_t ref, planeClipAndMax_t opt);
 
 public:
 



More information about the x265-devel mailing list