[x265] [PATCH 2 of 2] asm: Testbench for planeClipAndMax and enable it
Min Chen
chenm003 at 163.com
Wed Aug 19 20:29:11 CEST 2015
# HG changeset patch
# User Min Chen <chenm003 at 163.com>
# Date 1440007775 25200
# Node ID 0ed7994745c62eaa5af3204c671e77378c64fe37
# Parent 1f3102f601abc0c730f471ae7762870b784fe507
asm: Testbench for planeClipAndMax and enable it
---
source/common/x86/asm-primitives.cpp | 1 +
source/common/x86/pixel.h | 1 +
source/test/pixelharness.cpp | 43 ++++++++++++++++++++++++++++++++++
source/test/pixelharness.h | 1 +
4 files changed, 46 insertions(+), 0 deletions(-)
diff -r 1f3102f601ab -r 0ed7994745c6 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Aug 19 11:09:33 2015 -0700
+++ b/source/common/x86/asm-primitives.cpp Wed Aug 19 11:09:35 2015 -0700
@@ -3660,6 +3660,7 @@
p.chroma[X265_CSP_I420].cu[CHROMA_420_32x32].copy_ps = PFX(blockcopy_ps_32x32_avx2);
p.chroma[X265_CSP_I422].cu[CHROMA_422_32x64].copy_ps = PFX(blockcopy_ps_32x64_avx2);
p.cu[BLOCK_64x64].copy_ps = PFX(blockcopy_ps_64x64_avx2);
+ p.planeClipAndMax = PFX(planeClipAndMax_avx2);
/* The following primitives have been disabled since performance compared to SSE is negligible/negative */
#if 0
diff -r 1f3102f601ab -r 0ed7994745c6 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Wed Aug 19 11:09:33 2015 -0700
+++ b/source/common/x86/pixel.h Wed Aug 19 11:09:35 2015 -0700
@@ -34,6 +34,7 @@
void PFX(upShift_16_avx2)(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask);
void PFX(upShift_8_sse4)(const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);
void PFX(upShift_8_avx2)(const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);
+pixel PFX(planeClipAndMax_avx2)(pixel *src, intptr_t stride, int width, int height, uint64_t *outsum, const pixel minPix, const pixel maxPix);
#define DECL_PIXELS(cpu) \
FUNCDEF_PU(uint32_t, pixel_ssd, cpu, const pixel*, intptr_t, const pixel*, intptr_t); \
diff -r 1f3102f601ab -r 0ed7994745c6 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp Wed Aug 19 11:09:33 2015 -0700
+++ b/source/test/pixelharness.cpp Wed Aug 19 11:09:35 2015 -0700
@@ -1751,6 +1751,34 @@
return true;
}
+bool PixelHarness::check_planeClipAndMax(planeClipAndMax_t ref, planeClipAndMax_t opt)
+{
+ for (int i = 0; i < ITERS; i++)
+ {
+ intptr_t rand_stride = rand() % STRIDE;
+ int rand_width = (rand() % (STRIDE * 2)) + 1;
+ const int rand_height = (rand() % MAX_HEIGHT) + 1;
+ const pixel rand_min = rand() % 32;
+ const pixel rand_max = PIXEL_MAX - (rand() % 32);
+ uint64_t ref_sum, opt_sum;
+
+ // video width must be more than or equal to 32
+ if (rand_width < 32)
+ rand_width = 32;
+
+ // stride must be more than or equal to width
+ if (rand_stride < rand_width)
+ rand_stride = rand_width;
+
+ pixel ref_max = ref(pbuf1, rand_stride, rand_width, rand_height, &ref_sum, rand_min, rand_max);
+ pixel opt_max = (pixel)checked(opt, pbuf1, rand_stride, rand_width, rand_height, &opt_sum, rand_min, rand_max);
+
+ if (ref_max != opt_max)
+ return false;
+ }
+ return true;
+}
+
bool PixelHarness::testPU(int part, const EncoderPrimitives& ref, const EncoderPrimitives& opt)
{
if (opt.pu[part].satd)
@@ -2379,6 +2407,15 @@
}
}
+ if (opt.planeClipAndMax)
+ {
+ if (!check_planeClipAndMax(ref.planeClipAndMax, opt.planeClipAndMax))
+ {
+ printf("planeClipAndMax failed!\n");
+ return false;
+ }
+ }
+
return true;
}
@@ -2849,4 +2886,10 @@
memset(abscoefBuf + 32 * 31, 1, 32 * sizeof(uint16_t));
REPORT_SPEEDUP(opt.costCoeffRemain, ref.costCoeffRemain, abscoefBuf, 16, 3);
}
+ if (opt.planeClipAndMax)
+ {
+ HEADER0("planeClipAndMax");
+ uint64_t dummy;
+ REPORT_SPEEDUP(opt.planeClipAndMax, ref.planeClipAndMax, pbuf1, 128, 63, 62, &dummy, 1, PIXEL_MAX - 1);
+ }
}
diff -r 1f3102f601ab -r 0ed7994745c6 source/test/pixelharness.h
--- a/source/test/pixelharness.h Wed Aug 19 11:09:33 2015 -0700
+++ b/source/test/pixelharness.h Wed Aug 19 11:09:35 2015 -0700
@@ -116,6 +116,7 @@
bool check_findPosFirstLast(findPosFirstLast_t ref, findPosFirstLast_t opt);
bool check_costCoeffNxN(costCoeffNxN_t ref, costCoeffNxN_t opt);
bool check_costCoeffRemain(costCoeffRemain_t ref, costCoeffRemain_t opt);
+ bool check_planeClipAndMax(planeClipAndMax_t ref, planeClipAndMax_t opt);
public:
More information about the x265-devel
mailing list