<div dir="ltr"><div class="gmail_extra"><br><div class="gmail_quote">On Wed, May 10, 2017 at 3:40 PM, Jayashri Murugan <span dir="ltr"><<a href="mailto:jayashri@multicorewareinc.com" target="_blank">jayashri@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Vignesh Vijayakumar<br>
# Date 1493790986 -19800<br>
# Wed May 03 11:26:26 2017 +0530<br>
# Node ID 47e0de01255d81f41076d4b720c26d<wbr>5115eb2e9c<br>
# Parent bc0e9bd7c08f5ddc10be5e7a82c167<wbr>0667862dab<br>
SEA Motion Search: AVX2 framework of integral functions<br></blockquote><div><br></div><div>Pushed along with AVX2 kernels for vertical functions to default branch.</div><div>Apologies for the delay!</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
diff -r bc0e9bd7c08f -r 47e0de01255d source/common/CMakeLists.txt<br>
--- a/source/common/CMakeLists.txt Wed May 10 10:40:16 2017 +0530<br>
+++ b/source/common/CMakeLists.txt Wed May 03 11:26:26 2017 +0530<br>
@@ -57,10 +57,10 @@<br>
set(VEC_PRIMITIVES vec/vec-primitives.cpp ${PRIMITIVES})<br>
source_group(Intrinsics FILES ${VEC_PRIMITIVES})<br>
<br>
- set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h)<br>
+ set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h seaintegral.h)<br>
set(A_SRCS pixel-a.asm const-a.asm cpu-a.asm ssd-a.asm mc-a.asm<br>
mc-a2.asm pixel-util8.asm blockcopy8.asm<br>
- pixeladd8.asm dct8.asm)<br>
+ pixeladd8.asm dct8.asm seaintegral.asm)<br>
if(HIGH_BIT_DEPTH)<br>
set(A_SRCS ${A_SRCS} sad16-a.asm intrapred16.asm ipfilter16.asm loopfilter.asm)<br>
else()<br>
diff -r bc0e9bd7c08f -r 47e0de01255d source/common/primitives.cpp<br>
--- a/source/common/primitives.cpp Wed May 10 10:40:16 2017 +0530<br>
+++ b/source/common/primitives.cpp Wed May 03 11:26:26 2017 +0530<br>
@@ -57,6 +57,7 @@<br>
void setupIntraPrimitives_c(<wbr>EncoderPrimitives &p);<br>
void setupLoopFilterPrimitives_c(<wbr>EncoderPrimitives &p);<br>
void setupSaoPrimitives_c(<wbr>EncoderPrimitives &p);<br>
+void setupSeaIntegralPrimitives_c(<wbr>EncoderPrimitives &p);<br>
<br>
void setupCPrimitives(<wbr>EncoderPrimitives &p)<br>
{<br>
@@ -66,6 +67,7 @@<br>
setupIntraPrimitives_c(p); // intrapred.cpp<br>
setupLoopFilterPrimitives_c(p)<wbr>; // loopfilter.cpp<br>
setupSaoPrimitives_c(p); // sao.cpp<br>
+ setupSeaIntegralPrimitives_c(<wbr>p); // framefilter.cpp<br>
}<br>
<br>
void setupAliasPrimitives(<wbr>EncoderPrimitives &p)<br>
diff -r bc0e9bd7c08f -r 47e0de01255d source/common/primitives.h<br>
--- a/source/common/primitives.h Wed May 10 10:40:16 2017 +0530<br>
+++ b/source/common/primitives.h Wed May 03 11:26:26 2017 +0530<br>
@@ -110,6 +110,17 @@<br>
BLOCK_422_32x64<br>
};<br>
<br>
+enum IntegralSize<br>
+{<br>
+ INTEGRAL_4,<br>
+ INTEGRAL_8,<br>
+ INTEGRAL_12,<br>
+ INTEGRAL_16,<br>
+ INTEGRAL_24,<br>
+ INTEGRAL_32,<br>
+ NUM_INTEGRAL_SIZE<br>
+};<br>
+<br>
typedef int (*pixelcmp_t)(const pixel* fenc, intptr_t fencstride, const pixel* fref, intptr_t frefstride); // fenc is aligned<br>
typedef int (*pixelcmp_ss_t)(const int16_t* fenc, intptr_t fencstride, const int16_t* fref, intptr_t frefstride);<br>
typedef sse_t (*pixel_sse_t)(const pixel* fenc, intptr_t fencstride, const pixel* fref, intptr_t frefstride); // fenc is aligned<br>
@@ -203,6 +214,9 @@<br>
typedef void (*pelFilterLumaStrong_t)(<wbr>pixel* src, intptr_t srcStep, intptr_t offset, int32_t tcP, int32_t tcQ);<br>
typedef void (*pelFilterChroma_t)(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ);<br>
<br>
+typedef void (*integralv_t)(uint32_t *sum, intptr_t stride);<br>
+typedef void (*integralh_t)(uint32_t *sum, pixel *pix, intptr_t stride);<br>
+<br>
/* Function pointers to optimized encoder primitives. Each pointer can reference<br>
* either an assembly routine, a SIMD intrinsic primitive, or a C function */<br>
struct EncoderPrimitives<br>
@@ -342,6 +356,9 @@<br>
pelFilterLumaStrong_t pelFilterLumaStrong[2]; // EDGE_VER = 0, EDGE_HOR = 1<br>
pelFilterChroma_t pelFilterChroma[2]; // EDGE_VER = 0, EDGE_HOR = 1<br>
<br>
+ integralv_t integral_initv[NUM_INTEGRAL_<wbr>SIZE];<br>
+ integralh_t integral_inith[NUM_INTEGRAL_<wbr>SIZE];<br>
+<br>
/* There is one set of chroma primitives per color space. An encoder will<br>
* have just a single color space and thus it will only ever use one entry<br>
* in this array. However we always fill all entries in the array in case<br>
diff -r bc0e9bd7c08f -r 47e0de01255d source/common/x86/asm-<wbr>primitives.cpp<br>
--- a/source/common/x86/asm-<wbr>primitives.cpp Wed May 10 10:40:16 2017 +0530<br>
+++ b/source/common/x86/asm-<wbr>primitives.cpp Wed May 03 11:26:26 2017 +0530<br>
@@ -114,6 +114,7 @@<br>
#include "blockcopy8.h"<br>
#include "intrapred.h"<br>
#include "dct8.h"<br>
+#include "seaintegral.h"<br>
}<br>
<br>
#define ALL_LUMA_CU_TYPED(prim, fncdef, fname, cpu) \<br>
@@ -2157,6 +2158,13 @@<br>
p.fix8Unpack = PFX(cutree_fix8_unpack_avx2);<br>
p.fix8Pack = PFX(cutree_fix8_pack_avx2);<br>
<br>
+ p.integral_initv[INTEGRAL_4] = PFX(integral4v_avx2);<br>
+ p.integral_initv[INTEGRAL_8] = PFX(integral8v_avx2);<br>
+ p.integral_initv[INTEGRAL_12] = PFX(integral12v_avx2);<br>
+ p.integral_initv[INTEGRAL_16] = PFX(integral16v_avx2);<br>
+ p.integral_initv[INTEGRAL_24] = PFX(integral24v_avx2);<br>
+ p.integral_initv[INTEGRAL_32] = PFX(integral32v_avx2);<br>
+<br>
/* TODO: This kernel needs to be modified to work with HIGH_BIT_DEPTH only<br>
p.planeClipAndMax = PFX(planeClipAndMax_avx2); */<br>
<br>
@@ -3695,6 +3703,13 @@<br>
p.fix8Unpack = PFX(cutree_fix8_unpack_avx2);<br>
p.fix8Pack = PFX(cutree_fix8_pack_avx2);<br>
<br>
+ p.integral_initv[INTEGRAL_4] = PFX(integral4v_avx2);<br>
+ p.integral_initv[INTEGRAL_8] = PFX(integral8v_avx2);<br>
+ p.integral_initv[INTEGRAL_12] = PFX(integral12v_avx2);<br>
+ p.integral_initv[INTEGRAL_16] = PFX(integral16v_avx2);<br>
+ p.integral_initv[INTEGRAL_24] = PFX(integral24v_avx2);<br>
+ p.integral_initv[INTEGRAL_32] = PFX(integral32v_avx2);<br>
+<br>
}<br>
#endif<br>
}<br>
diff -r bc0e9bd7c08f -r 47e0de01255d source/common/x86/seaintegral.<wbr>asm<br>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000<br>
+++ b/source/common/x86/<wbr>seaintegral.asm Wed May 03 11:26:26 2017 +0530<br>
@@ -0,0 +1,125 @@<br>
+;****************************<wbr>******************************<wbr>*******************<br>
+;* Copyright (C) 2013-2017 MulticoreWare, Inc<br>
+;*<br>
+;* Authors: Jayashri Murugan <<a href="mailto:jayashri@multicorewareinc.com">jayashri@multicorewareinc.com</a><wbr>><br>
+;* Vignesh V Menon <<a href="mailto:vignesh@multicorewareinc.com">vignesh@multicorewareinc.com</a>><br>
+;* Praveen Tiwari <<a href="mailto:praveen@multicorewareinc.com">praveen@multicorewareinc.com</a>><br>
+;*<br>
+;* This program is free software; you can redistribute it and/or modify<br>
+;* it under the terms of the GNU General Public License as published by<br>
+;* the Free Software Foundation; either version 2 of the License, or<br>
+;* (at your option) any later version.<br>
+;*<br>
+;* This program is distributed in the hope that it will be useful,<br>
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of<br>
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the<br>
+;* GNU General Public License for more details.<br>
+;*<br>
+;* You should have received a copy of the GNU General Public License<br>
+;* along with this program; if not, write to the Free Software<br>
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.<br>
+;*<br>
+;* This program is also available under a commercial proprietary license.<br>
+;* For more information, contact us at license @ <a href="http://x265.com" rel="noreferrer" target="_blank">x265.com</a>.<br>
+;****************************<wbr>******************************<wbr>*******************/<br>
+<br>
+%include "x86inc.asm"<br>
+%include "x86util.asm"<br>
+<br>
+SECTION .text<br>
+<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+;void integral_init4v_c(uint32_t *sum4, intptr_t stride)<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+INIT_YMM avx2<br>
+cglobal integral4v, 2, 2, 0<br>
+<br>
+ RET<br>
+<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+;void integral_init8v_c(uint32_t *sum8, intptr_t stride)<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+INIT_YMM avx2<br>
+cglobal integral8v, 2, 2, 0<br>
+<br>
+ RET<br>
+<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+;void integral_init12v_c(uint32_t *sum12, intptr_t stride)<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+INIT_YMM avx2<br>
+cglobal integral12v, 2, 2, 0<br>
+<br>
+ RET<br>
+<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+;void integral_init16v_c(uint32_t *sum16, intptr_t stride)<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+INIT_YMM avx2<br>
+cglobal integral16v, 2, 2, 0<br>
+<br>
+ RET<br>
+<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+;void integral_init24v_c(uint32_t *sum24, intptr_t stride)<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+INIT_YMM avx2<br>
+cglobal integral24v, 2, 2, 0<br>
+<br>
+ RET<br>
+<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+;void integral_init32v_c(uint32_t *sum32, intptr_t stride)<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+INIT_YMM avx2<br>
+cglobal integral32v, 2, 2, 0<br>
+<br>
+ RET<br>
+<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+;static void integral_init4h_c(uint32_t *sum, pixel *pix, intptr_t stride)<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+INIT_YMM avx2<br>
+cglobal integral4h, 3, 3, 0<br>
+<br>
+ RET<br>
+<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+;static void integral_init8h_c(uint32_t *sum, pixel *pix, intptr_t stride)<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+INIT_YMM avx2<br>
+cglobal integral8h, 3, 3, 0<br>
+<br>
+ RET<br>
+<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+;static void integral_init12h_c(uint32_t *sum, pixel *pix, intptr_t stride)<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+INIT_YMM avx2<br>
+cglobal integral12h, 3, 3, 0<br>
+<br>
+ RET<br>
+<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+;static void integral_init16h_c(uint32_t *sum, pixel *pix, intptr_t stride)<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+INIT_YMM avx2<br>
+cglobal integral16h, 3, 3, 0<br>
+<br>
+ RET<br>
+<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+;static void integral_init24h_c(uint32_t *sum, pixel *pix, intptr_t stride)<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+INIT_YMM avx2<br>
+cglobal integral24h, 3, 3, 0<br>
+<br>
+ RET<br>
+<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+;static void integral_init32h_c(uint32_t *sum, pixel *pix, intptr_t stride)<br>
+;----------------------------<wbr>------------------------------<wbr>-------------------<br>
+INIT_YMM avx2<br>
+cglobal integral32h, 3, 3, 0<br>
+<br>
+ RET<br>
diff -r bc0e9bd7c08f -r 47e0de01255d source/common/x86/seaintegral.<wbr>h<br>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000<br>
+++ b/source/common/x86/<wbr>seaintegral.h Wed May 03 11:26:26 2017 +0530<br>
@@ -0,0 +1,42 @@<br>
+/****************************<wbr>******************************<wbr>*******************<br>
+* Copyright (C) 2013-2017 MulticoreWare, Inc<br>
+*<br>
+* Authors: Vignesh V Menon <<a href="mailto:vignesh@multicorewareinc.com">vignesh@multicorewareinc.com</a>><br>
+* Jayashri Murugan <<a href="mailto:jayashri@multicorewareinc.com">jayashri@multicorewareinc.com</a><wbr>><br>
+* Praveen Tiwari <<a href="mailto:praveen@multicorewareinc.com">praveen@multicorewareinc.com</a>><br>
+*<br>
+* This program is free software; you can redistribute it and/or modify<br>
+* it under the terms of the GNU General Public License as published by<br>
+* the Free Software Foundation; either version 2 of the License, or<br>
+* (at your option) any later version.<br>
+*<br>
+* This program is distributed in the hope that it will be useful,<br>
+* but WITHOUT ANY WARRANTY; without even the implied warranty of<br>
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the<br>
+* GNU General Public License for more details.<br>
+*<br>
+* You should have received a copy of the GNU General Public License<br>
+* along with this program; if not, write to the Free Software<br>
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.<br>
+*<br>
+* This program is also available under a commercial proprietary license.<br>
+* For more information, contact us at license @ <a href="http://x265.com" rel="noreferrer" target="_blank">x265.com</a>.<br>
+*****************************<wbr>******************************<wbr>******************/<br>
+<br>
+#ifndef X265_SEAINTEGRAL_H<br>
+#define X265_SEAINTEGRAL_H<br>
+<br>
+void PFX(integral4v_avx2)(uint32_t *sum, intptr_t stride);<br>
+void PFX(integral8v_avx2)(uint32_t *sum, intptr_t stride);<br>
+void PFX(integral12v_avx2)(uint32_t *sum, intptr_t stride);<br>
+void PFX(integral16v_avx2)(uint32_t *sum, intptr_t stride);<br>
+void PFX(integral24v_avx2)(uint32_t *sum, intptr_t stride);<br>
+void PFX(integral32v_avx2)(uint32_t *sum, intptr_t stride);<br>
+void PFX(integral4h_avx2)(uint32_t *sum, pixel *pix, intptr_t stride);<br>
+void PFX(integral8h_avx2)(uint32_t *sum, pixel *pix, intptr_t stride);<br>
+void PFX(integral12h_avx2)(uint32_t *sum, pixel *pix, intptr_t stride);<br>
+void PFX(integral16h_avx2)(uint32_t *sum, pixel *pix, intptr_t stride);<br>
+void PFX(integral24h_avx2)(uint32_t *sum, pixel *pix, intptr_t stride);<br>
+void PFX(integral32h_avx2)(uint32_t *sum, pixel *pix, intptr_t stride);<br>
+<br>
+#endif //X265_SEAINTEGRAL_H<br>
diff -r bc0e9bd7c08f -r 47e0de01255d source/encoder/framefilter.cpp<br>
--- a/source/encoder/framefilter.<wbr>cpp Wed May 10 10:40:16 2017 +0530<br>
+++ b/source/encoder/framefilter.<wbr>cpp Wed May 03 11:26:26 2017 +0530<br>
@@ -35,107 +35,126 @@<br>
static uint64_t computeSSD(pixel *fenc, pixel *rec, intptr_t stride, uint32_t width, uint32_t height);<br>
static float calculateSSIM(pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, uint32_t width, uint32_t height, void *buf, uint32_t& cnt);<br>
<br>
-static void integral_init4h(uint32_t *sum, pixel *pix, intptr_t stride)<br>
+namespace X265_NS<br>
{<br>
- int32_t v = pix[0] + pix[1] + pix[2] + pix[3];<br>
- for (int16_t x = 0; x < stride - 4; x++)<br>
+ static void integral_init4h_c(uint32_t *sum, pixel *pix, intptr_t stride)<br>
{<br>
- sum[x] = v + sum[x - stride];<br>
- v += pix[x + 4] - pix[x];<br>
+ int32_t v = pix[0] + pix[1] + pix[2] + pix[3];<br>
+ for (int16_t x = 0; x < stride - 4; x++)<br>
+ {<br>
+ sum[x] = v + sum[x - stride];<br>
+ v += pix[x + 4] - pix[x];<br>
+ }<br>
}<br>
-}<br>
<br>
-static void integral_init8h(uint32_t *sum, pixel *pix, intptr_t stride)<br>
-{<br>
- int32_t v = pix[0] + pix[1] + pix[2] + pix[3] + pix[4] + pix[5] + pix[6] + pix[7];<br>
- for (int16_t x = 0; x < stride - 8; x++)<br>
+ static void integral_init8h_c(uint32_t *sum, pixel *pix, intptr_t stride)<br>
{<br>
- sum[x] = v + sum[x - stride];<br>
- v += pix[x + 8] - pix[x];<br>
+ int32_t v = pix[0] + pix[1] + pix[2] + pix[3] + pix[4] + pix[5] + pix[6] + pix[7];<br>
+ for (int16_t x = 0; x < stride - 8; x++)<br>
+ {<br>
+ sum[x] = v + sum[x - stride];<br>
+ v += pix[x + 8] - pix[x];<br>
+ }<br>
}<br>
-}<br>
<br>
-static void integral_init12h(uint32_t *sum, pixel *pix, intptr_t stride)<br>
-{<br>
- int32_t v = pix[0] + pix[1] + pix[2] + pix[3] + pix[4] + pix[5] + pix[6] + pix[7] +<br>
- pix[8] + pix[9] + pix[10] + pix[11];<br>
- for (int16_t x = 0; x < stride - 12; x++)<br>
+ static void integral_init12h_c(uint32_t *sum, pixel *pix, intptr_t stride)<br>
{<br>
- sum[x] = v + sum[x - stride];<br>
- v += pix[x + 12] - pix[x];<br>
+ int32_t v = pix[0] + pix[1] + pix[2] + pix[3] + pix[4] + pix[5] + pix[6] + pix[7] +<br>
+ pix[8] + pix[9] + pix[10] + pix[11];<br>
+ for (int16_t x = 0; x < stride - 12; x++)<br>
+ {<br>
+ sum[x] = v + sum[x - stride];<br>
+ v += pix[x + 12] - pix[x];<br>
+ }<br>
}<br>
-}<br>
<br>
-static void integral_init16h(uint32_t *sum, pixel *pix, intptr_t stride)<br>
-{<br>
- int32_t v = pix[0] + pix[1] + pix[2] + pix[3] + pix[4] + pix[5] + pix[6] + pix[7] +<br>
- pix[8] + pix[9] + pix[10] + pix[11] + pix[12] + pix[13] + pix[14] + pix[15];<br>
- for (int16_t x = 0; x < stride - 16; x++)<br>
+ static void integral_init16h_c(uint32_t *sum, pixel *pix, intptr_t stride)<br>
{<br>
- sum[x] = v + sum[x - stride];<br>
- v += pix[x + 16] - pix[x];<br>
+ int32_t v = pix[0] + pix[1] + pix[2] + pix[3] + pix[4] + pix[5] + pix[6] + pix[7] +<br>
+ pix[8] + pix[9] + pix[10] + pix[11] + pix[12] + pix[13] + pix[14] + pix[15];<br>
+ for (int16_t x = 0; x < stride - 16; x++)<br>
+ {<br>
+ sum[x] = v + sum[x - stride];<br>
+ v += pix[x + 16] - pix[x];<br>
+ }<br>
}<br>
-}<br>
<br>
-static void integral_init24h(uint32_t *sum, pixel *pix, intptr_t stride)<br>
-{<br>
- int32_t v = pix[0] + pix[1] + pix[2] + pix[3] + pix[4] + pix[5] + pix[6] + pix[7] +<br>
- pix[8] + pix[9] + pix[10] + pix[11] + pix[12] + pix[13] + pix[14] + pix[15] +<br>
- pix[16] + pix[17] + pix[18] + pix[19] + pix[20] + pix[21] + pix[22] + pix[23];<br>
- for (int16_t x = 0; x < stride - 24; x++)<br>
+ static void integral_init24h_c(uint32_t *sum, pixel *pix, intptr_t stride)<br>
{<br>
- sum[x] = v + sum[x - stride];<br>
- v += pix[x + 24] - pix[x];<br>
+ int32_t v = pix[0] + pix[1] + pix[2] + pix[3] + pix[4] + pix[5] + pix[6] + pix[7] +<br>
+ pix[8] + pix[9] + pix[10] + pix[11] + pix[12] + pix[13] + pix[14] + pix[15] +<br>
+ pix[16] + pix[17] + pix[18] + pix[19] + pix[20] + pix[21] + pix[22] + pix[23];<br>
+ for (int16_t x = 0; x < stride - 24; x++)<br>
+ {<br>
+ sum[x] = v + sum[x - stride];<br>
+ v += pix[x + 24] - pix[x];<br>
+ }<br>
}<br>
-}<br>
<br>
-static void integral_init32h(uint32_t *sum, pixel *pix, intptr_t stride)<br>
-{<br>
- int32_t v = pix[0] + pix[1] + pix[2] + pix[3] + pix[4] + pix[5] + pix[6] + pix[7] +<br>
- pix[8] + pix[9] + pix[10] + pix[11] + pix[12] + pix[13] + pix[14] + pix[15] +<br>
- pix[16] + pix[17] + pix[18] + pix[19] + pix[20] + pix[21] + pix[22] + pix[23] +<br>
- pix[24] + pix[25] + pix[26] + pix[27] + pix[28] + pix[29] + pix[30] + pix[31];<br>
- for (int16_t x = 0; x < stride - 32; x++)<br>
+ static void integral_init32h_c(uint32_t *sum, pixel *pix, intptr_t stride)<br>
{<br>
- sum[x] = v + sum[x - stride];<br>
- v += pix[x + 32] - pix[x];<br>
+ int32_t v = pix[0] + pix[1] + pix[2] + pix[3] + pix[4] + pix[5] + pix[6] + pix[7] +<br>
+ pix[8] + pix[9] + pix[10] + pix[11] + pix[12] + pix[13] + pix[14] + pix[15] +<br>
+ pix[16] + pix[17] + pix[18] + pix[19] + pix[20] + pix[21] + pix[22] + pix[23] +<br>
+ pix[24] + pix[25] + pix[26] + pix[27] + pix[28] + pix[29] + pix[30] + pix[31];<br>
+ for (int16_t x = 0; x < stride - 32; x++)<br>
+ {<br>
+ sum[x] = v + sum[x - stride];<br>
+ v += pix[x + 32] - pix[x];<br>
+ }<br>
}<br>
-}<br>
<br>
-static void integral_init4v(uint32_t *sum4, intptr_t stride)<br>
-{<br>
- for (int x = 0; x < stride; x++)<br>
- sum4[x] = sum4[x + 4 * stride] - sum4[x];<br>
-}<br>
+ static void integral_init4v_c(uint32_t *sum4, intptr_t stride)<br>
+ {<br>
+ for (int x = 0; x < stride; x++)<br>
+ sum4[x] = sum4[x + 4 * stride] - sum4[x];<br>
+ }<br>
<br>
-static void integral_init8v(uint32_t *sum8, intptr_t stride)<br>
-{<br>
- for (int x = 0; x < stride; x++)<br>
- sum8[x] = sum8[x + 8 * stride] - sum8[x];<br>
-}<br>
+ static void integral_init8v_c(uint32_t *sum8, intptr_t stride)<br>
+ {<br>
+ for (int x = 0; x < stride; x++)<br>
+ sum8[x] = sum8[x + 8 * stride] - sum8[x];<br>
+ }<br>
<br>
-static void integral_init12v(uint32_t *sum12, intptr_t stride)<br>
-{<br>
- for (int x = 0; x < stride; x++)<br>
- sum12[x] = sum12[x + 12 * stride] - sum12[x];<br>
-}<br>
+ static void integral_init12v_c(uint32_t *sum12, intptr_t stride)<br>
+ {<br>
+ for (int x = 0; x < stride; x++)<br>
+ sum12[x] = sum12[x + 12 * stride] - sum12[x];<br>
+ }<br>
<br>
-static void integral_init16v(uint32_t *sum16, intptr_t stride)<br>
-{<br>
- for (int x = 0; x < stride; x++)<br>
- sum16[x] = sum16[x + 16 * stride] - sum16[x];<br>
-}<br>
+ static void integral_init16v_c(uint32_t *sum16, intptr_t stride)<br>
+ {<br>
+ for (int x = 0; x < stride; x++)<br>
+ sum16[x] = sum16[x + 16 * stride] - sum16[x];<br>
+ }<br>
<br>
-static void integral_init24v(uint32_t *sum24, intptr_t stride)<br>
-{<br>
- for (int x = 0; x < stride; x++)<br>
- sum24[x] = sum24[x + 24 * stride] - sum24[x];<br>
-}<br>
+ static void integral_init24v_c(uint32_t *sum24, intptr_t stride)<br>
+ {<br>
+ for (int x = 0; x < stride; x++)<br>
+ sum24[x] = sum24[x + 24 * stride] - sum24[x];<br>
+ }<br>
<br>
-static void integral_init32v(uint32_t *sum32, intptr_t stride)<br>
-{<br>
- for (int x = 0; x < stride; x++)<br>
- sum32[x] = sum32[x + 32 * stride] - sum32[x];<br>
+ static void integral_init32v_c(uint32_t *sum32, intptr_t stride)<br>
+ {<br>
+ for (int x = 0; x < stride; x++)<br>
+ sum32[x] = sum32[x + 32 * stride] - sum32[x];<br>
+ }<br>
+<br>
+ void setupSeaIntegralPrimitives_c(<wbr>EncoderPrimitives &p)<br>
+ {<br>
+ p.integral_initv[INTEGRAL_4] = integral_init4v_c;<br>
+ p.integral_initv[INTEGRAL_8] = integral_init8v_c;<br>
+ p.integral_initv[INTEGRAL_12] = integral_init12v_c;<br>
+ p.integral_initv[INTEGRAL_16] = integral_init16v_c;<br>
+ p.integral_initv[INTEGRAL_24] = integral_init24v_c;<br>
+ p.integral_initv[INTEGRAL_32] = integral_init32v_c;<br>
+ p.integral_inith[INTEGRAL_4] = integral_init4h_c;<br>
+ p.integral_inith[INTEGRAL_8] = integral_init8h_c;<br>
+ p.integral_inith[INTEGRAL_12] = integral_init12h_c;<br>
+ p.integral_inith[INTEGRAL_16] = integral_init16h_c;<br>
+ p.integral_inith[INTEGRAL_24] = integral_init24h_c;<br>
+ p.integral_inith[INTEGRAL_32] = integral_init32h_c;<br>
+ }<br>
}<br>
<br>
void FrameFilter::destroy()<br>
@@ -833,47 +852,47 @@<br>
uint32_t *sum4x4 = m_frame->m_encData->m_<wbr>meIntegral[11] + (y + 1) * stride - padX;<br>
<br>
/*For width = 32 */<br>
- integral_init32h(sum32x32, pix, stride);<br>
+ integral_init32h_c(sum32x32, pix, stride);<br>
if (y >= 32 - padY)<br>
- integral_init32v(sum32x32 - 32 * stride, stride);<br>
- integral_init32h(sum32x24, pix, stride);<br>
+ integral_init32v_c(sum32x32 - 32 * stride, stride);<br>
+ integral_init32h_c(sum32x24, pix, stride);<br>
if (y >= 24 - padY)<br>
- integral_init24v(sum32x24 - 24 * stride, stride);<br>
- integral_init32h(sum32x8, pix, stride);<br>
+ integral_init24v_c(sum32x24 - 24 * stride, stride);<br>
+ integral_init32h_c(sum32x8, pix, stride);<br>
if (y >= 8 - padY)<br>
- integral_init8v(sum32x8 - 8 * stride, stride);<br>
+ integral_init8v_c(sum32x8 - 8 * stride, stride);<br>
/*For width = 24 */<br>
- integral_init24h(sum24x32, pix, stride);<br>
+ integral_init24h_c(sum24x32, pix, stride);<br>
if (y >= 32 - padY)<br>
- integral_init32v(sum24x32 - 32 * stride, stride);<br>
+ integral_init32v_c(sum24x32 - 32 * stride, stride);<br>
/*For width = 16 */<br>
- integral_init16h(sum16x16, pix, stride);<br>
+ integral_init16h_c(sum16x16, pix, stride);<br>
if (y >= 16 - padY)<br>
- integral_init16v(sum16x16 - 16 * stride, stride);<br>
- integral_init16h(sum16x12, pix, stride);<br>
+ integral_init16v_c(sum16x16 - 16 * stride, stride);<br>
+ integral_init16h_c(sum16x12, pix, stride);<br>
if (y >= 12 - padY)<br>
- integral_init12v(sum16x12 - 12 * stride, stride);<br>
- integral_init16h(sum16x4, pix, stride);<br>
+ integral_init12v_c(sum16x12 - 12 * stride, stride);<br>
+ integral_init16h_c(sum16x4, pix, stride);<br>
if (y >= 4 - padY)<br>
- integral_init4v(sum16x4 - 4 * stride, stride);<br>
+ integral_init4v_c(sum16x4 - 4 * stride, stride);<br>
/*For width = 12 */<br>
- integral_init12h(sum12x16, pix, stride);<br>
+ integral_init12h_c(sum12x16, pix, stride);<br>
if (y >= 16 - padY)<br>
- integral_init16v(sum12x16 - 16 * stride, stride);<br>
+ integral_init16v_c(sum12x16 - 16 * stride, stride);<br>
/*For width = 8 */<br>
- integral_init8h(sum8x32, pix, stride);<br>
+ integral_init8h_c(sum8x32, pix, stride);<br>
if (y >= 32 - padY)<br>
- integral_init32v(sum8x32 - 32 * stride, stride);<br>
- integral_init8h(sum8x8, pix, stride);<br>
+ integral_init32v_c(sum8x32 - 32 * stride, stride);<br>
+ integral_init8h_c(sum8x8, pix, stride);<br>
if (y >= 8 - padY)<br>
- integral_init8v(sum8x8 - 8 * stride, stride);<br>
+ integral_init8v_c(sum8x8 - 8 * stride, stride);<br>
/*For width = 4 */<br>
- integral_init4h(sum4x16, pix, stride);<br>
+ integral_init4h_c(sum4x16, pix, stride);<br>
if (y >= 16 - padY)<br>
- integral_init16v(sum4x16 - 16 * stride, stride);<br>
- integral_init4h(sum4x4, pix, stride);<br>
+ integral_init16v_c(sum4x16 - 16 * stride, stride);<br>
+ integral_init4h_c(sum4x4, pix, stride);<br>
if (y >= 4 - padY)<br>
- integral_init4v(sum4x4 - 4 * stride, stride);<br>
+ integral_init4v_c(sum4x4 - 4 * stride, stride);<br>
}<br>
m_parallelFilter[row].m_<wbr>frameFilter-><wbr>integralCompleted.set(1);<br>
}<br>
diff -r bc0e9bd7c08f -r 47e0de01255d source/test/pixelharness.cpp<br>
--- a/source/test/pixelharness.cpp Wed May 10 10:40:16 2017 +0530<br>
+++ b/source/test/pixelharness.cpp Wed May 03 11:26:26 2017 +0530<br>
@@ -2002,6 +2002,70 @@<br>
return true;<br>
}<br>
<br>
+bool PixelHarness::check_integral_<wbr>initv(integralv_t ref, integralv_t opt)<br>
+{<br>
+ intptr_t srcStep = 64;<br>
+ int j = 0;<br>
+ uint32_t dst_ref[BUFFSIZE] = { 0 };<br>
+ uint32_t dst_opt[BUFFSIZE] = { 0 };<br>
+<br>
+ for (int i = 0; i < 64; i++)<br>
+ {<br>
+ dst_ref[i] = pixel_test_buff[0][i];<br>
+ dst_opt[i] = pixel_test_buff[0][i];<br>
+ }<br>
+<br>
+ for (int i = 0, k = 0; i < BUFFSIZE; i++)<br>
+ {<br>
+ if (i % 64 == 0)<br>
+ k++;<br>
+ dst_ref[i] = dst_ref[i % 64] + k;<br>
+ dst_opt[i] = dst_opt[i % 64] + k;<br>
+ }<br>
+<br>
+ int padx = 4;<br>
+ int pady = 4;<br>
+ uint32_t *dst_ref_ptr = dst_ref + srcStep * pady + padx;<br>
+ uint32_t *dst_opt_ptr = dst_opt + srcStep * pady + padx;<br>
+ for (int i = 0; i < ITERS; i++)<br>
+ {<br>
+ ref(dst_ref_ptr, srcStep);<br>
+ checked(opt, dst_opt_ptr, srcStep);<br>
+<br>
+ if (memcmp(dst_ref, dst_opt, sizeof(uint32_t) * BUFFSIZE))<br>
+ return false;<br>
+<br>
+ reportfail()<br>
+ j += INCR;<br>
+ }<br>
+ return true;<br>
+}<br>
+<br>
+bool PixelHarness::check_integral_<wbr>inith(integralh_t ref, integralh_t opt)<br>
+{<br>
+ intptr_t srcStep = 64;<br>
+ int j = 0;<br>
+ uint32_t dst_ref[BUFFSIZE] = { 0 };<br>
+ uint32_t dst_opt[BUFFSIZE] = { 0 };<br>
+<br>
+ int padx = 4;<br>
+ int pady = 4;<br>
+ uint32_t *dst_ref_ptr = dst_ref + srcStep * pady + padx;<br>
+ uint32_t *dst_opt_ptr = dst_opt + srcStep * pady + padx;<br>
+ for (int k = 0; k < ITERS; k++)<br>
+ {<br>
+ ref(dst_ref_ptr, pixel_test_buff[0], srcStep);<br>
+ checked(opt, dst_opt_ptr, pixel_test_buff[0], srcStep);<br>
+<br>
+ if (memcmp(dst_ref, dst_opt, sizeof(uint32_t) * BUFFSIZE))<br>
+ return false;<br>
+<br>
+ reportfail()<br>
+ j += INCR;<br>
+ }<br>
+ return true;<br>
+}<br>
+<br>
bool PixelHarness::testPU(int part, const EncoderPrimitives& ref, const EncoderPrimitives& opt)<br>
{<br>
if (opt.pu[part].satd)<br>
@@ -2687,6 +2751,64 @@<br>
}<br>
}<br>
<br>
+ for (int k = 0; k < NUM_INTEGRAL_SIZE; k++)<br>
+ {<br>
+ if (opt.integral_initv[k] && !check_integral_initv(ref.<wbr>integral_initv[k], opt.integral_initv[k]))<br>
+ {<br>
+ switch (k)<br>
+ {<br>
+ case 0:<br>
+ printf("Integral4v failed!\n");<br>
+ break;<br>
+ case 1:<br>
+ printf("Integral8v failed!\n");<br>
+ break;<br>
+ case 2:<br>
+ printf("Integral12v failed!\n");<br>
+ break;<br>
+ case 3:<br>
+ printf("Integral16v failed!\n");<br>
+ break;<br>
+ case 4:<br>
+ printf("Integral24v failed!\n");<br>
+ break;<br>
+ case 5:<br>
+ printf("Integral32v failed!\n");<br>
+ break;<br>
+ }<br>
+ return false;<br>
+ }<br>
+ }<br>
+<br>
+<br>
+ for (int k = 0; k < NUM_INTEGRAL_SIZE; k++)<br>
+ {<br>
+ if (opt.integral_inith[k] && !check_integral_inith(ref.<wbr>integral_inith[k], opt.integral_inith[k]))<br>
+ {<br>
+ switch (k)<br>
+ {<br>
+ case 0:<br>
+ printf("Integral4h failed!\n");<br>
+ break;<br>
+ case 1:<br>
+ printf("Integral8h failed!\n");<br>
+ break;<br>
+ case 2:<br>
+ printf("Integral12h failed!\n");<br>
+ break;<br>
+ case 3:<br>
+ printf("Integral16h failed!\n");<br>
+ break;<br>
+ case 4:<br>
+ printf("Integral24h failed!\n");<br>
+ break;<br>
+ case 5:<br>
+ printf("Integral32h failed!\n");<br>
+ break;<br>
+ }<br>
+ return false;<br>
+ }<br>
+ }<br>
return true;<br>
}<br>
<br>
@@ -3209,4 +3331,67 @@<br>
HEADER0("pelFilterChroma_<wbr>Horizontal");<br>
REPORT_SPEEDUP(opt.<wbr>pelFilterChroma[1], ref.pelFilterChroma[1], pbuf1, 1, STRIDE, tc, maskP, maskQ);<br>
}<br>
+<br>
+ for (int k = 0; k < NUM_INTEGRAL_SIZE; k++)<br>
+ {<br>
+ if (opt.integral_initv[k])<br>
+ {<br>
+ switch (k)<br>
+ {<br>
+ case 0:<br>
+ HEADER0("integral_init4v");<br>
+ break;<br>
+ case 1:<br>
+ HEADER0("integral_init8v");<br>
+ break;<br>
+ case 2:<br>
+ HEADER0("integral_init12v");<br>
+ break;<br>
+ case 3:<br>
+ HEADER0("integral_init16v");<br>
+ break;<br>
+ case 4:<br>
+ HEADER0("integral_init24v");<br>
+ break;<br>
+ case 5:<br>
+ HEADER0("integral_init32v");<br>
+ break;<br>
+ default:<br>
+ break;<br>
+ }<br>
+ REPORT_SPEEDUP(opt.integral_<wbr>initv[k], ref.integral_initv[k], (uint32_t*)pbuf1, STRIDE);<br>
+ }<br>
+ }<br>
+<br>
+ for (int k = 0; k < NUM_INTEGRAL_SIZE; k++)<br>
+ {<br>
+ if (opt.integral_inith[k])<br>
+ {<br>
+ uint32_t dst_buf[BUFFSIZE] = { 0 };<br>
+ switch (k)<br>
+ {<br>
+ case 0:<br>
+ HEADER0("integral_init4h");<br>
+ break;<br>
+ case 1:<br>
+ HEADER0("integral_init8h");<br>
+ break;<br>
+ case 2:<br>
+ HEADER0("integral_init12h");<br>
+ break;<br>
+ case 3:<br>
+ HEADER0("integral_init16h");<br>
+ break;<br>
+ case 4:<br>
+ HEADER0("integral_init24h");<br>
+ break;<br>
+ case 5:<br>
+ HEADER0("integral_init32h");<br>
+ break;<br>
+ default:<br>
+ break;<br>
+ }<br>
+ REPORT_SPEEDUP(opt.integral_<wbr>inith[k], ref.integral_inith[k], dst_buf, pbuf1, STRIDE);<br>
+ }<br>
+ }<br>
}<br>
diff -r bc0e9bd7c08f -r 47e0de01255d source/test/pixelharness.h<br>
--- a/source/test/pixelharness.h Wed May 10 10:40:16 2017 +0530<br>
+++ b/source/test/pixelharness.h Wed May 03 11:26:26 2017 +0530<br>
@@ -126,6 +126,8 @@<br>
bool check_pelFilterLumaStrong_H(<wbr>pelFilterLumaStrong_t ref, pelFilterLumaStrong_t opt);<br>
bool check_pelFilterChroma_V(<wbr>pelFilterChroma_t ref, pelFilterChroma_t opt);<br>
bool check_pelFilterChroma_H(<wbr>pelFilterChroma_t ref, pelFilterChroma_t opt);<br>
+ bool check_integral_initv(<wbr>integralv_t ref, integralv_t opt);<br>
+ bool check_integral_inith(<wbr>integralh_t ref, integralh_t opt);<br>
<br>
public:<br>
<br>
______________________________<wbr>_________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/<wbr>listinfo/x265-devel</a><br>
</blockquote></div><br></div></div>