<div dir="ltr"><div># HG changeset patch</div><div># User Akil Ayyappan<<a href="mailto:akil@multicorewareinc.com" target="_blank">akil@multicorewareinc.com</a>></div><div># Date 1551693998 -19800</div><div># Mon Mar 04 15:36:38 2019 +0530</div><div># Node ID 19f27e0c8a6f8250af5e2f7c7984dc3b57bea7e4</div><div># Parent d12a4caf7963fd47d646040689ad5f02754ad879</div><div>x86: normFactor primitive</div><div><br></div><div>This patch adds AVX2 assembly for this primitive.</div><div><br></div><div>Pushed to default branch of x265 repo</div><div><br></div><div>Thanks & Regards,</div><div>Dinesh</div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Tue, Mar 5, 2019 at 10:04 AM Akil <<a href="mailto:akil@multicorewareinc.com">akil@multicorewareinc.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr"><div dir="ltr"><div># HG changeset patch</div><div># User Akil Ayyappan<<a href="mailto:akil@multicorewareinc.com" target="_blank">akil@multicorewareinc.com</a>></div><div># Date 1551693998 -19800</div><div># Mon Mar 04 15:36:38 2019 +0530</div><div># Node ID 19f27e0c8a6f8250af5e2f7c7984dc3b57bea7e4</div><div># Parent d12a4caf7963fd47d646040689ad5f02754ad879</div><div>x86: normFactor primitive</div><div><br></div><div>This patch adds AVX2 assembly for this primitive.</div><div><br></div><div>|---------|-----------|-----------------|-----------------|</div><div>| Size |Performance|AVX2 clock cycles|CPP clock cycles |</div><div>|---------|-----------|-----------------|-----------------|</div><div>| [8x8] | 7.65x | 312.90 | 2394.83 |</div><div>| [16x16] | 8.42x | 1157.14 | 9741.56 |</div><div>| [32x32] | 9.56x | 3942.18 | 37692.20 |</div><div>| [64x64] | 8.96x | 15388.24 | 137889.28 |</div><div>|---------|-----------|-----------------|-----------------|</div><div><br></div><div>diff -r d12a4caf7963 -r 19f27e0c8a6f source/common/pixel.cpp</div><div>--- a/source/common/pixel.cpp<span style="white-space:pre-wrap"> </span>Wed Feb 27 12:35:02 2019 +0530</div><div>+++ b/source/common/pixel.cpp<span style="white-space:pre-wrap"> </span>Mon Mar 04 15:36:38 2019 +0530</div><div>@@ -959,6 +959,19 @@</div><div> }</div><div> }</div><div> </div><div>+static void normFact_c(const pixel* src, uint32_t blockSize, int shift, uint64_t *z_k)</div><div>+{</div><div>+ *z_k = 0;</div><div>+ for (uint32_t block_yy = 0; block_yy < blockSize; block_yy += 1)</div><div>+ {</div><div>+ for (uint32_t block_xx = 0; block_xx < blockSize; block_xx += 1)</div><div>+ {</div><div>+ uint32_t temp = src[block_yy * blockSize + block_xx] >> shift;</div><div>+ *z_k += temp * temp;</div><div>+ }</div><div>+ }</div><div>+}</div><div>+</div><div> #if HIGH_BIT_DEPTH</div><div> static pixel planeClipAndMax_c(pixel *src, intptr_t stride, int width, int height, uint64_t *outsum, </div><div> const pixel minPix, const pixel maxPix)</div><div>@@ -1314,5 +1327,10 @@</div><div> <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_16x16].ssimDist = ssimDist_c<4>;</div><div> <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_32x32].ssimDist = ssimDist_c<5>;</div><div> <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_64x64].ssimDist = ssimDist_c<6>;</div><div>+</div><div>+ <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_8x8].normFact = normFact_c;</div><div>+ <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_16x16].normFact = normFact_c;</div><div>+ <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_32x32].normFact = normFact_c;</div><div>+ <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_64x64].normFact = normFact_c;</div><div> }</div><div> }</div><div>diff -r d12a4caf7963 -r 19f27e0c8a6f source/common/primitives.h</div><div>--- a/source/common/primitives.h<span style="white-space:pre-wrap"> </span>Wed Feb 27 12:35:02 2019 +0530</div><div>+++ b/source/common/primitives.h<span style="white-space:pre-wrap"> </span>Mon Mar 04 15:36:38 2019 +0530</div><div>@@ -228,6 +228,7 @@</div><div> typedef void(*psyRdoQuant_t1)(int16_t *m_resiDctCoeff, int64_t *costUncoded, int64_t *totalUncodedCost, int64_t *totalRdCost,uint32_t blkPos);</div><div> typedef void(*psyRdoQuant_t2)(int16_t *m_resiDctCoeff, int16_t *m_fencDctCoeff, int64_t *costUncoded, int64_t *totalUncodedCost, int64_t *totalRdCost, int64_t *psyScale, uint32_t blkPos);</div><div> typedef void(*ssimDistortion_t)(const pixel *fenc, uint32_t fStride, const pixel *recon, intptr_t rstride, uint64_t *ssBlock, int shift, uint64_t *ac_k);</div><div>+typedef void(*normFactor_t)(const pixel *src, uint32_t blockSize, int shift, uint64_t *z_k);</div><div> /* Function pointers to optimized encoder primitives. Each pointer can reference</div><div> * either an assembly routine, a SIMD intrinsic primitive, or a C function */</div><div> struct EncoderPrimitives</div><div>@@ -305,6 +306,7 @@</div><div> <span style="white-space:pre-wrap"> </span>psyRdoQuant_t1 psyRdoQuant_1p;</div><div> <span style="white-space:pre-wrap"> </span>psyRdoQuant_t2 psyRdoQuant_2p;</div><div> ssimDistortion_t ssimDist;</div><div>+ normFactor_t normFact;</div><div> }</div><div> cu[NUM_CU_SIZES];</div><div> /* These remaining primitives work on either fixed block sizes or take</div><div>diff -r d12a4caf7963 -r 19f27e0c8a6f source/common/x86/asm-primitives.cpp</div><div>--- a/source/common/x86/asm-primitives.cpp<span style="white-space:pre-wrap"> </span>Wed Feb 27 12:35:02 2019 +0530</div><div>+++ b/source/common/x86/asm-primitives.cpp<span style="white-space:pre-wrap"> </span>Mon Mar 04 15:36:38 2019 +0530</div><div>@@ -2325,6 +2325,11 @@</div><div> <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_32x32].ssimDist = PFX(ssimDist32_avx2);</div><div> <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_64x64].ssimDist = PFX(ssimDist64_avx2);</div><div> </div><div>+ <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_8x8].normFact = PFX(normFact8_avx2);</div><div>+ <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_16x16].normFact = PFX(normFact16_avx2);</div><div>+ <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_32x32].normFact = PFX(normFact32_avx2);</div><div>+ <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_64x64].normFact = PFX(normFact64_avx2);</div><div>+</div><div> /* TODO: This kernel needs to be modified to work with HIGH_BIT_DEPTH only </div><div> p.planeClipAndMax = PFX(planeClipAndMax_avx2); */</div><div> </div><div>@@ -4718,6 +4723,11 @@</div><div> <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_32x32].ssimDist = PFX(ssimDist32_avx2);</div><div> <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_64x64].ssimDist = PFX(ssimDist64_avx2);</div><div> </div><div>+ <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_8x8].normFact = PFX(normFact8_avx2);</div><div>+ <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_16x16].normFact = PFX(normFact16_avx2);</div><div>+ <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_32x32].normFact = PFX(normFact32_avx2);</div><div>+ <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_64x64].normFact = PFX(normFact64_avx2);</div><div>+</div><div> }</div><div> if (cpuMask & X265_CPU_AVX512)</div><div> {</div><div>diff -r d12a4caf7963 -r 19f27e0c8a6f source/common/x86/pixel-a.asm</div><div>--- a/source/common/x86/pixel-a.asm<span style="white-space:pre-wrap"> </span>Wed Feb 27 12:35:02 2019 +0530</div><div>+++ b/source/common/x86/pixel-a.asm<span style="white-space:pre-wrap"> </span>Mon Mar 04 15:36:38 2019 +0530</div><div>@@ -388,6 +388,16 @@</div><div> vpaddq m7, m6</div><div> %endmacro</div><div> </div><div>+%macro NORM_FACT_COL 1</div><div>+ vpsrld m1, m0, SSIMRD_SHIFT</div><div>+ vpmuldq m2, m1, m1</div><div>+ vpsrldq m1, m1, 4</div><div>+ vpmuldq m1, m1, m1</div><div>+</div><div>+ vpaddq m1, m2</div><div>+ vpaddq m3, m1</div><div>+%endmacro</div><div>+</div><div> ; FIXME avoid the spilling of regs to hold 3*stride.</div><div> ; for small blocks on x86_32, modify pixel pointer instead.</div><div> </div><div>@@ -16303,3 +16313,266 @@</div><div> movq [r4], xm4</div><div> movq [r6], xm7</div><div> RET</div><div>+</div><div>+</div><div>+;static void normFact_c(const pixel* src, uint32_t blockSize, int shift, uint64_t *z_k)</div><div>+;{</div><div>+; *z_k = 0;</div><div>+; for (uint32_t block_yy = 0; block_yy < blockSize; block_yy += 1)</div><div>+; {</div><div>+; for (uint32_t block_xx = 0; block_xx < blockSize; block_xx += 1)</div><div>+; {</div><div>+; uint32_t temp = src[block_yy * blockSize + block_xx] >> shift;</div><div>+; *z_k += temp * temp;</div><div>+; }</div><div>+; }</div><div>+;}</div><div>+;--------------------------------------------------------------------------------------</div><div>+; void normFact_c(const pixel* src, uint32_t blockSize, int shift, uint64_t *z_k)</div><div>+;--------------------------------------------------------------------------------------</div><div>+INIT_YMM avx2</div><div>+cglobal normFact8, 4, 5, 6</div><div>+ mov r4d, 8</div><div>+ vpxor m3, m3 ;z_k</div><div>+ vpxor m5, m5</div><div>+.row:</div><div>+%if HIGH_BIT_DEPTH</div><div>+ vpmovzxwd m0, [r0] ;src</div><div>+%elif BIT_DEPTH == 8</div><div>+ vpmovzxbd m0, [r0]</div><div>+%else</div><div>+ %error Unsupported BIT_DEPTH!</div><div>+%endif</div><div>+</div><div>+ NORM_FACT_COL m0</div><div>+</div><div>+%if HIGH_BIT_DEPTH</div><div>+ lea r0, [r0 + 2 * r1]</div><div>+%else</div><div>+ lea r0, [r0 + r1]</div><div>+%endif</div><div>+ dec r4d</div><div>+ jnz .row</div><div>+ vextracti128 xm4, m3, 1</div><div>+ vpaddq xm3, xm4</div><div>+ punpckhqdq xm2, xm3, xm5</div><div>+ paddq xm3, xm2</div><div>+ movq [r3], xm3</div><div>+ RET</div><div>+</div><div>+</div><div>+INIT_YMM avx2</div><div>+cglobal normFact16, 4, 5, 6</div><div>+ mov r4d, 16</div><div>+ vpxor m3, m3 ;z_k</div><div>+ vpxor m5, m5</div><div>+.row:</div><div>+;Col 1-8</div><div>+%if HIGH_BIT_DEPTH</div><div>+ vpmovzxwd m0, [r0] ;src</div><div>+%elif BIT_DEPTH == 8</div><div>+ vpmovzxbd m0, [r0]</div><div>+%else</div><div>+ %error Unsupported BIT_DEPTH!</div><div>+%endif</div><div>+</div><div>+ NORM_FACT_COL m0</div><div>+</div><div>+;Col 9-16</div><div>+%if HIGH_BIT_DEPTH</div><div>+ vpmovzxwd m0, [r0 + 16] ;src</div><div>+%elif BIT_DEPTH == 8</div><div>+ vpmovzxbd m0, [r0 + 8]</div><div>+%else</div><div>+ %error Unsupported BIT_DEPTH!</div><div>+%endif</div><div>+</div><div>+ NORM_FACT_COL m0</div><div>+</div><div>+%if HIGH_BIT_DEPTH</div><div>+ lea r0, [r0 + 2 * r1]</div><div>+%else</div><div>+ lea r0, [r0 + r1]</div><div>+%endif</div><div>+ dec r4d</div><div>+ jnz .row</div><div>+ vextracti128 xm4, m3, 1</div><div>+ vpaddq xm3, xm4</div><div>+ punpckhqdq xm2, xm3, xm5</div><div>+ paddq xm3, xm2</div><div>+ movq [r3], xm3</div><div>+ RET</div><div>+</div><div>+</div><div>+INIT_YMM avx2</div><div>+cglobal normFact32, 4, 5, 6 </div><div>+ mov r4d, 32</div><div>+ vpxor m3, m3 ;z_k</div><div>+ vpxor m5, m5</div><div>+.row:</div><div>+;Col 1-8</div><div>+%if HIGH_BIT_DEPTH</div><div>+ vpmovzxwd m0, [r0] ;src</div><div>+%elif BIT_DEPTH == 8</div><div>+ vpmovzxbd m0, [r0]</div><div>+%else</div><div>+ %error Unsupported BIT_DEPTH!</div><div>+%endif</div><div>+</div><div>+ NORM_FACT_COL m0</div><div>+</div><div>+;Col 9-16</div><div>+%if HIGH_BIT_DEPTH</div><div>+ vpmovzxwd m0, [r0 + 16] ;src</div><div>+%elif BIT_DEPTH == 8</div><div>+ vpmovzxbd m0, [r0 + 8]</div><div>+%else</div><div>+ %error Unsupported BIT_DEPTH!</div><div>+%endif</div><div>+</div><div>+ NORM_FACT_COL m0</div><div>+</div><div>+;Col 17-24</div><div>+%if HIGH_BIT_DEPTH</div><div>+ vpmovzxwd m0, [r0 + 32] ;src</div><div>+%elif BIT_DEPTH == 8</div><div>+ vpmovzxbd m0, [r0 + 16]</div><div>+%else</div><div>+ %error Unsupported BIT_DEPTH!</div><div>+%endif</div><div>+</div><div>+ NORM_FACT_COL m0</div><div>+</div><div>+;Col 25-32</div><div>+%if HIGH_BIT_DEPTH</div><div>+ vpmovzxwd m0, [r0 + 48] ;src</div><div>+%elif BIT_DEPTH == 8</div><div>+ vpmovzxbd m0, [r0 + 24]</div><div>+%else</div><div>+ %error Unsupported BIT_DEPTH!</div><div>+%endif</div><div>+</div><div>+ NORM_FACT_COL m0</div><div>+</div><div>+%if HIGH_BIT_DEPTH</div><div>+ lea r0, [r0 + 2 * r1]</div><div>+%else</div><div>+ lea r0, [r0 + r1]</div><div>+%endif</div><div>+ dec r4d</div><div>+ jnz .row</div><div>+ vextracti128 xm4, m3, 1</div><div>+ vpaddq xm3, xm4</div><div>+ punpckhqdq xm2, xm3, xm5</div><div>+ paddq xm3, xm2</div><div>+ movq [r3], xm3</div><div>+ RET</div><div>+</div><div>+</div><div>+INIT_YMM avx2</div><div>+cglobal normFact64, 4, 5, 6 </div><div>+ mov r4d, 64</div><div>+ vpxor m3, m3 ;z_k</div><div>+ vpxor m5, m5</div><div>+.row:</div><div>+;Col 1-8</div><div>+%if HIGH_BIT_DEPTH</div><div>+ vpmovzxwd m0, [r0] ;src</div><div>+%elif BIT_DEPTH == 8</div><div>+ vpmovzxbd m0, [r0]</div><div>+%else</div><div>+ %error Unsupported BIT_DEPTH!</div><div>+%endif</div><div>+</div><div>+ NORM_FACT_COL m0</div><div>+</div><div>+;Col 9-16</div><div>+%if HIGH_BIT_DEPTH</div><div>+ vpmovzxwd m0, [r0 + 16] ;src</div><div>+%elif BIT_DEPTH == 8</div><div>+ vpmovzxbd m0, [r0 + 8]</div><div>+%else</div><div>+ %error Unsupported BIT_DEPTH!</div><div>+%endif</div><div>+</div><div>+ NORM_FACT_COL m0</div><div>+</div><div>+;Col 17-24</div><div>+%if HIGH_BIT_DEPTH</div><div>+ vpmovzxwd m0, [r0 + 32] ;src</div><div>+%elif BIT_DEPTH == 8</div><div>+ vpmovzxbd m0, [r0 + 16]</div><div>+%else</div><div>+ %error Unsupported BIT_DEPTH!</div><div>+%endif</div><div>+</div><div>+ NORM_FACT_COL m0</div><div>+</div><div>+;Col 25-32</div><div>+%if HIGH_BIT_DEPTH</div><div>+ vpmovzxwd m0, [r0 + 48] ;src</div><div>+%elif BIT_DEPTH == 8</div><div>+ vpmovzxbd m0, [r0 + 24]</div><div>+%else</div><div>+ %error Unsupported BIT_DEPTH!</div><div>+%endif</div><div>+</div><div>+ NORM_FACT_COL m0</div><div>+</div><div>+;Col 33-40</div><div>+%if HIGH_BIT_DEPTH</div><div>+ vpmovzxwd m0, [r0 + 64] ;src</div><div>+%elif BIT_DEPTH == 8</div><div>+ vpmovzxbd m0, [r0 + 32]</div><div>+%else</div><div>+ %error Unsupported BIT_DEPTH!</div><div>+%endif</div><div>+</div><div>+ NORM_FACT_COL m0</div><div>+</div><div>+;Col 41-48</div><div>+%if HIGH_BIT_DEPTH</div><div>+ vpmovzxwd m0, [r0 + 80] ;src</div><div>+%elif BIT_DEPTH == 8</div><div>+ vpmovzxbd m0, [r0 + 40]</div><div>+%else</div><div>+ %error Unsupported BIT_DEPTH!</div><div>+%endif</div><div>+</div><div>+ NORM_FACT_COL m0</div><div>+</div><div>+;Col 49-56</div><div>+%if HIGH_BIT_DEPTH</div><div>+ vpmovzxwd m0, [r0 + 96] ;src</div><div>+%elif BIT_DEPTH == 8</div><div>+ vpmovzxbd m0, [r0 + 48]</div><div>+%else</div><div>+ %error Unsupported BIT_DEPTH!</div><div>+%endif</div><div>+</div><div>+ NORM_FACT_COL m0</div><div>+</div><div>+;Col 57-64</div><div>+%if HIGH_BIT_DEPTH</div><div>+ vpmovzxwd m0, [r0 + 112] ;src</div><div>+%elif BIT_DEPTH == 8</div><div>+ vpmovzxbd m0, [r0 + 56]</div><div>+%else</div><div>+ %error Unsupported BIT_DEPTH!</div><div>+%endif</div><div>+</div><div>+ NORM_FACT_COL m0</div><div>+</div><div>+%if HIGH_BIT_DEPTH</div><div>+ lea r0, [r0 + 2 * r1]</div><div>+%else</div><div>+ lea r0, [r0 + r1]</div><div>+%endif</div><div>+ dec r4d</div><div>+ jnz .row</div><div>+ vextracti128 xm4, m3, 1</div><div>+ vpaddq xm3, xm4</div><div>+ punpckhqdq xm2, xm3, xm5</div><div>+ paddq xm3, xm2</div><div>+ movq [r3], xm3</div><div>+ RET</div><div>diff -r d12a4caf7963 -r 19f27e0c8a6f source/common/x86/pixel.h</div><div>--- a/source/common/x86/pixel.h<span style="white-space:pre-wrap"> </span>Wed Feb 27 12:35:02 2019 +0530</div><div>+++ b/source/common/x86/pixel.h<span style="white-space:pre-wrap"> </span>Mon Mar 04 15:36:38 2019 +0530</div><div>@@ -61,7 +61,8 @@</div><div> FUNCDEF_TU(uint64_t, pixel_var, cpu, const pixel*, intptr_t); \</div><div> FUNCDEF_TU(int, psyCost_pp, cpu, const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride); \</div><div> FUNCDEF_TU(int, psyCost_ss, cpu, const int16_t* source, intptr_t sstride, const int16_t* recon, intptr_t rstride); \</div><div>- FUNCDEF_TU_S2(void, ssimDist, cpu, const pixel *fenc, uint32_t fStride, const pixel *recon, intptr_t rstride, uint64_t *ssBlock, int shift, uint64_t *ac_k)</div><div>+ FUNCDEF_TU_S2(void, ssimDist, cpu, const pixel *fenc, uint32_t fStride, const pixel *recon, intptr_t rstride, uint64_t *ssBlock, int shift, uint64_t *ac_k); \</div><div>+ FUNCDEF_TU_S2(void, normFact, cpu, const pixel *src, uint32_t blockSize, int shift, uint64_t *z_k)</div><div> </div><div> DECL_PIXELS(mmx);</div><div> DECL_PIXELS(mmx2);</div><div>diff -r d12a4caf7963 -r 19f27e0c8a6f source/encoder/analysis.cpp</div><div>--- a/source/encoder/analysis.cpp<span style="white-space:pre-wrap"> </span>Wed Feb 27 12:35:02 2019 +0530</div><div>+++ b/source/encoder/analysis.cpp<span style="white-space:pre-wrap"> </span>Mon Mar 04 15:36:38 2019 +0530</div><div>@@ -3696,14 +3696,8 @@</div><div> </div><div> // 2. Calculate ac component</div><div> uint64_t z_k = 0;</div><div>- for (uint32_t block_yy = 0; block_yy < blockSize; block_yy += 1)</div><div>- {</div><div>- for (uint32_t block_xx = 0; block_xx < blockSize; block_xx += 1)</div><div>- {</div><div>- uint32_t temp = src[block_yy * blockSize + block_xx] >> shift;</div><div>- z_k += temp * temp;</div><div>- }</div><div>- }</div><div>+ int block = (int)((log(blockSize) / log(2)) - 2);</div><div>+ <a href="http://primitives.cu" target="_blank">primitives.cu</a>[block].normFact(src, blockSize, shift, &z_k);</div><div> </div><div> // Remove the DC part</div><div> z_k -= z_o;</div><div>diff -r d12a4caf7963 -r 19f27e0c8a6f source/test/pixelharness.cpp</div><div>--- a/source/test/pixelharness.cpp<span style="white-space:pre-wrap"> </span>Wed Feb 27 12:35:02 2019 +0530</div><div>+++ b/source/test/pixelharness.cpp<span style="white-space:pre-wrap"> </span>Mon Mar 04 15:36:38 2019 +0530</div><div>@@ -2296,6 +2296,30 @@</div><div> return true;</div><div> }</div><div> </div><div>+bool PixelHarness::check_normFact(normFactor_t ref, normFactor_t opt, int block)</div><div>+{</div><div>+ int shift = X265_DEPTH - 8;</div><div>+ uint64_t opt_dest = 0, ref_dest = 0;</div><div>+ int j = 0;</div><div>+ int blockSize = 4 << block;</div><div>+</div><div>+ for (int i = 0; i < ITERS; i++)</div><div>+ {</div><div>+ int index = i % TEST_CASES;</div><div>+ ref(pixel_test_buff[index] + j, blockSize, shift, &ref_dest);</div><div>+ opt(pixel_test_buff[index] + j, blockSize, shift, &opt_dest);</div><div>+</div><div>+ if (opt_dest != ref_dest)</div><div>+ {</div><div>+ return false;</div><div>+ }</div><div>+</div><div>+ reportfail()</div><div>+ j += INCR;</div><div>+ }</div><div>+ return true;</div><div>+}</div><div>+</div><div> bool PixelHarness::testPU(int part, const EncoderPrimitives& ref, const EncoderPrimitives& opt)</div><div> {</div><div> if (opt.pu[part].satd)</div><div>@@ -3129,6 +3153,18 @@</div><div> }</div><div> }</div><div> </div><div>+ for (int i = BLOCK_8x8; i < NUM_CU_SIZES; i++)</div><div>+ {</div><div>+ if (<a href="http://opt.cu" target="_blank">opt.cu</a>[i].normFact)</div><div>+ {</div><div>+ if (!check_normFact(<a href="http://ref.cu" target="_blank">ref.cu</a>[i].normFact, <a href="http://opt.cu" target="_blank">opt.cu</a>[i].normFact, i))</div><div>+ {</div><div>+ printf("\nnormFact[%dx%d] failed!\n", 4 << i, 4 << i);</div><div>+ return false;</div><div>+ }</div><div>+ }</div><div>+ }</div><div>+</div><div> return true;</div><div> }</div><div> </div><div>@@ -3769,4 +3805,16 @@</div><div> REPORT_SPEEDUP(opt.integral_inith[k], ref.integral_inith[k], dst_buf, pbuf1, STRIDE);</div><div> }</div><div> }</div><div>+</div><div>+ for (int i = BLOCK_8x8; i < NUM_CU_SIZES; i++)</div><div>+ {</div><div>+ if (<a href="http://opt.cu" target="_blank">opt.cu</a>[i].normFact)</div><div>+ {</div><div>+ uint64_t dst = 0;</div><div>+ int blockSize = 4 << i;</div><div>+ int shift = X265_DEPTH - 8;</div><div>+ printf("normFact[%dx%d]", blockSize, blockSize);</div><div>+ REPORT_SPEEDUP(<a href="http://opt.cu" target="_blank">opt.cu</a>[i].normFact, <a href="http://ref.cu" target="_blank">ref.cu</a>[i].normFact, pixel_test_buff[0], blockSize, shift, &dst);</div><div>+ }</div><div>+ }</div><div> }</div><div>diff -r d12a4caf7963 -r 19f27e0c8a6f source/test/pixelharness.h</div><div>--- a/source/test/pixelharness.h<span style="white-space:pre-wrap"> </span>Wed Feb 27 12:35:02 2019 +0530</div><div>+++ b/source/test/pixelharness.h<span style="white-space:pre-wrap"> </span>Mon Mar 04 15:36:38 2019 +0530</div><div>@@ -137,6 +137,7 @@</div><div> bool check_integral_initv(integralv_t ref, integralv_t opt);</div><div> bool check_integral_inith(integralh_t ref, integralh_t opt);</div><div> bool check_ssimDist(ssimDistortion_t ref, ssimDistortion_t opt);</div><div>+ bool check_normFact(normFactor_t ref, normFactor_t opt, int block);</div><div> </div><div> public:</div><div> </div><div><br></div><div><br></div>-- <br><div dir="ltr" class="gmail-m_8768129972488442555gmail_signature"><div dir="ltr"><i><b>Regards,</b></i><div><i><b>Akil R</b></i></div></div></div></div></div>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org" target="_blank">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div>