[x265] [PATCH x265] Add AVX2 assembly code for ssimDistortion primitive.
Akil
akil at multicorewareinc.com
Tue Mar 5 05:33:27 CET 2019
# HG changeset patch
# User Akil Ayyappan<akil at multicorewareinc.com>
# Date 1551251102 -19800
# Wed Feb 27 12:35:02 2019 +0530
# Node ID d12a4caf7963fd47d646040689ad5f02754ad879
# Parent cb3e172a5f51c6a4bf8adb7953fe53277f5a1979
x86: ssimDistortion primitive
This patch adds AVX2 assembly for this primitive.
|---------|-----------|-----------------|-----------------|
| Size |Performance|AVX2 clock cycles|CPP clock cycles |
|---------|-----------|-----------------|-----------------|
| [4x4] | 3.52x | 264.43 | 932.05 |
| [8x8] | 5.11x | 619.24 | 3163.56 |
| [16x16] | 5.44x | 2114.00 | 11490.52 |
| [32x32] | 6.01x | 7589.70 | 45608.01 |
| [64x64] | 6.70x | 27859.21 | 186634.25 |
|---------|-----------|-----------------|-----------------|
diff -r cb3e172a5f51 -r d12a4caf7963 source/common/pixel.cpp
--- a/source/common/pixel.cpp Tue Feb 19 20:20:35 2019 +0530
+++ b/source/common/pixel.cpp Wed Feb 27 12:35:02 2019 +0530
@@ -934,6 +934,31 @@
}
}
+template<int log2TrSize>
+static void ssimDist_c(const pixel* fenc, uint32_t fStride, const pixel*
recon, intptr_t rstride, uint64_t *ssBlock, int shift, uint64_t *ac_k)
+{
+ *ssBlock = 0;
+ const uint32_t trSize = 1 << log2TrSize;
+ for (int y = 0; y < trSize; y++)
+ {
+ for (int x = 0; x < trSize; x++)
+ {
+ int temp = fenc[y * fStride + x] - recon[y * rstride + x]; //
copy of residual coeff
+ *ssBlock += temp * temp;
+ }
+ }
+
+ *ac_k = 0;
+ for (int block_yy = 0; block_yy < trSize; block_yy += 1)
+ {
+ for (int block_xx = 0; block_xx < trSize; block_xx += 1)
+ {
+ uint32_t temp = fenc[block_yy * fStride + block_xx] >> shift;
+ *ac_k += temp * temp;
+ }
+ }
+}
+
#if HIGH_BIT_DEPTH
static pixel planeClipAndMax_c(pixel *src, intptr_t stride, int width, int
height, uint64_t *outsum,
const pixel minPix, const pixel maxPix)
@@ -1283,5 +1308,11 @@
p.propagateCost = estimateCUPropagateCost;
p.fix8Unpack = cuTreeFix8Unpack;
p.fix8Pack = cuTreeFix8Pack;
+
+ p.cu[BLOCK_4x4].ssimDist = ssimDist_c<2>;
+ p.cu[BLOCK_8x8].ssimDist = ssimDist_c<3>;
+ p.cu[BLOCK_16x16].ssimDist = ssimDist_c<4>;
+ p.cu[BLOCK_32x32].ssimDist = ssimDist_c<5>;
+ p.cu[BLOCK_64x64].ssimDist = ssimDist_c<6>;
}
}
diff -r cb3e172a5f51 -r d12a4caf7963 source/common/primitives.h
--- a/source/common/primitives.h Tue Feb 19 20:20:35 2019 +0530
+++ b/source/common/primitives.h Wed Feb 27 12:35:02 2019 +0530
@@ -227,6 +227,7 @@
typedef void(*psyRdoQuant_t)(int16_t *m_resiDctCoeff, int16_t
*m_fencDctCoeff, int64_t *costUncoded, int64_t *totalUncodedCost, int64_t
*totalRdCost, int64_t *psyScale, uint32_t blkPos);
typedef void(*psyRdoQuant_t1)(int16_t *m_resiDctCoeff, int64_t
*costUncoded, int64_t *totalUncodedCost, int64_t *totalRdCost,uint32_t
blkPos);
typedef void(*psyRdoQuant_t2)(int16_t *m_resiDctCoeff, int16_t
*m_fencDctCoeff, int64_t *costUncoded, int64_t *totalUncodedCost, int64_t
*totalRdCost, int64_t *psyScale, uint32_t blkPos);
+typedef void(*ssimDistortion_t)(const pixel *fenc, uint32_t fStride, const
pixel *recon, intptr_t rstride, uint64_t *ssBlock, int shift, uint64_t
*ac_k);
/* Function pointers to optimized encoder primitives. Each pointer can
reference
* either an assembly routine, a SIMD intrinsic primitive, or a C function
*/
struct EncoderPrimitives
@@ -303,6 +304,7 @@
psyRdoQuant_t psyRdoQuant;
psyRdoQuant_t1 psyRdoQuant_1p;
psyRdoQuant_t2 psyRdoQuant_2p;
+ ssimDistortion_t ssimDist;
}
cu[NUM_CU_SIZES];
/* These remaining primitives work on either fixed block sizes or take
diff -r cb3e172a5f51 -r d12a4caf7963 source/common/quant.cpp
--- a/source/common/quant.cpp Tue Feb 19 20:20:35 2019 +0530
+++ b/source/common/quant.cpp Wed Feb 27 12:35:02 2019 +0530
@@ -501,15 +501,8 @@
// Calculation of (X(k) - Y(k)) * (X(k) - Y(k)), AC
ssBlock = 0;
- for (int y = 0; y < trSize; y++)
- {
- for (int x = 0; x < trSize; x++)
- {
- int temp = fenc[y * fStride + x] - recon[y * rstride + x]; //
copy of residual coeff
- ssBlock += temp * temp;
- }
- }
-
+ uint64_t ac_k = 0;
+ primitives.cu[log2TrSize - 2].ssimDist(fenc, fStride, recon, rstride,
&ssBlock, shift, &ac_k);
ssAc = ssBlock - ssDc;
// 1. Calculation of fdc'
@@ -535,15 +528,6 @@
uint64_t fAc_num = 0;
// 2. Calculate ac component
- uint64_t ac_k = 0;
- for (int block_yy = 0; block_yy < trSize; block_yy += 1)
- {
- for (int block_xx = 0; block_xx < trSize; block_xx += 1)
- {
- uint32_t temp = fenc[block_yy * fStride + block_xx] >> shift;
- ac_k += temp * temp;
- }
- }
ac_k -= dc_k;
double s = 1 + 0.005 * cu.m_qp[absPartIdx];
diff -r cb3e172a5f51 -r d12a4caf7963 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Tue Feb 19 20:20:35 2019 +0530
+++ b/source/common/x86/asm-primitives.cpp Wed Feb 27 12:35:02 2019 +0530
@@ -2319,6 +2319,12 @@
p.cu[BLOCK_16x16].psyRdoQuant_1p = PFX(psyRdoQuant_1p16_avx2);
p.cu[BLOCK_32x32].psyRdoQuant_1p = PFX(psyRdoQuant_1p32_avx2);
+ p.cu[BLOCK_4x4].ssimDist = PFX(ssimDist4_avx2);
+ p.cu[BLOCK_8x8].ssimDist = PFX(ssimDist8_avx2);
+ p.cu[BLOCK_16x16].ssimDist = PFX(ssimDist16_avx2);
+ p.cu[BLOCK_32x32].ssimDist = PFX(ssimDist32_avx2);
+ p.cu[BLOCK_64x64].ssimDist = PFX(ssimDist64_avx2);
+
/* TODO: This kernel needs to be modified to work with
HIGH_BIT_DEPTH only
p.planeClipAndMax = PFX(planeClipAndMax_avx2); */
@@ -4706,6 +4712,12 @@
p.cu[BLOCK_16x16].psyRdoQuant_1p = PFX(psyRdoQuant_1p16_avx2);
p.cu[BLOCK_32x32].psyRdoQuant_1p = PFX(psyRdoQuant_1p32_avx2);
+ p.cu[BLOCK_4x4].ssimDist = PFX(ssimDist4_avx2);
+ p.cu[BLOCK_8x8].ssimDist = PFX(ssimDist8_avx2);
+ p.cu[BLOCK_16x16].ssimDist = PFX(ssimDist16_avx2);
+ p.cu[BLOCK_32x32].ssimDist = PFX(ssimDist32_avx2);
+ p.cu[BLOCK_64x64].ssimDist = PFX(ssimDist64_avx2);
+
}
if (cpuMask & X265_CPU_AVX512)
{
diff -r cb3e172a5f51 -r d12a4caf7963 source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm Tue Feb 19 20:20:35 2019 +0530
+++ b/source/common/x86/pixel-a.asm Wed Feb 27 12:35:02 2019 +0530
@@ -73,6 +73,16 @@
cextern pb_movemask_32
cextern pw_pixel_max
+%if BIT_DEPTH == 12
+ %define SSIMRD_SHIFT 4
+%elif BIT_DEPTH == 10
+ %define SSIMRD_SHIFT 2
+%elif BIT_DEPTH == 8
+ %define SSIMRD_SHIFT 0
+%else
+ %error Unsupported BIT_DEPTH!
+%endif
+
;=============================================================================
; SATD
;=============================================================================
@@ -360,6 +370,24 @@
RET
%endmacro
+%macro SSIM_RD_COL 2
+ vpsrld m6, m0, SSIMRD_SHIFT
+ vpsubd m0, m1
+
+ vpmuldq m2, m0, m0
+ vpsrldq m0, m0, 4
+ vpmuldq m0, m0, m0
+ vpaddq m0, m2
+
+ vpmuldq m2, m6, m6
+ vpsrldq m6, m6, 4
+ vpmuldq m6, m6, m6
+ vpaddq m6, m2
+
+ vpaddq m4, m0
+ vpaddq m7, m6
+%endmacro
+
; FIXME avoid the spilling of regs to hold 3*stride.
; for small blocks on x86_32, modify pixel pointer instead.
@@ -15883,3 +15911,395 @@
RET
%endif
%endif ; HIGH_BIT_DEPTH == 1 && BIT_DEPTH == 10
+
+;template<int log2TrSize>
+;static void ssimDist_c(const pixel* fenc, uint32_t fStride, const pixel*
recon, intptr_t rstride, uint64_t *ssBlock, int shift, uint64_t *ac_k)
+;{
+; *ssBlock = 0;
+; const uint32_t trSize = 1 << log2TrSize;
+; for (int y = 0; y < trSize; y++)
+; {
+; for (int x = 0; x < trSize; x++)
+; {
+; int temp = fenc[y * fStride + x] - recon[y * rstride + x]; //
copy of residual coeff
+; *ssBlock += temp * temp;
+; }
+; }
+;
+; *ac_k = 0;
+; for (int block_yy = 0; block_yy < trSize; block_yy += 1)
+; {
+; for (int block_xx = 0; block_xx < trSize; block_xx += 1)
+; {
+; uint32_t temp = fenc[block_yy * fStride + block_xx] >> shift;
+; *ac_k += temp * temp;
+; }
+; }
+;}
+;-----------------------------------------------------------------------------------------------------------------
+; void ssimDist_c(const pixel* fenc, uint32_t fStride, const pixel* recon,
intptr_t rstride, uint64_t *ssBlock, int shift, uint64_t *ac_k)
+;-----------------------------------------------------------------------------------------------------------------
+
+INIT_YMM avx2
+cglobal ssimDist4, 7, 8, 8
+ mov r7d, 4
+ vpxor m4, m4 ;ssBlock
+ vpxor m3, m3
+ vpxor m7, m7 ;ac_k
+.row:
+%if HIGH_BIT_DEPTH
+ vpmovzxwq m0, [r0] ;fenc
+ vpmovzxwq m1, [r2] ;recon
+%elif BIT_DEPTH == 8
+ vpmovzxbq m0, [r0]
+ vpmovzxbq m1, [r2]
+%else
+ %error Unsupported BIT_DEPTH!
+%endif
+ vpsrlq m6, m0, SSIMRD_SHIFT
+ vpsubq m0, m1
+ vpmuldq m0, m0, m0
+ vpmuldq m6, m6, m6
+ vpaddq m4, m0
+ vpaddq m7, m6
+
+%if HIGH_BIT_DEPTH
+ lea r0, [r0 + 2 * r1]
+ lea r2, [r2 + 2 * r3]
+%else
+ lea r0, [r0 + r1]
+ lea r2, [r2 + r3]
+%endif
+ dec r7d
+ jnz .row
+ vextracti128 xm5, m4, 1
+ vpaddq xm4, xm5
+ punpckhqdq xm2, xm4, xm3
+ paddq xm4, xm2
+
+ vextracti128 xm5, m7, 1
+ vpaddq xm7, xm5
+ punpckhqdq xm2, xm7, xm3
+ paddq xm7, xm2
+
+ movq [r4], xm4
+ movq [r6], xm7
+ RET
+
+
+INIT_YMM avx2
+cglobal ssimDist8, 7, 8, 8
+ mov r7d, 8
+ vpxor m4, m4 ;ssBlock
+ vpxor m3, m3
+ vpxor m7, m7 ;ac_k
+.row:
+%if HIGH_BIT_DEPTH
+ vpmovzxwd m0, [r0] ;fenc
+ vpmovzxwd m1, [r2] ;recon
+%elif BIT_DEPTH == 8
+ vpmovzxbd m0, [r0]
+ vpmovzxbd m1, [r2]
+%else
+ %error Unsupported BIT_DEPTH!
+%endif
+
+ SSIM_RD_COL m0, m1
+
+%if HIGH_BIT_DEPTH
+ lea r0, [r0 + 2 * r1]
+ lea r2, [r2 + 2 * r3]
+%else
+ lea r0, [r0 + r1]
+ lea r2, [r2 + r3]
+%endif
+ dec r7d
+ jnz .row
+ vextracti128 xm5, m4, 1
+ vpaddq xm4, xm5
+ punpckhqdq xm2, xm4, xm3
+ paddq xm4, xm2
+
+ vextracti128 xm5, m7, 1
+ vpaddq xm7, xm5
+ punpckhqdq xm2, xm7, xm3
+ paddq xm7, xm2
+
+ movq [r4], xm4
+ movq [r6], xm7
+ RET
+
+
+INIT_YMM avx2
+cglobal ssimDist16, 7, 8, 8
+ mov r7d, 16
+ vpxor m4, m4 ;ssBlock
+ vpxor m3, m3
+ vpxor m7, m7 ;ac_k
+.row:
+;Col 1-8
+%if HIGH_BIT_DEPTH
+ vpmovzxwd m0, [r0] ;fenc
+ vpmovzxwd m1, [r2] ;recon
+%elif BIT_DEPTH == 8
+ vpmovzxbd m0, [r0]
+ vpmovzxbd m1, [r2]
+%else
+ %error Unsupported BIT_DEPTH!
+%endif
+
+ SSIM_RD_COL m0, m1
+
+;Col 9-16
+%if HIGH_BIT_DEPTH
+ vpmovzxwd m0, [r0 + 16] ;fenc
+ vpmovzxwd m1, [r2 + 16] ;recon
+%elif BIT_DEPTH == 8
+ vpmovzxbd m0, [r0 + 8]
+ vpmovzxbd m1, [r2 + 8]
+%else
+ %error Unsupported BIT_DEPTH!
+%endif
+
+ SSIM_RD_COL m0, m1
+
+%if HIGH_BIT_DEPTH
+ lea r0, [r0 + 2 * r1]
+ lea r2, [r2 + 2 * r3]
+%else
+ lea r0, [r0 + r1]
+ lea r2, [r2 + r3]
+%endif
+ dec r7d
+ jnz .row
+ vextracti128 xm5, m4, 1
+ vpaddq xm4, xm5
+ punpckhqdq xm2, xm4, xm3
+ paddq xm4, xm2
+
+ vextracti128 xm5, m7, 1
+ vpaddq xm7, xm5
+ punpckhqdq xm2, xm7, xm3
+ paddq xm7, xm2
+
+ movq [r4], xm4
+ movq [r6], xm7
+ RET
+
+
+INIT_YMM avx2
+cglobal ssimDist32, 7, 8, 8
+ mov r7d, 32
+ vpxor m4, m4 ;ssBlock
+ vpxor m3, m3
+ vpxor m7, m7 ;ac_k
+.row:
+;Col 1-8
+%if HIGH_BIT_DEPTH
+ vpmovzxwd m0, [r0] ;fenc
+ vpmovzxwd m1, [r2] ;recon
+%elif BIT_DEPTH == 8
+ vpmovzxbd m0, [r0]
+ vpmovzxbd m1, [r2]
+%else
+ %error Unsupported BIT_DEPTH!
+%endif
+
+ SSIM_RD_COL m0, m1
+
+;Col 9-16
+%if HIGH_BIT_DEPTH
+ vpmovzxwd m0, [r0 + 16] ;fenc
+ vpmovzxwd m1, [r2 + 16] ;recon
+%elif BIT_DEPTH == 8
+ vpmovzxbd m0, [r0 + 8]
+ vpmovzxbd m1, [r2 + 8]
+%else
+ %error Unsupported BIT_DEPTH!
+%endif
+
+ SSIM_RD_COL m0, m1
+
+;Col 17-24
+%if HIGH_BIT_DEPTH
+ vpmovzxwd m0, [r0 + 32] ;fenc
+ vpmovzxwd m1, [r2 + 32] ;recon
+%elif BIT_DEPTH == 8
+ vpmovzxbd m0, [r0 + 16]
+ vpmovzxbd m1, [r2 + 16]
+%else
+ %error Unsupported BIT_DEPTH!
+%endif
+
+ SSIM_RD_COL m0, m1
+
+;Col 25-32
+%if HIGH_BIT_DEPTH
+ vpmovzxwd m0, [r0 + 48] ;fenc
+ vpmovzxwd m1, [r2 + 48] ;recon
+%elif BIT_DEPTH == 8
+ vpmovzxbd m0, [r0 + 24]
+ vpmovzxbd m1, [r2 + 24]
+%else
+ %error Unsupported BIT_DEPTH!
+%endif
+
+ SSIM_RD_COL m0, m1
+
+%if HIGH_BIT_DEPTH
+ lea r0, [r0 + 2 * r1]
+ lea r2, [r2 + 2 * r3]
+%else
+ lea r0, [r0 + r1]
+ lea r2, [r2 + r3]
+%endif
+ dec r7d
+ jnz .row
+ vextracti128 xm5, m4, 1
+ vpaddq xm4, xm5
+ punpckhqdq xm2, xm4, xm3
+ paddq xm4, xm2
+
+ vextracti128 xm5, m7, 1
+ vpaddq xm7, xm5
+ punpckhqdq xm2, xm7, xm3
+ paddq xm7, xm2
+
+ movq [r4], xm4
+ movq [r6], xm7
+ RET
+
+
+INIT_YMM avx2
+cglobal ssimDist64, 7, 8, 8
+ mov r7d, 64
+ vpxor m4, m4 ;ssBlock
+ vpxor m3, m3
+ vpxor m7, m7 ;ac_k
+.row:
+;Col 1-8
+%if HIGH_BIT_DEPTH
+ vpmovzxwd m0, [r0] ;fenc
+ vpmovzxwd m1, [r2] ;recon
+%elif BIT_DEPTH == 8
+ vpmovzxbd m0, [r0]
+ vpmovzxbd m1, [r2]
+%else
+ %error Unsupported BIT_DEPTH!
+%endif
+
+ SSIM_RD_COL m0, m1
+
+;Col 9-16
+%if HIGH_BIT_DEPTH
+ vpmovzxwd m0, [r0 + 16] ;fenc
+ vpmovzxwd m1, [r2 + 16] ;recon
+%elif BIT_DEPTH == 8
+ vpmovzxbd m0, [r0 + 8]
+ vpmovzxbd m1, [r2 + 8]
+%else
+ %error Unsupported BIT_DEPTH!
+%endif
+
+ SSIM_RD_COL m0, m1
+
+;Col 17-24
+%if HIGH_BIT_DEPTH
+ vpmovzxwd m0, [r0 + 32] ;fenc
+ vpmovzxwd m1, [r2 + 32] ;recon
+%elif BIT_DEPTH == 8
+ vpmovzxbd m0, [r0 + 16]
+ vpmovzxbd m1, [r2 + 16]
+%else
+ %error Unsupported BIT_DEPTH!
+%endif
+
+ SSIM_RD_COL m0, m1
+
+;Col 25-32
+%if HIGH_BIT_DEPTH
+ vpmovzxwd m0, [r0 + 48] ;fenc
+ vpmovzxwd m1, [r2 + 48] ;recon
+%elif BIT_DEPTH == 8
+ vpmovzxbd m0, [r0 + 24]
+ vpmovzxbd m1, [r2 + 24]
+%else
+ %error Unsupported BIT_DEPTH!
+%endif
+
+ SSIM_RD_COL m0, m1
+
+;Col 33-40
+%if HIGH_BIT_DEPTH
+ vpmovzxwd m0, [r0 + 64] ;fenc
+ vpmovzxwd m1, [r2 + 64] ;recon
+%elif BIT_DEPTH == 8
+ vpmovzxbd m0, [r0 + 32]
+ vpmovzxbd m1, [r2 + 32]
+%else
+ %error Unsupported BIT_DEPTH!
+%endif
+
+ SSIM_RD_COL m0, m1
+
+;Col 41-48
+%if HIGH_BIT_DEPTH
+ vpmovzxwd m0, [r0 + 80] ;fenc
+ vpmovzxwd m1, [r2 + 80] ;recon
+%elif BIT_DEPTH == 8
+ vpmovzxbd m0, [r0 + 40]
+ vpmovzxbd m1, [r2 + 40]
+%else
+ %error Unsupported BIT_DEPTH!
+%endif
+
+ SSIM_RD_COL m0, m1
+
+;Col 49-56
+%if HIGH_BIT_DEPTH
+ vpmovzxwd m0, [r0 + 96] ;fenc
+ vpmovzxwd m1, [r2 + 96] ;recon
+%elif BIT_DEPTH == 8
+ vpmovzxbd m0, [r0 + 48]
+ vpmovzxbd m1, [r2 + 48]
+%else
+ %error Unsupported BIT_DEPTH!
+%endif
+
+ SSIM_RD_COL m0, m1
+
+;Col 57-64
+%if HIGH_BIT_DEPTH
+ vpmovzxwd m0, [r0 + 112] ;fenc
+ vpmovzxwd m1, [r2 + 112] ;recon
+%elif BIT_DEPTH == 8
+ vpmovzxbd m0, [r0 + 56]
+ vpmovzxbd m1, [r2 + 56]
+%else
+ %error Unsupported BIT_DEPTH!
+%endif
+
+ SSIM_RD_COL m0, m1
+
+%if HIGH_BIT_DEPTH
+ lea r0, [r0 + 2 * r1]
+ lea r2, [r2 + 2 * r3]
+%else
+ lea r0, [r0 + r1]
+ lea r2, [r2 + r3]
+%endif
+ dec r7d
+ jnz .row
+ vextracti128 xm5, m4, 1
+ vpaddq xm4, xm5
+ punpckhqdq xm2, xm4, xm3
+ paddq xm4, xm2
+
+ vextracti128 xm5, m7, 1
+ vpaddq xm7, xm5
+ punpckhqdq xm2, xm7, xm3
+ paddq xm7, xm2
+
+ movq [r4], xm4
+ movq [r6], xm7
+ RET
diff -r cb3e172a5f51 -r d12a4caf7963 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Tue Feb 19 20:20:35 2019 +0530
+++ b/source/common/x86/pixel.h Wed Feb 27 12:35:02 2019 +0530
@@ -60,7 +60,8 @@
FUNCDEF_TU_S(sse_t, pixel_ssd_s_aligned, cpu, const int16_t*,
intptr_t); \
FUNCDEF_TU(uint64_t, pixel_var, cpu, const pixel*, intptr_t); \
FUNCDEF_TU(int, psyCost_pp, cpu, const pixel* source, intptr_t
sstride, const pixel* recon, intptr_t rstride); \
- FUNCDEF_TU(int, psyCost_ss, cpu, const int16_t* source, intptr_t
sstride, const int16_t* recon, intptr_t rstride)
+ FUNCDEF_TU(int, psyCost_ss, cpu, const int16_t* source, intptr_t
sstride, const int16_t* recon, intptr_t rstride); \
+ FUNCDEF_TU_S2(void, ssimDist, cpu, const pixel *fenc, uint32_t
fStride, const pixel *recon, intptr_t rstride, uint64_t *ssBlock, int
shift, uint64_t *ac_k)
DECL_PIXELS(mmx);
DECL_PIXELS(mmx2);
diff -r cb3e172a5f51 -r d12a4caf7963 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp Tue Feb 19 20:20:35 2019 +0530
+++ b/source/test/pixelharness.cpp Wed Feb 27 12:35:02 2019 +0530
@@ -2270,6 +2270,32 @@
return true;
}
+bool PixelHarness::check_ssimDist(ssimDistortion_t ref, ssimDistortion_t
opt)
+{
+ uint32_t srcStride[5] = { 4, 8, 16, 32, 64 };
+ intptr_t dstStride[5] = { 4, 8, 16, 32, 64 };
+ int shift = X265_DEPTH - 8;
+ uint64_t opt_dest1 = 0, ref_dest1 = 0, opt_dest2 = 0, ref_dest2 = 0;
+ int j = 0;
+
+ for (int i = 0; i < ITERS; i++)
+ {
+ int index = i % TEST_CASES;
+ int k1 = rand() % 5, k2 = rand() % 5;
+ ref(pixel_test_buff[index] + j, srcStride[k1],
pixel_test_buff[index + 10] + j, dstStride[k2], &ref_dest1, shift,
&ref_dest2);
+ opt(pixel_test_buff[index] + j, srcStride[k1],
pixel_test_buff[index + 10] + j, dstStride[k2], &opt_dest1, shift,
&opt_dest2);
+
+ if (opt_dest1 != ref_dest1 && opt_dest2 != ref_dest2)
+ {
+ return false;
+ }
+
+ reportfail()
+ j += INCR;
+ }
+ return true;
+}
+
bool PixelHarness::testPU(int part, const EncoderPrimitives& ref, const
EncoderPrimitives& opt)
{
if (opt.pu[part].satd)
@@ -2607,6 +2633,15 @@
}
}
+ if (opt.cu[i].ssimDist)
+ {
+ if (!check_ssimDist(ref.cu[i].ssimDist, opt.cu[i].ssimDist))
+ {
+ printf("\nssimDist[%dx%d] failed!\n", 4 << i, 4 << i);
+ return false;
+ }
+ }
+
if (i < BLOCK_64x64)
{
/* TU only primitives */
@@ -3093,6 +3128,7 @@
return false;
}
}
+
return true;
}
@@ -3392,6 +3428,14 @@
HEADER("psy_cost_pp[%dx%d]", 4 << i, 4 << i);
REPORT_SPEEDUP(opt.cu[i].psy_cost_pp, ref.cu[i].psy_cost_pp,
pbuf1, STRIDE, pbuf2, STRIDE);
}
+
+ if (opt.cu[i].ssimDist)
+ {
+ uint64_t dst1 = 0, dst2 = 0;
+ int shift = X265_DEPTH - 8;
+ printf("ssimDist[%dx%d]", 4 << i, 4 << i);
+ REPORT_SPEEDUP(opt.cu[i].ssimDist, ref.cu[i].ssimDist,
pixel_test_buff[0], 32, pixel_test_buff[5], 64, &dst1, shift, &dst2);
+ }
}
if (opt.weight_pp)
diff -r cb3e172a5f51 -r d12a4caf7963 source/test/pixelharness.h
--- a/source/test/pixelharness.h Tue Feb 19 20:20:35 2019 +0530
+++ b/source/test/pixelharness.h Wed Feb 27 12:35:02 2019 +0530
@@ -136,6 +136,7 @@
bool check_pelFilterChroma_H(pelFilterChroma_t ref, pelFilterChroma_t
opt);
bool check_integral_initv(integralv_t ref, integralv_t opt);
bool check_integral_inith(integralh_t ref, integralh_t opt);
+ bool check_ssimDist(ssimDistortion_t ref, ssimDistortion_t opt);
public:
--
*Regards,*
*Akil R*
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20190305/dcf33b90/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ssimDistortion_avx2.patch
Type: application/octet-stream
Size: 23892 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20190305/dcf33b90/attachment-0001.obj>
More information about the x265-devel
mailing list