[x265] [PATCH] asm: code for addAvg luma and chroma all sizes
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Sat Jan 18 11:02:51 CET 2014
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1390037965 -19800
# Sat Jan 18 15:09:25 2014 +0530
# Node ID a72a0900a84d48627541ab343c9e6d8a35b3db71
# Parent 4f396ba19b4729bd1257b8cd898a0157f9ed2d53
asm: code for addAvg luma and chroma all sizes
diff -r 4f396ba19b47 -r a72a0900a84d source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp Sat Jan 18 12:01:44 2014 +0530
+++ b/source/Lib/TLibCommon/TComYuv.cpp Sat Jan 18 15:09:25 2014 +0530
@@ -594,7 +594,7 @@
src1Stride = srcYuv1->m_width;
dststride = getStride();
- primitives.luma_addAvg[part](dstY, dststride, srcY0, src0Stride, srcY1, src1Stride);
+ primitives.luma_addAvg[part](srcY0, srcY1, dstY, src0Stride, src1Stride, dststride);
}
if (bChroma)
{
@@ -602,8 +602,8 @@
src1Stride = srcYuv1->m_cwidth;
dststride = getCStride();
- primitives.chroma_addAvg[part](dstU, dststride, srcU0, src0Stride, srcU1, src1Stride);
- primitives.chroma_addAvg[part](dstV, dststride, srcV0, src0Stride, srcV1, src1Stride);
+ primitives.chroma_addAvg[part](srcU0, srcU1, dstU, src0Stride, src1Stride, dststride);
+ primitives.chroma_addAvg[part](srcV0, srcV1, dstV, src0Stride, src1Stride, dststride);
}
}
diff -r 4f396ba19b47 -r a72a0900a84d source/common/pixel.cpp
--- a/source/common/pixel.cpp Sat Jan 18 12:01:44 2014 +0530
+++ b/source/common/pixel.cpp Sat Jan 18 15:09:25 2014 +0530
@@ -802,7 +802,7 @@
}
template<int bx, int by>
-void addAvg(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride)
+void addAvg(int16_t* src0, int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride)
{
int shiftNum, offset;
shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
diff -r 4f396ba19b47 -r a72a0900a84d source/common/primitives.h
--- a/source/common/primitives.h Sat Jan 18 12:01:44 2014 +0530
+++ b/source/common/primitives.h Sat Jan 18 15:09:25 2014 +0530
@@ -203,7 +203,7 @@
typedef void (*pixel_sub_ps_t)(int16_t *dst, intptr_t dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);
typedef void (*pixel_add_ps_t)(pixel *a, intptr_t dstride, pixel *b0, int16_t *b1, intptr_t sstride0, intptr_t sstride1);
-typedef void (*addAvg_t)(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride);
+typedef void (*addAvg_t)(int16_t* src0, int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride);
/* Define a structure containing function pointers to optimized encoder
* primitives. Each pointer can reference either an assembly routine,
diff -r 4f396ba19b47 -r a72a0900a84d source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Sat Jan 18 12:01:44 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp Sat Jan 18 15:09:25 2014 +0530
@@ -547,6 +547,62 @@
SETUP_PIXEL_SSE_SP_DEF(64, 16, cpu); \
SETUP_PIXEL_SSE_SP_DEF(16, 64, cpu);
+#define SETUP_LUMA_ADDAVG_FUNC_DEF(W, H, cpu) \
+ p.luma_addAvg[LUMA_## W ## x ## H] = x265_addAvg_## W ## x ## H ## cpu;
+
+#define LUMA_ADDAVG(cpu) \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(4, 4, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(4, 8, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(4, 16, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(8, 4, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(8, 8, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(8, 16, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(8, 32, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(12, 16, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(16, 4, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(16, 8, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(16, 12, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(16, 16, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(16, 32, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(24, 32, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(16, 64, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(32, 8, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(32, 16, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(32, 24, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(32, 32, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(32, 64, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(48, 64, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(64, 16, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(64, 32, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(64, 48, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(64, 64, cpu); \
+
+#define SETUP_CHROMA_ADDAVG_FUNC_DEF(W, H, cpu) \
+ p.chroma_addAvg[CHROMA_## W ## x ## H] = x265_addAvg_## W ## x ## H ## cpu;
+
+#define CHROMA_ADDAVG(cpu) \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(4, 2, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(4, 4, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(4, 8, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(4, 16, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 2, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 4, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 6, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 8, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 16, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 32, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(12, 16, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 4, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 8, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 12, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 16, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 32, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(24, 32, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 8, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 16, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 24, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 32, cpu); \
+
#define SETUP_INTRA_ANG4(mode, fno, cpu) \
p.intra_pred[BLOCK_4x4][mode] = x265_intra_pred_ang4_ ## fno ## _ ## cpu;
#define SETUP_INTRA_ANG8(mode, fno, cpu) \
@@ -883,6 +939,9 @@
p.dct[DCT_4x4] = x265_dct4_sse2;
p.idct[IDCT_4x4] = x265_idct4_sse2;
p.idct[IDST_4x4] = x265_idst4_sse2;
+
+ LUMA_ADDAVG(_sse2);
+ CHROMA_ADDAVG(_sse2);
}
if (cpuMask & X265_CPU_SSSE3)
{
@@ -1012,6 +1071,10 @@
SETUP_INTRA_ANG4(33, 3, sse4);
p.dct[DCT_8x8] = x265_dct8_sse4;
+
+ p.chroma_addAvg[CHROMA_2x4] = x265_addAvg_2x4_sse4;
+ p.chroma_addAvg[CHROMA_2x8] = x265_addAvg_2x8_sse4;
+ p.chroma_addAvg[CHROMA_6x8] = x265_addAvg_6x8_sse4;
}
if (cpuMask & X265_CPU_AVX)
{
diff -r 4f396ba19b47 -r a72a0900a84d source/common/x86/const-a.asm
--- a/source/common/x86/const-a.asm Sat Jan 18 12:01:44 2014 +0530
+++ b/source/common/x86/const-a.asm Sat Jan 18 15:09:25 2014 +0530
@@ -33,10 +33,13 @@
const pw_1, times 16 dw 1
const pw_16, times 16 dw 16
const pw_32, times 16 dw 32
+const pw_128, times 16 dw 128
+const pw_256, times 16 dw 256
const pw_512, times 16 dw 512
const pw_1024, times 16 dw 1024
const pw_4096, times 16 dw 4096
const pw_00ff, times 16 dw 0x00ff
+const pw_16448, times 16 dw 16448
const pw_pixel_max,times 16 dw ((1 << BIT_DEPTH)-1)
const deinterleave_shufd, dd 0,4,1,5,2,6,3,7
const pb_unpackbd1, times 2 db 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3
diff -r 4f396ba19b47 -r a72a0900a84d source/common/x86/mc-a.asm
--- a/source/common/x86/mc-a.asm Sat Jan 18 12:01:44 2014 +0530
+++ b/source/common/x86/mc-a.asm Sat Jan 18 15:09:25 2014 +0530
@@ -49,13 +49,1055 @@
cextern pw_8
cextern pw_32
cextern pw_64
+cextern pw_128
+cextern pw_256
cextern pw_512
cextern pw_00ff
+cextern pw_16448
cextern pw_pixel_max
cextern sw_64
cextern pd_32
cextern deinterleave_shufd
+;====================================================================================================================
+;void addAvg (int16_t* src0, int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride)
+;====================================================================================================================
+; r0 = pSrc0, r1 = pSrc1
+; r2 = pDst, r3 = iStride0
+; r4 = iStride1, r5 = iDstStride
+
+;-----------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal addAvg_2x4, 6,7,5, src0, src1, dst, src0Stride, src1tride, dstStride
+
+ mova m0, [pw_16448]
+ add r3, r3
+ add r4, r4
+
+ movd m1, [r0]
+ movd m2, [r0 + r3]
+ movd m3, [r1]
+ movd m4, [r1 + r4]
+
+ punpckldq m1, m2
+ punpckldq m3, m4
+
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ movd m2, [r0]
+ movd m4, [r0 + r3]
+ movd m5, [r1]
+ movd m6, [r1 + r4]
+
+ punpckldq m2, m4
+ punpckldq m5, m6
+ punpcklqdq m1, m2
+ punpcklqdq m3, m5
+
+ paddw m1, m3
+ paddw m1, m0
+ psraw m1, 7
+ packuswb m1, m1
+
+ pextrw [r2], m1, 0
+ pextrw [r2 + r5], m1, 1
+ lea r2, [r2 + 2 * r5]
+ pextrw [r2], m1, 2
+ pextrw [r2 + r5], m1, 3
+
+ RET
+;-----------------------------------------------------------------------------
+
+;-----------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal addAvg_2x8, 6,7,5, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
+
+ mova m0, [pw_16448]
+ add r3, r3
+ add r4, r4
+
+ movd m1, [r0]
+ movd m2, [r0 + r3]
+ movd m3, [r1]
+ movd m4, [r1 + r4]
+
+ punpckldq m1, m2
+ punpckldq m3, m4
+
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ movd m2, [r0]
+ movd m4, [r0 + r3]
+ movd m5, [r1]
+ movd m6, [r1 + r4]
+
+ punpckldq m2, m4
+ punpckldq m5, m6
+ punpcklqdq m1, m2
+ punpcklqdq m3, m5
+
+ paddw m1, m3
+ paddw m1, m0
+ psraw m1, 7
+ packuswb m1, m1
+
+ pextrw [r2], m1, 0
+ pextrw [r2 + r5], m1, 1
+ lea r2, [r2 + 2 * r5]
+ pextrw [r2], m1, 2
+ pextrw [r2 + r5], m1, 3
+
+ lea r2, [r2 + 2 * r5]
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ movd m1, [r0]
+ movd m2, [r0 + r3]
+ movd m3, [r1]
+ movd m4, [r1 + r4]
+
+ punpckldq m1, m2
+ punpckldq m3, m4
+
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ movd m2, [r0]
+ movd m4, [r0 + r3]
+ movd m5, [r1]
+ movd m6, [r1 + r4]
+
+ punpckldq m2, m4
+ punpckldq m5, m6
+ punpcklqdq m1, m2
+ punpcklqdq m3, m5
+
+ paddw m1, m3
+ paddw m1, m0
+ psraw m1, 7
+ packuswb m1, m1
+
+ pextrw [r2], m1, 0
+ pextrw [r2 + r5], m1, 1
+ lea r2, [r2 + 2 * r5]
+ pextrw [r2], m1, 2
+ pextrw [r2 + r5], m1, 3
+
+ RET
+;-----------------------------------------------------------------------------
+
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal addAvg_4x2, 6,6,5, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
+
+ mova m4, [pw_16448]
+ add r3, r3
+ add r4, r4
+
+ movh m0, [r0]
+ movh m1, [r0 + r3]
+ movh m2, [r1]
+ movh m3, [r1 + r4]
+
+ punpcklqdq m0, m1
+ punpcklqdq m2, m3
+
+ paddw m0, m2
+ paddw m0, m4
+ psraw m0, 7
+
+ packuswb m0, m0
+ movd [r2], m0
+ pshufd m0, m0, 1
+ movd [r2 + r5], m0
+
+ RET
+;-----------------------------------------------------------------------------
+
+;-----------------------------------------------------------------------------
+%macro ADDAVG_W4_H4 1
+INIT_XMM sse2
+cglobal addAvg_4x%1, 6,7,5, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
+ mova m4, [pw_256]
+ mova m5, [pw_128]
+ add r3, r3
+ add r4, r4
+
+ mov r6d, %1/4
+
+.loop
+ movh m0, [r0]
+ movh m1, [r0 + r3]
+ movh m2, [r1]
+ movh m3, [r1 + r4]
+
+ punpcklqdq m0, m1
+ punpcklqdq m2, m3
+
+ paddw m0, m2
+ pmulhrsw m0, m4
+ paddw m0, m5
+
+ packuswb m0, m0
+ movd [r2], m0
+ pshufd m0, m0, 1
+ movd [r2 + r5], m0
+
+ lea r2, [r2 + 2 * r5]
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ movh m0, [r0]
+ movh m1, [r0 + r3]
+ movh m2, [r1]
+ movh m3, [r1 + r4]
+
+ punpcklqdq m0, m1
+ punpcklqdq m2, m3
+
+ paddw m0, m2
+ pmulhrsw m0, m4
+ paddw m0, m5
+
+ packuswb m0, m0
+ movd [r2], m0
+ pshufd m0, m0, 1
+ movd [r2 + r5], m0
+
+ lea r2, [r2 + 2 * r5]
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ dec r6d
+ jnz .loop
+ RET
+%endmacro
+
+ADDAVG_W4_H4 4
+ADDAVG_W4_H4 8
+ADDAVG_W4_H4 16
+;-----------------------------------------------------------------------------
+
+;-----------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal addAvg_6x8, 6,7,5, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
+
+ mova m4, [pw_16448]
+ add r3, r3
+ add r4, r4
+
+ movu m0, [r0]
+ movu m2, [r1]
+ paddw m0, m2
+ paddw m0, m4
+ psraw m0, 7
+ packuswb m0, m0
+ movd [r2], m0
+ pextrw [r2 + 4], m0, 2
+
+ movu m1, [r0 + r3]
+ movu m3, [r1 + r4]
+ paddw m1, m3
+ paddw m1, m4
+ psraw m1, 7
+ packuswb m1, m1
+ movd [r2 + r5], m1
+ pextrw [r2 + r5 + 4], m1, 2
+
+ lea r2, [r2 + 2 * r5]
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ movu m0, [r0]
+ movu m2, [r1]
+ paddw m0, m2
+ paddw m0, m4
+ psraw m0, 7
+ packuswb m0, m0
+ movd [r2], m0
+ pextrw [r2 + 4], m0, 2
+
+ movu m1, [r0 + r3]
+ movu m3, [r1 + r4]
+ paddw m1, m3
+ paddw m1, m4
+ psraw m1, 7
+ packuswb m1, m1
+ movd [r2 + r5], m1
+ pextrw [r2 + r5 + 4], m1, 2
+
+ lea r2, [r2 + 2 * r5]
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ movu m0, [r0]
+ movu m2, [r1]
+ paddw m0, m2
+ paddw m0, m4
+ psraw m0, 7
+ packuswb m0, m0
+ movd [r2], m0
+ pextrw [r2 + 4], m0, 2
+
+ movu m1, [r0 + r3]
+ movu m3, [r1 + r4]
+ paddw m1, m3
+ paddw m1, m4
+ psraw m1, 7
+ packuswb m1, m1
+ movd [r2 + r5], m1
+ pextrw [r2 + r5 + 4], m1, 2
+
+ lea r2, [r2 + 2 * r5]
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ movu m0, [r0]
+ movu m2, [r1]
+ paddw m0, m2
+ paddw m0, m4
+ psraw m0, 7
+ packuswb m0, m0
+ movd [r2], m0
+ pextrw [r2 + 4], m0, 2
+
+ movu m1, [r0 + r3]
+ movu m3, [r1 + r4]
+ paddw m1, m3
+ paddw m1, m4
+ psraw m1, 7
+ packuswb m1, m1
+ movd [r2 + r5], m1
+ pextrw [r2 + r5 + 4], m1, 2
+
+ RET
+;-----------------------------------------------------------------------------
+
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal addAvg_8x2, 6,6,5, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
+ mova m4, [pw_16448]
+ add r3, r3
+ add r4, r4
+
+ movu m0, [r0]
+ movu m2, [r1]
+ paddw m0, m2
+ paddw m0, m4
+ psraw m0, 7
+ packuswb m0, m0
+ movh [r2], m0
+
+ movu m1, [r0 + r3]
+ movu m3, [r1 + r4]
+ paddw m1, m3
+ paddw m1, m4
+ psraw m1, 7
+ packuswb m1, m1
+ movh [r2 + r5], m1
+
+ RET
+;-----------------------------------------------------------------------------
+
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal addAvg_8x6, 6,6,5, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
+
+ mova m4, [pw_16448]
+ add r3, r3
+ add r4, r4
+
+ movu m0, [r0]
+ movu m2, [r1]
+ paddw m0, m2
+ paddw m0, m4
+ psraw m0, 7
+ packuswb m0, m0
+ movh [r2], m0
+
+ movu m1, [r0 + r3]
+ movu m3, [r1 + r4]
+ paddw m1, m3
+ paddw m1, m4
+ psraw m1, 7
+ packuswb m1, m1
+ movh [r2 + r5], m1
+
+ lea r2, [r2 + 2 * r5]
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ movu m0, [r0]
+ movu m2, [r1]
+ paddw m0, m2
+ paddw m0, m4
+ psraw m0, 7
+ packuswb m0, m0
+ movh [r2], m0
+
+ movu m1, [r0 + r3]
+ movu m3, [r1 + r4]
+ paddw m1, m3
+ paddw m1, m4
+ psraw m1, 7
+ packuswb m1, m1
+ movh [r2 + r5], m1
+
+ lea r2, [r2 + 2 * r5]
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ movu m0, [r0]
+ movu m2, [r1]
+ paddw m0, m2
+ paddw m0, m4
+ psraw m0, 7
+ packuswb m0, m0
+ movh [r2], m0
+
+ movu m1, [r0 + r3]
+ movu m3, [r1 + r4]
+ paddw m1, m3
+ paddw m1, m4
+ psraw m1, 7
+ packuswb m1, m1
+ movh [r2 + r5], m1
+
+ RET
+;-----------------------------------------------------------------------------
+
+;-----------------------------------------------------------------------------
+%macro ADDAVG_W8_H4 1
+INIT_XMM sse2
+cglobal addAvg_8x%1, 6,7,6, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
+
+ mova m4, [pw_256]
+ mova m5, [pw_128]
+ add r3, r3
+ add r4, r4
+
+ mov r6d, %1/4
+
+.loop
+ movu m0, [r0]
+ movu m2, [r1]
+ paddw m0, m2
+ pmulhrsw m0, m4
+ paddw m0, m5
+
+ packuswb m0, m0
+ movh [r2], m0
+
+ movu m1, [r0 + r3]
+ movu m3, [r1 + r4]
+ paddw m1, m3
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ packuswb m1, m1
+ movh [r2 + r5], m1
+
+ lea r2, [r2 + 2 * r5]
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ movu m0, [r0]
+ movu m2, [r1]
+ paddw m0, m2
+ pmulhrsw m0, m4
+ paddw m0, m5
+
+ packuswb m0, m0
+ movh [r2], m0
+
+ movu m1, [r0 + r3]
+ movu m3, [r1 + r4]
+ paddw m1, m3
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ packuswb m1, m1
+ movh [r2 + r5], m1
+
+ lea r2, [r2 + 2 * r5]
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ dec r6d
+ jnz .loop
+ RET
+%endmacro
+
+ADDAVG_W8_H4 4
+ADDAVG_W8_H4 8
+ADDAVG_W8_H4 16
+ADDAVG_W8_H4 32
+
+;-----------------------------------------------------------------------------
+
+
+;-----------------------------------------------------------------------------
+%macro ADDAVG_W12_H4 1
+INIT_XMM sse2
+cglobal addAvg_12x%1, 6,7,6, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
+ mova m4, [pw_256]
+ mova m5, [pw_128]
+ add r3, r3
+ add r4, r4
+
+ mov r6d, %1/4
+
+.loop
+ movu m0, [r0]
+ movu m2, [r1]
+ paddw m0, m2
+ pmulhrsw m0, m4
+ paddw m0, m5
+ packuswb m0, m0
+ movh [r2], m0
+
+ movh m0, [r0 + 16]
+ movh m1, [r0 + 16 + r3]
+ movh m2, [r1 + 16]
+ movh m3, [r1 + 16 + r4]
+
+ punpcklqdq m0, m1
+ punpcklqdq m2, m3
+
+ paddw m0, m2
+ pmulhrsw m0, m4
+ paddw m0, m5
+
+ packuswb m0, m0
+ movd [r2 + 8], m0
+ pshufd m0, m0, 1
+ movd [r2 + 8 + r5], m0
+
+ movu m1, [r0 + r3]
+ movu m3, [r1 + r4]
+ paddw m1, m3
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ packuswb m1, m1
+ movh [r2 + r5], m1
+
+ lea r2, [r2 + 2 * r5]
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ movu m0, [r0]
+ movu m2, [r1]
+ paddw m0, m2
+ pmulhrsw m0, m4
+ paddw m0, m5
+
+ packuswb m0, m0
+ movh [r2], m0
+
+ movh m0, [r0 + 16]
+ movh m1, [r0 + 16 + r3]
+ movh m2, [r1 + 16]
+ movh m3, [r1 + 16 + r4]
+
+ punpcklqdq m0, m1
+ punpcklqdq m2, m3
+
+ paddw m0, m2
+ pmulhrsw m0, m4
+ paddw m0, m5
+
+ packuswb m0, m0
+ movd [r2 + 8], m0
+ pshufd m0, m0, 1
+ movd [r2 + 8 + r5], m0
+
+ movu m1, [r0 + r3]
+ movu m3, [r1 + r4]
+ paddw m1, m3
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ packuswb m1, m1
+ movh [r2 + r5], m1
+
+ lea r2, [r2 + 2 * r5]
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ dec r6d
+ jnz .loop
+ RET
+%endmacro
+
+ADDAVG_W12_H4 16
+
+;-----------------------------------------------------------------------------
+
+
+;-----------------------------------------------------------------------------
+%macro ADDAVG_W16_H4 1
+INIT_XMM sse2
+cglobal addAvg_16x%1, 6,7,6, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
+ mova m4, [pw_256]
+ mova m5, [pw_128]
+ add r3, r3
+ add r4, r4
+
+ mov r6d, %1/4
+
+.loop
+ movu m0, [r0]
+ movu m2, [r1]
+ paddw m0, m2
+ pmulhrsw m0, m4
+ paddw m0, m5
+
+ movu m1, [r0 + 16]
+ movu m2, [r1 + 16]
+ paddw m1, m2
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ packuswb m0, m1
+ movu [r2], m0
+
+ movu m1, [r0 + r3]
+ movu m3, [r1 + r4]
+ paddw m1, m3
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ movu m2, [r0 + 16 + r3]
+ movu m3, [r1 + 16 + r4]
+ paddw m2, m3
+ pmulhrsw m2, m4
+ paddw m2, m5
+
+ packuswb m1, m2
+ movu [r2 + r5], m1
+
+ lea r2, [r2 + 2 * r5]
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ movu m0, [r0]
+ movu m2, [r1]
+ paddw m0, m2
+ pmulhrsw m0, m4
+ paddw m0, m5
+
+ movu m1, [r0 + 16]
+ movu m2, [r1 + 16]
+ paddw m1, m2
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ packuswb m0, m1
+ movu [r2], m0
+
+ movu m1, [r0 + r3]
+ movu m3, [r1 + r4]
+ paddw m1, m3
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ movu m2, [r0 + 16 + r3]
+ movu m3, [r1 + 16 + r4]
+ paddw m2, m3
+ pmulhrsw m2, m4
+ paddw m2, m5
+
+ packuswb m1, m2
+ movu [r2 + r5], m1
+
+ lea r2, [r2 + 2 * r5]
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ dec r6d
+ jnz .loop
+ RET
+%endmacro
+
+ADDAVG_W16_H4 4
+ADDAVG_W16_H4 8
+ADDAVG_W16_H4 12
+ADDAVG_W16_H4 16
+ADDAVG_W16_H4 32
+ADDAVG_W16_H4 64
+
+;-----------------------------------------------------------------------------
+
+
+;-----------------------------------------------------------------------------
+%macro ADDAVG_W24_H2 2
+INIT_XMM sse2
+cglobal addAvg_%1x%2, 6,7,6, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
+ mova m4, [pw_256]
+ mova m5, [pw_128]
+ add r3, r3
+ add r4, r4
+
+ mov r6d, %2/2
+
+.loop
+ movu m0, [r0]
+ movu m2, [r1]
+ paddw m0, m2
+ pmulhrsw m0, m4
+ paddw m0, m5
+
+ movu m1, [r0 + 16]
+ movu m2, [r1 + 16]
+ paddw m1, m2
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ packuswb m0, m1
+ movu [r2], m0
+
+ movu m0, [r0 + 32]
+ movu m2, [r1 + 32]
+ paddw m0, m2
+ pmulhrsw m0, m4
+ paddw m0, m5
+
+ packuswb m0, m0
+ movh [r2 + 16], m0
+
+ movu m1, [r0 + r3]
+ movu m3, [r1 + r4]
+ paddw m1, m3
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ movu m2, [r0 + 16 + r3]
+ movu m3, [r1 + 16 + r4]
+ paddw m2, m3
+ pmulhrsw m2, m4
+ paddw m2, m5
+
+ packuswb m1, m2
+ movu [r2 + r5], m1
+
+ movu m1, [r0 + 32 + r3]
+ movu m3, [r1 + 32 + r4]
+ paddw m1, m3
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ packuswb m1, m1
+ movh [r2 + 16 + r5], m1
+
+ lea r2, [r2 + 2 * r5]
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ dec r6d
+ jnz .loop
+ RET
+%endmacro
+
+ADDAVG_W24_H2 24, 32
+
+;-----------------------------------------------------------------------------
+
+;-----------------------------------------------------------------------------
+%macro ADDAVG_W32_H2 1
+INIT_XMM sse2
+cglobal addAvg_32x%1, 6,7,6, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
+ mova m4, [pw_256]
+ mova m5, [pw_128]
+ add r3, r3
+ add r4, r4
+
+ mov r6d, %1/2
+
+.loop
+ movu m0, [r0]
+ movu m2, [r1]
+ paddw m0, m2
+ pmulhrsw m0, m4
+ paddw m0, m5
+
+ movu m1, [r0 + 16]
+ movu m2, [r1 + 16]
+ paddw m1, m2
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ packuswb m0, m1
+ movu [r2], m0
+
+ movu m0, [r0 + 32]
+ movu m2, [r1 + 32]
+ paddw m0, m2
+ pmulhrsw m0, m4
+ paddw m0, m5
+
+ movu m1, [r0 + 48]
+ movu m2, [r1 + 48]
+ paddw m1, m2
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ packuswb m0, m1
+ movu [r2 + 16], m0
+
+ movu m1, [r0 + r3]
+ movu m3, [r1 + r4]
+ paddw m1, m3
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ movu m2, [r0 + 16 + r3]
+ movu m3, [r1 + 16 + r4]
+ paddw m2, m3
+ pmulhrsw m2, m4
+ paddw m2, m5
+
+ packuswb m1, m2
+ movu [r2 + r5], m1
+
+ movu m1, [r0 + 32 + r3]
+ movu m3, [r1 + 32 + r4]
+ paddw m1, m3
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ movu m2, [r0 + 48 + r3]
+ movu m3, [r1 + 48 + r4]
+ paddw m2, m3
+ pmulhrsw m2, m4
+ paddw m2, m5
+
+ packuswb m1, m2
+ movu [r2 + 16 + r5], m1
+
+ lea r2, [r2 + 2 * r5]
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ dec r6d
+ jnz .loop
+ RET
+%endmacro
+
+ADDAVG_W32_H2 8
+ADDAVG_W32_H2 16
+ADDAVG_W32_H2 24
+ADDAVG_W32_H2 32
+ADDAVG_W32_H2 64
+
+;-----------------------------------------------------------------------------
+
+
+;-----------------------------------------------------------------------------
+%macro ADDAVG_W48_H2 1
+INIT_XMM sse2
+cglobal addAvg_48x%1, 6,7,6, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
+
+ mova m4, [pw_256]
+ mova m5, [pw_128]
+ add r3, r3
+ add r4, r4
+
+ mov r6d, %1/2
+
+.loop
+ movu m0, [r0]
+ movu m2, [r1]
+ paddw m0, m2
+ pmulhrsw m0, m4
+ paddw m0, m5
+
+ movu m1, [r0 + 16]
+ movu m2, [r1 + 16]
+ paddw m1, m2
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ packuswb m0, m1
+ movu [r2], m0
+
+ movu m0, [r0 + 32]
+ movu m2, [r1 + 32]
+ paddw m0, m2
+ pmulhrsw m0, m4
+ paddw m0, m5
+
+ movu m1, [r0 + 48]
+ movu m2, [r1 + 48]
+ paddw m1, m2
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ packuswb m0, m1
+ movu [r2 + 16], m0
+
+ movu m0, [r0 + 64]
+ movu m2, [r1 + 64]
+ paddw m0, m2
+ pmulhrsw m0, m4
+ paddw m0, m5
+
+ movu m1, [r0 + 80]
+ movu m2, [r1 + 80]
+ paddw m1, m2
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ packuswb m0, m1
+ movu [r2 + 32], m0
+
+ movu m1, [r0 + r3]
+ movu m3, [r1 + r4]
+ paddw m1, m3
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ movu m2, [r0 + 16 + r3]
+ movu m3, [r1 + 16 + r4]
+ paddw m2, m3
+ pmulhrsw m2, m4
+ paddw m2, m5
+
+ packuswb m1, m2
+ movu [r2 + r5], m1
+
+ movu m1, [r0 + 32 + r3]
+ movu m3, [r1 + 32 + r4]
+ paddw m1, m3
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ movu m2, [r0 + 48 + r3]
+ movu m3, [r1 + 48 + r4]
+ paddw m2, m3
+ pmulhrsw m2, m4
+ paddw m2, m5
+
+ packuswb m1, m2
+ movu [r2 + 16 + r5], m1
+
+ movu m1, [r0 + 64 + r3]
+ movu m3, [r1 + 64 + r4]
+ paddw m1, m3
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ movu m2, [r0 + 80 + r3]
+ movu m3, [r1 + 80 + r4]
+ paddw m2, m3
+ pmulhrsw m2, m4
+ paddw m2, m5
+
+ packuswb m1, m2
+ movu [r2 + 32 + r5], m1
+
+ lea r2, [r2 + 2 * r5]
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ dec r6d
+ jnz .loop
+ RET
+%endmacro
+
+ADDAVG_W48_H2 64
+
+;-----------------------------------------------------------------------------
+
+;-----------------------------------------------------------------------------
+%macro ADDAVG_W64_H1 1
+INIT_XMM sse2
+cglobal addAvg_64x%1, 6,7,6, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
+ mova m4, [pw_256]
+ mova m5, [pw_128]
+ add r3, r3
+ add r4, r4
+
+ mov r6d, %1
+
+.loop
+ movu m0, [r0]
+ movu m2, [r1]
+ paddw m0, m2
+ pmulhrsw m0, m4
+ paddw m0, m5
+
+ movu m1, [r0 + 16]
+ movu m2, [r1 + 16]
+ paddw m1, m2
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ packuswb m0, m1
+ movu [r2], m0
+
+ movu m0, [r0 + 32]
+ movu m2, [r1 + 32]
+ paddw m0, m2
+ pmulhrsw m0, m4
+ paddw m0, m5
+
+ movu m1, [r0 + 48]
+ movu m2, [r1 + 48]
+ paddw m1, m2
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ packuswb m0, m1
+ movu [r2 + 16], m0
+
+ movu m0, [r0 + 64]
+ movu m2, [r1 + 64]
+ paddw m0, m2
+ pmulhrsw m0, m4
+ paddw m0, m5
+
+ movu m1, [r0 + 80]
+ movu m2, [r1 + 80]
+ paddw m1, m2
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ packuswb m0, m1
+ movu [r2 + 32], m0
+
+ movu m0, [r0 + 96]
+ movu m2, [r1 + 96]
+ paddw m0, m2
+ pmulhrsw m0, m4
+ paddw m0, m5
+
+ movu m1, [r0 + 112]
+ movu m2, [r1 + 112]
+ paddw m1, m2
+ pmulhrsw m1, m4
+ paddw m1, m5
+
+ packuswb m0, m1
+ movu [r2 + 48], m0
+
+ add r2, r5
+ add r0, r3
+ add r1, r4
+
+ dec r6d
+ jnz .loop
+ RET
+%endmacro
+
+ADDAVG_W64_H1 16
+ADDAVG_W64_H1 32
+ADDAVG_W64_H1 48
+ADDAVG_W64_H1 64
+;-----------------------------------------------------------------------------
+
;=============================================================================
; implicit weighted biprediction
;=============================================================================
diff -r 4f396ba19b47 -r a72a0900a84d source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Sat Jan 18 12:01:44 2014 +0530
+++ b/source/common/x86/pixel.h Sat Jan 18 15:09:25 2014 +0530
@@ -166,6 +166,41 @@
int x265_pixel_ssd_64x48_sse4(pixel *, intptr_t, pixel *, intptr_t);
int x265_pixel_ssd_64x64_sse4(pixel *, intptr_t, pixel *, intptr_t);
+#define ADDAVG(func, args) \
+ void func ## _sse2 args; \
+ void func ## _sse4 args;
+ADDAVG(x265_addAvg_2x4, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_2x8, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_4x2, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_4x4, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_4x8, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_4x16, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_6x8, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_8x2, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_8x4, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_8x6, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_8x8, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_8x16, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_8x32, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_12x16, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_16x4, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_16x8, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_16x12, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_16x16, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_16x32, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_16x64, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_24x32, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_32x8, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_32x16, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_32x24, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_32x32, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_32x64, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_64x16, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_64x32, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_64x48, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_64x64, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+ADDAVG(x265_addAvg_48x64, (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t))
+
#undef DECL_PIXELS
#undef DECL_HEVC_SSD
#undef DECL_X1
diff -r 4f396ba19b47 -r a72a0900a84d source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp Sat Jan 18 12:01:44 2014 +0530
+++ b/source/test/pixelharness.cpp Sat Jan 18 15:09:25 2014 +0530
@@ -763,28 +763,28 @@
return true;
}
-bool PixelHarness::check_addAvg(addAvg_t ref, addAvg_t opt)
-{
- ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
- ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
-
- int j = 0;
-
- memset(ref_dest, 0xCD, sizeof(ref_dest));
- memset(opt_dest, 0xCD, sizeof(opt_dest));
-
- for (int i = 0; i < ITERS; i++)
- {
- ref(ref_dest, STRIDE, sbuf1 + j, STRIDE, sbuf2 + j, STRIDE);
- opt(opt_dest, STRIDE, sbuf1 + j, STRIDE, sbuf2 + j, STRIDE);
-
- if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
- return false;
-
- j += INCR;
- }
-
- return true;
+bool PixelHarness::check_addAvg(addAvg_t ref, addAvg_t opt)
+{
+ ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
+ ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
+
+ int j = 0;
+
+ memset(ref_dest, 0xCD, sizeof(ref_dest));
+ memset(opt_dest, 0xCD, sizeof(opt_dest));
+
+ for (int i = 0; i < ITERS; i++)
+ {
+ ref(sbuf1 + j, sbuf2 + j, ref_dest, STRIDE, STRIDE, STRIDE);
+ opt(sbuf1 + j, sbuf2 + j, opt_dest, STRIDE, STRIDE, STRIDE);
+
+ if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
+ return false;
+
+ j += INCR;
+ }
+
+ return true;
}
bool PixelHarness::testPartition(int part, const EncoderPrimitives& ref, const EncoderPrimitives& opt)
@@ -959,22 +959,22 @@
}
}
- if (opt.luma_addAvg[part])
- {
- if (!check_addAvg(ref.luma_addAvg[part], opt.luma_addAvg[part]))
- {
- printf("luma_addAvg[%s] failed\n", lumaPartStr[part]);
- return false;
- }
- }
-
- if (opt.chroma_addAvg[part])
- {
- if (!check_addAvg(ref.chroma_addAvg[part], opt.chroma_addAvg[part]))
- {
- printf("chroma_addAvg[%s] failed\n", chromaPartStr[part]);
- return false;
- }
+ if (opt.luma_addAvg[part])
+ {
+ if (!check_addAvg(ref.luma_addAvg[part], opt.luma_addAvg[part]))
+ {
+ printf("luma_addAvg[%s] failed\n", lumaPartStr[part]);
+ return false;
+ }
+ }
+
+ if (opt.chroma_addAvg[part])
+ {
+ if (!check_addAvg(ref.chroma_addAvg[part], opt.chroma_addAvg[part]))
+ {
+ printf("chroma_addAvg[%s] failed\n", chromaPartStr[part]);
+ return false;
+ }
}
return true;
@@ -1300,16 +1300,16 @@
}
}
- if (opt.luma_addAvg[part])
- {
- printf("luma_addAvg[%s]", lumaPartStr[part]);
- REPORT_SPEEDUP(opt.luma_addAvg[part], ref.luma_addAvg[part], pbuf1, STRIDE, sbuf1, STRIDE, sbuf2, STRIDE);
- }
-
- if (opt.chroma_addAvg[part])
- {
- printf("chroma_addAvg[%s]", chromaPartStr[part]);
- REPORT_SPEEDUP(opt.chroma_addAvg[part], ref.chroma_addAvg[part], pbuf1, STRIDE, sbuf1, STRIDE, sbuf2, STRIDE);
+ if (opt.luma_addAvg[part])
+ {
+ printf("luma_addAvg[%s]", lumaPartStr[part]);
+ REPORT_SPEEDUP(opt.luma_addAvg[part], ref.luma_addAvg[part], sbuf1, sbuf2, pbuf1, STRIDE, STRIDE, STRIDE);
+ }
+
+ if (opt.chroma_addAvg[part])
+ {
+ printf("chroma_addAvg[%s]", chromaPartStr[part]);
+ REPORT_SPEEDUP(opt.chroma_addAvg[part], ref.chroma_addAvg[part], sbuf1, sbuf2, pbuf1, STRIDE, STRIDE, STRIDE);
}
#undef HEADER
More information about the x265-devel
mailing list