[x265-commits] [x265] primitives: fix build error in refactor of chroma p2s pri...
Aarthi at videolan.org
Aarthi at videolan.org
Fri Dec 5 01:46:42 CET 2014
details: http://hg.videolan.org/x265/rev/511dde5ac1de
branches:
changeset: 8942:511dde5ac1de
user: Aarthi Thirumalai
date: Thu Dec 04 12:43:06 2014 +0530
description:
primitives: fix build error in refactor of chroma p2s primitive.
Subject: [x265] noiseReduction: allow separate strengths to be specified for intra and inter CUs
details: http://hg.videolan.org/x265/rev/ec06f5878e8b
branches:
changeset: 8943:ec06f5878e8b
user: Deepthi Nandakumar <deepthi at multicorewareinc.com>
date: Thu Dec 04 10:19:38 2014 +0530
description:
noiseReduction: allow separate strengths to be specified for intra and inter CUs
Subject: [x265] analysis: add chroma distortion to rdLevels 3 and 4
details: http://hg.videolan.org/x265/rev/1d2a11f6a33f
branches:
changeset: 8944:1d2a11f6a33f
user: Deepthi Nandakumar <deepthi at multicorewareinc.com>
date: Thu Dec 04 16:00:12 2014 +0530
description:
analysis: add chroma distortion to rdLevels 3 and 4
At these rdLevels, inter/bidir and merge candidate decisions were being taken
based on luma sa8dCost only. This will increase bitrate and lower ssim slightly,
with better subjective quality.
Also fixed some naming nits.
Subject: [x265] analysis: cache m_bChromaSa8d and reduce redundant work
details: http://hg.videolan.org/x265/rev/cc327e846dac
branches:
changeset: 8945:cc327e846dac
user: Steve Borho <steve at borho.org>
date: Thu Dec 04 10:50:02 2014 -0600
description:
analysis: cache m_bChromaSa8d and reduce redundant work
Renamed some 'part' variables to 'puIdx' to avoid variable shadow warnings and
for consistency with search.cpp
Subject: [x265] asm: chroma_vpp[4x4] for colorspace i420 in avx2: improve 228c->184c
details: http://hg.videolan.org/x265/rev/23e637065aec
branches:
changeset: 8946:23e637065aec
user: Divya Manivannan <divya at multicorewareinc.com>
date: Thu Dec 04 10:57:35 2014 +0530
description:
asm: chroma_vpp[4x4] for colorspace i420 in avx2: improve 228c->184c
diffstat:
doc/reST/cli.rst | 2 +-
source/CMakeLists.txt | 2 +-
source/common/param.cpp | 15 +++-
source/common/x86/asm-primitives.cpp | 8 +-
source/common/x86/ipfilter8.asm | 77 +++++++++++++++++++++++++++++
source/common/x86/ipfilter8.h | 1 +
source/encoder/analysis.cpp | 95 ++++++++++++++++++++++++++---------
source/encoder/analysis.h | 1 +
source/encoder/frameencoder.cpp | 7 +-
source/encoder/search.cpp | 2 +-
source/x265.cpp | 6 +-
source/x265.h | 10 ++-
12 files changed, 182 insertions(+), 44 deletions(-)
diffs (truncated from 505 to 300 lines):
diff -r b1b5f06fe9ce -r 23e637065aec doc/reST/cli.rst
--- a/doc/reST/cli.rst Wed Dec 03 22:21:46 2014 -0600
+++ b/doc/reST/cli.rst Thu Dec 04 10:57:35 2014 +0530
@@ -925,7 +925,7 @@ Quality, rate control and rate distortio
less bits. This tends to improve detail in the backgrounds of video
with less detail in areas of high motion. Default enabled
-.. option:: --nr <integer>
+.. option:: --nr-intra <integer>, --nr-inter <integer>
Noise reduction - an adaptive deadzone applied after DCT
(subtracting from DCT coefficients), before quantization. It does
diff -r b1b5f06fe9ce -r 23e637065aec source/CMakeLists.txt
--- a/source/CMakeLists.txt Wed Dec 03 22:21:46 2014 -0600
+++ b/source/CMakeLists.txt Thu Dec 04 10:57:35 2014 +0530
@@ -21,7 +21,7 @@ include(CheckSymbolExists)
include(CheckCXXCompilerFlag)
# X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 39)
+set(X265_BUILD 40)
configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
"${PROJECT_BINARY_DIR}/x265.def")
configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff -r b1b5f06fe9ce -r 23e637065aec source/common/param.cpp
--- a/source/common/param.cpp Wed Dec 03 22:21:46 2014 -0600
+++ b/source/common/param.cpp Thu Dec 04 10:57:35 2014 +0530
@@ -766,7 +766,8 @@ int x265_param_parse(x265_param *p, cons
&p->vui.defDispWinRightOffset,
&p->vui.defDispWinBottomOffset) != 4;
}
- OPT("nr") p->noiseReduction = atoi(value);
+ OPT("nr-intra") p->noiseReductionIntra = atoi(value);
+ OPT("nr-inter") p->noiseReductionInter = atoi(value);
OPT("pass")
{
int pass = Clip3(0, 3, atoi(value));
@@ -1078,8 +1079,10 @@ int x265_check_params(x265_param *param)
"Target bitrate can not be less than zero");
CHECK(param->rc.qCompress < 0.5 || param->rc.qCompress > 1.0,
"qCompress must be between 0.5 and 1.0");
- if (param->noiseReduction)
- CHECK(0 > param->noiseReduction || param->noiseReduction > 2000, "Valid noise reduction range 0 - 2000");
+ if (param->noiseReductionIntra)
+ CHECK(0 > param->noiseReductionIntra || param->noiseReductionIntra > 2000, "Valid noise reduction range 0 - 2000");
+ if (param->noiseReductionInter)
+ CHECK(0 > param->noiseReductionInter || param->noiseReductionInter > 2000, "Valid noise reduction range 0 - 2000");
CHECK(param->rc.rateControlMode == X265_RC_CRF && param->rc.bStatRead,
"Constant rate-factor is incompatible with 2pass");
CHECK(param->rc.rateControlMode == X265_RC_CQP && param->rc.bStatRead,
@@ -1201,8 +1204,10 @@ void x265_print_params(x265_param *param
fprintf(stderr, "psy-rdoq=%.2lf ", param->psyRdoq);
TOOLOPT(param->bEnableEarlySkip, "early-skip");
TOOLOPT(param->bEnableCbfFastMode, "fast-cbf");
- if (param->noiseReduction)
- fprintf(stderr, "nr=%d ", param->noiseReduction);
+ if (param->noiseReductionIntra)
+ fprintf(stderr, "nr-intra=%d ", param->noiseReductionIntra);
+ if (param->noiseReductionInter)
+ fprintf(stderr, "nr-inter=%d ", param->noiseReductionInter);
if (param->bEnableLoopFilter)
{
if (param->deblockingFilterBetaOffset || param->deblockingFilterTCOffset)
diff -r b1b5f06fe9ce -r 23e637065aec source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Wed Dec 03 22:21:46 2014 -0600
+++ b/source/common/x86/asm-primitives.cpp Thu Dec 04 10:57:35 2014 +0530
@@ -1365,9 +1365,9 @@ void Setup_Assembly_Primitives(EncoderPr
CHROMA_VERT_FILTERS_422(_sse2);
CHROMA_VERT_FILTERS_444(_sse2);
p.luma_p2s = x265_luma_p2s_sse2;
- p.chroma_p2s[X265_CSP_I420] = x265_chroma_p2s_sse2;
- p.chroma_p2s[X265_CSP_I422] = x265_chroma_p2s_sse2;
- p.chroma_p2s[X265_CSP_I444] = x265_luma_p2s_sse2; // for i444 , chroma_p2s can be replaced by luma_p2s
+ p.chroma[X265_CSP_I420].p2s = x265_chroma_p2s_sse2;
+ p.chroma[X265_CSP_I422].p2s = x265_chroma_p2s_sse2;
+ p.chroma[X265_CSP_I444].p2s = x265_luma_p2s_sse2; // for i444 , chroma_p2s can be replaced by luma_p2s
p.blockfill_s[BLOCK_4x4] = x265_blockfill_s_4x4_sse2;
p.blockfill_s[BLOCK_8x8] = x265_blockfill_s_8x8_sse2;
@@ -1879,6 +1879,8 @@ void Setup_Assembly_Primitives(EncoderPr
p.luma_vpp[LUMA_8x8] = x265_interp_8tap_vert_pp_8x8_avx2;
p.luma_vpp[LUMA_8x16] = x265_interp_8tap_vert_pp_8x16_avx2;
p.luma_vpp[LUMA_8x32] = x265_interp_8tap_vert_pp_8x32_avx2;
+
+ p.chroma[X265_CSP_I420].filter_vpp[CHROMA_4x4] = x265_interp_4tap_vert_pp_4x4_avx2;
}
#endif // if HIGH_BIT_DEPTH
}
diff -r b1b5f06fe9ce -r 23e637065aec source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm Wed Dec 03 22:21:46 2014 -0600
+++ b/source/common/x86/ipfilter8.asm Thu Dec 04 10:57:35 2014 +0530
@@ -32,6 +32,13 @@ tab_Tm: db 0, 1, 2, 3, 1, 2, 3, 4, 2,
db 8, 9,10,11, 9,10,11,12,10,11,12,13,11,12,13, 14
ALIGN 32
+const interp4_vpp_shuf, times 2 db 0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15
+
+ALIGN 32
+const interp4_vpp_shuf1, dd 0, 1, 1, 2, 2, 3, 3, 4
+ dd 2, 3, 3, 4, 4, 5, 5, 6
+
+ALIGN 32
tab_Lm: db 0, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 8
db 2, 3, 4, 5, 6, 7, 8, 9, 3, 4, 5, 6, 7, 8, 9, 10
db 4, 5, 6, 7, 8, 9, 10, 11, 5, 6, 7, 8, 9, 10, 11, 12
@@ -143,6 +150,31 @@ tab_LumaCoeffVer_32: times 16 db 0, 0
times 16 db 58, -10
times 16 db 4, -1
+ALIGN 32
+tab_ChromaCoeffVer_32: times 16 db 0, 64
+ times 16 db 0, 0
+
+ times 16 db -2, 58
+ times 16 db 10, -2
+
+ times 16 db -4, 54
+ times 16 db 16, -2
+
+ times 16 db -6, 46
+ times 16 db 28, -4
+
+ times 16 db -4, 36
+ times 16 db 36, -4
+
+ times 16 db -4, 28
+ times 16 db 46, -6
+
+ times 16 db -2, 16
+ times 16 db 54, -4
+
+ times 16 db -2, 10
+ times 16 db 58, -2
+
tab_c_64_n64: times 8 db 64, -64
const interp4_shuf, times 2 db 0, 1, 8, 9, 4, 5, 12, 13, 2, 3, 10, 11, 6, 7, 14, 15
@@ -1901,6 +1933,51 @@ pextrd [r2 + r3], m2, 3
RET
+INIT_YMM avx2
+cglobal interp_4tap_vert_pp_4x4, 4, 6, 3
+ mov r4d, r4m
+ shl r4d, 6
+ sub r0, r1
+
+%ifdef PIC
+ lea r5, [tab_ChromaCoeffVer_32]
+ add r5, r4
+%else
+ lea r5, [tab_ChromaCoeffVer_32 + r4]
+%endif
+
+ lea r4, [r1 * 3]
+
+ movd xm1, [r0]
+ pinsrd xm1, [r0 + r1], 1
+ pinsrd xm1, [r0 + r1 * 2], 2
+ pinsrd xm1, [r0 + r4], 3 ; m1 = row[3 2 1 0]
+ lea r0, [r0 + r1 * 4]
+ movd xm2, [r0]
+ pinsrd xm2, [r0 + r1], 1
+ pinsrd xm2, [r0 + r1 * 2], 2 ; m2 = row[x 6 5 4]
+ vinserti128 m1, m1, xm2, 1 ; m1 = row[x 6 5 4 3 2 1 0]
+ mova m2, [interp4_vpp_shuf1]
+ vpermd m0, m2, m1 ; m0 = row[4 3 3 2 2 1 1 0]
+ mova m2, [interp4_vpp_shuf1 + mmsize]
+ vpermd m1, m2, m1 ; m1 = row[6 5 5 4 4 3 3 2]
+
+ mova m2, [interp4_vpp_shuf]
+ pshufb m0, m0, m2
+ pshufb m1, m1, m2
+ pmaddubsw m0, [r5]
+ pmaddubsw m1, [r5 + mmsize]
+ paddw m0, m1 ; m0 = WORD ROW[3 2 1 0]
+ pmulhrsw m0, [pw_512]
+ vextracti128 xm1, m0, 1
+ packuswb xm0, xm1
+ lea r5, [r3 * 3]
+ movd [r2], xm0
+ pextrd [r2 + r3], xm0, 1
+ pextrd [r2 + r3 * 2], xm0, 2
+ pextrd [r2 + r5], xm0, 3
+ RET
+
;-----------------------------------------------------------------------------
; void interp_4tap_vert_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
;-----------------------------------------------------------------------------
diff -r b1b5f06fe9ce -r 23e637065aec source/common/x86/ipfilter8.h
--- a/source/common/x86/ipfilter8.h Wed Dec 03 22:21:46 2014 -0600
+++ b/source/common/x86/ipfilter8.h Thu Dec 04 10:57:35 2014 +0530
@@ -573,6 +573,7 @@ CHROMA_HORIZ_FILTERS_444(_sse4);
SETUP_CHROMA_SS_FUNC_DEF(16, 64, cpu);
CHROMA_FILTERS(_sse4);
+CHROMA_FILTERS(_avx2);
CHROMA_SP_FILTERS(_sse2);
CHROMA_SP_FILTERS_SSE4(_sse4);
CHROMA_SS_FILTERS(_sse2);
diff -r b1b5f06fe9ce -r 23e637065aec source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Wed Dec 03 22:21:46 2014 -0600
+++ b/source/encoder/analysis.cpp Thu Dec 04 10:57:35 2014 +0530
@@ -79,6 +79,7 @@ bool Analysis::create(ThreadLocalData *t
{
m_tld = tld;
m_bTryLossless = m_param->bCULossless && !m_param->bLossless && m_param->rdLevel >= 2;
+ m_bChromaSa8d = m_param->rdLevel >= 3;
int csp = m_param->internalCsp;
uint32_t cuSize = g_maxCUSize;
@@ -593,10 +594,13 @@ void Analysis::compressInterCU_dist(cons
if (m_param->rdLevel > 2)
{
/* RD selection between merge, inter, bidir and intra */
- for (uint32_t puIdx = 0; puIdx < bestInter->cu.getNumPartInter(); puIdx++)
+ if (!m_bChromaSa8d)
{
- prepMotionCompensation(bestInter->cu, cuGeom, puIdx);
- motionCompensation(bestInter->predYuv, false, true);
+ for (uint32_t puIdx = 0; puIdx < bestInter->cu.getNumPartInter(); puIdx++)
+ {
+ prepMotionCompensation(bestInter->cu, cuGeom, puIdx);
+ motionCompensation(bestInter->predYuv, false, true);
+ }
}
encodeResAndCalcRdInterCU(*bestInter, cuGeom);
checkBestMode(*bestInter, depth);
@@ -841,10 +845,13 @@ void Analysis::compressInterCU_rd0_4(con
if (m_param->rdLevel >= 3)
{
/* Calculate RD cost of best inter option */
- for (uint32_t puIdx = 0; puIdx < bestInter->cu.getNumPartInter(); puIdx++)
+ if (!m_bChromaSa8d)
{
- prepMotionCompensation(bestInter->cu, cuGeom, puIdx);
- motionCompensation(bestInter->predYuv, false, true);
+ for (uint32_t puIdx = 0; puIdx < bestInter->cu.getNumPartInter(); puIdx++)
+ {
+ prepMotionCompensation(bestInter->cu, cuGeom, puIdx);
+ motionCompensation(bestInter->predYuv, false, true);
+ }
}
encodeResAndCalcRdInterCU(*bestInter, cuGeom);
checkBestMode(*bestInter, depth);
@@ -1227,7 +1234,12 @@ void Analysis::checkMerge2Nx2N_rd0_4(Mod
bestPred->sa8dCost = MAX_INT64;
int bestSadCand = -1;
- int sizeIdx = cuGeom.log2CUSize - 2;
+ int cpart, sizeIdx = cuGeom.log2CUSize - 2;
+ if (m_bChromaSa8d)
+ {
+ int cuSize = 1 << cuGeom.log2CUSize;
+ cpart = partitionFromSizes(cuSize >> m_hChromaShift, cuSize >> m_vChromaShift);
+ }
for (uint32_t i = 0; i < maxNumMergeCand; ++i)
{
if (m_bFrameParallel &&
@@ -1242,12 +1254,16 @@ void Analysis::checkMerge2Nx2N_rd0_4(Mod
tempPred->cu.m_mv[1][0] = mvFieldNeighbours[i][1].mv;
tempPred->cu.m_refIdx[1][0] = (int8_t)mvFieldNeighbours[i][1].refIdx;
- // do MC only for Luma part
prepMotionCompensation(tempPred->cu, cuGeom, 0);
- motionCompensation(tempPred->predYuv, true, false);
+ motionCompensation(tempPred->predYuv, true, m_bChromaSa8d);
tempPred->sa8dBits = getTUBits(i, maxNumMergeCand);
tempPred->distortion = primitives.sa8d[sizeIdx](fencYuv->m_buf[0], fencYuv->m_size, tempPred->predYuv.m_buf[0], tempPred->predYuv.m_size);
+ if (m_bChromaSa8d)
+ {
+ tempPred->distortion += primitives.sa8d[cpart](fencYuv->m_buf[1], fencYuv->m_csize, tempPred->predYuv.m_buf[1], tempPred->predYuv.m_csize);
+ tempPred->distortion += primitives.sa8d[cpart](fencYuv->m_buf[2], fencYuv->m_csize, tempPred->predYuv.m_buf[2], tempPred->predYuv.m_csize);
+ }
tempPred->sa8dCost = m_rdCost.calcRdSADCost(tempPred->distortion, tempPred->sa8dBits);
if (tempPred->sa8dCost < bestPred->sa8dCost)
@@ -1262,8 +1278,11 @@ void Analysis::checkMerge2Nx2N_rd0_4(Mod
return;
/* calculate the motion compensation for chroma for the best mode selected */
- prepMotionCompensation(bestPred->cu, cuGeom, 0);
- motionCompensation(bestPred->predYuv, false, true);
+ if (!m_bChromaSa8d)
+ {
+ prepMotionCompensation(bestPred->cu, cuGeom, 0);
+ motionCompensation(bestPred->predYuv, false, true);
+ }
if (m_param->rdLevel)
{
@@ -1428,19 +1447,27 @@ void Analysis::checkInter_rd0_4(Mode& in
}
}
}
- if (predInterSearch(interMode, cuGeom, false, false))
+ if (predInterSearch(interMode, cuGeom, false, m_bChromaSa8d))
{
/* predInterSearch sets interMode.sa8dBits */
More information about the x265-commits
mailing list