[x265] [PATCH] Improved sao implementation by limiting sao types
ashok at multicorewareinc.com
ashok at multicorewareinc.com
Fri Apr 7 12:34:43 CEST 2017
# HG changeset patch
# User Ashok Kumar Mishra <ashok at multicorewareinc.com>
# Date 1491215527 -19800
# Mon Apr 03 16:02:07 2017 +0530
# Node ID 195ae8f499fc61bcdc6865cf7cffe7d0d7c486f0
# Parent 08a05ca9fd16c9f5efb1ce4d8389bda8a63f5f7d
Improved sao implementation by limiting sao types
diff -r 08a05ca9fd16 -r 195ae8f499fc doc/reST/cli.rst
--- a/doc/reST/cli.rst Mon Mar 27 12:35:20 2017 +0530
+++ b/doc/reST/cli.rst Mon Apr 03 16:02:07 2017 +0530
@@ -1690,6 +1690,12 @@
disabled, SAO analysis skips the right/bottom boundary areas.
Default disabled
+.. option:: --limit-sao, --no-limit-sao
+ Limit SAO filter computation by early terminating SAO process based
+ on inter prediction mode, CTU spatial-domain correlations, and relations
+ between luma and chroma.
+ Default disabled
+
VUI (Video Usability Information) options
=========================================
diff -r 08a05ca9fd16 -r 195ae8f499fc source/common/param.cpp
--- a/source/common/param.cpp Mon Mar 27 12:35:20 2017 +0530
+++ b/source/common/param.cpp Mon Apr 03 16:02:07 2017 +0530
@@ -187,6 +187,7 @@
/* SAO Loop Filter */
param->bEnableSAO = 1;
param->bSaoNonDeblocked = 0;
+ param->bLimitSAO = 0;
/* Coding Quality */
param->cbQpOffset = 0;
@@ -272,7 +273,6 @@
param->bAQMotion = 0;
param->bHDROpt = 0;
param->analysisRefineLevel = 5;
-
}
int x265_param_default_preset(x265_param* param, const char* preset, const char* tune)
@@ -949,6 +949,7 @@
}
OPT("hdr") p->bEmitHDRSEI = atobool(value);
OPT("hdr-opt") p->bHDROpt = atobool(value);
+ OPT("limit-sao") p->bLimitSAO = atobool(value);
else
return X265_PARAM_BAD_NAME;
}
@@ -1658,6 +1659,7 @@
BOOL(p->bEmitHDRSEI, "hdr");
BOOL(p->bHDROpt, "hdr-opt");
s += sprintf(s, " refine-level=%d", p->analysisRefineLevel);
+ BOOL(p->bLimitSAO, "limit-sao");
#undef BOOL
return buf;
}
diff -r 08a05ca9fd16 -r 195ae8f499fc source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Mon Mar 27 12:35:20 2017 +0530
+++ b/source/encoder/encoder.cpp Mon Apr 03 16:02:07 2017 +0530
@@ -2109,6 +2109,7 @@
/* some options make no sense if others are disabled */
p->bSaoNonDeblocked &= p->bEnableSAO;
p->bEnableTSkipFast &= p->bEnableTransformSkip;
+ p->bLimitSAO &= p->bEnableSAO;
/* initialize the conformance window */
m_conformanceWindow.bEnabled = false;
diff -r 08a05ca9fd16 -r 195ae8f499fc source/encoder/sao.cpp
--- a/source/encoder/sao.cpp Mon Mar 27 12:35:20 2017 +0530
+++ b/source/encoder/sao.cpp Mon Apr 03 16:02:07 2017 +0530
@@ -734,6 +734,7 @@
/* Calculate SAO statistics for current CTU without non-crossing slice */
void SAO::calcSaoStatsCTU(int addr, int plane)
{
+ Slice* slice = m_frame->m_encData->m_slice;
const PicYuv* reconPic = m_frame->m_reconPic;
const CUData* cu = m_frame->m_encData->getPicCTU(addr);
const pixel* fenc0 = m_frame->m_fencPic->getPlaneAddr(plane, addr);
@@ -858,59 +859,63 @@
primitives.saoCuStatsE1(diff + startY * MAX_CU_SIZE, rec0 + startY * stride, stride, upBuff1, endX, endY - startY, m_offsetOrg[plane][SAO_EO_1], m_count[plane][SAO_EO_1]);
}
- // SAO_EO_2: // dir: 135
+ if (!m_param->bLimitSAO || ((slice->m_sliceType == P_SLICE && !cu->isSkipped(0)) ||
+ (slice->m_sliceType != B_SLICE)))
{
- if (m_param->bSaoNonDeblocked)
+ // SAO_EO_2: // dir: 135
{
- skipB = 4;
- skipR = 5;
+ if (m_param->bSaoNonDeblocked)
+ {
+ skipB = 4;
+ skipR = 5;
+ }
+
+ fenc = fenc0;
+ rec = rec0;
+
+ startX = !lpelx;
+ endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR + plane_offset;
+
+ startY = bAboveUnavail;
+ endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB + plane_offset;
+ if (startY)
+ {
+ fenc += stride;
+ rec += stride;
+ }
+
+ primitives.sign(upBuff1, &rec[startX], &rec[startX - stride - 1], (endX - startX));
+
+ primitives.saoCuStatsE2(diff + startX + startY * MAX_CU_SIZE, rec0 + startX + startY * stride, stride, upBuff1, upBufft, endX - startX, endY - startY, m_offsetOrg[plane][SAO_EO_2], m_count[plane][SAO_EO_2]);
}
- fenc = fenc0;
- rec = rec0;
+ // SAO_EO_3: // dir: 45
+ {
+ if (m_param->bSaoNonDeblocked)
+ {
+ skipB = 4;
+ skipR = 5;
+ }
- startX = !lpelx;
- endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR + plane_offset;
+ fenc = fenc0;
+ rec = rec0;
- startY = bAboveUnavail;
- endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB + plane_offset;
- if (startY)
- {
- fenc += stride;
- rec += stride;
+ startX = !lpelx;
+ endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR + plane_offset;
+
+ startY = bAboveUnavail;
+ endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB + plane_offset;
+
+ if (startY)
+ {
+ fenc += stride;
+ rec += stride;
+ }
+
+ primitives.sign(upBuff1, &rec[startX - 1], &rec[startX - 1 - stride + 1], (endX - startX + 1));
+
+ primitives.saoCuStatsE3(diff + startX + startY * MAX_CU_SIZE, rec0 + startX + startY * stride, stride, upBuff1 + 1, endX - startX, endY - startY, m_offsetOrg[plane][SAO_EO_3], m_count[plane][SAO_EO_3]);
}
-
- primitives.sign(upBuff1, &rec[startX], &rec[startX - stride - 1], (endX - startX));
-
- primitives.saoCuStatsE2(diff + startX + startY * MAX_CU_SIZE, rec0 + startX + startY * stride, stride, upBuff1, upBufft, endX - startX, endY - startY, m_offsetOrg[plane][SAO_EO_2], m_count[plane][SAO_EO_2]);
- }
-
- // SAO_EO_3: // dir: 45
- {
- if (m_param->bSaoNonDeblocked)
- {
- skipB = 4;
- skipR = 5;
- }
-
- fenc = fenc0;
- rec = rec0;
-
- startX = !lpelx;
- endX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR + plane_offset;
-
- startY = bAboveUnavail;
- endY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB + plane_offset;
-
- if (startY)
- {
- fenc += stride;
- rec += stride;
- }
-
- primitives.sign(upBuff1, &rec[startX - 1], &rec[startX - 1 - stride + 1], (endX - startX + 1));
-
- primitives.saoCuStatsE3(diff + startX + startY * MAX_CU_SIZE, rec0 + startX + startY * stride, stride, upBuff1 + 1, endX - startX, endY - startY, m_offsetOrg[plane][SAO_EO_3], m_count[plane][SAO_EO_3]);
}
}
}
@@ -1224,7 +1229,6 @@
void SAO::rdoSaoUnitCu(SAOParam* saoParam, int rowBaseAddr, int idxX, int addr)
{
Slice* slice = m_frame->m_encData->m_slice;
-// int qp = slice->m_sliceQp;
const CUData* cu = m_frame->m_encData->getPicCTU(addr);
int qp = cu->m_qp[0];
@@ -1263,17 +1267,6 @@
for (int i = 0; i < planes; i++)
saoParam->ctuParam[i][addr].reset();
- if (saoParam->bSaoFlag[0])
- calcSaoStatsCTU(addr, 0);
-
- if (saoParam->bSaoFlag[1])
- {
- calcSaoStatsCTU(addr, 1);
- calcSaoStatsCTU(addr, 2);
- }
-
- saoStatsInitialOffset(planes);
-
// SAO distortion calculation
m_entropyCoder.load(m_rdContexts.cur);
m_entropyCoder.resetBits();
@@ -1283,13 +1276,44 @@
m_entropyCoder.codeSaoMerge(0);
m_entropyCoder.store(m_rdContexts.temp);
- // Estimate distortion and cost of new SAO params
+ memset(m_offset, 0, sizeof(m_offset));
int64_t bestCost = 0;
int64_t rateDist = 0;
+
+ bool bAboveLeftAvail = true;
+ for (int mergeIdx = 0; mergeIdx < 2; ++mergeIdx)
+ {
+ if (!allowMerge[mergeIdx])
+ continue;
+
+ SaoCtuParam* mergeSrcParam = &(saoParam->ctuParam[0][addrMerge[mergeIdx]]);
+ bAboveLeftAvail = bAboveLeftAvail && (mergeSrcParam->typeIdx == -1);
+ }
+ // Don't apply sao if ctu is skipped or ajacent ctus are sao off
+ bool bSaoOff = (slice->m_sliceType == B_SLICE) && (cu->isSkipped(0) || bAboveLeftAvail);
+
// Estimate distortion and cost of new SAO params
- saoLumaComponentParamDist(saoParam, addr, rateDist, lambda, bestCost);
- if (chroma)
- saoChromaComponentParamDist(saoParam, addr, rateDist, lambda, bestCost);
+ if (saoParam->bSaoFlag[0])
+ {
+ if (!m_param->bLimitSAO || !bSaoOff)
+ {
+ calcSaoStatsCTU(addr, 0);
+ saoStatsInitialOffset(addr, 0);
+ saoLumaComponentParamDist(saoParam, addr, rateDist, lambda, bestCost);
+ }
+ }
+
+ SaoCtuParam* lclCtuParam = &saoParam->ctuParam[0][addr];
+ if (saoParam->bSaoFlag[1])
+ {
+ if (!m_param->bLimitSAO || ((lclCtuParam->typeIdx != -1) && !bSaoOff))
+ {
+ calcSaoStatsCTU(addr, 1);
+ calcSaoStatsCTU(addr, 2);
+ saoStatsInitialOffset(addr, 1);
+ saoChromaComponentParamDist(saoParam, addr, rateDist, lambda, bestCost);
+ }
+ }
if (saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1])
{
@@ -1360,14 +1384,26 @@
// Rounds the division of initial offsets by the number of samples in
// each of the statistics table entries.
-void SAO::saoStatsInitialOffset(int planes)
+void SAO::saoStatsInitialOffset(int addr, int planes)
{
- memset(m_offset, 0, sizeof(m_offset));
+ Slice* slice = m_frame->m_encData->m_slice;
+ const CUData* cu = m_frame->m_encData->getPicCTU(addr);
+
+ int maxSaoType;
+ if (m_param->bLimitSAO && ((slice->m_sliceType == P_SLICE && cu->isSkipped(0)) ||
+ (slice->m_sliceType == B_SLICE)))
+ {
+ maxSaoType = MAX_NUM_SAO_TYPE - 3;
+ }
+ else
+ {
+ maxSaoType = MAX_NUM_SAO_TYPE - 1;
+ }
// EO
- for (int plane = 0; plane < planes; plane++)
+ for (int plane = planes; plane <= planes * 2; plane++)
{
- for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)
+ for (int typeIdx = 0; typeIdx < maxSaoType; typeIdx++)
{
for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1; classIdx++)
{
@@ -1390,7 +1426,7 @@
}
// BO
- for (int plane = 0; plane < planes; plane++)
+ for (int plane = planes; plane <= planes * 2; plane++)
{
for (int classIdx = 0; classIdx < MAX_NUM_SAO_CLASS; classIdx++)
{
@@ -1454,6 +1490,8 @@
void SAO::saoLumaComponentParamDist(SAOParam* saoParam, int32_t addr, int64_t& rateDist, int64_t* lambda, int64_t &bestCost)
{
+ Slice* slice = m_frame->m_encData->m_slice;
+ const CUData* cu = m_frame->m_encData->getPicCTU(addr);
int64_t bestDist = 0;
int bestTypeIdx = -1;
@@ -1469,13 +1507,24 @@
int64_t costPartBest = calcSaoRdoCost(0, m_entropyCoder.getNumberOfWrittenBits(), lambda[0]);
+ int maxSaoType;
+ if (m_param->bLimitSAO && ((slice->m_sliceType == P_SLICE && cu->isSkipped(0)) ||
+ (slice->m_sliceType == B_SLICE)))
+ {
+ maxSaoType = MAX_NUM_SAO_TYPE - 3;
+ }
+ else
+ {
+ maxSaoType = MAX_NUM_SAO_TYPE - 1;
+ }
+
//EO distortion calculation
- for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)
+ for (int typeIdx = 0; typeIdx < maxSaoType; typeIdx++)
{
int64_t estDist = 0;
for (int classIdx = 1; classIdx < SAO_NUM_OFFSET + 1; classIdx++)
{
- int32_t& count = m_count[0][typeIdx][classIdx];
+ int32_t& count = m_count[0][typeIdx][classIdx];
int32_t& offsetOrg = m_offsetOrg[0][typeIdx][classIdx];
int32_t& offsetOut = m_offset[0][typeIdx][classIdx];
@@ -1571,6 +1620,8 @@
void SAO::saoChromaComponentParamDist(SAOParam* saoParam, int32_t addr, int64_t& rateDist, int64_t* lambda, int64_t &bestCost)
{
+ Slice* slice = m_frame->m_encData->m_slice;
+ const CUData* cu = m_frame->m_encData->getPicCTU(addr);
int64_t bestDist = 0;
int bestTypeIdx = -1;
@@ -1587,8 +1638,19 @@
uint32_t bits = m_entropyCoder.getNumberOfWrittenBits();
int64_t costPartBest = calcSaoRdoCost(0, bits, lambda[1]);
+ int maxSaoType;
+ if (m_param->bLimitSAO && ((slice->m_sliceType == P_SLICE && cu->isSkipped(0)) ||
+ (slice->m_sliceType == B_SLICE)))
+ {
+ maxSaoType = MAX_NUM_SAO_TYPE - 3;
+ }
+ else
+ {
+ maxSaoType = MAX_NUM_SAO_TYPE - 1;
+ }
+
//EO RDO
- for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE - 1; typeIdx++)
+ for (int typeIdx = 0; typeIdx < maxSaoType; typeIdx++)
{
int64_t estDist[2] = {0, 0};
for (int compIdx = 1; compIdx < 3; compIdx++)
diff -r 08a05ca9fd16 -r 195ae8f499fc source/encoder/sao.h
--- a/source/encoder/sao.h Mon Mar 27 12:35:20 2017 +0530
+++ b/source/encoder/sao.h Mon Apr 03 16:02:07 2017 +0530
@@ -134,7 +134,7 @@
void rdoSaoUnitCu(SAOParam* saoParam, int rowBaseAddr, int idxX, int addr);
int64_t calcSaoRdoCost(int64_t distortion, uint32_t bits, int64_t lambda);
- void saoStatsInitialOffset(int planes);
+ void saoStatsInitialOffset(int addr, int planes);
friend class FrameFilter;
};
diff -r 08a05ca9fd16 -r 195ae8f499fc source/test/regression-tests.txt
--- a/source/test/regression-tests.txt Mon Mar 27 12:35:20 2017 +0530
+++ b/source/test/regression-tests.txt Mon Apr 03 16:02:07 2017 +0530
@@ -45,6 +45,7 @@
CrowdRun_1920x1080_50_10bit_444.yuv,--preset veryslow --tskip --tskip-fast --no-scenecut --limit-tu 1
CrowdRun_1920x1080_50_10bit_444.yuv,--preset veryslow --aq-mode 3 --aq-strength 1.5 --aq-motion --bitrate 5000
CrowdRun_1920x1080_50_10bit_444.yuv,--preset veryslow --aq-mode 3 --aq-strength 1.5 --no-psy-rd --ssim-rd
+CrowdRun_1920x1080_50_10bit_422.yuv,--preset superfast --weightp --sao --limit-sao
DucksAndLegs_1920x1080_60_10bit_422.yuv,--preset superfast --weightp --qg-size 16
DucksAndLegs_1920x1080_60_10bit_422.yuv,--preset medium --tune psnr --bframes 16 --limit-modes
DucksAndLegs_1920x1080_60_10bit_422.yuv,--preset slow --temporal-layers --no-psy-rd --qg-size 32 --limit-refs 0 --cu-lossless
diff -r 08a05ca9fd16 -r 195ae8f499fc source/test/smoke-tests.txt
--- a/source/test/smoke-tests.txt Mon Mar 27 12:35:20 2017 +0530
+++ b/source/test/smoke-tests.txt Mon Apr 03 16:02:07 2017 +0530
@@ -19,6 +19,7 @@
DucksAndLegs_1920x1080_60_10bit_422.yuv,--preset=veryfast --min-cu 16
DucksAndLegs_1920x1080_60_10bit_422.yuv,--preset=fast --weightb --interlace bff
DucksAndLegs_1920x1080_60_10bit_422.yuv,--preset=veryslow --limit-ref 1 --limit-mode --tskip --limit-tu 1
+CrowdRun_1920x1080_50_10bit_444.yuv,--preset=superfast --bitrate 7000 --sao --limit-sao
# Main12 intraCost overflow bug test
720p50_parkrun_ter.y4m,--preset medium
diff -r 08a05ca9fd16 -r 195ae8f499fc source/x265.h
--- a/source/x265.h Mon Mar 27 12:35:20 2017 +0530
+++ b/source/x265.h Mon Apr 03 16:02:07 2017 +0530
@@ -963,7 +963,7 @@
int bEnableWeightedBiPred;
/* Enable source pixels in motion estimation. Default is disabled */
- int bSourceReferenceEstimation;
+ int bSourceReferenceEstimation;
/*== Loop Filters ==*/
@@ -1347,7 +1347,7 @@
/* This value represents the percentage difference between the inter cost and
* intra cost of a frame used in scenecut detection. Default 5. */
- double scenecutBias;
+ double scenecutBias;
/* Use multiple worker threads dedicated to doing only lookahead instead of sharing
* the worker threads with Frame Encoders. A dedicated lookahead threadpool is created with the
@@ -1357,16 +1357,16 @@
int lookaheadThreads;
/* Optimize CU level QPs to signal consistent deltaQPs in frame for rd level > 4 */
- int bOptCUDeltaQP;
+ int bOptCUDeltaQP;
/* Refine analysis in multipass ratecontrol based on analysis information stored */
- int analysisMultiPassRefine;
+ int analysisMultiPassRefine;
/* Refine analysis in multipass ratecontrol based on distortion data stored */
- int analysisMultiPassDistortion;
+ int analysisMultiPassDistortion;
/* Adaptive Quantization based on relative motion */
- int bAQMotion;
+ int bAQMotion;
/* SSIM based RDO, based on residual divisive normalization scheme. Used for mode
* selection during analysis of CTUs, can achieve significant gain in terms of
@@ -1390,6 +1390,11 @@
* level higher the informtion stored/reused. Default is 5 */
int analysisRefineLevel;
+ /* Limit Sample Adaptive Offset filter computation by early terminating SAO
+ * process based on inter prediction mode, CTU spatial-domain correlations,
+ * and relations between luma and chroma */
+ int bLimitSAO;
+
} x265_param;
/* x265_param_alloc:
diff -r 08a05ca9fd16 -r 195ae8f499fc source/x265cli.h
--- a/source/x265cli.h Mon Mar 27 12:35:20 2017 +0530
+++ b/source/x265cli.h Mon Apr 03 16:02:07 2017 +0530
@@ -266,6 +266,8 @@
{ "no-hdr", no_argument, NULL, 0 },
{ "hdr-opt", no_argument, NULL, 0 },
{ "no-hdr-opt", no_argument, NULL, 0 },
+ { "limit-sao", no_argument, NULL, 0 },
+ { "no-limit-sao", no_argument, NULL, 0 },
{ 0, 0, 0, 0 },
{ 0, 0, 0, 0 },
{ 0, 0, 0, 0 },
@@ -454,6 +456,7 @@
H0(" --[no-]deblock Enable Deblocking Loop Filter, optionally specify tC:Beta offsets Default %s\n", OPT(param->bEnableLoopFilter));
H0(" --[no-]sao Enable Sample Adaptive Offset. Default %s\n", OPT(param->bEnableSAO));
H1(" --[no-]sao-non-deblock Use non-deblocked pixels, else right/bottom boundary areas skipped. Default %s\n", OPT(param->bSaoNonDeblocked));
+ H0(" --[no-]limit-sao Limit Sample Adaptive Offset types. Default %s\n", OPT(param->bLimitSAO));
H0("\nVUI options:\n");
H0(" --sar <width:height|int> Sample Aspect Ratio, the ratio of width to height of an individual pixel.\n");
H0(" Choose from 0=undef, 1=1:1(\"square\"), 2=12:11, 3=10:11, 4=16:11,\n");
More information about the x265-devel
mailing list