From ashok at multicorewareinc.com Fri Nov 23 01:03:04 2018 From: ashok at multicorewareinc.com (Ashok Kumar Mishra) Date: Fri, 23 Nov 2018 01:03:04 +0100 Subject: [x265-commits] [x265] Merge with stable Message-ID: details: http://hg.videolan.org/x265/rev/f74003e88622 branches: changeset: 12434:f74003e88622 user: Ashok Kumar Mishra date: Thu Nov 22 15:02:08 2018 +0530 description: Merge with stable diffstat: .hgtags | 1 + doc/reST/releasenotes.rst | 26 ++++++++++++++++++++++++++ source/common/cpu.cpp | 1 + source/common/framedata.cpp | 6 +++--- source/common/quant.cpp | 11 ++++++++++- source/encoder/api.cpp | 28 ++++++++++++++-------------- source/encoder/encoder.cpp | 11 +++++++---- source/encoder/frameencoder.cpp | 10 ++++++---- source/encoder/frameencoder.h | 3 +++ source/test/testharness.h | 6 +++--- source/x265.h | 5 ++++- 11 files changed, 78 insertions(+), 30 deletions(-) diffs (truncated from 320 to 300 lines): diff -r fd517ae68f93 -r f74003e88622 .hgtags --- a/.hgtags Tue Sep 25 16:02:31 2018 +0530 +++ b/.hgtags Thu Nov 22 15:02:08 2018 +0530 @@ -27,3 +27,4 @@ 64b2d0bf45a52511e57a6b7299160b961ca3d51c 0e9ea76945c89962cd46cee6537586e2054b2935 2.6 e41a9bf2bac4a7af2bec2bbadf91e63752d320ef 2.7 a158a3a029663133455268e2a63ae6b0af2df720 2.8 +f9681d731f2e56c2ca185cec10daece5939bee07 2.9 diff -r fd517ae68f93 -r f74003e88622 doc/reST/releasenotes.rst --- a/doc/reST/releasenotes.rst Tue Sep 25 16:02:31 2018 +0530 +++ b/doc/reST/releasenotes.rst Thu Nov 22 15:02:08 2018 +0530 @@ -2,6 +2,32 @@ Release Notes ************* +Version 2.9 +=========== + +Release date - 05/10/2018 + +New features +------------- +1. Support for chunked encoding + + :option:`--chunk-start and --chunk-end` + Frames preceding first frame of chunk in display order will be encoded, however, they will be discarded in the bitstream. + Frames following last frame of the chunk in display order will be used in taking lookahead decisions, but, they will not be encoded. + This feature can be enabled only in closed GOP structures. Default disabled. + +2. Support for HDR10+ version 1 SEI messages. + +Encoder enhancements +-------------------- +1. Create API function for allocating and freeing x265_analysis_data. +2. CEA 608/708 support: Read SEI messages from text file and encode it using userSEI message. + +Bug fixes +--------- +1. Disable noise reduction when vbv is enabled. +2. Support minLuma and maxLuma values changed by the commandline. + Version 2.8 =========== diff -r fd517ae68f93 -r f74003e88622 source/common/cpu.cpp --- a/source/common/cpu.cpp Tue Sep 25 16:02:31 2018 +0530 +++ b/source/common/cpu.cpp Thu Nov 22 15:02:08 2018 +0530 @@ -127,6 +127,7 @@ bool detect512() { return(enable512); } + uint32_t cpu_detect(bool benableavx512 ) { diff -r fd517ae68f93 -r f74003e88622 source/common/framedata.cpp --- a/source/common/framedata.cpp Tue Sep 25 16:02:31 2018 +0530 +++ b/source/common/framedata.cpp Thu Nov 22 15:02:08 2018 +0530 @@ -83,9 +83,9 @@ void FrameData::reinit(const SPS& sps) memset(m_rowStat, 0, sps.numCuInHeight * sizeof(*m_rowStat)); if (m_param->bDynamicRefine) { - memset(m_picCTU->m_collectCURd, 0, MAX_NUM_DYN_REFINE * sizeof(uint64_t)); - memset(m_picCTU->m_collectCUVariance, 0, MAX_NUM_DYN_REFINE * sizeof(uint32_t)); - memset(m_picCTU->m_collectCUCount, 0, MAX_NUM_DYN_REFINE * sizeof(uint32_t)); + memset(m_picCTU->m_collectCURd, 0, MAX_NUM_DYN_REFINE * sps.numCUsInFrame * sizeof(uint64_t)); + memset(m_picCTU->m_collectCUVariance, 0, MAX_NUM_DYN_REFINE * sps.numCUsInFrame * sizeof(uint32_t)); + memset(m_picCTU->m_collectCUCount, 0, MAX_NUM_DYN_REFINE * sps.numCUsInFrame * sizeof(uint32_t)); } } diff -r fd517ae68f93 -r f74003e88622 source/common/quant.cpp --- a/source/common/quant.cpp Tue Sep 25 16:02:31 2018 +0530 +++ b/source/common/quant.cpp Thu Nov 22 15:02:08 2018 +0530 @@ -723,6 +723,7 @@ uint32_t Quant::rdoQuant(const CUData& c X265_CHECK(coeffNum[cgScanPos] == 0, "count of coeff failure\n"); uint32_t scanPosBase = (cgScanPos << MLS_CG_SIZE); uint32_t blkPos = codeParams.scan[scanPosBase]; +#if X265_ARCH_X86 bool enable512 = detect512(); if (enable512) primitives.cu[log2TrSize - 2].psyRdoQuant(m_resiDctCoeff, m_fencDctCoeff, costUncoded, &totalUncodedCost, &totalRdCost, &psyScale, blkPos); @@ -731,6 +732,10 @@ uint32_t Quant::rdoQuant(const CUData& c primitives.cu[log2TrSize - 2].psyRdoQuant_1p(m_resiDctCoeff, costUncoded, &totalUncodedCost, &totalRdCost,blkPos); primitives.cu[log2TrSize - 2].psyRdoQuant_2p(m_resiDctCoeff, m_fencDctCoeff, costUncoded, &totalUncodedCost, &totalRdCost, &psyScale, blkPos); } +#else + primitives.cu[log2TrSize - 2].psyRdoQuant_1p(m_resiDctCoeff, costUncoded, &totalUncodedCost, &totalRdCost, blkPos); + primitives.cu[log2TrSize - 2].psyRdoQuant_2p(m_resiDctCoeff, m_fencDctCoeff, costUncoded, &totalUncodedCost, &totalRdCost, &psyScale, blkPos); +#endif } } else @@ -805,8 +810,8 @@ uint32_t Quant::rdoQuant(const CUData& c uint32_t blkPos = codeParams.scan[scanPosBase]; if (usePsyMask) { +#if X265_ARCH_X86 bool enable512 = detect512(); - if (enable512) primitives.cu[log2TrSize - 2].psyRdoQuant(m_resiDctCoeff, m_fencDctCoeff, costUncoded, &totalUncodedCost, &totalRdCost, &psyScale, blkPos); else @@ -814,6 +819,10 @@ uint32_t Quant::rdoQuant(const CUData& c primitives.cu[log2TrSize - 2].psyRdoQuant_1p(m_resiDctCoeff, costUncoded, &totalUncodedCost, &totalRdCost, blkPos); primitives.cu[log2TrSize - 2].psyRdoQuant_2p(m_resiDctCoeff, m_fencDctCoeff, costUncoded, &totalUncodedCost, &totalRdCost, &psyScale, blkPos); } +#else + primitives.cu[log2TrSize - 2].psyRdoQuant_1p(m_resiDctCoeff, costUncoded, &totalUncodedCost, &totalRdCost, blkPos); + primitives.cu[log2TrSize - 2].psyRdoQuant_2p(m_resiDctCoeff, m_fencDctCoeff, costUncoded, &totalUncodedCost, &totalRdCost, &psyScale, blkPos); +#endif blkPos = codeParams.scan[scanPosBase]; for (int y = 0; y < MLS_CG_SIZE; y++) { diff -r fd517ae68f93 -r f74003e88622 source/encoder/api.cpp --- a/source/encoder/api.cpp Tue Sep 25 16:02:31 2018 +0530 +++ b/source/encoder/api.cpp Thu Nov 22 15:02:08 2018 +0530 @@ -1483,11 +1483,11 @@ fail_or_end: double x265_calculate_vmafscore(x265_param *param, x265_vmaf_data *data) { double score; - + data->width = param->sourceWidth; data->height = param->sourceHeight; data->internalBitDepth = param->internalBitDepth; - + if (param->internalCsp == X265_CSP_I420) { if ((param->sourceWidth * param->sourceHeight) % 2 != 0) @@ -1500,8 +1500,8 @@ double x265_calculate_vmafscore(x265_par data->offset = param->sourceWidth * param->sourceHeight * 2; else x265_log(NULL, X265_LOG_ERROR, "Invalid format\n"); - - compute_vmaf(&score, vcd->format, data->width, data->height, read_frame, data, vcd->model_path, vcd->log_path, vcd->log_fmt, vcd->disable_clip, vcd->disable_avx, vcd->enable_transform, vcd->phone_model, vcd->psnr, vcd->ssim, vcd->ms_ssim, vcd->pool); + + compute_vmaf(&score, vcd->format, data->width, data->height, read_frame, data, vcd->model_path, vcd->log_path, vcd->log_fmt, vcd->disable_clip, vcd->disable_avx, vcd->enable_transform, vcd->phone_model, vcd->psnr, vcd->ssim, vcd->ms_ssim, vcd->pool, vcd->thread, vcd->subsample, vcd->enable_conf_interval); return score; } @@ -1514,11 +1514,11 @@ int read_frame_10bit(float *reference_da PicYuv *distorted_frame = (PicYuv *)user_data->distorted_frame; if(!user_data->frame_set) { - + int reference_stride = reference_frame->m_stride; int distorted_stride = distorted_frame->m_stride; - const uint16_t *reference_ptr = (const uint16_t *)reference_frame->m_picOrg[0]; + const uint16_t *reference_ptr = (const uint16_t *)reference_frame->m_picOrg[0]; const uint16_t *distorted_ptr = (const uint16_t *)distorted_frame->m_picOrg[0]; temp_data = reference_data; @@ -1534,7 +1534,7 @@ int read_frame_10bit(float *reference_da reference_ptr += reference_stride; temp_data += stride / sizeof(*temp_data); } - + temp_data = distorted_data; for (i = 0; i < height; i++) { for (j = 0; j < width; j++) { @@ -1546,8 +1546,8 @@ int read_frame_10bit(float *reference_da user_data->frame_set = 1; return 0; - } - return 2; + } + return 2; } int read_frame_8bit(float *reference_data, float *distorted_data, float *temp_data, int stride, void *s) @@ -1578,7 +1578,7 @@ int read_frame_8bit(float *reference_dat reference_ptr += reference_stride; temp_data += stride / sizeof(*temp_data); } - + temp_data = distorted_data; for (i = 0; i < height; i++) { for (j = 0; j < width; j++) { @@ -1590,8 +1590,8 @@ int read_frame_8bit(float *reference_dat user_data->frame_set = 1; return 0; - } - return 2; + } + return 2; } double x265_calculate_vmaf_framelevelscore(x265_vmaf_framedata *vmafframedata) @@ -1603,8 +1603,8 @@ double x265_calculate_vmaf_framelevelsco read_frame = read_frame_8bit; else read_frame = read_frame_10bit; - compute_vmaf(&score, vcd->format, vmafframedata->width, vmafframedata->height, read_frame, vmafframedata, vcd->model_path, vcd->log_path, vcd->log_fmt, vcd->disable_clip, vcd->disable_avx, vcd->enable_transform, vcd->phone_model, vcd->psnr, vcd->ssim, vcd->ms_ssim, vcd->pool); - + compute_vmaf(&score, vcd->format, vmafframedata->width, vmafframedata->height, read_frame, vmafframedata, vcd->model_path, vcd->log_path, vcd->log_fmt, vcd->disable_clip, vcd->disable_avx, vcd->enable_transform, vcd->phone_model, vcd->psnr, vcd->ssim, vcd->ms_ssim, vcd->pool, vcd->thread, vcd->subsample, vcd->enable_conf_interval); + return score; } #endif diff -r fd517ae68f93 -r f74003e88622 source/encoder/encoder.cpp --- a/source/encoder/encoder.cpp Tue Sep 25 16:02:31 2018 +0530 +++ b/source/encoder/encoder.cpp Thu Nov 22 15:02:08 2018 +0530 @@ -2381,10 +2381,13 @@ void Encoder::getStreamHeaders(NALList& if (m_param->bEmitHDRSEI) { - SEIContentLightLevel cllsei; - cllsei.max_content_light_level = m_param->maxCLL; - cllsei.max_pic_average_light_level = m_param->maxFALL; - cllsei.writeSEImessages(bs, m_sps, NAL_UNIT_PREFIX_SEI, list, m_param->bSingleSeiNal); + if (m_emitCLLSEI) + { + SEIContentLightLevel cllsei; + cllsei.max_content_light_level = m_param->maxCLL; + cllsei.max_pic_average_light_level = m_param->maxFALL; + cllsei.writeSEImessages(bs, m_sps, NAL_UNIT_PREFIX_SEI, list, m_param->bSingleSeiNal); + } if (m_param->masteringDisplayColorVolume) { diff -r fd517ae68f93 -r f74003e88622 source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Tue Sep 25 16:02:31 2018 +0530 +++ b/source/encoder/frameencoder.cpp Thu Nov 22 15:02:08 2018 +0530 @@ -1599,11 +1599,11 @@ void FrameEncoder::processRowEncoder(int if (!m_param->bEnableWavefront && col == numCols - 1) { double qpBase = curEncData.m_cuStat[cuAddr].baseQp; - int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame, row, &m_rce, qpBase, m_sliceBaseRow, sliceId); + curRow.reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame, row, &m_rce, qpBase, m_sliceBaseRow, sliceId); qpBase = x265_clip3((double)m_param->rc.qpMin, (double)m_param->rc.qpMax, qpBase); curEncData.m_rowStat[row].rowQp = qpBase; curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(qpBase); - if (reEncode < 0) + if (curRow.reEncode < 0) { x265_log(m_param, X265_LOG_DEBUG, "POC %d row %d - encode restart required for VBV, to %.2f from %.2f\n", m_frame->m_poc, row, qpBase, curEncData.m_cuStat[cuAddr].baseQp); @@ -1642,17 +1642,19 @@ void FrameEncoder::processRowEncoder(int curEncData.m_rowStat[r].sumQpRc += curEncData.m_cuStat[c].baseQp; curEncData.m_rowStat[r].numEncodedCUs = c; } + if (curRow.reEncode < 0) + break; startCuAddr = EndCuAddr - numCols; EndCuAddr = startCuAddr + 1; } } double qpBase = curEncData.m_cuStat[cuAddr].baseQp; - int reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame, row, &m_rce, qpBase, m_sliceBaseRow, sliceId); + curRow.reEncode = m_top->m_rateControl->rowVbvRateControl(m_frame, row, &m_rce, qpBase, m_sliceBaseRow, sliceId); qpBase = x265_clip3((double)m_param->rc.qpMin, (double)m_param->rc.qpMax, qpBase); curEncData.m_rowStat[row].rowQp = qpBase; curEncData.m_rowStat[row].rowQpScale = x265_qp2qScale(qpBase); - if (reEncode < 0) + if (curRow.reEncode < 0) { x265_log(m_param, X265_LOG_DEBUG, "POC %d row %d - encode restart required for VBV, to %.2f from %.2f\n", m_frame->m_poc, row, qpBase, curEncData.m_cuStat[cuAddr].baseQp); diff -r fd517ae68f93 -r f74003e88622 source/encoder/frameencoder.h --- a/source/encoder/frameencoder.h Tue Sep 25 16:02:31 2018 +0530 +++ b/source/encoder/frameencoder.h Thu Nov 22 15:02:08 2018 +0530 @@ -97,6 +97,8 @@ struct CTURow volatile uint32_t completed; volatile uint32_t avgQPComputed; + volatile int reEncode; + /* called at the start of each frame to initialize state */ void init(Entropy& initContext, unsigned int sid) { @@ -105,6 +107,7 @@ struct CTURow completed = 0; avgQPComputed = 0; sliceId = sid; + reEncode = 0; memset(&rowStats, 0, sizeof(rowStats)); rowGoOnCoder.load(initContext); } diff -r fd517ae68f93 -r f74003e88622 source/test/testharness.h --- a/source/test/testharness.h Tue Sep 25 16:02:31 2018 +0530 +++ b/source/test/testharness.h Thu Nov 22 15:02:08 2018 +0530 @@ -93,9 +93,9 @@ static inline uint32_t __rdtsc(void) #define BENCH_RUNS 2000 -// Adapted from checkasm.c, runs each optimized primitive four times, measures rdtsc -// and discards invalid times. Repeats 1000 times to get a good average. Then measures -// the C reference with fewer runs and reports X factor and average cycles. +/* Adapted from checkasm.c, runs each optimized primitive four times, measures rdtsc + * and discards invalid times. Repeats BENCH_RUNS times to get a good average. + * Then measures the C reference with BENCH_RUNS / 4 runs and reports X factor and average cycles.*/