[x265-commits] [x265] Merge with default
Ashok Kumar Mishra
ashok at multicorewareinc.com
Fri Aug 10 01:03:03 CEST 2018
details: http://hg.videolan.org/x265/rev/b44d5f0e42f8
branches: stable
changeset: 12407:b44d5f0e42f8
user: Ashok Kumar Mishra <ashok at multicorewareinc.com>
date: Thu Aug 09 16:35:57 2018 +0530
description:
Merge with default
diffstat:
doc/reST/api.rst | 12 +
doc/reST/cli.rst | 38 +-
doc/reST/presets.rst | 5 +-
source/CMakeLists.txt | 2 +-
source/common/cpu.cpp | 21 +-
source/common/cpu.h | 2 +-
source/common/dct.cpp | 50 +-
source/common/frame.cpp | 2 +-
source/common/frame.h | 1 -
source/common/framedata.h | 42 -
source/common/lowres.cpp | 8 +-
source/common/param.cpp | 33 +-
source/common/picyuv.cpp | 13 +
source/common/predict.cpp | 10 +-
source/common/primitives.h | 5 +-
source/common/quant.cpp | 19 +-
source/common/slice.cpp | 2 +-
source/common/slice.h | 4 +-
source/common/x86/asm-primitives.cpp | 18 +-
source/common/x86/dct8.asm | 580 +++++++++++
source/common/x86/dct8.h | 3 +
source/common/x86/h-ipfilter16.asm | 1243 ++++++++++++++----------
source/common/x86/x86inc.asm | 138 +-
source/dynamicHDR10/SeiMetadataDictionary.cpp | 5 +
source/dynamicHDR10/SeiMetadataDictionary.h | 5 +
source/dynamicHDR10/metadataFromJson.cpp | 345 +++---
source/dynamicHDR10/metadataFromJson.h | 9 +-
source/encoder/analysis.cpp | 96 +-
source/encoder/analysis.h | 22 +-
source/encoder/api.cpp | 173 +++-
source/encoder/encoder.cpp | 799 +++++++--------
source/encoder/encoder.h | 19 +-
source/encoder/entropy.cpp | 12 +-
source/encoder/frameencoder.cpp | 58 +-
source/encoder/ratecontrol.cpp | 26 +-
source/encoder/ratecontrol.h | 2 +
source/encoder/reference.cpp | 2 +-
source/encoder/search.cpp | 10 +-
source/encoder/sei.cpp | 60 +
source/encoder/sei.h | 1 +
source/encoder/slicetype.cpp | 197 ++-
source/encoder/weightPrediction.cpp | 16 +-
source/test/mbdstharness.cpp | 66 +-
source/test/mbdstharness.h | 1 +
source/test/regression-tests.txt | 11 +-
source/test/smoke-tests.txt | 2 +-
source/x265.cpp | 4 +-
source/x265.h | 139 ++-
source/x265cli.h | 6 +
49 files changed, 2839 insertions(+), 1498 deletions(-)
diffs (truncated from 6621 to 300 lines):
diff -r df5bd3be9b11 -r b44d5f0e42f8 doc/reST/api.rst
--- a/doc/reST/api.rst Fri Jun 01 14:56:40 2018 +0530
+++ b/doc/reST/api.rst Thu Aug 09 16:35:57 2018 +0530
@@ -223,6 +223,18 @@ changes made to the parameters for auto-
* returns negative on error, 0 access unit were output.*/
int x265_set_analysis_data(x265_encoder *encoder, x265_analysis_data *analysis_data, int poc, uint32_t cuBytes);
+**x265_alloc_analysis_data()** may be used to allocate memory for the x265_analysis_data::
+
+ /* x265_alloc_analysis_data:
+ * Allocate memory for the x265_analysis_data object's internal structures. */
+ void x265_alloc_analysis_data(x265_param *param, x265_analysis_data* analysis);
+
+**x265_free_analysis_data()** may be used to free memory for the x265_analysis_data::
+
+ /* x265_free_analysis_data:
+ * Free the allocated memory for x265_analysis_data object's internal structures. */
+ void x265_free_analysis_data(x265_param *param, x265_analysis_data* analysis);
+
Pictures
========
diff -r df5bd3be9b11 -r b44d5f0e42f8 doc/reST/cli.rst
--- a/doc/reST/cli.rst Fri Jun 01 14:56:40 2018 +0530
+++ b/doc/reST/cli.rst Thu Aug 09 16:35:57 2018 +0530
@@ -535,6 +535,20 @@ frame counts) are only applicable to the
**CLI ONLY**
+.. option:: --chunk-start <integer>
+
+ First frame of the chunk. Frames preceeding this in display order will
+ be encoded, however, they will be discarded in the bitstream. This
+ feature can be enabled only in closed GOP structures.
+ Default 0 (disabled).
+
+.. option:: --chunk-end <integer>
+
+ Last frame of the chunk. Frames following this in display order will be
+ used in taking lookahead decisions, but, they will not be encoded.
+ This feature can be enabled only in closed GOP structures.
+ Default 0 (disabled).
+
Profile, Level, Tier
====================
@@ -895,11 +909,11 @@ will not reuse analysis if slice type pa
+--------------+------------------------------------------+
| 2 to 4 | Level 1 + intra/inter modes, ref's |
+--------------+------------------------------------------+
- | 5,6 and 9 | Level 2 + rect-amp |
+ | 5 and 6 | Level 2 + rect-amp |
+--------------+------------------------------------------+
| 7 | Level 5 + AVC size CU refinement |
+--------------+------------------------------------------+
- | 8 | Level 5 + AVC size Full CU analysis-info |
+ | 8 and 9 | Level 5 + AVC size Full CU analysis-info |
+--------------+------------------------------------------+
| 10 | Level 5 + Full CU analysis-info |
+--------------+------------------------------------------+
@@ -1225,7 +1239,7 @@ Temporal / motion search options
.. option:: --analyze-src-pics, --no-analyze-src-pics
- Enalbe motion estimation with source frame pixels, in this mode,
+ Enable motion estimation with source frame pixels, in this mode,
motion estimation can be computed independently. Default disabled.
Spatial/intra options
@@ -2121,6 +2135,24 @@ VUI fields must be manually specified.
Maximum luma value allowed for input pictures. Any values above max-luma
are clipped. No default.
+
+.. option:: --nalu-file <filename>
+
+ Text file containing userSEI in POC order : <POC><space><PREFIX><space><NAL UNIT TYPE>/<SEI TYPE><space><SEI Payload>
+ Parse the input file specified and inserts SEI messages into the bitstream.
+ Currently, we support only PREFIX SEI messages. This is an "application-only" feature.
+
+.. option:: --atc-sei <integer>
+
+ Emit the alternative transfer characteristics SEI message where the integer
+ is the preferred transfer characteristics. Required for HLG (Hybrid Log Gamma)
+ signalling. Not signalled by default.
+
+.. option:: --pic-struct <integer>
+
+ Set the picture structure and emits it in the picture timing SEI message.
+ Values in the range 0..12. See D.3.3 of the HEVC spec. for a detailed explanation.
+ Required for HLG (Hybrid Log Gamma) signalling. Not signalled by default.
Bitstream options
=================
diff -r df5bd3be9b11 -r b44d5f0e42f8 doc/reST/presets.rst
--- a/doc/reST/presets.rst Fri Jun 01 14:56:40 2018 +0530
+++ b/doc/reST/presets.rst Thu Aug 09 16:35:57 2018 +0530
@@ -156,7 +156,10 @@ It also enables a specialised ratecontro
that strictly minimises QP fluctuations across frames, while still allowing
the encoder to hit bitrate targets and VBV buffer limits (with a slightly
higher margin of error than normal). It is highly recommended that this
-algorithm is used only through the :option:`--tune` *grain* feature.
+algorithm is used only through the :option:`--tune` *grain* feature.
+Overriding the `--tune` *grain* settings might result in grain strobing, especially
+when enabling features like :option:`--aq-mode` and :option:`--cutree` that modify
+per-block QPs within a given frame.
Fast Decode
~~~~~~~~~~~
diff -r df5bd3be9b11 -r b44d5f0e42f8 source/CMakeLists.txt
--- a/source/CMakeLists.txt Fri Jun 01 14:56:40 2018 +0530
+++ b/source/CMakeLists.txt Thu Aug 09 16:35:57 2018 +0530
@@ -29,7 +29,7 @@ option(NATIVE_BUILD "Target the build CP
option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
# X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 160)
+set(X265_BUILD 164)
configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
"${PROJECT_BINARY_DIR}/x265.def")
configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff -r df5bd3be9b11 -r b44d5f0e42f8 source/common/cpu.cpp
--- a/source/common/cpu.cpp Fri Jun 01 14:56:40 2018 +0530
+++ b/source/common/cpu.cpp Thu Aug 09 16:35:57 2018 +0530
@@ -58,6 +58,7 @@ static void sigill_handler(int sig)
#endif // if X265_ARCH_ARM
namespace X265_NS {
+static bool enable512 = false;
const cpu_name_t cpu_names[] =
{
#if X265_ARCH_X86
@@ -122,10 +123,14 @@ uint64_t PFX(cpu_xgetbv)(int xcr);
#pragma warning(disable: 4309) // truncation of constant value
#endif
+bool detect512()
+{
+ return(enable512);
+}
uint32_t cpu_detect(bool benableavx512 )
{
- uint32_t cpu = 0;
+ uint32_t cpu = 0;
uint32_t eax, ebx, ecx, edx;
uint32_t vendor[4] = { 0 };
uint32_t max_extended_cap, max_basic_cap;
@@ -189,7 +194,10 @@ uint32_t cpu_detect(bool benableavx512 )
if ((xcr0 & 0xE0) == 0xE0) /* OPMASK/ZMM state */
{
if ((ebx & 0xD0030000) == 0xD0030000)
+ {
cpu |= X265_CPU_AVX512;
+ enable512 = true;
+ }
}
}
}
@@ -246,16 +254,8 @@ uint32_t cpu_detect(bool benableavx512 )
int model = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0);
if (family == 6)
{
- /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 6/14 (core1 "yonah")
- * theoretically support sse2, but it's significantly slower than mmx for
- * almost all of x264's functions, so let's just pretend they don't. */
- if (model == 9 || model == 13 || model == 14)
- {
- cpu &= ~(X265_CPU_SSE2 | X265_CPU_SSE3);
- X265_CHECK(!(cpu & (X265_CPU_SSSE3 | X265_CPU_SSE4)), "unexpected CPU ID %d\n", cpu);
- }
/* Detect Atom CPU */
- else if (model == 28)
+ if (model == 28)
{
cpu |= X265_CPU_SLOW_ATOM;
cpu |= X265_CPU_SLOW_PSHUFB;
@@ -390,3 +390,4 @@ uint32_t cpu_detect(bool benableavx512)
#endif // if X265_ARCH_X86
}
+
diff -r df5bd3be9b11 -r b44d5f0e42f8 source/common/cpu.h
--- a/source/common/cpu.h Fri Jun 01 14:56:40 2018 +0530
+++ b/source/common/cpu.h Thu Aug 09 16:35:57 2018 +0530
@@ -26,7 +26,6 @@
#define X265_CPU_H
#include "common.h"
-
/* All assembly functions are prefixed with X265_NS (macro expanded) */
#define PFX3(prefix, name) prefix ## _ ## name
#define PFX2(prefix, name) PFX3(prefix, name)
@@ -51,6 +50,7 @@ extern "C" void PFX(safe_intel_cpu_indic
namespace X265_NS {
uint32_t cpu_detect(bool);
+bool detect512();
struct cpu_name_t
{
diff -r df5bd3be9b11 -r b44d5f0e42f8 source/common/dct.cpp
--- a/source/common/dct.cpp Fri Jun 01 14:56:40 2018 +0530
+++ b/source/common/dct.cpp Thu Aug 09 16:35:57 2018 +0530
@@ -1027,6 +1027,47 @@ static void psyRdoQuant_c(int16_t *m_res
blkPos += trSize;
}
}
+template<int log2TrSize>
+static void psyRdoQuant_c_1(int16_t *m_resiDctCoeff, /*int16_t *m_fencDctCoeff, */ int64_t *costUncoded, int64_t *totalUncodedCost, int64_t *totalRdCost, /* int64_t *psyScale,*/ uint32_t blkPos)
+{
+ const int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; /* Represents scaling through forward transform */
+ const int scaleBits = SCALE_BITS - 2 * transformShift;
+ const uint32_t trSize = 1 << log2TrSize;
+
+ for (int y = 0; y < MLS_CG_SIZE; y++)
+ {
+ for (int x = 0; x < MLS_CG_SIZE; x++)
+ {
+ int64_t signCoef = m_resiDctCoeff[blkPos + x]; /* pre-quantization DCT coeff */
+ costUncoded[blkPos + x] = static_cast<int64_t>((double)((signCoef * signCoef) << scaleBits));
+ *totalUncodedCost += costUncoded[blkPos + x];
+ *totalRdCost += costUncoded[blkPos + x];
+ }
+ blkPos += trSize;
+ }
+}
+template<int log2TrSize>
+static void psyRdoQuant_c_2(int16_t *m_resiDctCoeff, int16_t *m_fencDctCoeff, int64_t *costUncoded, int64_t *totalUncodedCost, int64_t *totalRdCost, int64_t *psyScale, uint32_t blkPos)
+{
+ const int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; /* Represents scaling through forward transform */
+
+ const uint32_t trSize = 1 << log2TrSize;
+ int max = X265_MAX(0, (2 * transformShift + 1));
+
+ for (int y = 0; y < MLS_CG_SIZE; y++)
+ {
+ for (int x = 0; x < MLS_CG_SIZE; x++)
+ {
+ int64_t signCoef = m_resiDctCoeff[blkPos + x]; /* pre-quantization DCT coeff */
+ int64_t predictedCoef = m_fencDctCoeff[blkPos + x] - signCoef; /* predicted DCT = source DCT - residual DCT*/
+ costUncoded[blkPos + x] -= static_cast<int64_t>((double)(((*psyScale) * predictedCoef) >> max));
+ *totalUncodedCost += costUncoded[blkPos + x];
+ *totalRdCost += costUncoded[blkPos + x];
+ }
+ blkPos += trSize;
+ }
+}
+
namespace X265_NS {
// x265 private namespace
void setupDCTPrimitives_c(EncoderPrimitives& p)
@@ -1063,7 +1104,14 @@ void setupDCTPrimitives_c(EncoderPrimiti
p.cu[BLOCK_8x8].copy_cnt = copy_count<8>;
p.cu[BLOCK_16x16].copy_cnt = copy_count<16>;
p.cu[BLOCK_32x32].copy_cnt = copy_count<32>;
-
+ p.cu[BLOCK_4x4].psyRdoQuant_1p = psyRdoQuant_c_1<2>;
+ p.cu[BLOCK_4x4].psyRdoQuant_2p = psyRdoQuant_c_2<2>;
+ p.cu[BLOCK_8x8].psyRdoQuant_1p = psyRdoQuant_c_1<3>;
+ p.cu[BLOCK_8x8].psyRdoQuant_2p = psyRdoQuant_c_2<3>;
+ p.cu[BLOCK_16x16].psyRdoQuant_1p = psyRdoQuant_c_1<4>;
+ p.cu[BLOCK_16x16].psyRdoQuant_2p = psyRdoQuant_c_2<4>;
+ p.cu[BLOCK_32x32].psyRdoQuant_1p = psyRdoQuant_c_1<5>;
+ p.cu[BLOCK_32x32].psyRdoQuant_2p = psyRdoQuant_c_2<5>;
p.scanPosLast = scanPosLast_c;
p.findPosFirstLast = findPosFirstLast_c;
p.costCoeffNxN = costCoeffNxN_c;
diff -r df5bd3be9b11 -r b44d5f0e42f8 source/common/frame.cpp
--- a/source/common/frame.cpp Fri Jun 01 14:56:40 2018 +0530
+++ b/source/common/frame.cpp Thu Aug 09 16:35:57 2018 +0530
@@ -83,7 +83,7 @@ bool Frame::create(x265_param *param, fl
m_analysisData.wt = NULL;
m_analysisData.intraData = NULL;
m_analysisData.interData = NULL;
- m_analysis2Pass.analysisFramedata = NULL;
+ m_analysisData.distortionData = NULL;
}
if (param->bDynamicRefine)
diff -r df5bd3be9b11 -r b44d5f0e42f8 source/common/frame.h
--- a/source/common/frame.h Fri Jun 01 14:56:40 2018 +0530
+++ b/source/common/frame.h Thu Aug 09 16:35:57 2018 +0530
@@ -109,7 +109,6 @@ public:
Frame* m_prev;
x265_param* m_param; // Points to the latest param set for the frame.
x265_analysis_data m_analysisData;
- x265_analysis_2Pass m_analysis2Pass;
RcStats* m_rcData;
Event m_copyMVType;
diff -r df5bd3be9b11 -r b44d5f0e42f8 source/common/framedata.h
--- a/source/common/framedata.h Fri Jun 01 14:56:40 2018 +0530
+++ b/source/common/framedata.h Thu Aug 09 16:35:57 2018 +0530
@@ -179,47 +179,5 @@ public:
inline CUData* getPicCTU(uint32_t ctuAddr) { return &m_picCTU[ctuAddr]; }
};
-/* Stores intra analysis data for a single frame. This struct needs better packing */
-struct analysis_intra_data
More information about the x265-commits
mailing list