[x265-commits] [x265] Merge with default

Ashok Kumar Mishra ashok at multicorewareinc.com
Fri Aug 10 01:03:03 CEST 2018


details:   http://hg.videolan.org/x265/rev/b44d5f0e42f8
branches:  stable
changeset: 12407:b44d5f0e42f8
user:      Ashok Kumar Mishra <ashok at multicorewareinc.com>
date:      Thu Aug 09 16:35:57 2018 +0530
description:
Merge with default

diffstat:

 doc/reST/api.rst                              |    12 +
 doc/reST/cli.rst                              |    38 +-
 doc/reST/presets.rst                          |     5 +-
 source/CMakeLists.txt                         |     2 +-
 source/common/cpu.cpp                         |    21 +-
 source/common/cpu.h                           |     2 +-
 source/common/dct.cpp                         |    50 +-
 source/common/frame.cpp                       |     2 +-
 source/common/frame.h                         |     1 -
 source/common/framedata.h                     |    42 -
 source/common/lowres.cpp                      |     8 +-
 source/common/param.cpp                       |    33 +-
 source/common/picyuv.cpp                      |    13 +
 source/common/predict.cpp                     |    10 +-
 source/common/primitives.h                    |     5 +-
 source/common/quant.cpp                       |    19 +-
 source/common/slice.cpp                       |     2 +-
 source/common/slice.h                         |     4 +-
 source/common/x86/asm-primitives.cpp          |    18 +-
 source/common/x86/dct8.asm                    |   580 +++++++++++
 source/common/x86/dct8.h                      |     3 +
 source/common/x86/h-ipfilter16.asm            |  1243 ++++++++++++++----------
 source/common/x86/x86inc.asm                  |   138 +-
 source/dynamicHDR10/SeiMetadataDictionary.cpp |     5 +
 source/dynamicHDR10/SeiMetadataDictionary.h   |     5 +
 source/dynamicHDR10/metadataFromJson.cpp      |   345 +++---
 source/dynamicHDR10/metadataFromJson.h        |     9 +-
 source/encoder/analysis.cpp                   |    96 +-
 source/encoder/analysis.h                     |    22 +-
 source/encoder/api.cpp                        |   173 +++-
 source/encoder/encoder.cpp                    |   799 +++++++--------
 source/encoder/encoder.h                      |    19 +-
 source/encoder/entropy.cpp                    |    12 +-
 source/encoder/frameencoder.cpp               |    58 +-
 source/encoder/ratecontrol.cpp                |    26 +-
 source/encoder/ratecontrol.h                  |     2 +
 source/encoder/reference.cpp                  |     2 +-
 source/encoder/search.cpp                     |    10 +-
 source/encoder/sei.cpp                        |    60 +
 source/encoder/sei.h                          |     1 +
 source/encoder/slicetype.cpp                  |   197 ++-
 source/encoder/weightPrediction.cpp           |    16 +-
 source/test/mbdstharness.cpp                  |    66 +-
 source/test/mbdstharness.h                    |     1 +
 source/test/regression-tests.txt              |    11 +-
 source/test/smoke-tests.txt                   |     2 +-
 source/x265.cpp                               |     4 +-
 source/x265.h                                 |   139 ++-
 source/x265cli.h                              |     6 +
 49 files changed, 2839 insertions(+), 1498 deletions(-)

diffs (truncated from 6621 to 300 lines):

diff -r df5bd3be9b11 -r b44d5f0e42f8 doc/reST/api.rst
--- a/doc/reST/api.rst	Fri Jun 01 14:56:40 2018 +0530
+++ b/doc/reST/api.rst	Thu Aug 09 16:35:57 2018 +0530
@@ -223,6 +223,18 @@ changes made to the parameters for auto-
      *     returns negative on error, 0 access unit were output.*/
      int x265_set_analysis_data(x265_encoder *encoder, x265_analysis_data *analysis_data, int poc, uint32_t cuBytes);
 
+**x265_alloc_analysis_data()** may be used to allocate memory for the x265_analysis_data::
+
+    /* x265_alloc_analysis_data:
+     *     Allocate memory for the x265_analysis_data object's internal structures. */
+     void x265_alloc_analysis_data(x265_param *param, x265_analysis_data* analysis);
+
+**x265_free_analysis_data()** may be used to free memory for the x265_analysis_data::
+
+    /* x265_free_analysis_data:
+     *    Free the allocated memory for x265_analysis_data object's internal structures. */
+     void x265_free_analysis_data(x265_param *param, x265_analysis_data* analysis);
+
 Pictures
 ========
 
diff -r df5bd3be9b11 -r b44d5f0e42f8 doc/reST/cli.rst
--- a/doc/reST/cli.rst	Fri Jun 01 14:56:40 2018 +0530
+++ b/doc/reST/cli.rst	Thu Aug 09 16:35:57 2018 +0530
@@ -535,6 +535,20 @@ frame counts) are only applicable to the
 
 	**CLI ONLY**
 
+.. option:: --chunk-start <integer>
+
+	First frame of the chunk. Frames preceeding this in display order will
+	be encoded, however, they will be discarded in the bitstream. This
+	feature can be enabled only in closed GOP structures.
+	Default 0 (disabled).
+	
+.. option:: --chunk-end <integer>
+
+	Last frame of the chunk. Frames following this in display order will be
+	used in taking lookahead decisions, but, they will not be encoded.
+	This feature can be enabled only in closed GOP structures.
+	Default 0 (disabled).
+
 Profile, Level, Tier
 ====================
 
@@ -895,11 +909,11 @@ will not reuse analysis if slice type pa
     +--------------+------------------------------------------+
     | 2 to 4       | Level 1 + intra/inter modes, ref's       |
     +--------------+------------------------------------------+
-    | 5,6 and 9    | Level 2 + rect-amp                       |
+    | 5 and 6      | Level 2 + rect-amp                       |
     +--------------+------------------------------------------+
     | 7            | Level 5 + AVC size CU refinement         |
     +--------------+------------------------------------------+
-    | 8            | Level 5 + AVC size Full CU analysis-info |
+    | 8 and 9      | Level 5 + AVC size Full CU analysis-info |
     +--------------+------------------------------------------+
     | 10           | Level 5 + Full CU analysis-info          |
     +--------------+------------------------------------------+
@@ -1225,7 +1239,7 @@ Temporal / motion search options
 
 .. option:: --analyze-src-pics, --no-analyze-src-pics
 
-    Enalbe motion estimation with source frame pixels, in this mode, 
+    Enable motion estimation with source frame pixels, in this mode, 
     motion estimation can be computed independently. Default disabled.
 
 Spatial/intra options
@@ -2121,6 +2135,24 @@ VUI fields must be manually specified.
 
 	Maximum luma value allowed for input pictures. Any values above max-luma
 	are clipped.  No default.
+    
+.. option:: --nalu-file <filename>
+
+    Text file containing userSEI in POC order : <POC><space><PREFIX><space><NAL UNIT TYPE>/<SEI TYPE><space><SEI Payload>
+    Parse the input file specified and inserts SEI messages into the bitstream. 
+    Currently, we support only PREFIX SEI messages. This is an "application-only" feature.
+
+.. option:: --atc-sei <integer>
+
+	Emit the alternative transfer characteristics SEI message where the integer
+	is the preferred transfer characteristics. Required for HLG (Hybrid Log Gamma)
+	signalling. Not signalled by default.
+
+.. option:: --pic-struct <integer>
+
+	Set the picture structure and emits it in the picture timing SEI message.
+	Values in the range 0..12. See D.3.3 of the HEVC spec. for a detailed explanation.
+	Required for HLG (Hybrid Log Gamma) signalling. Not signalled by default.
 
 Bitstream options
 =================
diff -r df5bd3be9b11 -r b44d5f0e42f8 doc/reST/presets.rst
--- a/doc/reST/presets.rst	Fri Jun 01 14:56:40 2018 +0530
+++ b/doc/reST/presets.rst	Thu Aug 09 16:35:57 2018 +0530
@@ -156,7 +156,10 @@ It also enables a specialised ratecontro
 that strictly minimises QP fluctuations across frames, while still allowing 
 the encoder to hit bitrate targets and VBV buffer limits (with a slightly 
 higher margin of error than normal). It is highly recommended that this 
-algorithm is used only through the :option:`--tune` *grain* feature.
+algorithm is used only through the :option:`--tune` *grain* feature. 
+Overriding the `--tune` *grain* settings might result in grain strobing, especially
+when enabling features like :option:`--aq-mode` and :option:`--cutree` that modify
+per-block QPs within a given frame.
 
 Fast Decode
 ~~~~~~~~~~~
diff -r df5bd3be9b11 -r b44d5f0e42f8 source/CMakeLists.txt
--- a/source/CMakeLists.txt	Fri Jun 01 14:56:40 2018 +0530
+++ b/source/CMakeLists.txt	Thu Aug 09 16:35:57 2018 +0530
@@ -29,7 +29,7 @@ option(NATIVE_BUILD "Target the build CP
 option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
 mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
 # X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 160)
+set(X265_BUILD 164)
 configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
                "${PROJECT_BINARY_DIR}/x265.def")
 configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff -r df5bd3be9b11 -r b44d5f0e42f8 source/common/cpu.cpp
--- a/source/common/cpu.cpp	Fri Jun 01 14:56:40 2018 +0530
+++ b/source/common/cpu.cpp	Thu Aug 09 16:35:57 2018 +0530
@@ -58,6 +58,7 @@ static void sigill_handler(int sig)
 #endif // if X265_ARCH_ARM
 
 namespace X265_NS {
+static bool enable512 = false;
 const cpu_name_t cpu_names[] =
 {
 #if X265_ARCH_X86
@@ -122,10 +123,14 @@ uint64_t PFX(cpu_xgetbv)(int xcr);
 #pragma warning(disable: 4309) // truncation of constant value
 #endif
 
+bool detect512()
+{
+    return(enable512);
+}
 uint32_t cpu_detect(bool benableavx512 )
 {
-    uint32_t cpu = 0;
 
+    uint32_t cpu = 0; 
     uint32_t eax, ebx, ecx, edx;
     uint32_t vendor[4] = { 0 };
     uint32_t max_extended_cap, max_basic_cap;
@@ -189,7 +194,10 @@ uint32_t cpu_detect(bool benableavx512 )
                 if ((xcr0 & 0xE0) == 0xE0) /* OPMASK/ZMM state */
                 {
                     if ((ebx & 0xD0030000) == 0xD0030000)
+                    {
                         cpu |= X265_CPU_AVX512;
+                        enable512 = true;
+                    }
                 }
             }
         }
@@ -246,16 +254,8 @@ uint32_t cpu_detect(bool benableavx512 )
         int model  = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0);
         if (family == 6)
         {
-            /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 6/14 (core1 "yonah")
-             * theoretically support sse2, but it's significantly slower than mmx for
-             * almost all of x264's functions, so let's just pretend they don't. */
-            if (model == 9 || model == 13 || model == 14)
-            {
-                cpu &= ~(X265_CPU_SSE2 | X265_CPU_SSE3);
-                X265_CHECK(!(cpu & (X265_CPU_SSSE3 | X265_CPU_SSE4)), "unexpected CPU ID %d\n", cpu);
-            }
             /* Detect Atom CPU */
-            else if (model == 28)
+            if (model == 28)
             {
                 cpu |= X265_CPU_SLOW_ATOM;
                 cpu |= X265_CPU_SLOW_PSHUFB;
@@ -390,3 +390,4 @@ uint32_t cpu_detect(bool benableavx512)
 
 #endif // if X265_ARCH_X86
 }
+
diff -r df5bd3be9b11 -r b44d5f0e42f8 source/common/cpu.h
--- a/source/common/cpu.h	Fri Jun 01 14:56:40 2018 +0530
+++ b/source/common/cpu.h	Thu Aug 09 16:35:57 2018 +0530
@@ -26,7 +26,6 @@
 #define X265_CPU_H
 
 #include "common.h"
-
 /* All assembly functions are prefixed with X265_NS (macro expanded) */
 #define PFX3(prefix, name) prefix ## _ ## name
 #define PFX2(prefix, name) PFX3(prefix, name)
@@ -51,6 +50,7 @@ extern "C" void PFX(safe_intel_cpu_indic
 
 namespace X265_NS {
 uint32_t cpu_detect(bool);
+bool detect512();
 
 struct cpu_name_t
 {
diff -r df5bd3be9b11 -r b44d5f0e42f8 source/common/dct.cpp
--- a/source/common/dct.cpp	Fri Jun 01 14:56:40 2018 +0530
+++ b/source/common/dct.cpp	Thu Aug 09 16:35:57 2018 +0530
@@ -1027,6 +1027,47 @@ static void psyRdoQuant_c(int16_t *m_res
         blkPos += trSize;
     }
 }
+template<int log2TrSize>
+static void psyRdoQuant_c_1(int16_t *m_resiDctCoeff, /*int16_t  *m_fencDctCoeff, */ int64_t *costUncoded, int64_t *totalUncodedCost, int64_t *totalRdCost, /* int64_t *psyScale,*/ uint32_t blkPos)
+{
+	const int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; /* Represents scaling through forward transform */
+	const int scaleBits = SCALE_BITS - 2 * transformShift;
+	const uint32_t trSize = 1 << log2TrSize;
+
+	for (int y = 0; y < MLS_CG_SIZE; y++)
+	{
+		for (int x = 0; x < MLS_CG_SIZE; x++)
+		{
+			int64_t signCoef = m_resiDctCoeff[blkPos + x];            /* pre-quantization DCT coeff */
+			costUncoded[blkPos + x] = static_cast<int64_t>((double)((signCoef * signCoef) << scaleBits));
+			*totalUncodedCost += costUncoded[blkPos + x];
+			*totalRdCost += costUncoded[blkPos + x];
+		}
+		blkPos += trSize;
+	}
+}
+template<int log2TrSize>
+static void psyRdoQuant_c_2(int16_t *m_resiDctCoeff, int16_t *m_fencDctCoeff, int64_t *costUncoded, int64_t *totalUncodedCost, int64_t *totalRdCost, int64_t *psyScale, uint32_t blkPos)
+{
+	const int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; /* Represents scaling through forward transform */
+
+	const uint32_t trSize = 1 << log2TrSize;
+	int max = X265_MAX(0, (2 * transformShift + 1));
+
+	for (int y = 0; y < MLS_CG_SIZE; y++)
+	{
+		for (int x = 0; x < MLS_CG_SIZE; x++)
+		{
+			int64_t signCoef = m_resiDctCoeff[blkPos + x];            /* pre-quantization DCT coeff */
+			int64_t predictedCoef = m_fencDctCoeff[blkPos + x] - signCoef; /* predicted DCT = source DCT - residual DCT*/
+			costUncoded[blkPos + x] -= static_cast<int64_t>((double)(((*psyScale) * predictedCoef) >> max));
+			*totalUncodedCost += costUncoded[blkPos + x];
+			*totalRdCost += costUncoded[blkPos + x];
+		}
+		blkPos += trSize;
+	}
+}
+
 namespace X265_NS {
 // x265 private namespace
 void setupDCTPrimitives_c(EncoderPrimitives& p)
@@ -1063,7 +1104,14 @@ void setupDCTPrimitives_c(EncoderPrimiti
     p.cu[BLOCK_8x8].copy_cnt   = copy_count<8>;
     p.cu[BLOCK_16x16].copy_cnt = copy_count<16>;
     p.cu[BLOCK_32x32].copy_cnt = copy_count<32>;
-
+	p.cu[BLOCK_4x4].psyRdoQuant_1p = psyRdoQuant_c_1<2>;
+	p.cu[BLOCK_4x4].psyRdoQuant_2p = psyRdoQuant_c_2<2>;
+	p.cu[BLOCK_8x8].psyRdoQuant_1p = psyRdoQuant_c_1<3>;
+	p.cu[BLOCK_8x8].psyRdoQuant_2p = psyRdoQuant_c_2<3>;
+	p.cu[BLOCK_16x16].psyRdoQuant_1p = psyRdoQuant_c_1<4>;
+	p.cu[BLOCK_16x16].psyRdoQuant_2p = psyRdoQuant_c_2<4>;
+	p.cu[BLOCK_32x32].psyRdoQuant_1p = psyRdoQuant_c_1<5>;
+	p.cu[BLOCK_32x32].psyRdoQuant_2p = psyRdoQuant_c_2<5>;
     p.scanPosLast = scanPosLast_c;
     p.findPosFirstLast = findPosFirstLast_c;
     p.costCoeffNxN = costCoeffNxN_c;
diff -r df5bd3be9b11 -r b44d5f0e42f8 source/common/frame.cpp
--- a/source/common/frame.cpp	Fri Jun 01 14:56:40 2018 +0530
+++ b/source/common/frame.cpp	Thu Aug 09 16:35:57 2018 +0530
@@ -83,7 +83,7 @@ bool Frame::create(x265_param *param, fl
         m_analysisData.wt = NULL;
         m_analysisData.intraData = NULL;
         m_analysisData.interData = NULL;
-        m_analysis2Pass.analysisFramedata = NULL;
+        m_analysisData.distortionData = NULL;
     }
 
     if (param->bDynamicRefine)
diff -r df5bd3be9b11 -r b44d5f0e42f8 source/common/frame.h
--- a/source/common/frame.h	Fri Jun 01 14:56:40 2018 +0530
+++ b/source/common/frame.h	Thu Aug 09 16:35:57 2018 +0530
@@ -109,7 +109,6 @@ public:
     Frame*                 m_prev;
     x265_param*            m_param;              // Points to the latest param set for the frame.
     x265_analysis_data     m_analysisData;
-    x265_analysis_2Pass    m_analysis2Pass;
     RcStats*               m_rcData;
 
     Event                  m_copyMVType;
diff -r df5bd3be9b11 -r b44d5f0e42f8 source/common/framedata.h
--- a/source/common/framedata.h	Fri Jun 01 14:56:40 2018 +0530
+++ b/source/common/framedata.h	Thu Aug 09 16:35:57 2018 +0530
@@ -179,47 +179,5 @@ public:
     inline CUData* getPicCTU(uint32_t ctuAddr) { return &m_picCTU[ctuAddr]; }
 };
 
-/* Stores intra analysis data for a single frame. This struct needs better packing */
-struct analysis_intra_data


More information about the x265-commits mailing list