[x265-commits] [x265] doc: correct frame thread docs

Wed Apr 22 07:01:37 CEST 2015

details:   http://hg.videolan.org/x265/rev/2f1c93a0fd75
branches:  stable
changeset: 10246:2f1c93a0fd75
user:      Steve Borho <steve at borho.org>
date:      Tue Apr 21 23:50:09 2015 -0500
description:
doc: correct frame thread docs
Subject: [x265] Merge with stable

details:   http://hg.videolan.org/x265/rev/86268e498680
branches:  
changeset: 10247:86268e498680
user:      Steve Borho <steve at borho.org>
date:      Wed Apr 22 00:00:39 2015 -0500
description:
Merge with stable

diffstat:

 doc/reST/cli.rst                         |    93 +-
 doc/reST/threading.rst                   |    15 +
 readme.rst                               |     2 +-
 source/CMakeLists.txt                    |    22 +-
 source/common/common.cpp                 |    13 +-
 source/common/common.h                   |     3 +-
 source/common/constants.cpp              |     2 +-
 source/common/cudata.cpp                 |     6 +-
 source/common/cudata.h                   |     2 +-
 source/common/dct.cpp                    |    32 +
 source/common/ipfilter.cpp               |    36 +-
 source/common/loopfilter.cpp             |    61 +-
 source/common/param.cpp                  |    27 +-
 source/common/picyuv.cpp                 |    10 +-
 source/common/pixel.cpp                  |     2 +-
 source/common/predict.cpp                |    31 +-
 source/common/primitives.cpp             |     3 +-
 source/common/primitives.h               |    24 +-
 source/common/quant.cpp                  |   400 +-
 source/common/quant.h                    |    33 +-
 source/common/slice.h                    |     1 +
 source/common/threadpool.cpp             |     4 +-
 source/common/threadpool.h               |     2 +-
 source/common/x86/asm-primitives.cpp     |   577 ++-
 source/common/x86/const-a.asm            |   160 +-
 source/common/x86/dct8.asm               |   145 +-
 source/common/x86/dct8.h                 |     2 +
 source/common/x86/intrapred.h            |    68 +
 source/common/x86/intrapred16.asm        |   502 ++
 source/common/x86/intrapred8.asm         |  4454 +++++++++++++++++++++-
 source/common/x86/intrapred8_allangs.asm |  1160 +++++-
 source/common/x86/ipfilter16.asm         |  1346 ++++++-
 source/common/x86/ipfilter8.asm          |  6113 +++++++++++++++++++----------
 source/common/x86/ipfilter8.h            |   220 +-
 source/common/x86/loopfilter.asm         |   713 +++-
 source/common/x86/loopfilter.h           |     9 +-
 source/common/x86/mc-a.asm               |     6 +
 source/common/x86/pixel-a.asm            |   381 +
 source/common/x86/pixel-util.h           |     7 +-
 source/common/x86/pixel-util8.asm        |   518 ++-
 source/common/x86/pixel.h                |     6 +
 source/common/x86/pixeladd8.asm          |    37 +-
 source/common/x86/sad-a.asm              |    99 +-
 source/common/x86/x86inc.asm             |     3 +-
 source/encoder/CMakeLists.txt            |     6 +-
 source/encoder/analysis.cpp              |   246 +-
 source/encoder/analysis.h                |    11 +-
 source/encoder/api.cpp                   |    18 +-
 source/encoder/encoder.cpp               |    52 +-
 source/encoder/encoder.h                 |     2 +-
 source/encoder/entropy.cpp               |    13 +-
 source/encoder/entropy.h                 |     5 +-
 source/encoder/frameencoder.cpp          |     2 -
 source/encoder/level.cpp                 |    15 +-
 source/encoder/nal.cpp                   |    18 +-
 source/encoder/nal.h                     |     1 +
 source/encoder/ratecontrol.cpp           |    12 +-
 source/encoder/ratecontrol.h             |     1 +
 source/encoder/sao.cpp                   |    85 +-
 source/encoder/search.cpp                |    83 +-
 source/encoder/search.h                  |     4 +-
 source/encoder/sei.h                     |    42 +
 source/encoder/slicetype.cpp             |    48 +-
 source/encoder/slicetype.h               |     2 +-
 source/input/input.cpp                   |     2 +-
 source/input/input.h                     |    10 +-
 source/input/y4m.cpp                     |     7 +-
 source/input/y4m.h                       |     2 +-
 source/input/yuv.cpp                     |     6 +-
 source/input/yuv.h                       |     2 +-
 source/output/output.cpp                 |    12 +-
 source/output/output.h                   |    43 +-
 source/output/raw.cpp                    |    80 +
 source/output/raw.h                      |    64 +
 source/output/reconplay.cpp              |   197 +
 source/output/reconplay.h                |    74 +
 source/output/y4m.h                      |     2 +-
 source/output/yuv.h                      |     2 +-
 source/test/ipfilterharness.cpp          |   122 +-
 source/test/ipfilterharness.h            |     1 -
 source/test/pixelharness.cpp             |   203 +-
 source/test/pixelharness.h               |     7 +-
 source/test/rate-control-tests.txt       |    70 +-
 source/test/regression-tests.txt         |    20 +-
 source/test/smoke-tests.txt              |     6 +-
 source/x265.cpp                          |   204 +-
 source/x265.h                            |    24 +
 source/x265cli.h                         |    10 +
 88 files changed, 15848 insertions(+), 3338 deletions(-)

diffs (truncated from 24360 to 300 lines):

diff -r 751f9cf1dfc9 -r 86268e498680 doc/reST/cli.rst

--- a/doc/reST/cli.rst	Mon Mar 16 12:05:38 2015 +0530
+++ b/doc/reST/cli.rst	Wed Apr 22 00:00:39 2015 -0500
@@ -159,6 +159,13 @@ Performance Options
 	handled implicitly.
 
 	One may also directly supply the CPU capability bitmap as an integer.
+	
+	Note that by specifying this option you are overriding x265's CPU
+	detection and it is possible to do this wrong. You can cause encoder
+	crashes by specifying SIMD architectures which are not supported on
+	your CPU.
+
+	Default: auto-detected SIMD architectures
 
 .. option:: --frame-threads, -F <integer>
 
@@ -171,7 +178,7 @@ Performance Options
 	Over-allocation of frame threads will not improve performance, it
 	will generally just increase memory use.
 
-	**Values:** any value between 8 and 16. Default is 0, auto-detect
+	**Values:** any value between 0 and 16. Default is 0, auto-detect
 
 .. option:: --pools <string>, --numa-pools <string>
 
@@ -201,11 +208,11 @@ Performance Options
 	their node, they will not be allowed to migrate between nodes, but they
 	will be allowed to move between CPU cores within their node.
 
-	If the three pool features: :option:`--wpp` :option:`--pmode` and
-	:option:`--pme` are all disabled, then :option:`--pools` is ignored
-	and no thread pools are created.
+	If the four pool features: :option:`--wpp`, :option:`--pmode`,
+	:option:`--pme` and :option:`--lookahead-slices` are all disabled,
+	then :option:`--pools` is ignored and no thread pools are created.
 
-	If "none" is specified, then all three of the thread pool features are
+	If "none" is specified, then all four of the thread pool features are
 	implicitly disabled.
 
 	Multiple thread pools will be allocated for any NUMA node with more than
@@ -217,9 +224,22 @@ Performance Options
 	:option:`--frame-threads`.  The pools are used for WPP and for
 	distributed analysis and motion search.
 
+	On Windows, the native APIs offer sufficient functionality to
+	discover the NUMA topology and enforce the thread affinity that
+	libx265 needs (so long as you have not chosen to target XP or
+	Vista), but on POSIX systems it relies on libnuma for this
+	functionality. If your target POSIX system is single socket, then
+	building without libnuma is a perfectly reasonable option, as it
+	will have no effect on the runtime behavior. On a multiple-socket
+	system, a POSIX build of libx265 without libnuma will be less work
+	efficient. See :ref:`thread pools <pools>` for more detail.
+
 	Default "", one thread is allocated per detected hardware thread
 	(logical CPU cores) and one thread pool per NUMA node.
 
+	Note that the string value will need to be escaped or quoted to
+	protect against shell expansion on many platforms
+
 .. option:: --wpp, --no-wpp
 
 	Enable Wavefront Parallel Processing. The encoder may begin encoding
@@ -437,7 +457,7 @@ Profile, Level, Tier
 	times 10, for example level **5.1** is specified as "5.1" or "51",
 	and level **5.0** is specified as "5.0" or "50".
 
-	Annex A levels: 1, 2, 2.1, 3, 3.1, 4, 4.1, 5, 5.1, 5.2, 6, 6.1, 6.2
+	Annex A levels: 1, 2, 2.1, 3, 3.1, 4, 4.1, 5, 5.1, 5.2, 6, 6.1, 6.2, 8.5
 
 .. option:: --high-tier, --no-high-tier
 
@@ -464,11 +484,22 @@ Profile, Level, Tier
 	HEVC specification.  If x265 detects that the total reference count
 	is greater than 8, it will issue a warning that the resulting stream
 	is non-compliant and it signals the stream as profile NONE and level
-	NONE but still allows the encode to continue.  Compliant HEVC
+	NONE and will abort the encode unless
+	:option:`--allow-non-conformance` it specified.  Compliant HEVC
 	decoders may refuse to decode such streams.
 	
 	Default 3
 
+.. option:: --allow-non-conformance, --no-allow-non-conformance
+
+	Allow libx265 to generate a bitstream with profile and level NONE.
+	By default it will abort any encode which does not meet strict level
+	compliance. The two most likely causes for non-conformance are
+	:option:`--ctu` being too small, :option:`--ref` being too high,
+	or the bitrate or resolution being out of specification.
+
+	Default: disabled
+
 .. note::
 	:option:`--profile`, :option:`--level-idc`, and
 	:option:`--high-tier` are only intended for use when you are
@@ -476,7 +507,7 @@ Profile, Level, Tier
 	limitations and must constrain the bitstream within those limits.
 	Specifying a profile or level may lower the encode quality
 	parameters to meet those requirements but it will never raise
-	them.
+	them. It may enable VBV constraints on a CRF encode.
 
 Mode decision / Analysis
 ========================
@@ -1111,6 +1142,13 @@ Quality, rate control and rate distortio
 
 	**Range of values:** 0.0 to 3.0
 
+.. option:: --qg-size <64|32|16>
+	Enable adaptive quantization for sub-CTUs. This parameter specifies 
+	the minimum CU size at which QP can be adjusted, ie. Quantization Group
+	size. Allowed range of values are 64, 32, 16 provided this falls within 
+	the inclusive range [maxCUSize, minCUSize]. Experimental.
+	Default: same as maxCUSize
+
 .. option:: --cutree, --no-cutree
 
 	Enable the use of lookahead's lowres motion vector fields to
@@ -1451,9 +1489,32 @@ VUI fields must be manually specified.
 	specification for a description of these values. Default undefined
 	(not signaled)
 
+.. option:: --master-display <string>
+
+	SMPTE ST 2086 mastering display color volume SEI info, specified as
+	a string which is parsed when the stream header SEI are emitted. The
+	string format is "Y(%hu,%hu)U(%hu,%hu)V(%hu,%hu)WP(%hu,%hu)L(%u,%u)"
+	where %hu are unsigned 16bit integers and %u are unsigned 32bit
+	integers. The SEI includes X,Y display primaries for YUV channels,
+	white point X,Y and max,min luminance values.
+
+	Example: Y(10,12)U(5,13)V(5,13)WP(100,100)L(1000,100)
+
+	Note that this string value will need to be escaped or quoted to
+	protect against shell expansion on many platforms
+
 Bitstream options
 =================
 
+.. option:: --annexb, --no-annexb
+
+	If enabled, x265 will produce Annex B bitstream format, which places
+	start codes before NAL. If disabled, x265 will produce file format,
+	which places length before NAL. x265 CLI will choose the right option
+	based on output format. Default enabled
+
+	**API ONLY**
+
 .. option:: --repeat-headers, --no-repeat-headers
 
 	If enabled, x265 will emit VPS, SPS, and PPS headers with every
@@ -1525,4 +1586,20 @@ Debugging options
 
 	**CLI ONLY**
 
+.. option:: --recon-y4m-exec <string>
+
+	If you have an application which can play a Y4MPEG stream received
+	on stdin, the x265 CLI can feed it reconstructed pictures in display
+	order.  The pictures will have no timing info, obviously, so the
+	picture timing will be determined primarily by encoding elapsed time
+	and latencies, but it can be useful to preview the pictures being
+	output by the encoder to validate input settings and rate control
+	parameters.
+
+	Example command for ffplay (assuming it is in your PATH):
+
+	--recon-y4m-exec "ffplay -i pipe:0 -autoexit"
+
+	**CLI ONLY**
+
 .. vim: noet
diff -r 751f9cf1dfc9 -r 86268e498680 doc/reST/threading.rst
--- a/doc/reST/threading.rst	Mon Mar 16 12:05:38 2015 +0530
+++ b/doc/reST/threading.rst	Wed Apr 22 00:00:39 2015 -0500
@@ -2,6 +2,8 @@
 Threading
 *********
 
+.. _pools:
+
 Thread Pools
 ============
 
@@ -31,6 +33,18 @@ for data locking. If a job becomes block
 expected to drop that job so the worker thread may go back to the pool
 and find more work.
 
+On Windows, the native APIs offer sufficient functionality to discover
+the NUMA topology and enforce the thread affinity that libx265 needs (so
+long as you have not chosen to target XP or Vista), but on POSIX systems
+it relies on libnuma for this functionality. If your target POSIX system
+is single socket, then building without libnuma is a perfectly
+reasonable option, as it will have no effect on the runtime behavior. On
+a multiple-socket system, a POSIX build of libx265 without libnuma will
+be less work efficient, but will still function correctly. You lose the
+work isolation effect that keeps each frame encoder from only using the
+threads of a single socket and so you incur a heavier context switching
+cost.
+
 Wavefront Parallel Processing
 =============================
 
@@ -225,6 +239,7 @@ scene cuts and slice types) uses the thr
 lowres cost analysis to worker threads. It will use bonded task groups
 to perform batches of frame cost estimates, and it may optionally use
 bonded task groups to measure single frame cost estimates using slices.
+(see :option:`--lookahead-slices`)
 
 The function slicetypeDecide() itself is also be performed by a worker
 thread if your encoder has a thread pool, else it runs within the
diff -r 751f9cf1dfc9 -r 86268e498680 readme.rst
--- a/readme.rst	Mon Mar 16 12:05:38 2015 +0530
+++ b/readme.rst	Wed Apr 22 00:00:39 2015 -0500
@@ -3,7 +3,7 @@ x265 HEVC Encoder
 =================
 
 | **Read:** | Online `documentation <http://x265.readthedocs.org/en/default/>`_ | Developer `wiki <http://bitbucket.org/multicoreware/x265/wiki/>`_
-| **Download:** | `releases <http://bitbucket.org/multicoreware/x265/downloads/>`_ 
+| **Download:** | `releases <http://ftp.videolan.org/pub/videolan/x265/>`_ 
 | **Interact:** | #x265 on freenode.irc.net | `x265-devel at videolan.org <http://mailman.videolan.org/listinfo/x265-devel>`_ | `Report an issue <https://bitbucket.org/multicoreware/x265/issues?status=new&status=open>`_
 
 `x265 <https://www.videolan.org/developers/x265.html>`_ is an open
diff -r 751f9cf1dfc9 -r 86268e498680 source/CMakeLists.txt
--- a/source/CMakeLists.txt	Mon Mar 16 12:05:38 2015 +0530
+++ b/source/CMakeLists.txt	Wed Apr 22 00:00:39 2015 -0500
@@ -30,7 +30,7 @@ option(STATIC_LINK_CRT "Statically link 
 mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
 
 # X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 51)
+set(X265_BUILD 56)
 configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
                "${PROJECT_BINARY_DIR}/x265.def")
 configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
@@ -67,13 +67,13 @@ if(UNIX)
     endif()
     find_package(Numa)
     if(NUMA_FOUND)
-        list(APPEND CMAKE_REQUIRED_LIBRARIES ${NUMA_LIBRARY})
+        link_directories(${NUMA_LIBRARY_DIR})
+        list(APPEND CMAKE_REQUIRED_LIBRARIES numa)
         check_symbol_exists(numa_node_of_cpu numa.h NUMA_V2)
         if(NUMA_V2)
             add_definitions(-DHAVE_LIBNUMA)
             message(STATUS "libnuma found, building with support for NUMA nodes")
-            list(APPEND PLATFORM_LIBS ${NUMA_LIBRARY})
-            link_directories(${NUMA_LIBRARY_DIR})
+            list(APPEND PLATFORM_LIBS numa)
             include_directories(${NUMA_INCLUDE_DIR})
         endif()
     endif()
@@ -196,6 +196,7 @@ if(GCC)
         add_definitions(-static)
         list(APPEND LINKER_OPTIONS "-static")
     endif(STATIC_LINK_CRT)
+    check_cxx_compiler_flag(-Wno-strict-overflow CC_HAS_NO_STRICT_OVERFLOW)
     check_cxx_compiler_flag(-Wno-narrowing CC_HAS_NO_NARROWING) 
     check_cxx_compiler_flag(-Wno-array-bounds CC_HAS_NO_ARRAY_BOUNDS) 
     if (CC_HAS_NO_ARRAY_BOUNDS)
@@ -300,12 +301,15 @@ if (WIN32)
         list(APPEND PLATFORM_LIBS ${VLD_LIBRARIES})
         link_directories(${VLD_LIBRARY_DIRS})
     endif()
-    option(WINXP_SUPPORT "Make binaries compatible with Windows XP" OFF)
+    option(WINXP_SUPPORT "Make binaries compatible with Windows XP and Vista" OFF)
     if(WINXP_SUPPORT)
         # force use of workarounds for CONDITION_VARIABLE and atomic
         # intrinsics introduced after XP
         add_definitions(-D_WIN32_WINNT=_WIN32_WINNT_WINXP)
-    endif()
+    else(WINXP_SUPPORT)
+        # default to targeting Windows 7 for the NUMA APIs
+        add_definitions(-D_WIN32_WINNT=_WIN32_WINNT_WIN7)
+    endif(WINXP_SUPPORT)
 endif()
 
 include(version) # determine X265_VERSION and X265_LATEST_TAG
@@ -462,8 +466,10 @@ endif()
 # Main CLI application
 option(ENABLE_CLI "Build standalone CLI application" ON)
 if(ENABLE_CLI)
-    file(GLOB InputFiles input/*.cpp input/*.h)
-    file(GLOB OutputFiles output/*.cpp output/*.h)
+    file(GLOB InputFiles input/input.cpp input/yuv.cpp input/y4m.cpp input/*.h)
+    file(GLOB OutputFiles output/output.cpp output/reconplay.cpp output/*.h
+                          output/yuv.cpp output/y4m.cpp # recon
+                          output/raw.cpp)               # muxers
     file(GLOB FilterFiles filters/*.cpp filters/*.h)
     source_group(input FILES ${InputFiles})
     source_group(output FILES ${OutputFiles})
diff -r 751f9cf1dfc9 -r 86268e498680 source/common/common.cpp
--- a/source/common/common.cpp	Mon Mar 16 12:05:38 2015 +0530
+++ b/source/common/common.cpp	Wed Apr 22 00:00:39 2015 -0500
@@ -100,11 +100,14 @@ int x265_exp2fix8(double x)
     return (x265_exp2_lut[i & 63] + 256) << (i >> 6) >> 8;
 }