[x265-commits] [x265] Merge with default (prepare for 1.1)

Mon Jun 2 21:23:29 CEST 2014

details:   http://hg.videolan.org/x265/rev/108996798e78
branches:  stable
changeset: 6955:108996798e78
user:      Steve Borho <steve at borho.org>
date:      Mon Jun 02 14:23:09 2014 -0500
description:
Merge with default (prepare for 1.1)

diffstat:

 .hgignore                                           |     1 +
 COPYING                                             |     2 +-
 doc/reST/api.rst                                    |   310 +++
 doc/reST/cli.rst                                    |   120 +-
 doc/reST/index.rst                                  |     1 +
 doc/reST/introduction.rst                           |    66 +-
 doc/reST/presets.rst                                |    41 +-
 doc/reST/threading.rst                              |     9 +-
 source/CMakeLists.txt                               |    51 +-
 source/Lib/TLibCommon/CommonDef.h                   |     1 +
 source/Lib/TLibCommon/TComBitStream.cpp             |     8 +-
 source/Lib/TLibCommon/TComBitStream.h               |     2 +-
 source/Lib/TLibCommon/TComDataCU.cpp                |   253 +-
 source/Lib/TLibCommon/TComDataCU.h                  |    24 +-
 source/Lib/TLibCommon/TComLoopFilter.cpp            |     8 +-
 source/Lib/TLibCommon/TComMotionInfo.cpp            |     4 +-
 source/Lib/TLibCommon/TComPattern.cpp               |   317 +-
 source/Lib/TLibCommon/TComPattern.h                 |    32 +-
 source/Lib/TLibCommon/TComPic.h                     |    10 +-
 source/Lib/TLibCommon/TComPicYuv.cpp                |    30 +-
 source/Lib/TLibCommon/TComPicYuv.h                  |    39 +-
 source/Lib/TLibCommon/TComPrediction.cpp            |   121 +-
 source/Lib/TLibCommon/TComPrediction.h              |     8 +-
 source/Lib/TLibCommon/TComRdCost.h                  |    96 -
 source/Lib/TLibCommon/TComRom.h                     |     4 +-
 source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp  |    64 +-
 source/Lib/TLibCommon/TComSlice.cpp                 |    12 +-
 source/Lib/TLibCommon/TComSlice.h                   |    20 +-
 source/Lib/TLibCommon/TComTrQuant.cpp               |    96 +-
 source/Lib/TLibCommon/TComTrQuant.h                 |    22 +-
 source/Lib/TLibCommon/TComWeightPrediction.cpp      |    14 +-
 source/Lib/TLibCommon/TComYuv.cpp                   |   105 +-
 source/Lib/TLibCommon/TComYuv.h                     |    48 +-
 source/Lib/TLibEncoder/SEIwrite.cpp                 |     5 +-
 source/Lib/TLibEncoder/SyntaxElementWriter.cpp      |     4 +-
 source/Lib/TLibEncoder/TEncBinCoderCABAC.cpp        |    12 +-
 source/Lib/TLibEncoder/TEncBinCoderCABAC.h          |     7 +-
 source/Lib/TLibEncoder/TEncCu.cpp                   |   641 +++---
 source/Lib/TLibEncoder/TEncCu.h                     |    15 +-
 source/Lib/TLibEncoder/TEncEntropy.cpp              |   155 +-
 source/Lib/TLibEncoder/TEncEntropy.h                |     8 +-
 source/Lib/TLibEncoder/TEncSampleAdaptiveOffset.cpp |    15 +-
 source/Lib/TLibEncoder/TEncSbac.cpp                 |    63 +-
 source/Lib/TLibEncoder/TEncSbac.h                   |     2 +-
 source/Lib/TLibEncoder/TEncSearch.cpp               |  1910 +++++++++---------
 source/Lib/TLibEncoder/TEncSearch.h                 |    45 +-
 source/PPA/ppa.cpp                                  |     2 +-
 source/PPA/ppa.h                                    |     2 +-
 source/PPA/ppaApi.h                                 |     2 +-
 source/cmake/CMakeASM_YASMInformation.cmake         |     2 +-
 source/cmake/clean-generated.cmake                  |    10 +
 source/cmake/version.cmake                          |     6 +
 source/common/CMakeLists.txt                        |    66 +-
 source/common/bitstream.h                           |     2 +-
 source/common/common.cpp                            |     2 +-
 source/common/common.h                              |    40 +-
 source/common/cpu.cpp                               |     4 +-
 source/common/cpu.h                                 |     2 +-
 source/common/dct.cpp                               |    16 +-
 source/common/intrapred.cpp                         |     2 +-
 source/common/ipfilter.cpp                          |     2 +-
 source/common/loopfilter.cpp                        |     2 +-
 source/common/lowres.cpp                            |     6 +-
 source/common/lowres.h                              |     6 +-
 source/common/mv.h                                  |     2 +-
 source/common/param.cpp                             |   104 +-
 source/common/param.h                               |     2 +-
 source/common/piclist.cpp                           |     8 +-
 source/common/piclist.h                             |     2 +-
 source/common/pixel.cpp                             |    22 +-
 source/common/primitives.cpp                        |     9 +-
 source/common/primitives.h                          |    20 +-
 source/common/shortyuv.cpp                          |   132 +-
 source/common/shortyuv.h                            |    52 +-
 source/common/threading.cpp                         |     2 +-
 source/common/threading.h                           |     2 +-
 source/common/threadpool.cpp                        |    10 +-
 source/common/threadpool.h                          |     4 +-
 source/common/vec/blockcopy-sse3.cpp                |    10 +-
 source/common/vec/dct-sse3.cpp                      |     2 +-
 source/common/vec/dct-sse41.cpp                     |     4 +-
 source/common/vec/dct-ssse3.cpp                     |     2 +-
 source/common/vec/vec-primitives.cpp                |     2 +-
 source/common/version.cpp                           |    14 +-
 source/common/wavefront.cpp                         |    10 +-
 source/common/wavefront.h                           |     6 +-
 source/common/winxp.cpp                             |     6 +-
 source/common/winxp.h                               |     2 +-
 source/common/x86/asm-primitives.cpp                |    18 +-
 source/common/x86/blockcopy8.asm                    |     2 +-
 source/common/x86/blockcopy8.h                      |     2 +-
 source/common/x86/const-a.asm                       |     2 +-
 source/common/x86/cpu-a.asm                         |     2 +-
 source/common/x86/dct8.asm                          |     2 +-
 source/common/x86/dct8.h                            |     2 +-
 source/common/x86/intrapred.h                       |     2 +-
 source/common/x86/intrapred16.asm                   |     2 +-
 source/common/x86/intrapred8.asm                    |   461 ++--
 source/common/x86/ipfilter16.asm                    |     2 +-
 source/common/x86/ipfilter8.asm                     |     2 +-
 source/common/x86/ipfilter8.h                       |     2 +-
 source/common/x86/loopfilter.asm                    |     2 +-
 source/common/x86/loopfilter.h                      |     2 +-
 source/common/x86/mc-a.asm                          |     2 +-
 source/common/x86/mc-a2.asm                         |     2 +-
 source/common/x86/mc.h                              |     2 +-
 source/common/x86/pixel-32.asm                      |     2 +-
 source/common/x86/pixel-a.asm                       |   182 +-
 source/common/x86/pixel-util.h                      |     2 +-
 source/common/x86/pixel-util8.asm                   |     2 +-
 source/common/x86/pixel.h                           |     2 +-
 source/common/x86/pixeladd8.asm                     |     2 +-
 source/common/x86/sad-a.asm                         |     2 +-
 source/common/x86/sad16-a.asm                       |     2 +-
 source/common/x86/ssd-a.asm                         |     2 +-
 source/common/x86/x86util.asm                       |     2 +-
 source/compat/msvc/stdint.h                         |    13 +-
 source/encoder/CMakeLists.txt                       |    16 +-
 source/encoder/api.cpp                              |    29 +-
 source/encoder/bitcost.cpp                          |     2 +-
 source/encoder/bitcost.h                            |     2 +-
 source/encoder/compress.cpp                         |   335 +-
 source/encoder/cturow.cpp                           |     6 +-
 source/encoder/cturow.h                             |     6 +-
 source/encoder/dpb.cpp                              |     8 +-
 source/encoder/dpb.h                                |     2 +-
 source/encoder/encoder.cpp                          |    94 +-
 source/encoder/encoder.h                            |     9 +-
 source/encoder/frameencoder.cpp                     |    95 +-
 source/encoder/frameencoder.h                       |    11 +-
 source/encoder/framefilter.cpp                      |    10 +-
 source/encoder/framefilter.h                        |     2 +-
 source/encoder/level.cpp                            |    60 +-
 source/encoder/level.h                              |     4 +-
 source/encoder/motion.cpp                           |     6 +-
 source/encoder/motion.h                             |     2 +-
 source/encoder/ratecontrol.cpp                      |   351 ++-
 source/encoder/ratecontrol.h                        |    12 +-
 source/encoder/rdcost.h                             |   134 +
 source/encoder/reference.cpp                        |     4 +-
 source/encoder/reference.h                          |     2 +-
 source/encoder/slicetype.cpp                        |   149 +-
 source/encoder/slicetype.h                          |     6 +-
 source/encoder/weightPrediction.cpp                 |     2 +-
 source/filters/filters.cpp                          |     2 +-
 source/filters/filters.h                            |     2 +-
 source/input/input.cpp                              |     2 +-
 source/input/input.h                                |     2 +-
 source/input/y4m.cpp                                |    84 +-
 source/input/y4m.h                                  |     6 +-
 source/input/yuv.cpp                                |     6 +-
 source/input/yuv.h                                  |     2 +-
 source/output/output.cpp                            |     2 +-
 source/output/output.h                              |     2 +-
 source/output/y4m.cpp                               |     9 +-
 source/output/y4m.h                                 |     2 +-
 source/output/yuv.cpp                               |     6 +-
 source/output/yuv.h                                 |     2 +-
 source/test/checkasm-a.asm                          |     2 +-
 source/test/intrapredharness.cpp                    |     2 +-
 source/test/intrapredharness.h                      |     2 +-
 source/test/ipfilterharness.cpp                     |     2 +-
 source/test/ipfilterharness.h                       |     2 +-
 source/test/mbdstharness.cpp                        |     2 +-
 source/test/mbdstharness.h                          |     2 +-
 source/test/pixelharness.cpp                        |     2 +-
 source/test/pixelharness.h                          |     2 +-
 source/test/testbench.cpp                           |     2 +-
 source/test/testharness.h                           |     2 +-
 source/test/testpool.cpp                            |     6 +-
 source/x265.cpp                                     |   101 +-
 source/x265.def.in                                  |     1 +
 source/x265.h                                       |    65 +-
 source/x265_config.h.in                             |     2 +-
 174 files changed, 4370 insertions(+), 3463 deletions(-)

diffs (truncated from 16368 to 300 lines):

diff -r 607384b3312e -r 108996798e78 .hgignore

--- a/.hgignore	Wed May 07 02:19:54 2014 +0000
+++ b/.hgignore	Mon Jun 02 14:23:09 2014 -0500
@@ -8,3 +8,4 @@ build/
 **.y4m
 **.out
 **.swp
+.DS_Store
diff -r 607384b3312e -r 108996798e78 COPYING
--- a/COPYING	Wed May 07 02:19:54 2014 +0000
+++ b/COPYING	Mon Jun 02 14:23:09 2014 -0500
@@ -340,4 +340,4 @@ library.  If this is what you want to do
 Public License instead of this License.
 
 This program is also available under a commercial proprietary license.
-For more information, contact us at licensing at multicorewareinc.com.
+For more information, contact us at license @ x265.com.
diff -r 607384b3312e -r 108996798e78 doc/reST/api.rst
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/reST/api.rst	Mon Jun 02 14:23:09 2014 -0500
@@ -0,0 +1,310 @@
+*********************************
+Application Programming Interface
+*********************************
+
+Introduction
+============
+
+x265 is written primarily in C++ and x86 assembly language but the
+public facing programming interface is C for the widest possible
+portability.  This C interface is wholly defined within :file:`x265.h`
+in the source/ folder of our source tree.  All of the functions and
+variables and enumerations meant to be used by the end-user are present
+in this header.
+
+Where possible, x265 has tried to keep its public API as close as
+possible to x264's public API. So those familiar with using x264 through
+its C interface will find x265 quite familiar.
+
+This file is meant to be read in-order; the narrative follows linearly
+through the various sections
+
+Build Considerations
+====================
+
+The choice of Main or Main10 profile encodes is made at compile time;
+the internal pixel depth influences a great deal of variable sizes and
+thus 8 and 10bit pixels are handled as different build options
+(primarily to maintain the performance of the 8bit builds). libx265
+exports a variable **x265_max_bit_depth** which indicates how the
+library was compiled (it will contain a value of 8 or 10). Further,
+**x265_version_str** is a pointer to a string indicating the version of
+x265 which was compiled, and **x265_build_info_str** is a pointer to a
+string identifying the compiler and build options.
+
+x265 will accept input pixels of any depth between 8 and 16 bits
+regardless of the depth of its internal pixels (8 or 10).  It will shift
+and mask input pixels as required to reach the internal depth. If
+downshifting is being performed using our CLI application, the
+:option:`--dither` option may be enabled to reduce banding. This feature
+is not available through the C interface.
+
+Encoder
+=======
+
+The primary object in x265 is the encoder object, and this is
+represented in the public API as an opaque typedef **x265_encoder**.
+Pointers of this type are passed to most encoder functions.
+
+A single encoder generates a single output bitstream from a sequence of
+raw input pictures.  Thus if you need multiple output bitstreams you
+must allocate multiple encoders.  You may pass the same input pictures
+to multiple encoders, the encode function does not modify the input
+picture structures (the pictures are copied into the encoder as the
+first step of encode).
+
+Encoder allocation is a reentrant function, so multiple encoders may be
+safely allocated in a single process. The encoder access functions are
+not reentrant for a single encoder, so the recommended use case is to
+allocate one client thread per encoder instance (one thread for all
+encoder instances is possible, but some encoder access functions are
+blocking and thus this would be less efficient).
+
+.. Note::
+
+	There is one caveat to having multiple encoders within a single
+	process. All of the encoders must use the same maximum CTU size
+	because many global variables are configured based on this size.
+	Encoder allocation will fail if a mis-matched CTU size is attempted.
+
+An encoder is allocated by calling **x265_encoder_open()**::
+
+	/* x265_encoder_open:
+	*      create a new encoder handler, all parameters from x265_param are copied */
+	x265_encoder* x265_encoder_open(x265_param *);
+
+The returned pointer is then passed to all of the functions pertaining
+to this encode. A large amount of memory is allocated during this
+function call, but the encoder will continue to allocate memory as the
+first pictures are passed to the encoder; until its pool of picture
+structures is large enough to handle all of the pictures it must keep
+internally.  The pool size is determined by the lookahead depth, the
+number of frame threads, and the maximum number of references.
+
+As indicated in the comment, **x265_param** is copied internally so the user
+may release their copy after allocating the encoder.  Changes made to
+their copy of the param structure have no affect on the encoder after it
+has been allocated.
+
+Param
+=====
+
+The **x265_param** structure describes everything the encoder needs to
+know about the input pictures and the output bitstream and most
+everything in between.
+
+The recommended way to handle these param structures is to allocate them
+from libx265 via::
+
+	/* x265_param_alloc:
+	 *  Allocates an x265_param instance. The returned param structure is not
+	 *  special in any way, but using this method together with x265_param_free()
+	 *  and x265_param_parse() to set values by name allows the application to treat
+	 *  x265_param as an opaque data struct for version safety */
+	x265_param *x265_param_alloc();
+
+In this way, your application does not need to know the exact size of
+the param structure (the build of x265 could potentially be a bit newer
+than the copy of :file:`x265.h` that your application compiled against).
+
+Next you perform the initial *rough cut* configuration of the encoder by
+chosing a performance preset and optional tune factor
+**x265_preset_names** and **x265_tune_names** respectively hold the
+string names of the presets and tune factors (see :ref:`presets
+<preset-tune-ref>` for more detail on presets and tune factors)::
+
+	/*      returns 0 on success, negative on failure (e.g. invalid preset/tune name). */
+	int x265_param_default_preset(x265_param *, const char *preset, const char *tune);
+
+Now you may optionally specify a profile. **x265_profile_names**
+contains the string names this function accepts::
+
+	/*      (can be NULL, in which case the function will do nothing)
+	 *      returns 0 on success, negative on failure (e.g. invalid profile name). */
+	int x265_param_apply_profile(x265_param *, const char *profile);
+
+Finally you configure any remaining options by name using repeated calls to::
+
+	/* x265_param_parse:
+	 *  set one parameter by name.
+	 *  returns 0 on success, or returns one of the following errors.
+	 *  note: BAD_VALUE occurs only if it can't even parse the value,
+	 *  numerical range is not checked until x265_encoder_open().
+	 *  value=NULL means "true" for boolean options, but is a BAD_VALUE for non-booleans. */
+	#define X265_PARAM_BAD_NAME  (-1)
+	#define X265_PARAM_BAD_VALUE (-2)
+	int x265_param_parse(x265_param *p, const char *name, const char *value);
+
+See :ref:`string options <string-options-ref>` for the list of options (and their
+descriptions) which can be set by **x265_param_parse()**.
+
+After the encoder has been created, you may release the param structure::
+
+	/* x265_param_free:
+	 *  Use x265_param_free() to release storage for an x265_param instance
+	 *  allocated by x265_param_alloc() */
+	void x265_param_free(x265_param *);
+
+.. Note::
+
+	Using these methods to allocate and release the param structures
+	helps future-proof your code in many ways, but the x265 API is
+	versioned in such a way that we prevent linkage against a build of
+	x265 that does not match the version of the header you are compiling
+	against. This is function of the X265_BUILD macro.
+
+**x265_encoder_parameters()** may be used to get a copy of the param
+structure from the encoder after it has been opened, in order to see the
+changes made to the parameters for auto-detection and other reasons::
+
+	/* x265_encoder_parameters:
+	 *      copies the current internal set of parameters to the pointer provided
+	 *      by the caller.  useful when the calling application needs to know
+	 *      how x265_encoder_open has changed the parameters.
+	 *      note that the data accessible through pointers in the returned param struct
+	 *      (e.g. filenames) should not be modified by the calling application. */
+	void x265_encoder_parameters(x265_encoder *, x265_param *);                                                                      
+
+Pictures
+========
+
+Raw pictures are passed to the encoder via the **x265_picture** structure.
+Just like the param structure we recommend you allocate this structure
+from the encoder to avoid potential size mismatches::
+
+	/* x265_picture_alloc:
+	 *  Allocates an x265_picture instance. The returned picture structure is not
+	 *  special in any way, but using this method together with x265_picture_free()
+	 *  and x265_picture_init() allows some version safety. New picture fields will
+	 *  always be added to the end of x265_picture */
+	x265_picture *x265_picture_alloc();
+
+Regardless of whether you allocate your picture structure this way or
+whether you simply declare it on the stack, your next step is to
+initialize the structure via::
+
+	/***
+	 * Initialize an x265_picture structure to default values. It sets the pixel
+	 * depth and color space to the encoder's internal values and sets the slice
+	 * type to auto - so the lookahead will determine slice type.
+	 */
+	void x265_picture_init(x265_param *param, x265_picture *pic);
+
+x265 does not perform any color space conversions, so the raw picture's
+color space (chroma sampling) must match the color space specified in
+the param structure used to allocate the encoder. **x265_picture_init**
+initializes this field to the internal color space and it is best to
+leave it unmodified.
+
+The picture bit depth is initialized to be the encoder's internal bit
+depth but this value should be changed to the actual depth of the pixels
+being passed into the encoder.  If the picture bit depth is more than 8,
+the encoder assumes two bytes are used to represent each sample
+(little-endian shorts).
+
+The user is responsible for setting the plane pointers and plane strides
+(in units of bytes, not pixels). The presentation time stamp (**pts**)
+is optional, depending on whether you need accurate decode time stamps
+(**dts**) on output.
+
+If you wish to override the lookahead or rate control for a given
+picture you may specify a slicetype other than X265_TYPE_AUTO, or a
+forceQP value other than 0.
+
+x265 does not modify the picture structure provided as input, so you may
+reuse a single **x265_picture** for all pictures passed to a single
+encoder, or even all pictures passed to multiple encoders.
+
+Structures allocated from the library should eventually be released::
+
+	/* x265_picture_free:
+	 *  Use x265_picture_free() to release storage for an x265_picture instance
+	 *  allocated by x265_picture_alloc() */
+	void x265_picture_free(x265_picture *);
+
+
+Encode Process
+==============
+
+The output of the encoder is a series of NAL packets, which are always
+returned concatenated in consecutive memory. HEVC streams have SPS and
+PPS and VPS headers which describe how the following packets are to be
+decoded. If you specified :option:`--repeat-headers` then those headers
+will be output with every keyframe.  Otherwise you must explicitly query
+those headers using::
+
+	/* x265_encoder_headers:
+	 *      return the SPS and PPS that will be used for the whole stream.
+	 *      *pi_nal is the number of NAL units outputted in pp_nal.
+	 *      returns negative on error, total byte size of payload data on success
+	 *      the payloads of all output NALs are guaranteed to be sequential in memory. */
+	int x265_encoder_headers(x265_encoder *, x265_nal **pp_nal, uint32_t *pi_nal);
+
+Now we get to the main encode loop. Raw input pictures are passed to the
+encoder in display order via::
+
+	/* x265_encoder_encode:
+	 *      encode one picture.
+	 *      *pi_nal is the number of NAL units outputted in pp_nal.
+	 *      returns negative on error, zero if no NAL units returned.
+	 *      the payloads of all output NALs are guaranteed to be sequential in memory. */
+	int x265_encoder_encode(x265_encoder *encoder, x265_nal **pp_nal, uint32_t *pi_nal, x265_picture *pic_in, x265_picture *pic_out);
+
+These pictures are queued up until the lookahead is full, and then the
+frame encoders in turn are filled, and then finally you begin receiving
+a output NALs (corresponding to a single output picture) with each input
+picture you pass into the encoder.
+
+Once the pipeline is completely full, **x265_encoder_encode()** will
+block until the next output picture is complete.
+
+.. note:: 
+
+	Optionally, if the pointer of a second **x265_picture** structure is
+	provided, the encoder will fill it with data pertaining to the
+	output picture corresponding to the output NALs, including the
+	recontructed image, POC and decode timestamp. These pictures will be
+	in encode (or decode) order.
+
+When the last of the raw input pictures has been sent to the encoder,
+**x265_encoder_encode()** must still be called repeatedly with a
+*pic_in* argument of 0, indicating a pipeline flush, until the function
+returns a value less than or equal to 0 (indicating the output bitstream
+is complete).
+
+At any time during this process, the application may query running
+statistics from the encoder::
+
+	/* x265_encoder_get_stats:
+	 *       returns encoder statistics */