[x265-commits] [x265] rc: fix cost issues in predicting row size during mid fra...
Aarthi at videolan.org
Aarthi at videolan.org
Tue Apr 28 05:02:10 CEST 2015
details: http://hg.videolan.org/x265/rev/d2fe87494a6b
branches: stable
changeset: 10292:d2fe87494a6b
user: Aarthi Thirumalai
date: Tue Apr 21 17:29:08 2015 +0530
description:
rc: fix cost issues in predicting row size during mid frame vbv encodes.
Subject: [x265] asm: avx2 code for chroma vpp filter for i422
details: http://hg.videolan.org/x265/rev/0dd9e774266e
branches:
changeset: 10293:0dd9e774266e
user: Sumalatha Polureddy
date: Mon Apr 27 11:33:01 2015 +0530
description:
asm: avx2 code for chroma vpp filter for i422
Subject: [x265] asm: avx2 code for chroma vpp filter for i444
details: http://hg.videolan.org/x265/rev/1e9f3a211283
branches:
changeset: 10294:1e9f3a211283
user: Sumalatha Polureddy
date: Mon Apr 27 13:57:33 2015 +0530
description:
asm: avx2 code for chroma vpp filter for i444
Subject: [x265] asm: filter_vsp and filter_vss for Nx64, 32x48 in I422
details: http://hg.videolan.org/x265/rev/69463180d139
branches:
changeset: 10295:69463180d139
user: Divya Manivannan <divya at multicorewareinc.com>
date: Mon Apr 27 13:27:17 2015 +0530
description:
asm: filter_vsp and filter_vss for Nx64, 32x48 in I422
filter_vsp[32x64, 16x64, 24x64, 8x64, 32x48]: 22410c->14840c, 11660c->7343c, 16721c->11383c, 5708c->3842c, 17755c->11026c
filter_vss[32x64, 16x64, 24x64, 8x64, 32x48]: 18358c->16895c, 9262c->8129c, 15231c->12887c, 4891c->4082c, 14188c->12137c
Subject: [x265] asm: filter_vsp[8x12], filter_vss[8x12] for I422 in avx2
details: http://hg.videolan.org/x265/rev/1185e72c6f54
branches:
changeset: 10296:1185e72c6f54
user: Divya Manivannan <divya at multicorewareinc.com>
date: Mon Apr 27 15:56:17 2015 +0530
description:
asm: filter_vsp[8x12], filter_vss[8x12] for I422 in avx2
filter_vsp[8x12]: 1165c->702c
filter_vss[8x12]: 1003c->804c
Subject: [x265] asm: filter_vsp[6x16], filter_vss[6x16] in avx2
details: http://hg.videolan.org/x265/rev/df051116b6ad
branches:
changeset: 10297:df051116b6ad
user: Divya Manivannan <divya at multicorewareinc.com>
date: Mon Apr 27 18:07:45 2015 +0530
description:
asm: filter_vsp[6x16], filter_vss[6x16] in avx2
filter_vsp[6x16]: 1468c->1227c
filter_vss[6x16]: 1229c->1060c
Subject: [x265] sao: add comment for the reason of two versions of sao primitives
details: http://hg.videolan.org/x265/rev/cc4f5ca07580
branches:
changeset: 10298:cc4f5ca07580
user: Divya Manivannan <divya at multicorewareinc.com>
date: Mon Apr 27 19:12:49 2015 +0530
description:
sao: add comment for the reason of two versions of sao primitives
Subject: [x265] modify m_psyRdoqScale from int64 to int32 because dynamic range is [0,50]*256
details: http://hg.videolan.org/x265/rev/0117a052012e
branches:
changeset: 10299:0117a052012e
user: Min Chen <chenm003 at 163.com>
date: Mon Apr 27 20:31:42 2015 +0800
description:
modify m_psyRdoqScale from int64 to int32 because dynamic range is [0,50]*256
Subject: [x265] modify lambda from int64 to int32 because dynamic range less than 21 bits
details: http://hg.videolan.org/x265/rev/e3fc3aed9017
branches:
changeset: 10300:e3fc3aed9017
user: Min Chen <chenm003 at 163.com>
date: Mon Apr 27 20:31:46 2015 +0800
description:
modify lambda from int64 to int32 because dynamic range less than 21 bits
Subject: [x265] force type convert since multiplication result up to 34-bits
details: http://hg.videolan.org/x265/rev/3be0fd322add
branches:
changeset: 10301:3be0fd322add
user: Min Chen <chenm003 at 163.com>
date: Mon Apr 27 20:31:50 2015 +0800
description:
force type convert since multiplication result up to 34-bits
Subject: [x265] rdoQuant: reduce address operators by swap order on array significantBits[][]
details: http://hg.videolan.org/x265/rev/16ec87053d63
branches:
changeset: 10302:16ec87053d63
user: Min Chen <chenm003 at 163.com>
date: Mon Apr 27 20:31:54 2015 +0800
description:
rdoQuant: reduce address operators by swap order on array significantBits[][]
Subject: [x265] Merge with stable
details: http://hg.videolan.org/x265/rev/2266b0715a31
branches:
changeset: 10303:2266b0715a31
user: Steve Borho <steve at borho.org>
date: Mon Apr 27 11:20:31 2015 -0500
description:
Merge with stable
Subject: [x265] level: do not try to configure color space in x265_param_apply_profile()
details: http://hg.videolan.org/x265/rev/2f5b57e5de1f
branches:
changeset: 10304:2f5b57e5de1f
user: Steve Borho <steve at borho.org>
date: Mon Apr 27 13:08:57 2015 -0500
description:
level: do not try to configure color space in x265_param_apply_profile()
Each profile has restrictions on which color spaces are allowed, and those must
be enforced, but applying a profile should not affect the input color space
defined by the user. We cannot change the input color space, doing so here was
both futile and incorrect. (fixes #128)
Subject: [x265] rc: extract final average QP from the coded CTU structure
details: http://hg.videolan.org/x265/rev/ed448198ce3d
branches:
changeset: 10305:ed448198ce3d
user: Steve Borho <steve at borho.org>
date: Mon Apr 27 13:59:08 2015 -0500
description:
rc: extract final average QP from the coded CTU structure
With --qg-size, the average QP per CTU can be different from the CTU QP. This
removes calcQpForCu(), which was both redundant and out-of-date, and either
collects the total QP within collectCTUStatistics() or it uses a much simpler
calcCTUQP() function.
The average AQ QP is only used in two places:
1) frame logging
2) two-pass stats
It should have only a minor effect on both, but the result should be both more
accurate and more compute efficient
diffstat:
doc/reST/cli.rst | 91 +-
doc/reST/threading.rst | 15 +
readme.rst | 2 +-
source/CMakeLists.txt | 24 +-
source/common/common.cpp | 13 +-
source/common/common.h | 3 +-
source/common/constants.cpp | 2 +-
source/common/cudata.cpp | 10 +-
source/common/cudata.h | 4 +-
source/common/dct.cpp | 36 +-
source/common/framedata.h | 1 +
source/common/ipfilter.cpp | 36 +-
source/common/loopfilter.cpp | 47 +-
source/common/param.cpp | 27 +-
source/common/picyuv.cpp | 10 +-
source/common/pixel.cpp | 2 +-
source/common/predict.cpp | 31 +-
source/common/primitives.cpp | 3 +-
source/common/primitives.h | 39 +-
source/common/quant.cpp | 496 +-
source/common/quant.h | 42 +-
source/common/slice.h | 1 +
source/common/threadpool.cpp | 12 +-
source/common/threadpool.h | 2 +-
source/common/x86/asm-primitives.cpp | 779 +++-
source/common/x86/const-a.asm | 161 +-
source/common/x86/dct8.asm | 145 +-
source/common/x86/dct8.h | 2 +
source/common/x86/intrapred.h | 68 +
source/common/x86/intrapred16.asm | 502 ++
source/common/x86/intrapred8.asm | 4454 ++++++++++++++++++-
source/common/x86/intrapred8_allangs.asm | 1160 +++++-
source/common/x86/ipfilter16.asm | 1346 +++++-
source/common/x86/ipfilter8.asm | 6939 ++++++++++++++++++++---------
source/common/x86/ipfilter8.h | 256 +-
source/common/x86/loopfilter.asm | 868 +++-
source/common/x86/loopfilter.h | 12 +-
source/common/x86/mc-a.asm | 6 +
source/common/x86/pixel-a.asm | 753 +++
source/common/x86/pixel-util.h | 9 +-
source/common/x86/pixel-util8.asm | 847 +++-
source/common/x86/pixel.h | 6 +
source/common/x86/pixeladd8.asm | 305 +-
source/common/x86/sad-a.asm | 99 +-
source/common/x86/x86inc.asm | 3 +-
source/encoder/CMakeLists.txt | 6 +-
source/encoder/analysis.cpp | 252 +-
source/encoder/analysis.h | 13 +-
source/encoder/api.cpp | 18 +-
source/encoder/encoder.cpp | 52 +-
source/encoder/encoder.h | 2 +-
source/encoder/entropy.cpp | 23 +-
source/encoder/entropy.h | 7 +-
source/encoder/frameencoder.cpp | 88 +-
source/encoder/frameencoder.h | 4 +-
source/encoder/level.cpp | 96 +-
source/encoder/nal.cpp | 18 +-
source/encoder/nal.h | 1 +
source/encoder/ratecontrol.cpp | 46 +-
source/encoder/ratecontrol.h | 1 +
source/encoder/rdcost.h | 3 +-
source/encoder/sao.cpp | 67 +-
source/encoder/search.cpp | 103 +-
source/encoder/search.h | 6 +-
source/encoder/sei.h | 42 +
source/encoder/slicetype.cpp | 54 +-
source/encoder/slicetype.h | 2 +-
source/input/input.cpp | 2 +-
source/input/input.h | 10 +-
source/input/y4m.cpp | 7 +-
source/input/y4m.h | 2 +-
source/input/yuv.cpp | 6 +-
source/input/yuv.h | 2 +-
source/output/output.cpp | 12 +-
source/output/output.h | 43 +-
source/output/raw.cpp | 80 +
source/output/raw.h | 64 +
source/output/reconplay.cpp | 197 +
source/output/reconplay.h | 74 +
source/output/y4m.h | 2 +-
source/output/yuv.h | 2 +-
source/test/ipfilterharness.cpp | 122 +-
source/test/ipfilterharness.h | 1 -
source/test/pixelharness.cpp | 309 +-
source/test/pixelharness.h | 9 +-
source/test/rate-control-tests.txt | 70 +-
source/test/regression-tests.txt | 20 +-
source/test/smoke-tests.txt | 10 +-
source/test/testbench.cpp | 1 +
source/x265.cpp | 204 +-
source/x265.h | 24 +
source/x265cli.h | 11 +
92 files changed, 18211 insertions(+), 3646 deletions(-)
diffs (truncated from 27589 to 300 lines):
diff -r ddcf53cb6974 -r ed448198ce3d doc/reST/cli.rst
--- a/doc/reST/cli.rst Wed Apr 22 15:22:03 2015 -0500
+++ b/doc/reST/cli.rst Mon Apr 27 13:59:08 2015 -0500
@@ -159,6 +159,13 @@ Performance Options
handled implicitly.
One may also directly supply the CPU capability bitmap as an integer.
+
+ Note that by specifying this option you are overriding x265's CPU
+ detection and it is possible to do this wrong. You can cause encoder
+ crashes by specifying SIMD architectures which are not supported on
+ your CPU.
+
+ Default: auto-detected SIMD architectures
.. option:: --frame-threads, -F <integer>
@@ -201,11 +208,11 @@ Performance Options
their node, they will not be allowed to migrate between nodes, but they
will be allowed to move between CPU cores within their node.
- If the three pool features: :option:`--wpp` :option:`--pmode` and
- :option:`--pme` are all disabled, then :option:`--pools` is ignored
- and no thread pools are created.
+ If the four pool features: :option:`--wpp`, :option:`--pmode`,
+ :option:`--pme` and :option:`--lookahead-slices` are all disabled,
+ then :option:`--pools` is ignored and no thread pools are created.
- If "none" is specified, then all three of the thread pool features are
+ If "none" is specified, then all four of the thread pool features are
implicitly disabled.
Multiple thread pools will be allocated for any NUMA node with more than
@@ -217,9 +224,22 @@ Performance Options
:option:`--frame-threads`. The pools are used for WPP and for
distributed analysis and motion search.
+ On Windows, the native APIs offer sufficient functionality to
+ discover the NUMA topology and enforce the thread affinity that
+ libx265 needs (so long as you have not chosen to target XP or
+ Vista), but on POSIX systems it relies on libnuma for this
+ functionality. If your target POSIX system is single socket, then
+ building without libnuma is a perfectly reasonable option, as it
+ will have no effect on the runtime behavior. On a multiple-socket
+ system, a POSIX build of libx265 without libnuma will be less work
+ efficient. See :ref:`thread pools <pools>` for more detail.
+
Default "", one thread is allocated per detected hardware thread
(logical CPU cores) and one thread pool per NUMA node.
+ Note that the string value will need to be escaped or quoted to
+ protect against shell expansion on many platforms
+
.. option:: --wpp, --no-wpp
Enable Wavefront Parallel Processing. The encoder may begin encoding
@@ -437,7 +457,7 @@ Profile, Level, Tier
times 10, for example level **5.1** is specified as "5.1" or "51",
and level **5.0** is specified as "5.0" or "50".
- Annex A levels: 1, 2, 2.1, 3, 3.1, 4, 4.1, 5, 5.1, 5.2, 6, 6.1, 6.2
+ Annex A levels: 1, 2, 2.1, 3, 3.1, 4, 4.1, 5, 5.1, 5.2, 6, 6.1, 6.2, 8.5
.. option:: --high-tier, --no-high-tier
@@ -464,11 +484,22 @@ Profile, Level, Tier
HEVC specification. If x265 detects that the total reference count
is greater than 8, it will issue a warning that the resulting stream
is non-compliant and it signals the stream as profile NONE and level
- NONE but still allows the encode to continue. Compliant HEVC
+ NONE and will abort the encode unless
+ :option:`--allow-non-conformance` it specified. Compliant HEVC
decoders may refuse to decode such streams.
Default 3
+.. option:: --allow-non-conformance, --no-allow-non-conformance
+
+ Allow libx265 to generate a bitstream with profile and level NONE.
+ By default it will abort any encode which does not meet strict level
+ compliance. The two most likely causes for non-conformance are
+ :option:`--ctu` being too small, :option:`--ref` being too high,
+ or the bitrate or resolution being out of specification.
+
+ Default: disabled
+
.. note::
:option:`--profile`, :option:`--level-idc`, and
:option:`--high-tier` are only intended for use when you are
@@ -476,7 +507,7 @@ Profile, Level, Tier
limitations and must constrain the bitstream within those limits.
Specifying a profile or level may lower the encode quality
parameters to meet those requirements but it will never raise
- them.
+ them. It may enable VBV constraints on a CRF encode.
Mode decision / Analysis
========================
@@ -1111,6 +1142,13 @@ Quality, rate control and rate distortio
**Range of values:** 0.0 to 3.0
+.. option:: --qg-size <64|32|16>
+ Enable adaptive quantization for sub-CTUs. This parameter specifies
+ the minimum CU size at which QP can be adjusted, ie. Quantization Group
+ size. Allowed range of values are 64, 32, 16 provided this falls within
+ the inclusive range [maxCUSize, minCUSize]. Experimental.
+ Default: same as maxCUSize
+
.. option:: --cutree, --no-cutree
Enable the use of lookahead's lowres motion vector fields to
@@ -1451,9 +1489,32 @@ VUI fields must be manually specified.
specification for a description of these values. Default undefined
(not signaled)
+.. option:: --master-display <string>
+
+ SMPTE ST 2086 mastering display color volume SEI info, specified as
+ a string which is parsed when the stream header SEI are emitted. The
+ string format is "Y(%hu,%hu)U(%hu,%hu)V(%hu,%hu)WP(%hu,%hu)L(%u,%u)"
+ where %hu are unsigned 16bit integers and %u are unsigned 32bit
+ integers. The SEI includes X,Y display primaries for YUV channels,
+ white point X,Y and max,min luminance values.
+
+ Example: Y(10,12)U(5,13)V(5,13)WP(100,100)L(1000,100)
+
+ Note that this string value will need to be escaped or quoted to
+ protect against shell expansion on many platforms
+
Bitstream options
=================
+.. option:: --annexb, --no-annexb
+
+ If enabled, x265 will produce Annex B bitstream format, which places
+ start codes before NAL. If disabled, x265 will produce file format,
+ which places length before NAL. x265 CLI will choose the right option
+ based on output format. Default enabled
+
+ **API ONLY**
+
.. option:: --repeat-headers, --no-repeat-headers
If enabled, x265 will emit VPS, SPS, and PPS headers with every
@@ -1525,4 +1586,20 @@ Debugging options
**CLI ONLY**
+.. option:: --recon-y4m-exec <string>
+
+ If you have an application which can play a Y4MPEG stream received
+ on stdin, the x265 CLI can feed it reconstructed pictures in display
+ order. The pictures will have no timing info, obviously, so the
+ picture timing will be determined primarily by encoding elapsed time
+ and latencies, but it can be useful to preview the pictures being
+ output by the encoder to validate input settings and rate control
+ parameters.
+
+ Example command for ffplay (assuming it is in your PATH):
+
+ --recon-y4m-exec "ffplay -i pipe:0 -autoexit"
+
+ **CLI ONLY**
+
.. vim: noet
diff -r ddcf53cb6974 -r ed448198ce3d doc/reST/threading.rst
--- a/doc/reST/threading.rst Wed Apr 22 15:22:03 2015 -0500
+++ b/doc/reST/threading.rst Mon Apr 27 13:59:08 2015 -0500
@@ -2,6 +2,8 @@
Threading
*********
+.. _pools:
+
Thread Pools
============
@@ -31,6 +33,18 @@ for data locking. If a job becomes block
expected to drop that job so the worker thread may go back to the pool
and find more work.
+On Windows, the native APIs offer sufficient functionality to discover
+the NUMA topology and enforce the thread affinity that libx265 needs (so
+long as you have not chosen to target XP or Vista), but on POSIX systems
+it relies on libnuma for this functionality. If your target POSIX system
+is single socket, then building without libnuma is a perfectly
+reasonable option, as it will have no effect on the runtime behavior. On
+a multiple-socket system, a POSIX build of libx265 without libnuma will
+be less work efficient, but will still function correctly. You lose the
+work isolation effect that keeps each frame encoder from only using the
+threads of a single socket and so you incur a heavier context switching
+cost.
+
Wavefront Parallel Processing
=============================
@@ -225,6 +239,7 @@ scene cuts and slice types) uses the thr
lowres cost analysis to worker threads. It will use bonded task groups
to perform batches of frame cost estimates, and it may optionally use
bonded task groups to measure single frame cost estimates using slices.
+(see :option:`--lookahead-slices`)
The function slicetypeDecide() itself is also be performed by a worker
thread if your encoder has a thread pool, else it runs within the
diff -r ddcf53cb6974 -r ed448198ce3d readme.rst
--- a/readme.rst Wed Apr 22 15:22:03 2015 -0500
+++ b/readme.rst Mon Apr 27 13:59:08 2015 -0500
@@ -3,7 +3,7 @@ x265 HEVC Encoder
=================
| **Read:** | Online `documentation <http://x265.readthedocs.org/en/default/>`_ | Developer `wiki <http://bitbucket.org/multicoreware/x265/wiki/>`_
-| **Download:** | `releases <http://bitbucket.org/multicoreware/x265/downloads/>`_
+| **Download:** | `releases <http://ftp.videolan.org/pub/videolan/x265/>`_
| **Interact:** | #x265 on freenode.irc.net | `x265-devel at videolan.org <http://mailman.videolan.org/listinfo/x265-devel>`_ | `Report an issue <https://bitbucket.org/multicoreware/x265/issues?status=new&status=open>`_
`x265 <https://www.videolan.org/developers/x265.html>`_ is an open
diff -r ddcf53cb6974 -r ed448198ce3d source/CMakeLists.txt
--- a/source/CMakeLists.txt Wed Apr 22 15:22:03 2015 -0500
+++ b/source/CMakeLists.txt Mon Apr 27 13:59:08 2015 -0500
@@ -30,7 +30,7 @@ option(STATIC_LINK_CRT "Statically link
mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
# X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 51)
+set(X265_BUILD 56)
configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
"${PROJECT_BINARY_DIR}/x265.def")
configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
@@ -67,13 +67,13 @@ if(UNIX)
endif()
find_package(Numa)
if(NUMA_FOUND)
- list(APPEND CMAKE_REQUIRED_LIBRARIES ${NUMA_LIBRARY})
+ link_directories(${NUMA_LIBRARY_DIR})
+ list(APPEND CMAKE_REQUIRED_LIBRARIES numa)
check_symbol_exists(numa_node_of_cpu numa.h NUMA_V2)
if(NUMA_V2)
add_definitions(-DHAVE_LIBNUMA)
message(STATUS "libnuma found, building with support for NUMA nodes")
- list(APPEND PLATFORM_LIBS ${NUMA_LIBRARY})
- link_directories(${NUMA_LIBRARY_DIR})
+ list(APPEND PLATFORM_LIBS numa)
include_directories(${NUMA_INCLUDE_DIR})
endif()
endif()
@@ -196,6 +196,7 @@ if(GCC)
add_definitions(-static)
list(APPEND LINKER_OPTIONS "-static")
endif(STATIC_LINK_CRT)
+ check_cxx_compiler_flag(-Wno-strict-overflow CC_HAS_NO_STRICT_OVERFLOW)
check_cxx_compiler_flag(-Wno-narrowing CC_HAS_NO_NARROWING)
check_cxx_compiler_flag(-Wno-array-bounds CC_HAS_NO_ARRAY_BOUNDS)
if (CC_HAS_NO_ARRAY_BOUNDS)
@@ -300,12 +301,15 @@ if (WIN32)
list(APPEND PLATFORM_LIBS ${VLD_LIBRARIES})
link_directories(${VLD_LIBRARY_DIRS})
endif()
- option(WINXP_SUPPORT "Make binaries compatible with Windows XP" OFF)
+ option(WINXP_SUPPORT "Make binaries compatible with Windows XP and Vista" OFF)
if(WINXP_SUPPORT)
# force use of workarounds for CONDITION_VARIABLE and atomic
# intrinsics introduced after XP
- add_definitions(-D_WIN32_WINNT=_WIN32_WINNT_WINXP)
- endif()
+ add_definitions(-D_WIN32_WINNT=_WIN32_WINNT_WINXP -D_WIN32_WINNT_WIN7=0x0601)
+ else(WINXP_SUPPORT)
+ # default to targeting Windows 7 for the NUMA APIs
+ add_definitions(-D_WIN32_WINNT=_WIN32_WINNT_WIN7)
+ endif(WINXP_SUPPORT)
endif()
include(version) # determine X265_VERSION and X265_LATEST_TAG
@@ -462,8 +466,10 @@ endif()
# Main CLI application
option(ENABLE_CLI "Build standalone CLI application" ON)
if(ENABLE_CLI)
- file(GLOB InputFiles input/*.cpp input/*.h)
- file(GLOB OutputFiles output/*.cpp output/*.h)
+ file(GLOB InputFiles input/input.cpp input/yuv.cpp input/y4m.cpp input/*.h)
+ file(GLOB OutputFiles output/output.cpp output/reconplay.cpp output/*.h
+ output/yuv.cpp output/y4m.cpp # recon
+ output/raw.cpp) # muxers
file(GLOB FilterFiles filters/*.cpp filters/*.h)
source_group(input FILES ${InputFiles})
source_group(output FILES ${OutputFiles})
diff -r ddcf53cb6974 -r ed448198ce3d source/common/common.cpp
--- a/source/common/common.cpp Wed Apr 22 15:22:03 2015 -0500
+++ b/source/common/common.cpp Mon Apr 27 13:59:08 2015 -0500
@@ -100,11 +100,14 @@ int x265_exp2fix8(double x)
return (x265_exp2_lut[i & 63] + 256) << (i >> 6) >> 8;
}
-void x265_log(const x265_param *param, int level, const char *fmt, ...)
+void general_log(const x265_param* param, const char* caller, int level, const char* fmt, ...)
{
if (param && level > param->logLevel)
return;
- const char *log_level;
+ const int bufferSize = 4096;
More information about the x265-commits
mailing list