[x265-commits] [x265] analysis: allow AMP to work at 64x64 with --pmode and --rd 5

Steve Borho steve at borho.org
Tue Oct 28 04:48:46 CET 2014


details:   http://hg.videolan.org/x265/rev/0f71dcb02c59
branches:  
changeset: 8703:0f71dcb02c59
user:      Steve Borho <steve at borho.org>
date:      Mon Oct 27 19:04:34 2014 -0500
description:
analysis: allow AMP to work at 64x64 with --pmode and --rd 5
Subject: [x265] analysis: fix a typo in --rd 5 --rect

details:   http://hg.videolan.org/x265/rev/685127dfd466
branches:  
changeset: 8704:685127dfd466
user:      Steve Borho <steve at borho.org>
date:      Mon Oct 27 19:15:56 2014 -0500
description:
analysis: fix a typo in --rd 5 --rect
Subject: [x265] analysis: simplify logic slightly, combine conditionals

details:   http://hg.videolan.org/x265/rev/d62838b20805
branches:  
changeset: 8705:d62838b20805
user:      Steve Borho <steve at borho.org>
date:      Mon Oct 27 19:16:32 2014 -0500
description:
analysis: simplify logic slightly, combine conditionals
Subject: [x265] asm: AVX2 version of luma_pp[4x4], improve 320c -> 188c

details:   http://hg.videolan.org/x265/rev/52ba1fb2227e
branches:  
changeset: 8706:52ba1fb2227e
user:      Min Chen <chenm003 at 163.com>
date:      Mon Oct 27 17:01:11 2014 -0700
description:
asm: AVX2 version of luma_pp[4x4], improve 320c -> 188c
Subject: [x265] Add flag to enable/disable temporal MVP

details:   http://hg.videolan.org/x265/rev/ef27c0eb2fd6
branches:  
changeset: 8707:ef27c0eb2fd6
user:      Nicolas Morey-Chaisemartin <nmorey at kalray.eu>
date:      Mon Oct 27 19:49:27 2014 -0500
description:
Add flag to enable/disable temporal MVP
Subject: [x265] Add CLI option to enable/disable Temporal MVP

details:   http://hg.videolan.org/x265/rev/f6f662559bde
branches:  
changeset: 8708:f6f662559bde
user:      Nicolas Morey-Chaisemartin <nmorey at kalray.eu>
date:      Mon Oct 27 20:08:37 2014 -0500
description:
Add CLI option to enable/disable Temporal MVP
Subject: [x265] Set tuQTMaxLog2Size to MIN(5, maxLog2CUSize)

details:   http://hg.videolan.org/x265/rev/14388f2a7a88
branches:  
changeset: 8709:14388f2a7a88
user:      Nicolas Morey-Chaisemartin <nmorey at kalray.eu>
date:      Mon Oct 27 20:09:40 2014 -0500
description:
Set tuQTMaxLog2Size to MIN(5, maxLog2CUSize)

This allows 32x32 TUs even when --ctu 32 is used, for instance
Subject: [x265] frame: initialize and re-initialize m_bChromaExtended

details:   http://hg.videolan.org/x265/rev/0fc522bebda5
branches:  
changeset: 8710:0fc522bebda5
user:      Steve Borho <steve at borho.org>
date:      Mon Oct 27 21:34:57 2014 -0500
description:
frame: initialize and re-initialize m_bChromaExtended

fixes some non-determinism
Subject: [x265] fix some header include loops

details:   http://hg.videolan.org/x265/rev/fd95b6a592ee
branches:  
changeset: 8711:fd95b6a592ee
user:      Steve Borho <steve at borho.org>
date:      Mon Oct 27 21:35:59 2014 -0500
description:
fix some header include loops
Subject: [x265] encoder: nit cleanup of code copying data from input picture

details:   http://hg.videolan.org/x265/rev/84f4cb50fe46
branches:  
changeset: 8712:84f4cb50fe46
user:      Steve Borho <steve at borho.org>
date:      Mon Oct 27 21:52:09 2014 -0500
description:
encoder: nit cleanup of code copying data from input picture
Subject: [x265] slicetype: remove top-level Encoder pointer from Slicetype

details:   http://hg.videolan.org/x265/rev/3ccb20b6c022
branches:  
changeset: 8713:3ccb20b6c022
user:      Steve Borho <steve at borho.org>
date:      Mon Oct 27 21:59:30 2014 -0500
description:
slicetype: remove top-level Encoder pointer from Slicetype

Move the slice type assignment earlier to happen even before the picture
is given to the lookahead

diffstat:

 doc/reST/cli.rst                     |   6 +++
 source/CMakeLists.txt                |   2 +-
 source/common/cudata.cpp             |   5 +-
 source/common/deblock.cpp            |   2 +-
 source/common/frame.cpp              |  10 +++++
 source/common/frame.h                |   5 +-
 source/common/param.cpp              |   6 ++-
 source/common/predict.cpp            |   2 +
 source/common/predict.h              |   1 +
 source/common/quant.cpp              |   2 +-
 source/common/slice.h                |   1 +
 source/common/x86/asm-primitives.cpp |   1 +
 source/common/x86/dct8.asm           |   2 +-
 source/common/x86/ipfilter8.asm      |  62 +++++++++++++++++++++++++++++++++++-
 source/common/x86/ipfilter8.h        |   1 +
 source/encoder/analysis.cpp          |  19 ++++++----
 source/encoder/dpb.cpp               |   1 +
 source/encoder/encoder.cpp           |  24 ++++++++-----
 source/encoder/entropy.cpp           |   8 ++-
 source/encoder/entropy.h             |   1 +
 source/encoder/frameencoder.cpp      |   6 ++-
 source/encoder/framefilter.cpp       |   5 ++-
 source/encoder/ratecontrol.cpp       |   1 +
 source/encoder/sao.cpp               |   2 +
 source/encoder/search.cpp            |   4 +-
 source/encoder/slicetype.cpp         |  15 +------
 source/encoder/slicetype.h           |   7 ++-
 source/x265.cpp                      |   3 +
 source/x265.h                        |   3 +
 29 files changed, 157 insertions(+), 50 deletions(-)

diffs (truncated from 761 to 300 lines):

diff -r 26e1574a5424 -r 3ccb20b6c022 doc/reST/cli.rst
--- a/doc/reST/cli.rst	Mon Oct 27 15:06:04 2014 -0500
+++ b/doc/reST/cli.rst	Mon Oct 27 21:59:30 2014 -0500
@@ -527,6 +527,12 @@ Temporal / motion search options
 	The max candidate number is encoded in the SPS and determines the
 	bit cost of signaling merge CUs. Default 2
 
+.. option:: --temporal-mvp, --no-temporal-mvp
+
+	Enable temporal motion vector predictors in P and B slices.
+	This enables the use of the motion vector from the collocated block
+	in the previous frame to be used as a predictor. Default is enabled
+
 Spatial/intra options
 =====================
 
diff -r 26e1574a5424 -r 3ccb20b6c022 source/CMakeLists.txt
--- a/source/CMakeLists.txt	Mon Oct 27 15:06:04 2014 -0500
+++ b/source/CMakeLists.txt	Mon Oct 27 21:59:30 2014 -0500
@@ -21,7 +21,7 @@ include(CheckSymbolExists)
 include(CheckCXXCompilerFlag)
 
 # X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 34)
+set(X265_BUILD 35)
 configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
                "${PROJECT_BINARY_DIR}/x265.def")
 configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff -r 26e1574a5424 -r 3ccb20b6c022 source/common/cudata.cpp
--- a/source/common/cudata.cpp	Mon Oct 27 15:06:04 2014 -0500
+++ b/source/common/cudata.cpp	Mon Oct 27 21:59:30 2014 -0500
@@ -23,6 +23,7 @@
 
 #include "common.h"
 #include "frame.h"
+#include "framedata.h"
 #include "picyuv.h"
 #include "mv.h"
 #include "cudata.h"
@@ -1534,7 +1535,7 @@ uint32_t CUData::getInterMergeCandidates
                 return maxNumMergeCand;
         }
     }
-    // TMVP always enabled
+    if (m_slice->m_sps->bTemporalMVPEnabled)
     {
         MV colmv;
         uint32_t partIdxRB;
@@ -1763,7 +1764,7 @@ int CUData::fillMvpCand(uint32_t partIdx
             return numMvc;
     }
 
-    // TMVP always enabled
+    if (m_slice->m_sps->bTemporalMVPEnabled)
     {
         uint32_t absPartAddr = m_absIdxInCTU + partAddr;
         MV colmv;
diff -r 26e1574a5424 -r 3ccb20b6c022 source/common/deblock.cpp
--- a/source/common/deblock.cpp	Mon Oct 27 15:06:04 2014 -0500
+++ b/source/common/deblock.cpp	Mon Oct 27 21:59:30 2014 -0500
@@ -23,7 +23,7 @@
 
 #include "common.h"
 #include "deblock.h"
-#include "frame.h"
+#include "framedata.h"
 #include "picyuv.h"
 #include "slice.h"
 #include "mv.h"
diff -r 26e1574a5424 -r 3ccb20b6c022 source/common/frame.cpp
--- a/source/common/frame.cpp	Mon Oct 27 15:06:04 2014 -0500
+++ b/source/common/frame.cpp	Mon Oct 27 21:59:30 2014 -0500
@@ -24,11 +24,13 @@
 #include "common.h"
 #include "frame.h"
 #include "picyuv.h"
+#include "framedata.h"
 
 using namespace x265;
 
 Frame::Frame()
 {
+    m_bChromaExtended = false;
     m_reconRowCount.set(0);
     m_countRefEncoders = 0;
     m_encData = NULL;
@@ -64,6 +66,14 @@ bool Frame::allocEncodeData(x265_param *
     return ok;
 }
 
+/* prepare to re-use a FrameData instance to encode a new picture */
+void Frame::reinit(const SPS& sps)
+{
+    m_bChromaExtended = false;
+    m_reconPicYuv = m_encData->m_reconPicYuv;
+    m_encData->reinit(sps);
+}
+
 void Frame::destroy()
 {
     if (m_encData)
diff -r 26e1574a5424 -r 3ccb20b6c022 source/common/frame.h
--- a/source/common/frame.h	Mon Oct 27 15:06:04 2014 -0500
+++ b/source/common/frame.h	Mon Oct 27 21:59:30 2014 -0500
@@ -25,15 +25,15 @@
 #define X265_FRAME_H
 
 #include "common.h"
-#include "framedata.h"
 #include "lowres.h"
 #include "threading.h"
 
 namespace x265 {
 // private namespace
 
-class Encoder;
+class FrameData;
 class PicYuv;
+struct SPS;
 
 #define IS_REFERENCED(frame) (frame->m_lowres.sliceType != X265_TYPE_B) 
 
@@ -71,6 +71,7 @@ public:
 
     bool create(x265_param *param);
     bool allocEncodeData(x265_param *param, const SPS& sps);
+    void reinit(const SPS& sps);
     void destroy();
 };
 }
diff -r 26e1574a5424 -r 3ccb20b6c022 source/common/param.cpp
--- a/source/common/param.cpp	Mon Oct 27 15:06:04 2014 -0500
+++ b/source/common/param.cpp	Mon Oct 27 21:59:30 2014 -0500
@@ -161,6 +161,7 @@ void x265_param_default(x265_param *para
     param->bEnableTransformSkip = 0;
     param->bEnableTSkipFast = 0;
     param->maxNumReferences = 3;
+    param->bEnableTemporalMvp = 1;
 
     /* Loop Filter */
     param->bEnableLoopFilter = 1;
@@ -567,6 +568,7 @@ int x265_param_parse(x265_param *p, cons
     OPT("rect") p->bEnableRectInter = atobool(value);
     OPT("amp") p->bEnableAMP = atobool(value);
     OPT("max-merge") p->maxNumMergeCand = (uint32_t)atoi(value);
+    OPT("temporal-mvp") p->bEnableTemporalMvp = atobool(value);
     OPT("early-skip") p->bEnableEarlySkip = atobool(value);
     OPT("fast-cbf") p->bEnableCbfFastMode = atobool(value);
     OPT("rdpenalty") p->rdPenalty = atoi(value);
@@ -880,7 +882,7 @@ int x265_check_params(x265_param *param)
         return check_failed;
 
     uint32_t maxLog2CUSize = (uint32_t)g_log2Size[param->maxCUSize];
-    uint32_t tuQTMaxLog2Size = maxLog2CUSize - 1;
+    uint32_t tuQTMaxLog2Size = X265_MIN(maxLog2CUSize, 5);
     uint32_t tuQTMinLog2Size = 2; //log2(4)
 
     /* These checks might be temporary */
@@ -1161,6 +1163,7 @@ void x265_print_params(x265_param *param
     TOOLOPT(param->bEnableConstrainedIntra, "cip");
     TOOLOPT(param->bIntraInBFrames, "b-intra");
     TOOLOPT(param->bEnableFastIntra, "fast-intra");
+    TOOLOPT(param->bEnableTemporalMvp, "tmvp");
     if (param->bEnableTransformSkip)
         fprintf(stderr, "tskip%s ", param->bEnableTSkipFast ? "-fast" : "");
     TOOLOPT(param->bCULossless, "cu-lossless");
@@ -1194,6 +1197,7 @@ char *x265_param2string(x265_param *p)
     BOOL(p->bEnableRectInter, "rect");
     BOOL(p->bEnableAMP, "amp");
     s += sprintf(s, " max-merge=%d", p->maxNumMergeCand);
+    BOOL(p->bEnableTemporalMvp, "temporal-mvp");
     BOOL(p->bEnableEarlySkip, "early-skip");
     BOOL(p->bEnableCbfFastMode, "fast-cbf");
     s += sprintf(s, " rdpenalty=%d", p->rdPenalty);
diff -r 26e1574a5424 -r 3ccb20b6c022 source/common/predict.cpp
--- a/source/common/predict.cpp	Mon Oct 27 15:06:04 2014 -0500
+++ b/source/common/predict.cpp	Mon Oct 27 21:59:30 2014 -0500
@@ -22,6 +22,8 @@
 *****************************************************************************/
 
 #include "common.h"
+#include "slice.h"
+#include "framedata.h"
 #include "picyuv.h"
 #include "predict.h"
 #include "primitives.h"
diff -r 26e1574a5424 -r 3ccb20b6c022 source/common/predict.h
--- a/source/common/predict.h	Mon Oct 27 15:06:04 2014 -0500
+++ b/source/common/predict.h	Mon Oct 27 21:59:30 2014 -0500
@@ -33,6 +33,7 @@
 namespace x265 {
 
 class CUData;
+class Slice;
 struct CUGeom;
 
 class Predict
diff -r 26e1574a5424 -r 3ccb20b6c022 source/common/quant.cpp
--- a/source/common/quant.cpp	Mon Oct 27 15:06:04 2014 -0500
+++ b/source/common/quant.cpp	Mon Oct 27 21:59:30 2014 -0500
@@ -24,7 +24,7 @@
 #include "common.h"
 #include "primitives.h"
 #include "quant.h"
-#include "frame.h"
+#include "framedata.h"
 #include "entropy.h"
 #include "yuv.h"
 #include "cudata.h"
diff -r 26e1574a5424 -r 3ccb20b6c022 source/common/slice.h
--- a/source/common/slice.h	Mon Oct 27 15:06:04 2014 -0500
+++ b/source/common/slice.h	Mon Oct 27 21:59:30 2014 -0500
@@ -232,6 +232,7 @@ struct SPS
     int      numReorderPics;
 
     bool     bUseStrongIntraSmoothing; // use param
+    bool     bTemporalMVPEnabled;
 
     Window   conformanceWindow;
     VUI      vuiParameters;
diff -r 26e1574a5424 -r 3ccb20b6c022 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Mon Oct 27 15:06:04 2014 -0500
+++ b/source/common/x86/asm-primitives.cpp	Mon Oct 27 21:59:30 2014 -0500
@@ -1798,6 +1798,7 @@ void Setup_Assembly_Primitives(EncoderPr
         p.transpose[BLOCK_32x32] = x265_transpose32_avx2;
         p.transpose[BLOCK_64x64] = x265_transpose64_avx2;
 #endif
+        p.luma_hpp[BLOCK_4x4] = x265_interp_8tap_horiz_pp_4x4_avx2;
     }
 #endif // if HIGH_BIT_DEPTH
 }
diff -r 26e1574a5424 -r 3ccb20b6c022 source/common/x86/dct8.asm
--- a/source/common/x86/dct8.asm	Mon Oct 27 15:06:04 2014 -0500
+++ b/source/common/x86/dct8.asm	Mon Oct 27 21:59:30 2014 -0500
@@ -245,7 +245,7 @@ avx2_idct4_1:   dw 64, 64, 64, 64, 64, 6
 
 avx2_idct4_2:   dw 64, 64, 64, -64, 83, 36, 36, -83
 
-idct4_shuf1:    times 2 db 0, 1, 8, 9, 4, 5, 12, 13, 2, 3, 10, 11, 6, 7, 14, 15
+const idct4_shuf1,    times 2 db 0, 1, 8, 9, 4, 5, 12, 13, 2, 3, 10, 11, 6, 7, 14, 15
 
 idct4_shuf2:    times 2 db 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8 ,9 ,10, 11
 
diff -r 26e1574a5424 -r 3ccb20b6c022 source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm	Mon Oct 27 15:06:04 2014 -0500
+++ b/source/common/x86/ipfilter8.asm	Mon Oct 27 21:59:30 2014 -0500
@@ -31,6 +31,7 @@ tab_Tm:    db 0, 1, 2, 3, 1, 2, 3, 4, 2,
            db 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10
            db 8, 9,10,11, 9,10,11,12,10,11,12,13,11,12,13, 14
 
+ALIGN 32
 tab_Lm:    db 0, 1, 2, 3, 4,  5,  6,  7,  1, 2, 3, 4,  5,  6,  7,  8
            db 2, 3, 4, 5, 6,  7,  8,  9,  3, 4, 5, 6,  7,  8,  9,  10
            db 4, 5, 6, 7, 8,  9,  10, 11, 5, 6, 7, 8,  9,  10, 11, 12
@@ -128,6 +129,8 @@ tab_c_64_n64:   times 8 db 64, -64
 
 SECTION .text
 
+cextern idct4_shuf1
+cextern pw_1
 cextern pw_512
 cextern pw_2000
 
@@ -794,6 +797,64 @@ cglobal interp_8tap_horiz_%3_%1x%2, 4,7,
     RET
 %endmacro
 
+
+INIT_YMM avx2
+cglobal interp_8tap_horiz_pp_4x4, 4,6,6
+    mov             r4d, r4m
+
+%ifdef PIC
+    lea             r5, [tab_LumaCoeff]
+    vpbroadcastq    m0, [r5 + r4 * 8]
+%else
+    vpbroadcastq    m0, [tab_LumaCoeff + r4 * 8]
+%endif
+
+    mova            m1, [tab_Lm]
+    vpbroadcastd    m2, [pw_1]
+
+    ; register map
+    ; m0 - interpolate coeff
+    ; m1 - shuffle order table
+    ; m2 - constant word 1
+
+    sub             r0, 3
+    ; Row 0-1
+    vbroadcasti128  m3, [r0]                        ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
+    pshufb          m3, m1
+    pmaddubsw       m3, m0
+    pmaddwd         m3, m2
+    vbroadcasti128  m4, [r0 + r1]                   ; [x x x x x A 9 8 7 6 5 4 3 2 1 0]
+    pshufb          m4, m1
+    pmaddubsw       m4, m0
+    pmaddwd         m4, m2
+    phaddd          m3, m4                          ; DWORD [R1D R1C R0D R0C R1B R1A R0B R0A]
+
+    ; Row 2-3


More information about the x265-commits mailing list