[x265-commits] [x265] fix blockCbpBits[]

Satoshi Nakagawa nakagawa424 at oki.com
Fri Jan 17 06:40:31 CET 2014


details:   http://hg.videolan.org/x265/rev/04aae8fd88a0
branches:  
changeset: 5831:04aae8fd88a0
user:      Satoshi Nakagawa <nakagawa424 at oki.com>
date:      Wed Jan 15 22:31:13 2014 +0900
description:
fix blockCbpBits[]
Subject: [x265] remove duplicate code

details:   http://hg.videolan.org/x265/rev/188617e76d60
branches:  
changeset: 5832:188617e76d60
user:      Satoshi Nakagawa <nakagawa424 at oki.com>
date:      Sun Jan 12 22:56:03 2014 +0900
description:
remove duplicate code
Subject: [x265] TEncSbac: remove hungarian prefixes from loop vars

details:   http://hg.videolan.org/x265/rev/243b01e81109
branches:  
changeset: 5833:243b01e81109
user:      Steve Borho <steve at borho.org>
date:      Thu Jan 16 19:39:25 2014 -0600
description:
TEncSbac: remove hungarian prefixes from loop vars
Subject: [x265] add dts to x265_picture, handle same as x264

details:   http://hg.videolan.org/x265/rev/57b3238680c5
branches:  
changeset: 5834:57b3238680c5
user:      Xun Xu, PPLive Corporation<xunxu at pptv.com>
date:      Thu Jan 16 19:53:59 2014 -0600
description:
add dts to x265_picture, handle same as x264
Subject: [x265] stats: simplify slice type lookup

details:   http://hg.videolan.org/x265/rev/3d747041271f
branches:  
changeset: 5835:3d747041271f
user:      Steve Borho <steve at borho.org>
date:      Thu Jan 16 20:08:45 2014 -0600
description:
stats: simplify slice type lookup
Subject: [x265] asm: code for intra_pred[BLOCK_32x32] mode 2 and 34

details:   http://hg.videolan.org/x265/rev/1d7ea03e1a38
branches:  
changeset: 5836:1d7ea03e1a38
user:      Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
date:      Wed Jan 15 19:18:53 2014 +0530
description:
asm: code for intra_pred[BLOCK_32x32] mode 2 and 34

diffstat:

 source/CMakeLists.txt                |    2 +-
 source/Lib/TLibCommon/TComPic.h      |    5 +-
 source/Lib/TLibCommon/TComTrQuant.h  |    2 +-
 source/Lib/TLibEncoder/TEncSbac.cpp  |   12 +-
 source/common/x86/asm-primitives.cpp |    5 +
 source/common/x86/intrapred.h        |   19 ++++
 source/common/x86/intrapred8.asm     |  162 +++++++++++++++++++++++++++++++++++
 source/encoder/compress.cpp          |    4 -
 source/encoder/encoder.cpp           |   27 +++++-
 source/encoder/encoder.h             |    7 +-
 source/encoder/slicetype.cpp         |   25 +++++-
 source/x265.h                        |    1 +
 12 files changed, 255 insertions(+), 16 deletions(-)

diffs (truncated from 521 to 300 lines):

diff -r 37b4ca796088 -r 1d7ea03e1a38 source/CMakeLists.txt
--- a/source/CMakeLists.txt	Wed Jan 15 17:51:11 2014 +0800
+++ b/source/CMakeLists.txt	Wed Jan 15 19:18:53 2014 +0530
@@ -13,7 +13,7 @@ include(CheckFunctionExists)
 include(CheckCXXCompilerFlag)
 
 # X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 4)
+set(X265_BUILD 5)
 configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
                "${PROJECT_BINARY_DIR}/x265.def")
 configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
diff -r 37b4ca796088 -r 1d7ea03e1a38 source/Lib/TLibCommon/TComPic.h
--- a/source/Lib/TLibCommon/TComPic.h	Wed Jan 15 17:51:11 2014 +0800
+++ b/source/Lib/TLibCommon/TComPic.h	Wed Jan 15 19:18:53 2014 +0530
@@ -81,8 +81,11 @@ public:
     volatile uint32_t     m_countRefEncoders;   // count of FrameEncoder threads monitoring m_reconRowCount
     Event                 m_reconRowWait;       // event triggered m_countRefEncoders times each time a recon row is completed
     void*                 m_userData;           // user provided pointer passed in with this picture
+    
     int64_t               m_pts;                // user provided presentation time stamp
-
+    int64_t               m_reorderedPts;
+    int64_t               m_dts;
+    
     Lowres                m_lowres;
 
     TComPic*              m_next;
diff -r 37b4ca796088 -r 1d7ea03e1a38 source/Lib/TLibCommon/TComTrQuant.h
--- a/source/Lib/TLibCommon/TComTrQuant.h	Wed Jan 15 17:51:11 2014 +0800
+++ b/source/Lib/TLibCommon/TComTrQuant.h	Wed Jan 15 19:18:53 2014 +0530
@@ -68,7 +68,7 @@ typedef struct
     int greaterOneBits[NUM_ONE_FLAG_CTX][2];
     int levelAbsBits[NUM_ABS_FLAG_CTX][2];
 
-    int blockCbpBits[3 * NUM_QT_CBF_CTX][2];
+    int blockCbpBits[2 * NUM_QT_CBF_CTX][2];
     int blockRootCbpBits[4][2];
 } estBitsSbacStruct;
 
diff -r 37b4ca796088 -r 1d7ea03e1a38 source/Lib/TLibEncoder/TEncSbac.cpp
--- a/source/Lib/TLibEncoder/TEncSbac.cpp	Wed Jan 15 17:51:11 2014 +0800
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp	Wed Jan 15 19:18:53 2014 +0530
@@ -2461,18 +2461,18 @@ void TEncSbac::estCBFBit(estBitsSbacStru
 {
     ContextModel *ctx = &m_contextModels[OFF_QT_CBF_CTX];
 
-    for (uint32_t uiCtxInc = 0; uiCtxInc < 3 * NUM_QT_CBF_CTX; uiCtxInc++)
+    for (uint32_t ctxInc = 0; ctxInc < 2 * NUM_QT_CBF_CTX; ctxInc++)
     {
-        estBitsSbac->blockCbpBits[uiCtxInc][0] = sbacGetEntropyBits(ctx[uiCtxInc].m_state, 0);
-        estBitsSbac->blockCbpBits[uiCtxInc][1] = sbacGetEntropyBits(ctx[uiCtxInc].m_state, 1);
+        estBitsSbac->blockCbpBits[ctxInc][0] = sbacGetEntropyBits(ctx[ctxInc].m_state, 0);
+        estBitsSbac->blockCbpBits[ctxInc][1] = sbacGetEntropyBits(ctx[ctxInc].m_state, 1);
     }
 
     ctx = &m_contextModels[OFF_QT_ROOT_CBF_CTX];
 
-    for (uint32_t uiCtxInc = 0; uiCtxInc < 4; uiCtxInc++)
+    for (uint32_t ctxInc = 0; ctxInc < 4; ctxInc++)
     {
-        estBitsSbac->blockRootCbpBits[uiCtxInc][0] = sbacGetEntropyBits(ctx[uiCtxInc].m_state, 0);
-        estBitsSbac->blockRootCbpBits[uiCtxInc][1] = sbacGetEntropyBits(ctx[uiCtxInc].m_state, 1);
+        estBitsSbac->blockRootCbpBits[ctxInc][0] = sbacGetEntropyBits(ctx[ctxInc].m_state, 0);
+        estBitsSbac->blockRootCbpBits[ctxInc][1] = sbacGetEntropyBits(ctx[ctxInc].m_state, 1);
     }
 }
 
diff -r 37b4ca796088 -r 1d7ea03e1a38 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Wed Jan 15 17:51:11 2014 +0800
+++ b/source/common/x86/asm-primitives.cpp	Wed Jan 15 19:18:53 2014 +0530
@@ -554,6 +554,9 @@ extern "C" {
 #define SETUP_INTRA_ANG16(mode, fno, cpu) \
     p.intra_pred[BLOCK_16x16][mode] = x265_intra_pred_ang16_ ## fno ## _ ## cpu;
 
+#define SETUP_INTRA_ANG32(mode, fno, cpu) \
+    p.intra_pred[BLOCK_32x32][mode] = x265_intra_pred_ang32_ ## fno ## _ ## cpu;
+
 namespace x265 {
 // private x265 namespace
 void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
@@ -896,6 +899,8 @@ void Setup_Assembly_Primitives(EncoderPr
         SETUP_INTRA_ANG8(34, 2, ssse3);
         SETUP_INTRA_ANG16(2, 2, ssse3);
         SETUP_INTRA_ANG16(34, 2, ssse3);
+        SETUP_INTRA_ANG32(2, 2, ssse3);
+        SETUP_INTRA_ANG32(34, 2, ssse3);
 
         p.scale1D_128to64 = x265_scale1D_128to64_ssse3;
         p.scale2D_64to32 = x265_scale2D_64to32_ssse3;
diff -r 37b4ca796088 -r 1d7ea03e1a38 source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h	Wed Jan 15 17:51:11 2014 +0800
+++ b/source/common/x86/intrapred.h	Wed Jan 15 19:18:53 2014 +0530
@@ -95,6 +95,25 @@ DECL_ANG(16, 17, sse4);
 DECL_ANG(16, 18, sse4);
 DECL_ANG(16, 26, sse4);
 
+DECL_ANG(32, 2, ssse3);
+DECL_ANG(32, 3, sse4);
+DECL_ANG(32, 4, sse4);
+DECL_ANG(32, 5, sse4);
+DECL_ANG(32, 6, sse4);
+DECL_ANG(32, 7, sse4);
+DECL_ANG(32, 8, sse4);
+DECL_ANG(32, 9, sse4);
+DECL_ANG(32, 10, sse4);
+DECL_ANG(32, 11, sse4);
+DECL_ANG(32, 12, sse4);
+DECL_ANG(32, 13, sse4);
+DECL_ANG(32, 14, sse4);
+DECL_ANG(32, 15, sse4);
+DECL_ANG(32, 16, sse4);
+DECL_ANG(32, 17, sse4);
+DECL_ANG(32, 18, sse4);
+DECL_ANG(32, 26, sse4);
+
 #undef DECL_ANG
 void x265_all_angs_pred_4x4_sse4(pixel *dest, pixel *above0, pixel *left0, pixel *above1, pixel *left1, bool bLuma);
 void x265_all_angs_pred_8x8_sse4(pixel *dest, pixel *above0, pixel *left0, pixel *above1, pixel *left1, bool bLuma);
diff -r 37b4ca796088 -r 1d7ea03e1a38 source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm	Wed Jan 15 17:51:11 2014 +0800
+++ b/source/common/x86/intrapred8.asm	Wed Jan 15 19:18:53 2014 +0530
@@ -1182,6 +1182,168 @@ cglobal intra_pred_ang16_2, 3,3,3
     movu            [r0 + r1], m2
     RET
 
+
+;---------------------------------------------------------------------------------------------------------------
+; void intraPredAng32(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
+;---------------------------------------------------------------------------------------------------------------
+INIT_XMM ssse3
+cglobal intra_pred_ang32_2, 3,4,4
+    cmp             r4m, byte 34
+    cmove           r2, r3mp
+    movu            m0, [r2 + 2]
+    movu            m1, [r2 + 18]
+    movu            m3, [r2 + 34]
+
+    lea             r3, [r1 * 3]
+
+    movu            [r0], m0
+    movu            [r0 + 16], m1
+    palignr         m2, m1, m0, 1
+    movu            [r0 + r1], m2
+    palignr         m2, m3, m1, 1
+    movu            [r0 + r1 + 16], m2
+    palignr         m2, m1, m0, 2
+    movu            [r0 + r1 * 2], m2
+    palignr         m2, m3, m1, 2
+    movu            [r0 + r1 * 2 + 16], m2
+    palignr         m2, m1, m0, 3
+    movu            [r0 + r3], m2
+    palignr         m2, m3, m1, 3
+    movu            [r0 + r3 + 16], m2
+
+    lea             r0, [r0 + r1 * 4]
+
+    palignr         m2, m1, m0, 4
+    movu            [r0], m2
+    palignr         m2, m3, m1, 4
+    movu            [r0 + 16], m2
+    palignr         m2, m1, m0, 5
+    movu            [r0 + r1], m2
+    palignr         m2, m3, m1, 5
+    movu            [r0 + r1 + 16], m2
+    palignr         m2, m1, m0, 6
+    movu            [r0 + r1 * 2], m2
+    palignr         m2, m3, m1, 6
+    movu            [r0 + r1 * 2 + 16], m2
+    palignr         m2, m1, m0, 7
+    movu            [r0 + r3], m2
+    palignr         m2, m3, m1, 7
+    movu            [r0 + r3 + 16], m2
+
+    lea             r0, [r0 + r1 * 4]
+
+    palignr         m2, m1, m0, 8
+    movu            [r0], m2
+    palignr         m2, m3, m1, 8
+    movu            [r0 + 16], m2
+    palignr         m2, m1, m0, 9
+    movu            [r0 + r1], m2
+    palignr         m2, m3, m1, 9
+    movu            [r0 + r1 + 16], m2
+    palignr         m2, m1, m0, 10
+    movu            [r0 + r1 * 2], m2
+    palignr         m2, m3, m1, 10
+    movu            [r0 + r1 * 2 + 16], m2
+    palignr         m2, m1, m0, 11
+    movu            [r0 + r3], m2
+    palignr         m2, m3, m1, 11
+    movu            [r0 + r3 + 16], m2
+
+    lea             r0, [r0 + r1 * 4]
+
+    palignr         m2, m1, m0, 12
+    movu            [r0], m2
+    palignr         m2, m3, m1, 12
+    movu            [r0 + 16], m2
+    palignr         m2, m1, m0, 13
+    movu            [r0 + r1], m2
+    palignr         m2, m3, m1, 13
+    movu            [r0 + r1 + 16], m2
+    palignr         m2, m1, m0, 14
+    movu            [r0 + r1 * 2], m2
+    palignr         m2, m3, m1, 14
+    movu            [r0 + r1 * 2 + 16], m2
+    palignr         m2, m1, m0, 15
+    movu            [r0 + r3], m2
+    palignr         m2, m3, m1, 15
+    movu            [r0 + r3 + 16], m2
+
+    lea             r0, [r0 + r1 * 4]
+
+    movu            [r0], m1
+    movu            m0, [r2 + 50]
+    movu            [r0 + 16], m3
+    palignr         m2, m3, m1, 1
+    movu            [r0 + r1], m2
+    palignr         m2, m0, m3, 1
+    movu            [r0 + r1 + 16], m2
+    palignr         m2, m3, m1, 2
+    movu            [r0 + r1 * 2], m2
+    palignr         m2, m0, m3, 2
+    movu            [r0 + r1 * 2 + 16], m2
+    palignr         m2, m3, m1, 3
+    movu            [r0 + r3], m2
+    palignr         m2, m0, m3, 3
+    movu            [r0 + r3 + 16], m2
+
+    lea             r0, [r0 + r1 * 4]
+
+    palignr         m2, m3, m1, 4
+    movu            [r0], m2
+    palignr         m2, m0, m3, 4
+    movu            [r0 + 16], m2
+    palignr         m2, m3, m1, 5
+    movu            [r0 + r1], m2
+    palignr         m2, m0, m3, 5
+    movu            [r0 + r1 + 16], m2
+    palignr         m2, m3, m1, 6
+    movu            [r0 + r1 * 2], m2
+    palignr         m2, m0, m3, 6
+    movu            [r0 + r1 * 2 + 16], m2
+    palignr         m2, m3, m1, 7
+    movu            [r0 + r3], m2
+    palignr         m2, m0, m3, 7
+    movu            [r0 + r3 + 16], m2
+
+    lea             r0, [r0 + r1 * 4]
+
+    palignr         m2, m3, m1, 8
+    movu            [r0], m2
+    palignr         m2, m0, m3, 8
+    movu            [r0 + 16], m2
+    palignr         m2, m3, m1, 9
+    movu            [r0 + r1], m2
+    palignr         m2, m0, m3, 9
+    movu            [r0 + r1 + 16], m2
+    palignr         m2, m3, m1, 10
+    movu            [r0 + r1 * 2], m2
+    palignr         m2, m0, m3, 10
+    movu            [r0 + r1 * 2 + 16], m2
+    palignr         m2, m3, m1, 11
+    movu            [r0 + r3], m2
+    palignr         m2, m0, m3, 11
+    movu            [r0 + r3 + 16], m2
+
+    lea             r0, [r0 + r1 * 4]
+
+    palignr         m2, m3, m1, 12
+    movu            [r0], m2
+    palignr         m2, m0, m3, 12
+    movu            [r0 + 16], m2
+    palignr         m2, m3, m1, 13
+    movu            [r0 + r1], m2
+    palignr         m2, m0, m3, 13
+    movu            [r0 + r1 + 16], m2
+    palignr         m2, m3, m1, 14
+    movu            [r0 + r1 * 2], m2
+    palignr         m2, m0, m3, 14
+    movu            [r0 + r1 * 2 + 16], m2
+    palignr         m2, m3, m1, 15
+    movu            [r0 + r3], m2
+    palignr         m2, m0, m3, 15
+    movu            [r0 + r3 + 16], m2
+    RET
+
 ;-----------------------------------------------------------------------------
 ; void all_angs_pred_4x4(pixel *dest, pixel *above0, pixel *left0, pixel *above1, pixel *left1, bool bLuma)
 ;-----------------------------------------------------------------------------
diff -r 37b4ca796088 -r 1d7ea03e1a38 source/encoder/compress.cpp
--- a/source/encoder/compress.cpp	Wed Jan 15 17:51:11 2014 +0800
+++ b/source/encoder/compress.cpp	Wed Jan 15 19:18:53 2014 +0530
@@ -230,15 +230,11 @@ void TEncCu::xComputeCostMerge2Nx2N(TCom
     outTempCU->setCUTransquantBypassSubParts(m_cfg->getCUTransquantBypassFlagValue(), 0, depth);
     outTempCU->getInterMergeCandidates(0, 0, mvFieldNeighbours, interDirNeighbours, numValidMergeCand);
     outTempCU->setPredModeSubParts(MODE_INTER, 0, depth);
-    outTempCU->setCUTransquantBypassSubParts(m_cfg->getCUTransquantBypassFlagValue(), 0, depth);
-    outTempCU->setPartSizeSubParts(SIZE_2Nx2N, 0, depth);


More information about the x265-commits mailing list