[x265-commits] [x265] fix for hash mismatch in new weightp

Shazeb Nawaz Khan shazeb at multicorewareinc.com
Mon Jan 13 18:21:18 CET 2014


details:   http://hg.videolan.org/x265/rev/cd6c34bb4172
branches:  stable
changeset: 5817:cd6c34bb4172
user:      Shazeb Nawaz Khan <shazeb at multicorewareinc.com>
date:      Thu Jan 09 17:48:13 2014 +0530
description:
fix for hash mismatch in new weightp
Subject: [x265] HM: Fix for #501: Decoding part_mode with inter_4x4 can use CNU context

details:   http://hg.videolan.org/x265/rev/de98453fa608
branches:  stable
changeset: 5818:de98453fa608
user:      Min Chen <chenm003 at 163.com>
date:      Mon Jan 13 13:25:46 2014 +0800
description:
HM: Fix for #501: Decoding part_mode with inter_4x4 can use CNU context
Subject: [x265] HM: Fix for #576: Context table for CBF

details:   http://hg.videolan.org/x265/rev/c9cefa67691c
branches:  stable
changeset: 5819:c9cefa67691c
user:      Min Chen <chenm003 at 163.com>
date:      Mon Jan 13 13:26:11 2014 +0800
description:
HM: Fix for #576: Context table for CBF
Subject: [x265] Merge with stable

details:   http://hg.videolan.org/x265/rev/5a607dd446ea
branches:  
changeset: 5820:5a607dd446ea
user:      Steve Borho <steve at borho.org>
date:      Mon Jan 13 11:01:16 2014 -0600
description:
Merge with stable

diffstat:

 source/Lib/TLibCommon/ContextTables.h |  16 ++--
 source/Lib/TLibCommon/TComSlice.h     |   6 +-
 source/common/x86/asm-primitives.cpp  |  10 ++-
 source/common/x86/intrapred.h         |  38 +++++++++++++-
 source/common/x86/intrapred8.asm      |  76 ++++++++++++++++++++++++++++
 source/common/x86/ipfilter8.asm       |  94 +++++++++++++++++++++++++++-------
 source/encoder/weightPrediction.cpp   |  78 +++++++++++++++++-----------
 source/encoder/weightPrediction.h     |   9 +-
 8 files changed, 256 insertions(+), 71 deletions(-)

diffs (truncated from 547 to 300 lines):

diff -r a03cc8c4d739 -r 5a607dd446ea source/Lib/TLibCommon/ContextTables.h
--- a/source/Lib/TLibCommon/ContextTables.h	Sat Dec 28 10:22:04 2013 +0800
+++ b/source/Lib/TLibCommon/ContextTables.h	Mon Jan 13 11:01:16 2014 -0600
@@ -56,7 +56,7 @@
 #define NUM_MERGE_FLAG_EXT_CTX        1       ///< number of context models for merge flag of merge extended
 #define NUM_MERGE_IDX_EXT_CTX         1       ///< number of context models for merge index of merge extended
 
-#define NUM_PART_SIZE_CTX             4       ///< number of context models for partition size
+#define NUM_PART_SIZE_CTX             3       ///< number of context models for partition size
 #define NUM_CU_AMP_CTX                1       ///< number of context models for partition size (AMP)
 #define NUM_PRED_MODE_CTX             1       ///< number of context models for prediction mode
 
@@ -68,7 +68,7 @@
 
 #define NUM_REF_NO_CTX                2       ///< number of context models for reference index
 #define NUM_TRANS_SUBDIV_FLAG_CTX     3       ///< number of context models for transform subdivision flags
-#define NUM_QT_CBF_CTX                5       ///< number of context models for QT CBF
+#define NUM_QT_CBF_CTX                4       ///< number of context models for QT CBF
 #define NUM_QT_ROOT_CBF_CTX           1       ///< number of context models for QT ROOT CBF
 #define NUM_DELTA_QP_CTX              3       ///< number of context models for dQP
 
@@ -203,9 +203,9 @@ static const uint8_t
 static const uint8_t
     INIT_PART_SIZE[3][NUM_PART_SIZE_CTX] =
 {
-    { 154,  139,  CNU,  CNU, },
-    { 154,  139,  CNU,  CNU, },
-    { 184,  CNU,  CNU,  CNU, },
+    { 154,  139,  CNU, },
+    { 154,  139,  CNU, },
+    { 184,  CNU,  CNU, },
 };
 
 static const uint8_t
@@ -275,9 +275,9 @@ static const uint8_t
 static const uint8_t
     INIT_QT_CBF[3][2 * NUM_QT_CBF_CTX] =
 {
-    { 153,  111,  CNU,  CNU,  CNU,  149,   92,  167,  CNU,  CNU, },
-    { 153,  111,  CNU,  CNU,  CNU,  149,  107,  167,  CNU,  CNU, },
-    { 111,  141,  CNU,  CNU,  CNU,   94,  138,  182,  CNU,  CNU, },
+    { 153,  111,  CNU,  CNU,  149,   92,  167,  CNU, },
+    { 153,  111,  CNU,  CNU,  149,  107,  167,  CNU, },
+    { 111,  141,  CNU,  CNU,   94,  138,  182,  CNU, },
 };
 
 static const uint8_t
diff -r a03cc8c4d739 -r 5a607dd446ea source/Lib/TLibCommon/TComSlice.h
--- a/source/Lib/TLibCommon/TComSlice.h	Sat Dec 28 10:22:04 2013 +0800
+++ b/source/Lib/TLibCommon/TComSlice.h	Mon Jan 13 11:01:16 2014 -0600
@@ -1260,10 +1260,10 @@ struct WpScalingParam
     int         w, o, offset, shift, round;
 
     /* makes a non-h265 weight (i.e. fix7), into an h265 weight */
-    void setFromWeightAndOffset(int weight, int _offset)
+    void setFromWeightAndOffset(int weight, int _offset, int denom = 7)
     {
         inputOffset = _offset;
-        log2WeightDenom = 7;
+        log2WeightDenom = denom;
         inputWeight = weight;
         while (log2WeightDenom > 0 && (inputWeight > 127))
         {
@@ -1557,7 +1557,7 @@ public:
     void  getWpAcDcParam(wpACDCParam *&wp);
     void  initWpAcDcParam();
 
-    void setTileOffstForMultES(uint32_t offset){ m_tileOffstForMultES = offset; }
+    void setTileOffstForMultES(uint32_t offset) { m_tileOffstForMultES = offset; }
 
     uint32_t getTileOffstForMultES()           { return m_tileOffstForMultES; }
 
diff -r a03cc8c4d739 -r 5a607dd446ea source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Sat Dec 28 10:22:04 2013 +0800
+++ b/source/common/x86/asm-primitives.cpp	Mon Jan 13 11:01:16 2014 -0600
@@ -549,10 +549,13 @@ extern "C" {
 
 #define SETUP_INTRA_ANG4(mode, fno, cpu) \
     p.intra_pred[BLOCK_4x4][mode] = x265_intra_pred_ang4_ ## fno ## _ ## cpu;
+#define SETUP_INTRA_ANG8(mode, fno, cpu) \
+    p.intra_pred[BLOCK_8x8][mode] = x265_intra_pred_ang8_ ## fno ## _ ## cpu;
+#define SETUP_INTRA_ANG16(mode, fno, cpu) \
+    p.intra_pred[BLOCK_16x16][mode] = x265_intra_pred_ang16_ ## fno ## _ ## cpu;
 
 namespace x265 {
 // private x265 namespace
-
 void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask)
 {
 #if HIGH_BIT_DEPTH
@@ -889,10 +892,13 @@ void Setup_Assembly_Primitives(EncoderPr
 
         SETUP_INTRA_ANG4(2, 2, ssse3);
         SETUP_INTRA_ANG4(34, 2, ssse3);
+        SETUP_INTRA_ANG8(2, 2, ssse3);
+        SETUP_INTRA_ANG8(34, 2, ssse3);
+        SETUP_INTRA_ANG16(2, 2, ssse3);
+        SETUP_INTRA_ANG16(34, 2, ssse3);
 
         p.scale1D_128to64 = x265_scale1D_128to64_ssse3;
         p.scale2D_64to32 = x265_scale2D_64to32_ssse3;
-
         SAD_X3(ssse3);
         SAD_X4(ssse3);
         p.sad_x4[LUMA_8x4] = x265_pixel_sad_x4_8x4_ssse3;
diff -r a03cc8c4d739 -r 5a607dd446ea source/common/x86/intrapred.h
--- a/source/common/x86/intrapred.h	Sat Dec 28 10:22:04 2013 +0800
+++ b/source/common/x86/intrapred.h	Mon Jan 13 11:01:16 2014 -0600
@@ -57,9 +57,45 @@ DECL_ANG(4, 16, sse4);
 DECL_ANG(4, 17, sse4);
 DECL_ANG(4, 18, sse4);
 DECL_ANG(4, 26, sse4);
+DECL_ANG(8, 2, ssse3);
+DECL_ANG(8, 3, sse4);
+DECL_ANG(8, 4, sse4);
+DECL_ANG(8, 5, sse4);
+DECL_ANG(8, 6, sse4);
+DECL_ANG(8, 7, sse4);
+DECL_ANG(8, 8, sse4);
+DECL_ANG(8, 9, sse4);
+DECL_ANG(8, 10, sse4);
+DECL_ANG(8, 11, sse4);
+DECL_ANG(8, 12, sse4);
+DECL_ANG(8, 13, sse4);
+DECL_ANG(8, 14, sse4);
+DECL_ANG(8, 15, sse4);
+DECL_ANG(8, 16, sse4);
+DECL_ANG(8, 17, sse4);
+DECL_ANG(8, 18, sse4);
+DECL_ANG(8, 26, sse4);
+
+DECL_ANG(16, 2, ssse3);
+DECL_ANG(16, 3, sse4);
+DECL_ANG(16, 4, sse4);
+DECL_ANG(16, 5, sse4);
+DECL_ANG(16, 6, sse4);
+DECL_ANG(16, 7, sse4);
+DECL_ANG(16, 8, sse4);
+DECL_ANG(16, 9, sse4);
+DECL_ANG(16, 10, sse4);
+DECL_ANG(16, 11, sse4);
+DECL_ANG(16, 12, sse4);
+DECL_ANG(16, 13, sse4);
+DECL_ANG(16, 14, sse4);
+DECL_ANG(16, 15, sse4);
+DECL_ANG(16, 16, sse4);
+DECL_ANG(16, 17, sse4);
+DECL_ANG(16, 18, sse4);
+DECL_ANG(16, 26, sse4);
 
 #undef DECL_ANG
-
 void x265_all_angs_pred_4x4_sse4(pixel *dest, pixel *above0, pixel *left0, pixel *above1, pixel *left1, bool bLuma);
 void x265_all_angs_pred_8x8_sse4(pixel *dest, pixel *above0, pixel *left0, pixel *above1, pixel *left1, bool bLuma);
 
diff -r a03cc8c4d739 -r 5a607dd446ea source/common/x86/intrapred8.asm
--- a/source/common/x86/intrapred8.asm	Sat Dec 28 10:22:04 2013 +0800
+++ b/source/common/x86/intrapred8.asm	Mon Jan 13 11:01:16 2014 -0600
@@ -1105,6 +1105,82 @@ cglobal intra_pred_ang4_18, 4,4,1
     psrldq      m0, 1
     movd        [r0], m0
     RET
+;-----------------------------------------------------------------------------
+; void intraPredAng(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
+;-----------------------------------------------------------------------------
+INIT_XMM ssse3
+cglobal intra_pred_ang8_2, 3,5,2
+    cmp         r4m,            byte 34
+    cmove       r2,             r3mp
+    movu        m0,             [r2 + 2]
+    lea         r4,             [r1 * 3]
+
+    movh        [r0],           m0
+    palignr     m1,             m0, 1
+    movh        [r0 + r1],      m1
+    palignr     m1,             m0, 2
+    movh        [r0 + r1 * 2],  m1
+    palignr     m1,             m0, 3
+    movh        [r0 + r4],      m1
+    palignr     m1,             m0, 4
+    lea         r0,             [r0 + r1 * 4]
+    movh        [r0],           m1
+    palignr     m1,             m0, 5
+    movh        [r0 + r1],      m1
+    palignr     m1,             m0, 6
+    movh        [r0 + r1 * 2],  m1
+    palignr     m1,             m0, 7
+    movh        [r0 + r4],      m1
+    RET
+
+;-----------------------------------------------------------------------------
+; void intraPredAng16(pixel* dst, intptr_t dstStride, pixel *refLeft, pixel *refAbove, int dirMode, int bFilter)
+;-----------------------------------------------------------------------------
+INIT_XMM ssse3
+cglobal intra_pred_ang16_2, 3,3,3
+    cmp             r4m, byte 34
+    cmove           r2, r3mp
+    movu            m0, [r2 + 2]
+    movu            m1, [r2 + 18]
+    movu            [r0], m0
+    palignr         m2, m1, m0, 1
+    movu            [r0 + r1], m2
+    lea             r0, [r0 + r1 * 2]
+    palignr         m2, m1, m0, 2
+    movu            [r0], m2
+    palignr         m2, m1, m0, 3
+    movu            [r0 + r1], m2
+    lea             r0, [r0 + r1 * 2]
+    palignr         m2, m1, m0, 4
+    movu            [r0], m2
+    palignr         m2, m1, m0, 5
+    movu            [r0 + r1], m2
+    lea             r0, [r0 + r1 * 2]
+    palignr         m2, m1, m0, 6
+    movu            [r0], m2
+    palignr         m2, m1, m0, 7
+    movu            [r0 + r1], m2
+    lea             r0, [r0 + r1 * 2]
+    palignr         m2, m1, m0, 8
+    movu            [r0], m2
+    palignr         m2, m1, m0, 9
+    movu            [r0 + r1], m2
+    lea             r0, [r0 + r1 * 2]
+    palignr         m2, m1, m0, 10
+    movu            [r0], m2
+    palignr         m2, m1, m0, 11
+    movu            [r0 + r1], m2
+    lea             r0, [r0 + r1 * 2]
+    palignr         m2, m1, m0, 12
+    movu            [r0], m2
+    palignr         m2, m1, m0, 13
+    movu            [r0 + r1], m2
+    lea             r0, [r0 + r1 * 2]
+    palignr         m2, m1, m0, 14
+    movu            [r0], m2
+    palignr         m2, m1, m0, 15
+    movu            [r0 + r1], m2
+    RET
 
 ;-----------------------------------------------------------------------------
 ; void all_angs_pred_4x4(pixel *dest, pixel *above0, pixel *left0, pixel *above1, pixel *left1, bool bLuma)
diff -r a03cc8c4d739 -r 5a607dd446ea source/common/x86/ipfilter8.asm
--- a/source/common/x86/ipfilter8.asm	Sat Dec 28 10:22:04 2013 +0800
+++ b/source/common/x86/ipfilter8.asm	Mon Jan 13 11:01:16 2014 -0600
@@ -29,6 +29,7 @@
 SECTION_RODATA 32
 tab_Tm:    db 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6
            db 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10
+           db 8, 9,10,11, 9,10,11,12,10,11,12,13,11,12,13, 14
 
 tab_Lm:    db 0, 1, 2, 3, 4,  5,  6,  7,  1, 2, 3, 4,  5,  6,  7,  8
            db 2, 3, 4, 5, 6,  7,  8,  9,  3, 4, 5, 6,  7,  8,  9,  10
@@ -127,6 +128,7 @@ tab_c_64_n64:   times 8 db 64, -64
 
 SECTION .text
 
+cextern pw_512
 cextern pw_2000
 
 %macro FILTER_H4_w2_2 3
@@ -688,30 +690,80 @@ cglobal interp_8tap_horiz_%3_%1x%2, 4,7,
 ; void interp_8tap_horiz_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
 ;--------------------------------------------------------------------------------------------------------------
     IPFILTER_LUMA 4, 4, pp
-    IPFILTER_LUMA 8, 8, pp
-    IPFILTER_LUMA 8, 4, pp
     IPFILTER_LUMA 4, 8, pp
-    IPFILTER_LUMA 16, 16, pp
-    IPFILTER_LUMA 16, 8, pp
-    IPFILTER_LUMA 8, 16, pp
-    IPFILTER_LUMA 16, 12, pp
     IPFILTER_LUMA 12, 16, pp
-    IPFILTER_LUMA 16, 4, pp
     IPFILTER_LUMA 4, 16, pp
-    IPFILTER_LUMA 32, 32, pp
-    IPFILTER_LUMA 32, 16, pp
-    IPFILTER_LUMA 16, 32, pp
-    IPFILTER_LUMA 32, 24, pp
-    IPFILTER_LUMA 24, 32, pp
-    IPFILTER_LUMA 32, 8, pp
-    IPFILTER_LUMA 8, 32, pp
-    IPFILTER_LUMA 64, 64, pp
-    IPFILTER_LUMA 64, 32, pp
-    IPFILTER_LUMA 32, 64, pp
-    IPFILTER_LUMA 64, 48, pp
-    IPFILTER_LUMA 48, 64, pp
-    IPFILTER_LUMA 64, 16, pp
-    IPFILTER_LUMA 16, 64, pp
+
+
+;--------------------------------------------------------------------------------------------------------------
+; void interp_8tap_horiz_pp_%1x%2(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
+;--------------------------------------------------------------------------------------------------------------
+%macro IPFILTER_LUMA_PP_W8 2
+INIT_XMM sse4
+cglobal interp_8tap_horiz_pp_%1x%2, 4,6,7
+    mov         r4d, r4m
+
+%ifdef PIC
+    lea         r5, [tab_LumaCoeff]


More information about the x265-commits mailing list