[x265-commits] [x265] slicetype: move lastNonB set outside if expression for bo...

Steve Borho steve at borho.org
Wed Nov 6 10:04:06 CET 2013


details:   http://hg.videolan.org/x265/rev/412d2f3a2bd2
branches:  
changeset: 4873:412d2f3a2bd2
user:      Steve Borho <steve at borho.org>
date:      Wed Nov 06 02:12:44 2013 -0600
description:
slicetype: move lastNonB set outside if expression for both I and P clauses
Subject: [x265] motion: simplify lowres subpel refine

details:   http://hg.videolan.org/x265/rev/72520485725e
branches:  
changeset: 4874:72520485725e
user:      Steve Borho <steve at borho.org>
date:      Tue Nov 05 22:40:08 2013 -0600
description:
motion: simplify lowres subpel refine
Subject: [x265] motion: simplify subpel refine, drop height+1 interpolation

details:   http://hg.videolan.org/x265/rev/a1d576fbd0b0
branches:  
changeset: 4875:a1d576fbd0b0
user:      Steve Borho <steve at borho.org>
date:      Wed Nov 06 02:38:37 2013 -0600
description:
motion: simplify subpel refine, drop height+1 interpolation

This is in preparation of enabling assembly versions of interpolation functions
Subject: [x265] tcomdatacu: remove unused set functions

details:   http://hg.videolan.org/x265/rev/9368bfd107b8
branches:  
changeset: 4876:9368bfd107b8
user:      Gopu Govindaswamy <gopu at multicorewareinc.com>
date:      Wed Nov 06 11:36:36 2013 +0530
description:
tcomdatacu: remove unused set functions
Subject: [x265] tcomdatacu: remove memset in initEstData()

details:   http://hg.videolan.org/x265/rev/d044314537ad
branches:  
changeset: 4877:d044314537ad
user:      Gopu Govindaswamy <gopu at multicorewareinc.com>
date:      Wed Nov 06 11:51:45 2013 +0530
description:
tcomdatacu: remove memset in initEstData()

m_trCoeffY, m_trCoeffCb, m_trCoeffCr, m_iPCMSampleY, m_iPCMSampleCb and m_iPCMSampleCr
buffers are getting initialized in initCU(), and its not required to set 0 in initEstData()
Subject: [x265] tcomdatacu: remove memset in initSubCU()

details:   http://hg.videolan.org/x265/rev/1b913b8f7f19
branches:  
changeset: 4878:1b913b8f7f19
user:      Gopu Govindaswamy <gopu at multicorewareinc.com>
date:      Wed Nov 06 11:59:44 2013 +0530
description:
tcomdatacu: remove memset in initSubCU()

m_trCoeffY, m_trCoeffCb, m_trCoeffCr, m_iPCMSampleY, m_iPCMSampleCb and m_iPCMSampleCr
buffers are getting initialized in initCU(), and its not required to set 0 in initSubCU()
Subject: [x265] Adding function pointer array and C primitive for luma hps filter functions.

details:   http://hg.videolan.org/x265/rev/e31319dfb866
branches:  
changeset: 4879:e31319dfb866
user:      Nabajit Deka
date:      Wed Nov 06 12:25:45 2013 +0530
description:
Adding function pointer array and C primitive for luma hps filter functions.
Subject: [x265] Adding test bench code for luma hps filter functions.

details:   http://hg.videolan.org/x265/rev/cb323bec7d06
branches:  
changeset: 4880:cb323bec7d06
user:      Nabajit Deka
date:      Wed Nov 06 12:28:46 2013 +0530
description:
Adding test bench code for luma hps filter functions.
Subject: [x265] asm code for blockcopy_sp, 8xN blocks

details:   http://hg.videolan.org/x265/rev/73b4015984fd
branches:  
changeset: 4881:73b4015984fd
user:      Praveen Tiwari
date:      Wed Nov 06 12:04:35 2013 +0530
description:
asm code for blockcopy_sp, 8xN blocks
Subject: [x265] asm code for blockcopy_sp, 4xN blocks

details:   http://hg.videolan.org/x265/rev/bab35592e71c
branches:  
changeset: 4882:bab35592e71c
user:      Praveen Tiwari
date:      Wed Nov 06 13:06:15 2013 +0530
description:
asm code for blockcopy_sp, 4xN blocks

diffstat:

 source/Lib/TLibCommon/TComDataCU.cpp |   28 --
 source/Lib/TLibCommon/TComDataCU.h   |   28 --
 source/common/ipfilter.cpp           |   38 +++
 source/common/primitives.h           |    1 +
 source/common/x86/asm-primitives.cpp |   15 +
 source/common/x86/blockcopy8.asm     |  350 +++++++++++++++++++++++++++++++++++
 source/encoder/motion.cpp            |  277 ++++++++++++---------------
 source/encoder/motion.h              |    2 -
 source/encoder/slicetype.cpp         |    2 +-
 source/test/ipfilterharness.cpp      |   17 +
 10 files changed, 544 insertions(+), 214 deletions(-)

diffs (truncated from 1025 to 300 lines):

diff -r bc99537483f1 -r bab35592e71c source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp	Tue Nov 05 22:21:55 2013 -0600
+++ b/source/Lib/TLibCommon/TComDataCU.cpp	Wed Nov 06 13:06:15 2013 +0530
@@ -438,16 +438,6 @@ void TComDataCU::initEstData(uint32_t de
 
     m_cuMvField[0].clearMvField();
     m_cuMvField[1].clearMvField();
-
-    uint32_t tmp = width * height;
-    memset(m_trCoeffY,    0, tmp * sizeof(*m_trCoeffY));
-    memset(m_iPCMSampleY, 0, tmp * sizeof(*m_iPCMSampleY));
-
-    tmp = (width >> m_hChromaShift) * (height >> m_vChromaShift);
-    memset(m_trCoeffCb,    0, tmp * sizeof(*m_trCoeffCb));
-    memset(m_trCoeffCr,    0, tmp * sizeof(*m_trCoeffCr));
-    memset(m_iPCMSampleCb, 0, tmp * sizeof(*m_iPCMSampleCb));
-    memset(m_iPCMSampleCr, 0, tmp * sizeof(*m_iPCMSampleCr));
 }
 
 // initialize Sub partition
@@ -513,16 +503,6 @@ void TComDataCU::initSubCU(TComDataCU* c
         m_mvpNum[1][i] = -1;
     }
 
-    uint32_t tmp = width * heigth;
-    memset(m_trCoeffY, 0, sizeof(TCoeff) * tmp);
-    memset(m_iPCMSampleY, 0, sizeof(Pel) * tmp);
-
-    tmp = (width >> m_hChromaShift) * (heigth >> m_vChromaShift);
-    memset(m_trCoeffCb, 0, sizeof(TCoeff) * tmp);
-    memset(m_trCoeffCr, 0, sizeof(TCoeff) * tmp);
-    memset(m_iPCMSampleCb, 0, sizeof(Pel) * tmp);
-    memset(m_iPCMSampleCr, 0, sizeof(Pel) * tmp);
-
     m_cuMvField[0].clearMvField();
     m_cuMvField[1].clearMvField();
 
@@ -1597,14 +1577,6 @@ void TComDataCU::setTransformSkipSubPart
     memset(m_transformSkip[g_convertTxtTypeToIdx[ttype]] + absPartIdx, useTransformSkip, sizeof(UChar) * curPartNum);
 }
 
-void TComDataCU::setSizeSubParts(uint32_t width, uint32_t height, uint32_t absPartIdx, uint32_t depth)
-{
-    uint32_t curPartNum = m_pic->getNumPartInCU() >> (depth << 1);
-
-    memset(m_width  + absPartIdx, width,  sizeof(UChar) * curPartNum);
-    memset(m_height + absPartIdx, height, sizeof(UChar) * curPartNum);
-}
-
 UChar TComDataCU::getNumPartInter()
 {
     UChar numPart = 0;
diff -r bc99537483f1 -r bab35592e71c source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.h	Tue Nov 05 22:21:55 2013 -0600
+++ b/source/Lib/TLibCommon/TComDataCU.h	Wed Nov 06 13:06:15 2013 +0530
@@ -224,8 +224,6 @@ public:
 
     UChar         getDepth(uint32_t idx)           { return m_depth[idx]; }
 
-    void          setDepth(uint32_t idx, UChar h)  { m_depth[idx] = h; }
-
     void          setDepthSubParts(uint32_t depth, uint32_t absPartIdx);
 
     // -------------------------------------------------------------------------------------------------------------------
@@ -234,12 +232,8 @@ public:
 
     char*         getPartitionSize()                      { return m_partSizes; }
 
-    int           getUnitSize()                           { return m_unitSize; }
-
     PartSize      getPartitionSize(uint32_t idx)              { return static_cast<PartSize>(m_partSizes[idx]); }
 
-    void          setPartitionSize(uint32_t idx, PartSize uh) { m_partSizes[idx] = (char)uh; }
-
     void          setPartSizeSubParts(PartSize eMode, uint32_t absPartIdx, uint32_t depth);
     void          setCUTransquantBypassSubParts(bool flag, uint32_t absPartIdx, uint32_t depth);
 
@@ -247,8 +241,6 @@ public:
 
     bool         getSkipFlag(uint32_t idx)            { return m_skipFlag[idx]; }
 
-    void         setSkipFlag(uint32_t idx, bool skip) { m_skipFlag[idx] = skip; }
-
     void         setSkipFlagSubParts(bool skip, uint32_t absPartIdx, uint32_t depth);
 
     char*         getPredictionMode()                 { return m_predModes; }
@@ -259,24 +251,16 @@ public:
 
     bool          getCUTransquantBypass(uint32_t idx)     { return m_cuTransquantBypass[idx]; }
 
-    void          setPredictionMode(uint32_t idx, PredMode uh) { m_predModes[idx] = (char)uh; }
-
     void          setPredModeSubParts(PredMode eMode, uint32_t absPartIdx, uint32_t depth);
 
     UChar*        getWidth()                     { return m_width; }
 
     UChar         getWidth(uint32_t idx)             { return m_width[idx]; }
 
-    void          setWidth(uint32_t idx, UChar  uh)  { m_width[idx] = uh; }
-
     UChar*        getHeight()                    { return m_height; }
 
     UChar         getHeight(uint32_t idx)            { return m_height[idx]; }
 
-    void          setHeight(uint32_t idx, UChar  uh) { m_height[idx] = uh; }
-
-    void          setSizeSubParts(uint32_t width, uint32_t height, uint32_t absPartIdx, uint32_t depth);
-
     char*         getQP()                        { return m_qp; }
 
     char          getQP(uint32_t idx)                { return m_qp[idx]; }
@@ -342,16 +326,12 @@ public:
 
     bool          getMergeFlag(uint32_t idx)            { return m_bMergeFlags[idx]; }
 
-    void          setMergeFlag(uint32_t idx, bool b)    { m_bMergeFlags[idx] = b; }
-
     void          setMergeFlagSubParts(bool bMergeFlag, uint32_t absPartIdx, uint32_t partIdx, uint32_t depth);
 
     UChar*        getMergeIndex()                   { return m_mergeIndex; }
 
     UChar         getMergeIndex(uint32_t idx)           { return m_mergeIndex[idx]; }
 
-    void          setMergeIndex(uint32_t idx, uint32_t mergeIndex) { m_mergeIndex[idx] = (UChar)mergeIndex; }
-
     void          setMergeIndexSubParts(uint32_t mergeIndex, uint32_t absPartIdx, uint32_t partIdx, uint32_t depth);
     template<typename T>
     void          setSubPart(T bParameter, T* pbBaseLCU, uint32_t cuAddr, uint32_t cuDepth, uint32_t puIdx);
@@ -364,24 +344,18 @@ public:
 
     UChar         getLumaIntraDir(uint32_t idx) { return m_lumaIntraDir[idx]; }
 
-    void          setLumaIntraDir(uint32_t idx, UChar uh) { m_lumaIntraDir[idx] = uh; }
-
     void          setLumaIntraDirSubParts(uint32_t dir, uint32_t absPartIdx, uint32_t depth);
 
     UChar*        getChromaIntraDir()                 { return m_chromaIntraDir; }
 
     UChar         getChromaIntraDir(uint32_t idx)         { return m_chromaIntraDir[idx]; }
 
-    void          setChromaIntraDir(uint32_t idx, UChar  uh) { m_chromaIntraDir[idx] = uh; }
-
     void          setChromIntraDirSubParts(uint32_t dir, uint32_t absPartIdx, uint32_t depth);
 
     UChar*        getInterDir()                    { return m_interDir; }
 
     UChar         getInterDir(uint32_t idx)            { return m_interDir[idx]; }
 
-    void          setInterDir(uint32_t idx, UChar  uh) { m_interDir[idx] = uh; }
-
     void          setInterDirSubParts(uint32_t dir,  uint32_t absPartIdx, uint32_t partIdx, uint32_t depth);
     bool*         getIPCMFlag()                     { return m_iPCMFlags; }
 
@@ -414,8 +388,6 @@ public:
 
     char*         getMVPIdx(int picList)                       { return m_mvpIdx[picList]; }
 
-    void          setMVPNum(int picList, uint32_t idx, int mvpNum) { m_mvpNum[picList][idx] = (char)mvpNum; }
-
     int           getMVPNum(int picList, uint32_t idx)             { return m_mvpNum[picList][idx]; }
 
     char*         getMVPNum(int picList)                       { return m_mvpNum[picList]; }
diff -r bc99537483f1 -r bab35592e71c source/common/ipfilter.cpp
--- a/source/common/ipfilter.cpp	Tue Nov 05 22:21:55 2013 -0600
+++ b/source/common/ipfilter.cpp	Wed Nov 06 13:06:15 2013 +0530
@@ -391,6 +391,43 @@ void interp_horiz_pp_c(pixel *src, intpt
 }
 
 template<int N, int width, int height>
+void interp_horiz_ps_c(pixel *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int coeffIdx)
+{
+    int16_t const * coeff = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
+    int headRoom = IF_INTERNAL_PREC - X265_DEPTH;
+    int shift = IF_FILTER_PREC - headRoom;
+    int offset = -IF_INTERNAL_OFFS << shift;
+    src -= N / 2 - 1;
+
+    int row, col;
+    for (row = 0; row < height; row++)
+    {
+        for (col = 0; col < width; col++)
+        {
+            int sum;
+
+            sum  = src[col + 0] * coeff[0];
+            sum += src[col + 1] * coeff[1];
+            sum += src[col + 2] * coeff[2];
+            sum += src[col + 3] * coeff[3];
+            if (N == 8)
+            {
+                sum += src[col + 4] * coeff[4];
+                sum += src[col + 5] * coeff[5];
+                sum += src[col + 6] * coeff[6];
+                sum += src[col + 7] * coeff[7];
+            }
+
+            int16_t val = (int16_t)(sum + offset) >> shift;
+            dst[col] = val;
+        }
+
+        src += srcStride;
+        dst += dstStride;
+    }
+}
+
+template<int N, int width, int height>
 void interp_vert_pp_c(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
 {
     int16_t const * c = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
@@ -491,6 +528,7 @@ namespace x265 {
 
 #define LUMA(W, H) \
     p.luma_hpp[LUMA_ ## W ## x ## H]     = interp_horiz_pp_c<8, W, H>; \
+    p.luma_hps[LUMA_ ## W ## x ## H]     = interp_horiz_ps_c<8, W, H>;\
     p.luma_vpp[LUMA_ ## W ## x ## H]     = interp_vert_pp_c<8, W, H>; \
     p.luma_vps[LUMA_ ## W ## x ## H]     = interp_vert_ps_c<8, W, H>; \
     p.luma_hvpp[LUMA_ ## W ## x ## H]    = interp_hv_pp_c<8, W, H>;
diff -r bc99537483f1 -r bab35592e71c source/common/primitives.h
--- a/source/common/primitives.h	Tue Nov 05 22:21:55 2013 -0600
+++ b/source/common/primitives.h	Wed Nov 06 13:06:15 2013 +0530
@@ -254,6 +254,7 @@ struct EncoderPrimitives
     extendCURowBorder_t extendRowBorder;
     filter_pp_t     chroma_hpp[NUM_CHROMA_PARTITIONS];
     filter_pp_t     luma_hpp[NUM_LUMA_PARTITIONS];
+    filter_ps_t     luma_hps[NUM_LUMA_PARTITIONS];
     filter_pp_t     chroma_vpp[NUM_CHROMA_PARTITIONS];
     filter_pp_t     luma_vpp[NUM_LUMA_PARTITIONS];
     filter_ps_t     luma_vps[NUM_LUMA_PARTITIONS];
diff -r bc99537483f1 -r bab35592e71c source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue Nov 05 22:21:55 2013 -0600
+++ b/source/common/x86/asm-primitives.cpp	Wed Nov 06 13:06:15 2013 +0530
@@ -326,6 +326,21 @@ void Setup_Assembly_Primitives(EncoderPr
 
         CHROMA_BLOCKCOPY(_sse2);
         LUMA_BLOCKCOPY(_sse2);
+
+        // This function pointer initialization is temporary will be removed
+        // later with macro definitions.  It is used to avoid linker errors
+        // until all partitions are coded and commit smaller patches, easier to
+        // review.
+
+        p.chroma_copy_sp[CHROMA_4x2] = x265_blockcopy_sp_4x2_sse2;
+        p.chroma_copy_sp[CHROMA_4x4] = x265_blockcopy_sp_4x4_sse2;
+        p.chroma_copy_sp[CHROMA_4x8] = x265_blockcopy_sp_4x8_sse2;
+        p.chroma_copy_sp[CHROMA_4x16] = x265_blockcopy_sp_4x16_sse2;
+        p.chroma_copy_sp[CHROMA_8x2] = x265_blockcopy_sp_8x2_sse2;
+        p.chroma_copy_sp[CHROMA_8x4] = x265_blockcopy_sp_8x4_sse2;
+        p.chroma_copy_sp[CHROMA_8x6] = x265_blockcopy_sp_8x6_sse2;
+        p.chroma_copy_sp[CHROMA_8x8] = x265_blockcopy_sp_8x8_sse2;
+        p.chroma_copy_sp[CHROMA_8x16] = x265_blockcopy_sp_8x16_sse2;
 #if X86_64
         p.satd[LUMA_8x32] = x265_pixel_satd_8x32_sse2;
         p.satd[LUMA_16x4] = x265_pixel_satd_16x4_sse2;
diff -r bc99537483f1 -r bab35592e71c source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm	Tue Nov 05 22:21:55 2013 -0600
+++ b/source/common/x86/blockcopy8.asm	Wed Nov 06 13:06:15 2013 +0530
@@ -27,6 +27,8 @@
 
 SECTION_RODATA 32
 
+tab_Vm:    db 0, 2, 4, 6, 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0
+
 SECTION .text
 
 ;-----------------------------------------------------------------------------
@@ -796,3 +798,351 @@ BLOCKCOPY_PP_W64_H2 64, 16
 BLOCKCOPY_PP_W64_H2 64, 32
 BLOCKCOPY_PP_W64_H2 64, 48
 BLOCKCOPY_PP_W64_H2 64, 64
+
+
+;-----------------------------------------------------------------------------
+; void blockcopy_sp_4x2(pixel *dest, intptr_t destStride, int16_t *src, intptr_t srcStride)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal blockcopy_sp_4x2, 4, 4, 3, dest, destStride, src, srcStride
+
+add        r3,        r3
+
+mova       m0,        [tab_Vm]
+
+movh       m1,        [r2]
+movh       m2,        [r2 + r3]
+
+pshufb     m1,        m0
+pshufb     m2,        m0
+
+movd       [r0],      m1
+movd       [r0 + r1], m2
+
+RET
+
+;-----------------------------------------------------------------------------
+; void blockcopy_sp_4x4(pixel *dest, intptr_t destStride, int16_t *src, intptr_t srcStride)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal blockcopy_sp_4x4, 4, 5, 5, dest, destStride, src, srcStride
+
+add        r3,     r3
+


More information about the x265-commits mailing list