[x265-commits] [x265] slicetype: move lastNonB set outside if expression for bo...
Steve Borho
steve at borho.org
Wed Nov 6 10:04:06 CET 2013
details: http://hg.videolan.org/x265/rev/412d2f3a2bd2
branches:
changeset: 4873:412d2f3a2bd2
user: Steve Borho <steve at borho.org>
date: Wed Nov 06 02:12:44 2013 -0600
description:
slicetype: move lastNonB set outside if expression for both I and P clauses
Subject: [x265] motion: simplify lowres subpel refine
details: http://hg.videolan.org/x265/rev/72520485725e
branches:
changeset: 4874:72520485725e
user: Steve Borho <steve at borho.org>
date: Tue Nov 05 22:40:08 2013 -0600
description:
motion: simplify lowres subpel refine
Subject: [x265] motion: simplify subpel refine, drop height+1 interpolation
details: http://hg.videolan.org/x265/rev/a1d576fbd0b0
branches:
changeset: 4875:a1d576fbd0b0
user: Steve Borho <steve at borho.org>
date: Wed Nov 06 02:38:37 2013 -0600
description:
motion: simplify subpel refine, drop height+1 interpolation
This is in preparation of enabling assembly versions of interpolation functions
Subject: [x265] tcomdatacu: remove unused set functions
details: http://hg.videolan.org/x265/rev/9368bfd107b8
branches:
changeset: 4876:9368bfd107b8
user: Gopu Govindaswamy <gopu at multicorewareinc.com>
date: Wed Nov 06 11:36:36 2013 +0530
description:
tcomdatacu: remove unused set functions
Subject: [x265] tcomdatacu: remove memset in initEstData()
details: http://hg.videolan.org/x265/rev/d044314537ad
branches:
changeset: 4877:d044314537ad
user: Gopu Govindaswamy <gopu at multicorewareinc.com>
date: Wed Nov 06 11:51:45 2013 +0530
description:
tcomdatacu: remove memset in initEstData()
m_trCoeffY, m_trCoeffCb, m_trCoeffCr, m_iPCMSampleY, m_iPCMSampleCb and m_iPCMSampleCr
buffers are getting initialized in initCU(), and its not required to set 0 in initEstData()
Subject: [x265] tcomdatacu: remove memset in initSubCU()
details: http://hg.videolan.org/x265/rev/1b913b8f7f19
branches:
changeset: 4878:1b913b8f7f19
user: Gopu Govindaswamy <gopu at multicorewareinc.com>
date: Wed Nov 06 11:59:44 2013 +0530
description:
tcomdatacu: remove memset in initSubCU()
m_trCoeffY, m_trCoeffCb, m_trCoeffCr, m_iPCMSampleY, m_iPCMSampleCb and m_iPCMSampleCr
buffers are getting initialized in initCU(), and its not required to set 0 in initSubCU()
Subject: [x265] Adding function pointer array and C primitive for luma hps filter functions.
details: http://hg.videolan.org/x265/rev/e31319dfb866
branches:
changeset: 4879:e31319dfb866
user: Nabajit Deka
date: Wed Nov 06 12:25:45 2013 +0530
description:
Adding function pointer array and C primitive for luma hps filter functions.
Subject: [x265] Adding test bench code for luma hps filter functions.
details: http://hg.videolan.org/x265/rev/cb323bec7d06
branches:
changeset: 4880:cb323bec7d06
user: Nabajit Deka
date: Wed Nov 06 12:28:46 2013 +0530
description:
Adding test bench code for luma hps filter functions.
Subject: [x265] asm code for blockcopy_sp, 8xN blocks
details: http://hg.videolan.org/x265/rev/73b4015984fd
branches:
changeset: 4881:73b4015984fd
user: Praveen Tiwari
date: Wed Nov 06 12:04:35 2013 +0530
description:
asm code for blockcopy_sp, 8xN blocks
Subject: [x265] asm code for blockcopy_sp, 4xN blocks
details: http://hg.videolan.org/x265/rev/bab35592e71c
branches:
changeset: 4882:bab35592e71c
user: Praveen Tiwari
date: Wed Nov 06 13:06:15 2013 +0530
description:
asm code for blockcopy_sp, 4xN blocks
diffstat:
source/Lib/TLibCommon/TComDataCU.cpp | 28 --
source/Lib/TLibCommon/TComDataCU.h | 28 --
source/common/ipfilter.cpp | 38 +++
source/common/primitives.h | 1 +
source/common/x86/asm-primitives.cpp | 15 +
source/common/x86/blockcopy8.asm | 350 +++++++++++++++++++++++++++++++++++
source/encoder/motion.cpp | 277 ++++++++++++---------------
source/encoder/motion.h | 2 -
source/encoder/slicetype.cpp | 2 +-
source/test/ipfilterharness.cpp | 17 +
10 files changed, 544 insertions(+), 214 deletions(-)
diffs (truncated from 1025 to 300 lines):
diff -r bc99537483f1 -r bab35592e71c source/Lib/TLibCommon/TComDataCU.cpp
--- a/source/Lib/TLibCommon/TComDataCU.cpp Tue Nov 05 22:21:55 2013 -0600
+++ b/source/Lib/TLibCommon/TComDataCU.cpp Wed Nov 06 13:06:15 2013 +0530
@@ -438,16 +438,6 @@ void TComDataCU::initEstData(uint32_t de
m_cuMvField[0].clearMvField();
m_cuMvField[1].clearMvField();
-
- uint32_t tmp = width * height;
- memset(m_trCoeffY, 0, tmp * sizeof(*m_trCoeffY));
- memset(m_iPCMSampleY, 0, tmp * sizeof(*m_iPCMSampleY));
-
- tmp = (width >> m_hChromaShift) * (height >> m_vChromaShift);
- memset(m_trCoeffCb, 0, tmp * sizeof(*m_trCoeffCb));
- memset(m_trCoeffCr, 0, tmp * sizeof(*m_trCoeffCr));
- memset(m_iPCMSampleCb, 0, tmp * sizeof(*m_iPCMSampleCb));
- memset(m_iPCMSampleCr, 0, tmp * sizeof(*m_iPCMSampleCr));
}
// initialize Sub partition
@@ -513,16 +503,6 @@ void TComDataCU::initSubCU(TComDataCU* c
m_mvpNum[1][i] = -1;
}
- uint32_t tmp = width * heigth;
- memset(m_trCoeffY, 0, sizeof(TCoeff) * tmp);
- memset(m_iPCMSampleY, 0, sizeof(Pel) * tmp);
-
- tmp = (width >> m_hChromaShift) * (heigth >> m_vChromaShift);
- memset(m_trCoeffCb, 0, sizeof(TCoeff) * tmp);
- memset(m_trCoeffCr, 0, sizeof(TCoeff) * tmp);
- memset(m_iPCMSampleCb, 0, sizeof(Pel) * tmp);
- memset(m_iPCMSampleCr, 0, sizeof(Pel) * tmp);
-
m_cuMvField[0].clearMvField();
m_cuMvField[1].clearMvField();
@@ -1597,14 +1577,6 @@ void TComDataCU::setTransformSkipSubPart
memset(m_transformSkip[g_convertTxtTypeToIdx[ttype]] + absPartIdx, useTransformSkip, sizeof(UChar) * curPartNum);
}
-void TComDataCU::setSizeSubParts(uint32_t width, uint32_t height, uint32_t absPartIdx, uint32_t depth)
-{
- uint32_t curPartNum = m_pic->getNumPartInCU() >> (depth << 1);
-
- memset(m_width + absPartIdx, width, sizeof(UChar) * curPartNum);
- memset(m_height + absPartIdx, height, sizeof(UChar) * curPartNum);
-}
-
UChar TComDataCU::getNumPartInter()
{
UChar numPart = 0;
diff -r bc99537483f1 -r bab35592e71c source/Lib/TLibCommon/TComDataCU.h
--- a/source/Lib/TLibCommon/TComDataCU.h Tue Nov 05 22:21:55 2013 -0600
+++ b/source/Lib/TLibCommon/TComDataCU.h Wed Nov 06 13:06:15 2013 +0530
@@ -224,8 +224,6 @@ public:
UChar getDepth(uint32_t idx) { return m_depth[idx]; }
- void setDepth(uint32_t idx, UChar h) { m_depth[idx] = h; }
-
void setDepthSubParts(uint32_t depth, uint32_t absPartIdx);
// -------------------------------------------------------------------------------------------------------------------
@@ -234,12 +232,8 @@ public:
char* getPartitionSize() { return m_partSizes; }
- int getUnitSize() { return m_unitSize; }
-
PartSize getPartitionSize(uint32_t idx) { return static_cast<PartSize>(m_partSizes[idx]); }
- void setPartitionSize(uint32_t idx, PartSize uh) { m_partSizes[idx] = (char)uh; }
-
void setPartSizeSubParts(PartSize eMode, uint32_t absPartIdx, uint32_t depth);
void setCUTransquantBypassSubParts(bool flag, uint32_t absPartIdx, uint32_t depth);
@@ -247,8 +241,6 @@ public:
bool getSkipFlag(uint32_t idx) { return m_skipFlag[idx]; }
- void setSkipFlag(uint32_t idx, bool skip) { m_skipFlag[idx] = skip; }
-
void setSkipFlagSubParts(bool skip, uint32_t absPartIdx, uint32_t depth);
char* getPredictionMode() { return m_predModes; }
@@ -259,24 +251,16 @@ public:
bool getCUTransquantBypass(uint32_t idx) { return m_cuTransquantBypass[idx]; }
- void setPredictionMode(uint32_t idx, PredMode uh) { m_predModes[idx] = (char)uh; }
-
void setPredModeSubParts(PredMode eMode, uint32_t absPartIdx, uint32_t depth);
UChar* getWidth() { return m_width; }
UChar getWidth(uint32_t idx) { return m_width[idx]; }
- void setWidth(uint32_t idx, UChar uh) { m_width[idx] = uh; }
-
UChar* getHeight() { return m_height; }
UChar getHeight(uint32_t idx) { return m_height[idx]; }
- void setHeight(uint32_t idx, UChar uh) { m_height[idx] = uh; }
-
- void setSizeSubParts(uint32_t width, uint32_t height, uint32_t absPartIdx, uint32_t depth);
-
char* getQP() { return m_qp; }
char getQP(uint32_t idx) { return m_qp[idx]; }
@@ -342,16 +326,12 @@ public:
bool getMergeFlag(uint32_t idx) { return m_bMergeFlags[idx]; }
- void setMergeFlag(uint32_t idx, bool b) { m_bMergeFlags[idx] = b; }
-
void setMergeFlagSubParts(bool bMergeFlag, uint32_t absPartIdx, uint32_t partIdx, uint32_t depth);
UChar* getMergeIndex() { return m_mergeIndex; }
UChar getMergeIndex(uint32_t idx) { return m_mergeIndex[idx]; }
- void setMergeIndex(uint32_t idx, uint32_t mergeIndex) { m_mergeIndex[idx] = (UChar)mergeIndex; }
-
void setMergeIndexSubParts(uint32_t mergeIndex, uint32_t absPartIdx, uint32_t partIdx, uint32_t depth);
template<typename T>
void setSubPart(T bParameter, T* pbBaseLCU, uint32_t cuAddr, uint32_t cuDepth, uint32_t puIdx);
@@ -364,24 +344,18 @@ public:
UChar getLumaIntraDir(uint32_t idx) { return m_lumaIntraDir[idx]; }
- void setLumaIntraDir(uint32_t idx, UChar uh) { m_lumaIntraDir[idx] = uh; }
-
void setLumaIntraDirSubParts(uint32_t dir, uint32_t absPartIdx, uint32_t depth);
UChar* getChromaIntraDir() { return m_chromaIntraDir; }
UChar getChromaIntraDir(uint32_t idx) { return m_chromaIntraDir[idx]; }
- void setChromaIntraDir(uint32_t idx, UChar uh) { m_chromaIntraDir[idx] = uh; }
-
void setChromIntraDirSubParts(uint32_t dir, uint32_t absPartIdx, uint32_t depth);
UChar* getInterDir() { return m_interDir; }
UChar getInterDir(uint32_t idx) { return m_interDir[idx]; }
- void setInterDir(uint32_t idx, UChar uh) { m_interDir[idx] = uh; }
-
void setInterDirSubParts(uint32_t dir, uint32_t absPartIdx, uint32_t partIdx, uint32_t depth);
bool* getIPCMFlag() { return m_iPCMFlags; }
@@ -414,8 +388,6 @@ public:
char* getMVPIdx(int picList) { return m_mvpIdx[picList]; }
- void setMVPNum(int picList, uint32_t idx, int mvpNum) { m_mvpNum[picList][idx] = (char)mvpNum; }
-
int getMVPNum(int picList, uint32_t idx) { return m_mvpNum[picList][idx]; }
char* getMVPNum(int picList) { return m_mvpNum[picList]; }
diff -r bc99537483f1 -r bab35592e71c source/common/ipfilter.cpp
--- a/source/common/ipfilter.cpp Tue Nov 05 22:21:55 2013 -0600
+++ b/source/common/ipfilter.cpp Wed Nov 06 13:06:15 2013 +0530
@@ -391,6 +391,43 @@ void interp_horiz_pp_c(pixel *src, intpt
}
template<int N, int width, int height>
+void interp_horiz_ps_c(pixel *src, intptr_t srcStride, int16_t *dst, intptr_t dstStride, int coeffIdx)
+{
+ int16_t const * coeff = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
+ int headRoom = IF_INTERNAL_PREC - X265_DEPTH;
+ int shift = IF_FILTER_PREC - headRoom;
+ int offset = -IF_INTERNAL_OFFS << shift;
+ src -= N / 2 - 1;
+
+ int row, col;
+ for (row = 0; row < height; row++)
+ {
+ for (col = 0; col < width; col++)
+ {
+ int sum;
+
+ sum = src[col + 0] * coeff[0];
+ sum += src[col + 1] * coeff[1];
+ sum += src[col + 2] * coeff[2];
+ sum += src[col + 3] * coeff[3];
+ if (N == 8)
+ {
+ sum += src[col + 4] * coeff[4];
+ sum += src[col + 5] * coeff[5];
+ sum += src[col + 6] * coeff[6];
+ sum += src[col + 7] * coeff[7];
+ }
+
+ int16_t val = (int16_t)(sum + offset) >> shift;
+ dst[col] = val;
+ }
+
+ src += srcStride;
+ dst += dstStride;
+ }
+}
+
+template<int N, int width, int height>
void interp_vert_pp_c(pixel *src, intptr_t srcStride, pixel *dst, intptr_t dstStride, int coeffIdx)
{
int16_t const * c = (N == 4) ? g_chromaFilter[coeffIdx] : g_lumaFilter[coeffIdx];
@@ -491,6 +528,7 @@ namespace x265 {
#define LUMA(W, H) \
p.luma_hpp[LUMA_ ## W ## x ## H] = interp_horiz_pp_c<8, W, H>; \
+ p.luma_hps[LUMA_ ## W ## x ## H] = interp_horiz_ps_c<8, W, H>;\
p.luma_vpp[LUMA_ ## W ## x ## H] = interp_vert_pp_c<8, W, H>; \
p.luma_vps[LUMA_ ## W ## x ## H] = interp_vert_ps_c<8, W, H>; \
p.luma_hvpp[LUMA_ ## W ## x ## H] = interp_hv_pp_c<8, W, H>;
diff -r bc99537483f1 -r bab35592e71c source/common/primitives.h
--- a/source/common/primitives.h Tue Nov 05 22:21:55 2013 -0600
+++ b/source/common/primitives.h Wed Nov 06 13:06:15 2013 +0530
@@ -254,6 +254,7 @@ struct EncoderPrimitives
extendCURowBorder_t extendRowBorder;
filter_pp_t chroma_hpp[NUM_CHROMA_PARTITIONS];
filter_pp_t luma_hpp[NUM_LUMA_PARTITIONS];
+ filter_ps_t luma_hps[NUM_LUMA_PARTITIONS];
filter_pp_t chroma_vpp[NUM_CHROMA_PARTITIONS];
filter_pp_t luma_vpp[NUM_LUMA_PARTITIONS];
filter_ps_t luma_vps[NUM_LUMA_PARTITIONS];
diff -r bc99537483f1 -r bab35592e71c source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Tue Nov 05 22:21:55 2013 -0600
+++ b/source/common/x86/asm-primitives.cpp Wed Nov 06 13:06:15 2013 +0530
@@ -326,6 +326,21 @@ void Setup_Assembly_Primitives(EncoderPr
CHROMA_BLOCKCOPY(_sse2);
LUMA_BLOCKCOPY(_sse2);
+
+ // This function pointer initialization is temporary will be removed
+ // later with macro definitions. It is used to avoid linker errors
+ // until all partitions are coded and commit smaller patches, easier to
+ // review.
+
+ p.chroma_copy_sp[CHROMA_4x2] = x265_blockcopy_sp_4x2_sse2;
+ p.chroma_copy_sp[CHROMA_4x4] = x265_blockcopy_sp_4x4_sse2;
+ p.chroma_copy_sp[CHROMA_4x8] = x265_blockcopy_sp_4x8_sse2;
+ p.chroma_copy_sp[CHROMA_4x16] = x265_blockcopy_sp_4x16_sse2;
+ p.chroma_copy_sp[CHROMA_8x2] = x265_blockcopy_sp_8x2_sse2;
+ p.chroma_copy_sp[CHROMA_8x4] = x265_blockcopy_sp_8x4_sse2;
+ p.chroma_copy_sp[CHROMA_8x6] = x265_blockcopy_sp_8x6_sse2;
+ p.chroma_copy_sp[CHROMA_8x8] = x265_blockcopy_sp_8x8_sse2;
+ p.chroma_copy_sp[CHROMA_8x16] = x265_blockcopy_sp_8x16_sse2;
#if X86_64
p.satd[LUMA_8x32] = x265_pixel_satd_8x32_sse2;
p.satd[LUMA_16x4] = x265_pixel_satd_16x4_sse2;
diff -r bc99537483f1 -r bab35592e71c source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm Tue Nov 05 22:21:55 2013 -0600
+++ b/source/common/x86/blockcopy8.asm Wed Nov 06 13:06:15 2013 +0530
@@ -27,6 +27,8 @@
SECTION_RODATA 32
+tab_Vm: db 0, 2, 4, 6, 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0
+
SECTION .text
;-----------------------------------------------------------------------------
@@ -796,3 +798,351 @@ BLOCKCOPY_PP_W64_H2 64, 16
BLOCKCOPY_PP_W64_H2 64, 32
BLOCKCOPY_PP_W64_H2 64, 48
BLOCKCOPY_PP_W64_H2 64, 64
+
+
+;-----------------------------------------------------------------------------
+; void blockcopy_sp_4x2(pixel *dest, intptr_t destStride, int16_t *src, intptr_t srcStride)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal blockcopy_sp_4x2, 4, 4, 3, dest, destStride, src, srcStride
+
+add r3, r3
+
+mova m0, [tab_Vm]
+
+movh m1, [r2]
+movh m2, [r2 + r3]
+
+pshufb m1, m0
+pshufb m2, m0
+
+movd [r0], m1
+movd [r0 + r1], m2
+
+RET
+
+;-----------------------------------------------------------------------------
+; void blockcopy_sp_4x4(pixel *dest, intptr_t destStride, int16_t *src, intptr_t srcStride)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal blockcopy_sp_4x4, 4, 5, 5, dest, destStride, src, srcStride
+
+add r3, r3
+
More information about the x265-commits
mailing list