[x265-commits] [x265] entropy: remove m_ prefix from ContextModel.state, other ...
Steve Borho
steve at borho.org
Fri Aug 8 02:33:40 CEST 2014
details: http://hg.videolan.org/x265/rev/029563495b6e
branches:
changeset: 7732:029563495b6e
user: Steve Borho <steve at borho.org>
date: Thu Aug 07 02:23:40 2014 -0500
description:
entropy: remove m_ prefix from ContextModel.state, other nits
Subject: [x265] asm: cleanup unused sub_ps and add_ps
details: http://hg.videolan.org/x265/rev/c29e37317d46
branches:
changeset: 7733:c29e37317d46
user: Satoshi Nakagawa <nakagawa424 at oki.com>
date: Thu Aug 07 19:23:38 2014 +0900
description:
asm: cleanup unused sub_ps and add_ps
sub_ps and add_ps are used by CU or TU level, not PU level.
Subject: [x265] rc: set rdlevel to 2 in fast first pass for multipass encode.
details: http://hg.videolan.org/x265/rev/5a0e2a7f9ad3
branches:
changeset: 7734:5a0e2a7f9ad3
user: Aarthi Thirumalai
date: Thu Aug 07 19:12:04 2014 +0530
description:
rc: set rdlevel to 2 in fast first pass for multipass encode.
increases speed of the first pass by over 70% in the slower presets with almost the
same quality in the final pass.
Subject: [x265] analysis: cleanup
details: http://hg.videolan.org/x265/rev/8e45fc7c5521
branches:
changeset: 7735:8e45fc7c5521
user: Ashok Kumar Mishra<ashok at multicorewareinc.com>
date: Thu Aug 07 19:49:42 2014 +0530
description:
analysis: cleanup
diffstat:
source/Lib/TLibCommon/ContextTables.h | 14 +-
source/Lib/TLibCommon/TComYuv.cpp | 18 +-
source/Lib/TLibCommon/TComYuv.h | 4 +-
source/Lib/TLibEncoder/TEncSearch.cpp | 27 +-
source/common/param.cpp | 1 +
source/common/pixel.cpp | 48 +-
source/common/primitives.cpp | 6 +-
source/common/primitives.h | 10 +-
source/common/shortyuv.cpp | 8 +-
source/common/x86/asm-primitives.cpp | 66 +-
source/common/x86/ipfilter16.asm | 15 +-
source/common/x86/pixel-util.h | 69 +-
source/common/x86/pixel-util8.asm | 2229 +++++++++-----------------------
source/common/x86/pixeladd8.asm | 2025 ++++++++---------------------
source/common/x86/x86util.asm | 7 +
source/encoder/analysis.cpp | 21 +-
source/encoder/entropy.cpp | 56 +-
source/test/pixelharness.cpp | 122 +-
18 files changed, 1396 insertions(+), 3350 deletions(-)
diffs (truncated from 5704 to 300 lines):
diff -r 619633a933f6 -r 8e45fc7c5521 source/Lib/TLibCommon/ContextTables.h
--- a/source/Lib/TLibCommon/ContextTables.h Wed Aug 06 17:03:38 2014 -0500
+++ b/source/Lib/TLibCommon/ContextTables.h Thu Aug 07 19:49:42 2014 +0530
@@ -127,14 +127,11 @@
namespace x265 {
// private namespace
-// ====================================================================================================================
-// Sbac interface
-// ====================================================================================================================
-typedef struct ContextModel
+struct ContextModel
{
- uint8_t m_state; ///< internal state variable
+ uint8_t state;
uint8_t bBinsCoded;
-} ContextModel;
+};
extern const uint32_t g_entropyBits[128];
extern const uint8_t g_nextState[128][2];
@@ -144,9 +141,8 @@ extern const uint8_t g_nextState[128][2]
#define sbacNext(S, V) (g_nextState[(S)][(V)])
#define sbacGetEntropyBits(S, V) (g_entropyBits[(S) ^ (V)])
#define sbacGetEntropyBitsTrm(V) (g_entropyBits[126 ^ (V)])
-#define CHANNEL_TYPE_LUMA 0
-#define CHANNEL_TYPE_CHROMA 1
-#define MAX_NUM_CHANNEL_TYPE 2
+
+#define MAX_NUM_CHANNEL_TYPE 2
// ====================================================================================================================
// Tables
diff -r 619633a933f6 -r 8e45fc7c5521 source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp Wed Aug 06 17:03:38 2014 -0500
+++ b/source/Lib/TLibCommon/TComYuv.cpp Thu Aug 07 19:49:42 2014 +0530
@@ -167,13 +167,11 @@ void TComYuv::copyPartToYuv(TComYuv* dst
void TComYuv::addClip(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t log2Size)
{
- int part = partitionFromLog2Size(log2Size);
-
- addClipLuma(srcYuv0, srcYuv1, part);
- addClipChroma(srcYuv0, srcYuv1, part);
+ addClipLuma(srcYuv0, srcYuv1, log2Size);
+ addClipChroma(srcYuv0, srcYuv1, log2Size);
}
-void TComYuv::addClipLuma(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t part)
+void TComYuv::addClipLuma(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t log2Size)
{
pixel* src0 = srcYuv0->getLumaAddr();
int16_t* src1 = srcYuv1->getLumaAddr();
@@ -183,14 +181,14 @@ void TComYuv::addClipLuma(TComYuv* srcYu
uint32_t src1Stride = srcYuv1->m_width;
uint32_t dststride = getStride();
- primitives.luma_add_ps[part](dst, dststride, src0, src1, src0Stride, src1Stride);
+ primitives.luma_add_ps[log2Size - 2](dst, dststride, src0, src1, src0Stride, src1Stride);
}
-void TComYuv::addClipChroma(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t part)
+void TComYuv::addClipChroma(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t log2Size)
{
pixel* srcU0 = srcYuv0->getCbAddr();
+ pixel* srcV0 = srcYuv0->getCrAddr();
int16_t* srcU1 = srcYuv1->getCbAddr();
- pixel* srcV0 = srcYuv0->getCrAddr();
int16_t* srcV1 = srcYuv1->getCrAddr();
pixel* dstU = getCbAddr();
pixel* dstV = getCrAddr();
@@ -199,8 +197,8 @@ void TComYuv::addClipChroma(TComYuv* src
uint32_t src1Stride = srcYuv1->m_cwidth;
uint32_t dststride = getCStride();
- primitives.chroma[m_csp].add_ps[part](dstU, dststride, srcU0, srcU1, src0Stride, src1Stride);
- primitives.chroma[m_csp].add_ps[part](dstV, dststride, srcV0, srcV1, src0Stride, src1Stride);
+ primitives.chroma[m_csp].add_ps[log2Size - 2](dstU, dststride, srcU0, srcU1, src0Stride, src1Stride);
+ primitives.chroma[m_csp].add_ps[log2Size - 2](dstV, dststride, srcV0, srcV1, src0Stride, src1Stride);
}
void TComYuv::addAvg(ShortYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
diff -r 619633a933f6 -r 8e45fc7c5521 source/Lib/TLibCommon/TComYuv.h
--- a/source/Lib/TLibCommon/TComYuv.h Wed Aug 06 17:03:38 2014 -0500
+++ b/source/Lib/TLibCommon/TComYuv.h Thu Aug 07 19:49:42 2014 +0530
@@ -136,8 +136,8 @@ public:
// Clip(srcYuv0 + srcYuv1) -> m_apiBuf
void addClip(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t log2Size);
- void addClipLuma(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t part);
- void addClipChroma(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t part);
+ void addClipLuma(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t log2Size);
+ void addClipChroma(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t log2Size);
// (srcYuv0 + srcYuv1)/2 for YUV partition
void addAvg(ShortYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma);
diff -r 619633a933f6 -r 8e45fc7c5521 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Wed Aug 06 17:03:38 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Thu Aug 07 19:49:42 2014 +0530
@@ -864,16 +864,14 @@ void TEncSearch::residualTransformQuantI
//--- set coded block flag ---
cu->setCbfSubParts((numSig ? 1 : 0) << trDepth, TEXT_LUMA, absPartIdx, fullDepth);
- int part = partitionFromLog2Size(log2TrSize);
-
if (numSig)
{
//--- inverse transform ---
m_quant.invtransformNxN(cu->getCUTransquantBypass(absPartIdx), residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTransformSkip, numSig);
// Generate Recon
- primitives.luma_add_ps[part](recon, stride, pred, residual, stride, stride);
- primitives.luma_copy_pp[part](reconIPred, reconIPredStride, recon, stride);
+ primitives.luma_add_ps[sizeIdx](recon, stride, pred, residual, stride, stride);
+ primitives.square_copy_pp[sizeIdx](reconIPred, reconIPredStride, recon, stride);
}
else
{
@@ -882,8 +880,8 @@ void TEncSearch::residualTransformQuantI
#endif
// Generate Recon
- primitives.luma_copy_pp[part](recon, stride, pred, stride);
- primitives.luma_copy_pp[part](reconIPred, reconIPredStride, pred, stride);
+ primitives.square_copy_pp[sizeIdx](recon, stride, pred, stride);
+ primitives.square_copy_pp[sizeIdx](reconIPred, reconIPredStride, pred, stride);
}
}
@@ -1283,8 +1281,7 @@ void TEncSearch::residualQTIntrachroma(T
uint32_t tuSize = 1 << log2TrSizeC;
uint32_t stride = fencYuv->getCStride();
const bool splitIntoSubTUs = (m_csp == X265_CSP_I422);
- int sizeIdxC = log2TrSizeC - 2;
- int part = partitionFromLog2Size(log2TrSizeC);
+ const int sizeIdxC = log2TrSizeC - 2;
for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
{
@@ -1344,7 +1341,7 @@ void TEncSearch::residualQTIntrachroma(T
//===== reconstruction =====
// use square primitives
- primitives.chroma[X265_CSP_I444].add_ps[part](recon, stride, pred, residual, stride, stride);
+ primitives.chroma[X265_CSP_I444].add_ps[sizeIdxC](recon, stride, pred, residual, stride, stride);
primitives.square_copy_pp[sizeIdxC](reconIPred, reconIPredStride, recon, stride);
}
else
@@ -2820,7 +2817,7 @@ void TEncSearch::xEstimateResidualQT(TCo
uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getStride();
uint32_t stride = fencYuv->getStride();
//===== reconstruction =====
- primitives.luma_add_ps[partSize](reconIPred, reconIPredStride, pred, curResiY, stride, strideResiY);
+ primitives.luma_add_ps[sizeIdx](reconIPred, reconIPredStride, pred, curResiY, stride, strideResiY);
int size = log2TrSize - 2;
nonZeroPsyEnergyY = m_rdCost.psyCost(size, fencYuv->getLumaAddr(absPartIdx), fencYuv->getStride(),
cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder), cu->m_pic->getPicYuvRec()->getStride());
@@ -2919,8 +2916,8 @@ void TEncSearch::xEstimateResidualQT(TCo
uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getCStride();
uint32_t stride = fencYuv->getCStride();
//===== reconstruction =====
- primitives.luma_add_ps[partSizeC](reconIPred, reconIPredStride, pred, curResiU, stride, strideResiC);
int size = log2TrSizeC - 2;
+ primitives.luma_add_ps[size](reconIPred, reconIPredStride, pred, curResiU, stride, strideResiC);
nonZeroPsyEnergyU = m_rdCost.psyCost(size, fencYuv->getCbAddr(absPartIdxC), fencYuv->getCStride(),
cu->m_pic->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder),
cu->m_pic->getPicYuvRec()->getCStride());
@@ -3001,8 +2998,8 @@ void TEncSearch::xEstimateResidualQT(TCo
uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getCStride();
uint32_t stride = fencYuv->getCStride();
//===== reconstruction =====
- primitives.luma_add_ps[partSizeC](reconIPred, reconIPredStride, pred, curResiV, stride, strideResiC);
int size = log2TrSizeC - 2;
+ primitives.luma_add_ps[size](reconIPred, reconIPredStride, pred, curResiV, stride, strideResiC);
nonZeroPsyEnergyV = m_rdCost.psyCost(size, fencYuv->getCrAddr(absPartIdxC), fencYuv->getCStride(),
cu->m_pic->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder),
cu->m_pic->getPicYuvRec()->getCStride());
@@ -3107,8 +3104,8 @@ void TEncSearch::xEstimateResidualQT(TCo
uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getStride();
uint32_t stride = fencYuv->getStride();
//===== reconstruction =====
- primitives.luma_add_ps[partSize](reconIPred, reconIPredStride, pred, tsResiY, stride, trSize);
int size = log2TrSize - 2;
+ primitives.luma_add_ps[size](reconIPred, reconIPredStride, pred, tsResiY, stride, trSize);
nonZeroPsyEnergyY = m_rdCost.psyCost(size, fencYuv->getLumaAddr(absPartIdx), fencYuv->getStride(),
cu->m_pic->getPicYuvRec()->getLumaAddr(cu->getAddr(), zorder),
cu->m_pic->getPicYuvRec()->getStride());
@@ -3195,8 +3192,8 @@ void TEncSearch::xEstimateResidualQT(TCo
uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getCStride();
uint32_t stride = fencYuv->getCStride();
//===== reconstruction =====
- primitives.luma_add_ps[partSizeC](reconIPred, reconIPredStride, pred, tsResiU, stride, trSizeC);
int size = log2TrSizeC - 2;
+ primitives.luma_add_ps[size](reconIPred, reconIPredStride, pred, tsResiU, stride, trSizeC);
nonZeroPsyEnergyU = m_rdCost.psyCost(size, fencYuv->getCbAddr(absPartIdxC), fencYuv->getCStride(),
cu->m_pic->getPicYuvRec()->getCbAddr(cu->getAddr(), zorder), cu->m_pic->getPicYuvRec()->getCStride());
singleCostU = m_rdCost.calcPsyRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][tuIterator.section], nonZeroPsyEnergyU);
@@ -3236,8 +3233,8 @@ void TEncSearch::xEstimateResidualQT(TCo
uint32_t reconIPredStride = cu->m_pic->getPicYuvRec()->getCStride();
uint32_t stride = fencYuv->getCStride();
//===== reconstruction =====
- primitives.luma_add_ps[partSizeC](reconIPred, reconIPredStride, pred, tsResiV, stride, trSizeC);
int size = log2TrSizeC - 2;
+ primitives.luma_add_ps[size](reconIPred, reconIPredStride, pred, tsResiV, stride, trSizeC);
nonZeroPsyEnergyV = m_rdCost.psyCost(size, fencYuv->getCrAddr(absPartIdxC), fencYuv->getCStride(),
cu->m_pic->getPicYuvRec()->getCrAddr(cu->getAddr(), zorder), cu->m_pic->getPicYuvRec()->getCStride());
singleCostV = m_rdCost.calcPsyRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.section], nonZeroPsyEnergyV);
diff -r 619633a933f6 -r 8e45fc7c5521 source/common/param.cpp
--- a/source/common/param.cpp Wed Aug 06 17:03:38 2014 -0500
+++ b/source/common/param.cpp Thu Aug 07 19:49:42 2014 +0530
@@ -1035,6 +1035,7 @@ void x265_param_apply_fastfirstpass(x265
param->searchMethod = X265_DIA_SEARCH;
param->subpelRefine = X265_MIN(2, param->subpelRefine);
param->bEnableEarlySkip = 1;
+ param->rdLevel = 2;
}
}
diff -r 619633a933f6 -r 8e45fc7c5521 source/common/pixel.cpp
--- a/source/common/pixel.cpp Wed Aug 06 17:03:38 2014 -0500
+++ b/source/common/pixel.cpp Thu Aug 07 19:49:42 2014 +0530
@@ -1002,37 +1002,47 @@ void Setup_C_PixelPrimitives(EncoderPrim
p.chroma[X265_CSP_I420].copy_pp[CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
p.chroma[X265_CSP_I420].copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
- p.chroma[X265_CSP_I420].copy_ss[CHROMA_ ## W ## x ## H] = blockcopy_ss_c<W, H>; \
- p.chroma[X265_CSP_I420].sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
- p.chroma[X265_CSP_I420].add_ps[CHROMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
+ p.chroma[X265_CSP_I420].copy_ss[CHROMA_ ## W ## x ## H] = blockcopy_ss_c<W, H>;
#define CHROMA_422(W, H) \
p.chroma[X265_CSP_I422].addAvg[CHROMA422_ ## W ## x ## H] = addAvg<W, H>; \
p.chroma[X265_CSP_I422].copy_pp[CHROMA422_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
p.chroma[X265_CSP_I422].copy_sp[CHROMA422_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
p.chroma[X265_CSP_I422].copy_ps[CHROMA422_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
- p.chroma[X265_CSP_I422].copy_ss[CHROMA422_ ## W ## x ## H] = blockcopy_ss_c<W, H>; \
- p.chroma[X265_CSP_I422].sub_ps[CHROMA422_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
- p.chroma[X265_CSP_I422].add_ps[CHROMA422_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
+ p.chroma[X265_CSP_I422].copy_ss[CHROMA422_ ## W ## x ## H] = blockcopy_ss_c<W, H>;
#define CHROMA_444(W, H) \
p.chroma[X265_CSP_I444].addAvg[LUMA_ ## W ## x ## H] = addAvg<W, H>; \
p.chroma[X265_CSP_I444].copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
p.chroma[X265_CSP_I444].copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
p.chroma[X265_CSP_I444].copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
- p.chroma[X265_CSP_I444].copy_ss[LUMA_ ## W ## x ## H] = blockcopy_ss_c<W, H>; \
- p.chroma[X265_CSP_I444].sub_ps[LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
- p.chroma[X265_CSP_I444].add_ps[LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
+ p.chroma[X265_CSP_I444].copy_ss[LUMA_ ## W ## x ## H] = blockcopy_ss_c<W, H>;
#define LUMA(W, H) \
p.luma_addAvg[LUMA_ ## W ## x ## H] = addAvg<W, H>; \
p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
- p.luma_copy_ss[LUMA_ ## W ## x ## H] = blockcopy_ss_c<W, H>; \
+ p.luma_copy_ss[LUMA_ ## W ## x ## H] = blockcopy_ss_c<W, H>;
+
+#define LUMA_PIXELSUB(W, H) \
p.luma_sub_ps[LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
p.luma_add_ps[LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
+#define CHROMA_PIXELSUB_420(W, H) \
+ p.chroma[X265_CSP_I420].sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
+ p.chroma[X265_CSP_I420].add_ps[CHROMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
+
+#define CHROMA_PIXELSUB_422(W, H) \
+ p.chroma[X265_CSP_I422].sub_ps[CHROMA422_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
+ p.chroma[X265_CSP_I422].add_ps[CHROMA422_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
+
+#define CHROMA_PIXELSUB_444(W, H) \
+ p.chroma[X265_CSP_I444].sub_ps[LUMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; \
+ p.chroma[X265_CSP_I444].add_ps[LUMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
+
+
+
LUMA(4, 4);
LUMA(8, 8);
CHROMA_420(4, 4);
@@ -1083,6 +1093,24 @@ void Setup_C_PixelPrimitives(EncoderPrim
LUMA(16, 64);
CHROMA_420(8, 32);
+ LUMA_PIXELSUB(4, 4);
+ LUMA_PIXELSUB(8, 8);
+ LUMA_PIXELSUB(16, 16);
+ LUMA_PIXELSUB(32, 32);
+ LUMA_PIXELSUB(64, 64);
+ CHROMA_PIXELSUB_420(4, 4)
+ CHROMA_PIXELSUB_420(8, 8)
+ CHROMA_PIXELSUB_420(16, 16)
+ CHROMA_PIXELSUB_420(32, 32)
+ CHROMA_PIXELSUB_422(4, 8)
+ CHROMA_PIXELSUB_422(8, 16)
+ CHROMA_PIXELSUB_422(16, 32)
+ CHROMA_PIXELSUB_422(32, 64)
+ CHROMA_PIXELSUB_444(8, 8)
+ CHROMA_PIXELSUB_444(16, 16)
+ CHROMA_PIXELSUB_444(32, 32)
+ CHROMA_PIXELSUB_444(64, 64)
More information about the x265-commits
mailing list