[x265] refine partition size related
Satoshi Nakagawa
nakagawa424 at oki.com
Tue Jul 22 08:57:52 CEST 2014
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1406011990 -32400
# Tue Jul 22 15:53:10 2014 +0900
# Node ID b2ad081e4bfc20bbc84e8bfbab59ed52aeac2a73
# Parent d303b4d860e9f06396a156726dd518d0f41fe796
refine partition size related
- reorder LumaPartitions to simplify partitionFromLog2Size()
- remove unused
diff -r d303b4d860e9 -r b2ad081e4bfc source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp Mon Jul 21 22:43:38 2014 -0500
+++ b/source/Lib/TLibCommon/TComYuv.cpp Tue Jul 22 15:53:10 2014 +0900
@@ -127,6 +127,15 @@
primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], getCStride(), srcV, srcPicYuv->getCStride());
}
+void TComYuv::copyFromYuv(TComYuv* srcYuv)
+{
+ X265_CHECK(m_width <= srcYuv->m_width && m_height <= srcYuv->m_height, "invalid size\n");
+
+ primitives.luma_copy_pp[m_part](m_buf[0], m_width, srcYuv->m_buf[0], srcYuv->m_width);
+ primitives.chroma[m_csp].copy_pp[m_part](m_buf[1], m_cwidth, srcYuv->m_buf[1], srcYuv->m_cwidth);
+ primitives.chroma[m_csp].copy_pp[m_part](m_buf[2], m_cwidth, srcYuv->m_buf[2], srcYuv->m_cwidth);
+}
+
void TComYuv::copyToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx)
{
pixel* dstY = dstPicYuv->getLumaAddr(partIdx);
@@ -156,50 +165,9 @@
primitives.chroma[m_csp].copy_pp[part](dstV, dstPicYuv->getCStride(), srcV, getCStride());
}
-void TComYuv::copyPartToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
+void TComYuv::addClip(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t log2Size)
{
- int part = partitionFromSizes(width, height);
-
- X265_CHECK(width != 4 || height != 4, "4x4 partition detected\n");
-
- if (bLuma)
- {
- pixel* src = getLumaAddr(partIdx);
- pixel* dst = dstPicYuv->getLumaAddr(partIdx);
-
- uint32_t srcstride = getStride();
- uint32_t dststride = dstPicYuv->getStride();
-
- primitives.luma_copy_pp[part](dst, dststride, src, srcstride);
- }
- if (bChroma)
- {
- pixel* srcU = getCbAddr(partIdx);
- pixel* srcV = getCrAddr(partIdx);
- pixel* dstU = dstPicYuv->getCbAddr(partIdx);
- pixel* dstV = dstPicYuv->getCrAddr(partIdx);
-
- uint32_t srcstride = getCStride();
- uint32_t dststride = dstPicYuv->getCStride();
-
- primitives.chroma[m_csp].copy_pp[part](dstU, dststride, srcU, srcstride);
- primitives.chroma[m_csp].copy_pp[part](dstV, dststride, srcV, srcstride);
- }
-}
-
-void TComYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize)
-{
- int part = partitionFromSize(lumaSize);
-
- int16_t* dst = dstPicYuv->getLumaAddr(partIdx);
- uint32_t dststride = dstPicYuv->m_width;
-
- primitives.luma_copy_ps[part](dst, dststride, getLumaAddr(partIdx), getStride());
-}
-
-void TComYuv::addClip(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partSize)
-{
- int part = partitionFromSize(partSize);
+ int part = partitionFromLog2Size(log2Size);
addClipLuma(srcYuv0, srcYuv1, part);
addClipChroma(srcYuv0, srcYuv1, part);
@@ -235,113 +203,32 @@
primitives.chroma[m_csp].add_ps[part](dstV, dststride, srcV0, srcV1, src0Stride, src1Stride);
}
-void TComYuv::addAvg(TComYuv* srcYuv0, TComYuv* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
-{
- int x, y;
- uint32_t src0Stride, src1Stride, dststride;
- int shiftNum, offset;
-
- pixel* srcY0 = srcYuv0->getLumaAddr(partUnitIdx);
- pixel* srcU0 = srcYuv0->getCbAddr(partUnitIdx);
- pixel* srcV0 = srcYuv0->getCrAddr(partUnitIdx);
-
- pixel* srcY1 = srcYuv1->getLumaAddr(partUnitIdx);
- pixel* srcU1 = srcYuv1->getCbAddr(partUnitIdx);
- pixel* srcV1 = srcYuv1->getCrAddr(partUnitIdx);
-
- pixel* dstY = getLumaAddr(partUnitIdx);
- pixel* dstU = getCbAddr(partUnitIdx);
- pixel* dstV = getCrAddr(partUnitIdx);
-
- if (bLuma)
- {
- src0Stride = srcYuv0->getStride();
- src1Stride = srcYuv1->getStride();
- dststride = getStride();
- shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
- offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
-
- for (y = 0; y < height; y++)
- {
- for (x = 0; x < width; x += 4)
- {
- dstY[x + 0] = Clip((srcY0[x + 0] + srcY1[x + 0] + offset) >> shiftNum);
- dstY[x + 1] = Clip((srcY0[x + 1] + srcY1[x + 1] + offset) >> shiftNum);
- dstY[x + 2] = Clip((srcY0[x + 2] + srcY1[x + 2] + offset) >> shiftNum);
- dstY[x + 3] = Clip((srcY0[x + 3] + srcY1[x + 3] + offset) >> shiftNum);
- }
-
- srcY0 += src0Stride;
- srcY1 += src1Stride;
- dstY += dststride;
- }
- }
- if (bChroma)
- {
- shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
- offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
-
- src0Stride = srcYuv0->getCStride();
- src1Stride = srcYuv1->getCStride();
- dststride = getCStride();
-
- width >>= m_hChromaShift;
- height >>= m_vChromaShift;
-
- for (y = height - 1; y >= 0; y--)
- {
- for (x = width - 1; x >= 0; )
- {
- // note: chroma min width is 2
- dstU[x] = Clip((srcU0[x] + srcU1[x] + offset) >> shiftNum);
- dstV[x] = Clip((srcV0[x] + srcV1[x] + offset) >> shiftNum);
- x--;
- dstU[x] = Clip((srcU0[x] + srcU1[x] + offset) >> shiftNum);
- dstV[x] = Clip((srcV0[x] + srcV1[x] + offset) >> shiftNum);
- x--;
- }
-
- srcU0 += src0Stride;
- srcU1 += src1Stride;
- srcV0 += src0Stride;
- srcV1 += src1Stride;
- dstU += dststride;
- dstV += dststride;
- }
- }
-}
-
void TComYuv::addAvg(ShortYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
{
- uint32_t src0Stride, src1Stride, dststride;
-
- int16_t* srcY0 = srcYuv0->getLumaAddr(partUnitIdx);
- int16_t* srcU0 = srcYuv0->getCbAddr(partUnitIdx);
- int16_t* srcV0 = srcYuv0->getCrAddr(partUnitIdx);
-
- int16_t* srcY1 = srcYuv1->getLumaAddr(partUnitIdx);
- int16_t* srcU1 = srcYuv1->getCbAddr(partUnitIdx);
- int16_t* srcV1 = srcYuv1->getCrAddr(partUnitIdx);
-
- pixel* dstY = getLumaAddr(partUnitIdx);
- pixel* dstU = getCbAddr(partUnitIdx);
- pixel* dstV = getCrAddr(partUnitIdx);
-
int part = partitionFromSizes(width, height);
if (bLuma)
{
- src0Stride = srcYuv0->m_width;
- src1Stride = srcYuv1->m_width;
- dststride = getStride();
+ int16_t* srcY0 = srcYuv0->getLumaAddr(partUnitIdx);
+ int16_t* srcY1 = srcYuv1->getLumaAddr(partUnitIdx);
+ pixel* dstY = getLumaAddr(partUnitIdx);
+ uint32_t src0Stride = srcYuv0->m_width;
+ uint32_t src1Stride = srcYuv1->m_width;
+ uint32_t dststride = getStride();
primitives.luma_addAvg[part](srcY0, srcY1, dstY, src0Stride, src1Stride, dststride);
}
if (bChroma)
{
- src0Stride = srcYuv0->m_cwidth;
- src1Stride = srcYuv1->m_cwidth;
- dststride = getCStride();
+ int16_t* srcU0 = srcYuv0->getCbAddr(partUnitIdx);
+ int16_t* srcV0 = srcYuv0->getCrAddr(partUnitIdx);
+ int16_t* srcU1 = srcYuv1->getCbAddr(partUnitIdx);
+ int16_t* srcV1 = srcYuv1->getCrAddr(partUnitIdx);
+ pixel* dstU = getCbAddr(partUnitIdx);
+ pixel* dstV = getCrAddr(partUnitIdx);
+ uint32_t src0Stride = srcYuv0->m_cwidth;
+ uint32_t src1Stride = srcYuv1->m_cwidth;
+ uint32_t dststride = getCStride();
primitives.chroma[m_csp].addAvg[part](srcU0, srcU1, dstU, src0Stride, src1Stride, dststride);
primitives.chroma[m_csp].addAvg[part](srcV0, srcV1, dstV, src0Stride, src1Stride, dststride);
diff -r d303b4d860e9 -r b2ad081e4bfc source/Lib/TLibCommon/TComYuv.h
--- a/source/Lib/TLibCommon/TComYuv.h Mon Jul 21 22:43:38 2014 -0500
+++ b/source/Lib/TLibCommon/TComYuv.h Tue Jul 22 15:53:10 2014 +0900
@@ -121,28 +121,25 @@
// Copy YUV buffer from picture buffer
void copyFromPicYuv(TComPicYuv* srcPicYuv, uint32_t cuAddr, uint32_t absZOrderIdx);
+ // Copy from same size YUV buffer
+ void copyFromYuv(TComYuv* srcYuv);
+
// Copy Small YUV buffer to the part of other Big YUV buffer
void copyToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx);
// Copy the part of Big YUV buffer to other Small YUV buffer
void copyPartToYuv(TComYuv* dstPicYuv, uint32_t srcPartIdx);
- // Copy YUV partition buffer to other YUV partition buffer
- void copyPartToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma);
-
- void copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize);
-
// ------------------------------------------------------------------------------------------------------------------
// Algebraic operation for YUV buffer
// ------------------------------------------------------------------------------------------------------------------
// Clip(srcYuv0 + srcYuv1) -> m_apiBuf
- void addClip(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partSize);
+ void addClip(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t log2Size);
void addClipLuma(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t part);
void addClipChroma(TComYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t part);
// (srcYuv0 + srcYuv1)/2 for YUV partition
- void addAvg(TComYuv* srcYuv0, TComYuv* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma);
void addAvg(ShortYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma);
// ------------------------------------------------------------------------------------------------------------------
diff -r d303b4d860e9 -r b2ad081e4bfc source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jul 21 22:43:38 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Jul 22 15:53:10 2014 +0900
@@ -2297,7 +2297,7 @@
{
cu->setSkipFlagSubParts(true, 0, depth);
- predYuv->copyToPartYuv(outReconYuv, 0);
+ outReconYuv->copyFromYuv(predYuv);
// Luma
int part = partitionFromLog2Size(log2CUSize);
distortion = primitives.sse_pp[part](fencYuv->getLumaAddr(), fencYuv->getStride(), outReconYuv->getLumaAddr(), outReconYuv->getStride());
@@ -2426,9 +2426,9 @@
X265_CHECK(bestCost != MAX_INT64, "no best cost\n");
if (cu->getQtRootCbf(0))
- outReconYuv->addClip(predYuv, outBestResiYuv, cuSize);
+ outReconYuv->addClip(predYuv, outBestResiYuv, log2CUSize);
else
- predYuv->copyToPartYuv(outReconYuv, 0);
+ outReconYuv->copyFromYuv(predYuv);
// update with clipped distortion and cost (qp estimation loop uses unclipped values)
int part = partitionFromLog2Size(log2CUSize);
@@ -2457,7 +2457,7 @@
{
if (skipRes && cu->getPredictionMode(0) == MODE_INTER && cu->getMergeFlag(0) && cu->getPartitionSize(0) == SIZE_2Nx2N)
{
- predYuv->copyToPartYuv(reconYuv, 0);
+ reconYuv->copyFromYuv(predYuv);
cu->clearCbf(0, cu->getDepth(0));
return;
}
@@ -2467,12 +2467,11 @@
if (cu->getPredictionMode(0) == MODE_INTER)
{
residualTransformQuantInter(cu, 0, resiYuv, cu->getDepth(0), true);
- uint32_t cuSize = 1 << cu->getLog2CUSize(0);
if (cu->getQtRootCbf(0))
- reconYuv->addClip(predYuv, resiYuv, cuSize);
+ reconYuv->addClip(predYuv, resiYuv, cu->getLog2CUSize(0));
else
{
- predYuv->copyToPartYuv(reconYuv, 0);
+ reconYuv->copyFromYuv(predYuv);
if (cu->getMergeFlag(0) && cu->getPartitionSize(0) == SIZE_2Nx2N)
cu->setSkipFlagSubParts(true, 0, cu->getDepth(0));
}
diff -r d303b4d860e9 -r b2ad081e4bfc source/common/pixel.cpp
--- a/source/common/pixel.cpp Mon Jul 21 22:43:38 2014 -0500
+++ b/source/common/pixel.cpp Tue Jul 22 15:53:10 2014 +0900
@@ -1020,9 +1020,7 @@
p.chroma[X265_CSP_I422].copy_pp[CHROMA422X_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
p.chroma[X265_CSP_I422].copy_sp[CHROMA422X_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
p.chroma[X265_CSP_I422].copy_ps[CHROMA422X_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
- p.chroma[X265_CSP_I422].copy_ss[CHROMA422X_ ## W ## x ## H] = blockcopy_ss_c<W, H>; \
- p.chroma[X265_CSP_I422].copy_sp[NUM_CHROMA_PARTITIONS422] = blockcopy_sp_c<W, (H >> 1)>; \
- p.chroma[X265_CSP_I422].copy_ps[NUM_CHROMA_PARTITIONS422] = blockcopy_ps_c<W, (H >> 1)>;
+ p.chroma[X265_CSP_I422].copy_ss[CHROMA422X_ ## W ## x ## H] = blockcopy_ss_c<W, H>;
#define CHROMA_444(W, H) \
p.chroma[X265_CSP_I444].addAvg[LUMA_ ## W ## x ## H] = addAvg<W, H>; \
diff -r d303b4d860e9 -r b2ad081e4bfc source/common/primitives.cpp
--- a/source/common/primitives.cpp Mon Jul 21 22:43:38 2014 -0500
+++ b/source/common/primitives.cpp Tue Jul 22 15:53:10 2014 +0900
@@ -49,16 +49,6 @@
255, 255, 255, LUMA_64x16, 255, 255, 255, LUMA_64x32, 255, 255, 255, LUMA_64x48, 255, 255, 255, LUMA_64x64 // 64
};
-extern const uint8_t lumaSquarePartitionMapTable[] =
-{
- LUMA_4x4, LUMA_8x8, 255, LUMA_16x16, 255, 255, 255, LUMA_32x32, 255, 255, 255, 255, 255, 255, 255, LUMA_64x64
-};
-
-extern const uint8_t lumaPartitionsFromSquareBlocksTable[] =
-{
- LUMA_4x4, LUMA_8x8, LUMA_16x16, LUMA_32x32, LUMA_64x64
-};
-
/* the "authoritative" set of encoder primitives */
EncoderPrimitives primitives;
@@ -77,7 +67,7 @@
Setup_C_LoopFilterPrimitives(p); // loopfilter.cpp
}
-static void Setup_Alias_Primitives(EncoderPrimitives &p)
+void Setup_Alias_Primitives(EncoderPrimitives &p)
{
/* copy reusable luma primitives to chroma 4:4:4 */
for (int i = 0; i < NUM_LUMA_PARTITIONS; i++)
@@ -93,7 +83,7 @@
for (int i = 0; i < NUM_SQUARE_BLOCKS; i++)
{
- int partL = lumaPartitionsFromSquareBlocksTable[i];
+ int partL = partitionFromLog2Size(i + 2);
p.square_copy_pp[i] = p.luma_copy_pp[partL];
p.square_copy_ps[i] = p.luma_copy_ps[partL];
p.square_copy_sp[i] = p.luma_copy_sp[partL];
diff -r d303b4d860e9 -r b2ad081e4bfc source/common/primitives.h
--- a/source/common/primitives.h Mon Jul 21 22:43:38 2014 -0500
+++ b/source/common/primitives.h Tue Jul 22 15:53:10 2014 +0900
@@ -36,12 +36,18 @@
// x265 private namespace
enum LumaPartitions
-{ // Square Rectangular Asymmetrical (0.75, 0.25)
- LUMA_4x4,
- LUMA_8x8, LUMA_8x4, LUMA_4x8,
- LUMA_16x16, LUMA_16x8, LUMA_8x16, LUMA_16x12, LUMA_12x16, LUMA_16x4, LUMA_4x16,
- LUMA_32x32, LUMA_32x16, LUMA_16x32, LUMA_32x24, LUMA_24x32, LUMA_32x8, LUMA_8x32,
- LUMA_64x64, LUMA_64x32, LUMA_32x64, LUMA_64x48, LUMA_48x64, LUMA_64x16, LUMA_16x64,
+{
+ // Square
+ LUMA_4x4, LUMA_8x8, LUMA_16x16, LUMA_32x32, LUMA_64x64,
+ // Rectangular
+ LUMA_8x4, LUMA_4x8,
+ LUMA_16x8, LUMA_8x16,
+ LUMA_32x16, LUMA_16x32,
+ LUMA_64x32, LUMA_32x64,
+ // Asymmetrical (0.75, 0.25)
+ LUMA_16x12, LUMA_12x16, LUMA_16x4, LUMA_4x16,
+ LUMA_32x24, LUMA_24x32, LUMA_32x8, LUMA_8x32,
+ LUMA_64x48, LUMA_48x64, LUMA_64x16, LUMA_16x64,
NUM_LUMA_PARTITIONS
};
@@ -50,21 +56,27 @@
// be indexed by the luma partition enum
enum Chroma420Partitions
{
- CHROMA_2x2, // never used by HEVC
- CHROMA_4x4, CHROMA_4x2, CHROMA_2x4,
- CHROMA_8x8, CHROMA_8x4, CHROMA_4x8, CHROMA_8x6, CHROMA_6x8, CHROMA_8x2, CHROMA_2x8,
- CHROMA_16x16, CHROMA_16x8, CHROMA_8x16, CHROMA_16x12, CHROMA_12x16, CHROMA_16x4, CHROMA_4x16,
- CHROMA_32x32, CHROMA_32x16, CHROMA_16x32, CHROMA_32x24, CHROMA_24x32, CHROMA_32x8, CHROMA_8x32,
+ CHROMA_2x2, CHROMA_4x4, CHROMA_8x8, CHROMA_16x16, CHROMA_32x32,
+ CHROMA_4x2, CHROMA_2x4,
+ CHROMA_8x4, CHROMA_4x8,
+ CHROMA_16x8, CHROMA_8x16,
+ CHROMA_32x16, CHROMA_16x32,
+ CHROMA_8x6, CHROMA_6x8, CHROMA_8x2, CHROMA_2x8,
+ CHROMA_16x12, CHROMA_12x16, CHROMA_16x4, CHROMA_4x16,
+ CHROMA_32x24, CHROMA_24x32, CHROMA_32x8, CHROMA_8x32,
NUM_CHROMA_PARTITIONS
};
enum Chroma422Partitions
{
- CHROMA422X_4x8,
- CHROMA422_4x8, CHROMA422_4x4, CHROMA422_2x8,
- CHROMA422_8x16, CHROMA422_8x8, CHROMA422_4x16, CHROMA422_8x12, CHROMA422_6x16, CHROMA422_8x4, CHROMA422_2x16,
- CHROMA422_16x32, CHROMA422_16x16, CHROMA422_8x32, CHROMA422_16x24, CHROMA422_12x32, CHROMA422_16x8, CHROMA422_4x32,
- CHROMA422_32x64, CHROMA422_32x32, CHROMA422_16x64, CHROMA422_32x48, CHROMA422_24x64, CHROMA422_32x16, CHROMA422_8x64,
+ CHROMA422X_4x8, CHROMA422_4x8, CHROMA422_8x16, CHROMA422_16x32, CHROMA422_32x64,
+ CHROMA422_4x4, CHROMA422_2x8,
+ CHROMA422_8x8, CHROMA422_4x16,
+ CHROMA422_16x16, CHROMA422_8x32,
+ CHROMA422_32x32, CHROMA422_16x64,
+ CHROMA422_8x12, CHROMA422_6x16, CHROMA422_8x4, CHROMA422_2x16,
+ CHROMA422_16x24, CHROMA422_12x32, CHROMA422_16x8, CHROMA422_4x32,
+ CHROMA422_32x48, CHROMA422_24x64, CHROMA422_32x16, CHROMA422_8x64,
NUM_CHROMA_PARTITIONS422
};
@@ -111,20 +123,10 @@
return part;
}
-inline int partitionFromSize(int size)
-{
- X265_CHECK((size & ~(4 | 8 | 16 | 32 | 64)) == 0, "Invalid block size\n");
- extern const uint8_t lumaSquarePartitionMapTable[];
- int part = (int)lumaSquarePartitionMapTable[(size >> 2) - 1];
- X265_CHECK(part != 255, "Invalid block size %d\n", size);
- return part;
-}
-
inline int partitionFromLog2Size(int log2Size)
{
X265_CHECK(2 <= log2Size && log2Size <= 6, "Invalid block size\n");
- extern const uint8_t lumaPartitionsFromSquareBlocksTable[];
- return (int)lumaPartitionsFromSquareBlocksTable[log2Size - 2];
+ return log2Size - 2;
}
typedef int (*pixelcmp_t)(pixel *fenc, intptr_t fencstride, pixel *fref, intptr_t frefstride); // fenc is aligned
@@ -284,8 +286,8 @@
filter_pp_t filter_hpp[NUM_LUMA_PARTITIONS];
filter_hps_t filter_hps[NUM_LUMA_PARTITIONS];
copy_pp_t copy_pp[NUM_LUMA_PARTITIONS];
- copy_sp_t copy_sp[NUM_LUMA_PARTITIONS + 1];
- copy_ps_t copy_ps[NUM_LUMA_PARTITIONS + 1];
+ copy_sp_t copy_sp[NUM_LUMA_PARTITIONS];
+ copy_ps_t copy_ps[NUM_LUMA_PARTITIONS];
copy_ss_t copy_ss[NUM_LUMA_PARTITIONS];
pixel_sub_ps_t sub_ps[NUM_LUMA_PARTITIONS];
pixel_add_ps_t add_ps[NUM_LUMA_PARTITIONS];
@@ -302,6 +304,7 @@
void Setup_C_Primitives(EncoderPrimitives &p);
void Setup_Instrinsic_Primitives(EncoderPrimitives &p, int cpuMask);
void Setup_Assembly_Primitives(EncoderPrimitives &p, int cpuMask);
+void Setup_Alias_Primitives(EncoderPrimitives &p);
}
#endif // ifndef X265_PRIMITIVES_H
diff -r d303b4d860e9 -r b2ad081e4bfc source/common/shortyuv.cpp
--- a/source/common/shortyuv.cpp Mon Jul 21 22:43:38 2014 -0500
+++ b/source/common/shortyuv.cpp Tue Jul 22 15:53:10 2014 +0900
@@ -103,22 +103,6 @@
primitives.chroma[m_csp].sub_ps[part](getCrAddr(), m_cwidth, srcV0, srcV1, srcYuv0->getCStride(), srcYuv1->getCStride());
}
-void ShortYuv::addClip(ShortYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partSize)
-{
- int16_t* srcY0 = srcYuv0->getLumaAddr();
- int16_t* srcY1 = srcYuv1->getLumaAddr();
-
- primitives.pixeladd_ss(partSize, partSize, getLumaAddr(), m_width, srcY0, srcY1, srcYuv0->m_width, srcYuv1->m_width);
-
- uint32_t cpartSize = partSize >> m_hChromaShift;
- int16_t* srcU0 = srcYuv0->getCbAddr();
- int16_t* srcU1 = srcYuv1->getCbAddr();
- int16_t* srcV0 = srcYuv0->getCrAddr();
- int16_t* srcV1 = srcYuv1->getCrAddr();
- primitives.pixeladd_ss(cpartSize, cpartSize, getCbAddr(), m_cwidth, srcU0, srcU1, srcYuv0->m_cwidth, srcYuv1->m_cwidth);
- primitives.pixeladd_ss(cpartSize, cpartSize, getCrAddr(), m_cwidth, srcV0, srcV1, srcYuv0->m_cwidth, srcYuv1->m_cwidth);
-}
-
void ShortYuv::copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t log2Size)
{
int16_t* src = getLumaAddr(partIdx);
@@ -161,16 +145,3 @@
primitives.chroma[m_csp].copy_sp[part](dstU, dstStride, srcU, srcStride);
primitives.chroma[m_csp].copy_sp[part](dstV, dstStride, srcV, srcStride);
}
-
-void ShortYuv::copyPartToPartShortChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId)
-{
- X265_CHECK(chromaId == 1 || chromaId == 2, "invalid chroma id");
-
- int part = partitionFromSize(lumaSize);
-
- int16_t* src = getChromaAddr(chromaId, partIdx);
- int16_t* dst = dstPicYuv->getChromaAddr(chromaId, partIdx);
- uint32_t srcStride = m_cwidth;
- uint32_t dstStride = dstPicYuv->m_cwidth;
- primitives.chroma[m_csp].copy_ss[part](dst, dstStride, src, srcStride);
-}
diff -r d303b4d860e9 -r b2ad081e4bfc source/common/shortyuv.h
--- a/source/common/shortyuv.h Mon Jul 21 22:43:38 2014 -0500
+++ b/source/common/shortyuv.h Tue Jul 22 15:53:10 2014 +0900
@@ -90,12 +90,9 @@
int16_t* getChromaAddr(uint32_t chromaId, uint32_t partUnitIdx) { return m_buf[chromaId] + getChromaAddrOffset(partUnitIdx, m_cwidth); }
void subtract(TComYuv* srcYuv0, TComYuv* srcYuv1, uint32_t log2Size);
- void addClip(ShortYuv* srcYuv0, ShortYuv* srcYuv1, uint32_t partSize);
void copyPartToPartLuma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t log2Size);
void copyPartToPartChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t log2SizeL);
- void copyPartToPartShortChroma(ShortYuv* dstPicYuv, uint32_t partIdx, uint32_t lumaSize, uint32_t chromaId);
-
void copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t log2Size);
void copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t log2SizeL);
diff -r d303b4d860e9 -r b2ad081e4bfc source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Mon Jul 21 22:43:38 2014 -0500
+++ b/source/encoder/analysis.cpp Tue Jul 22 15:53:10 2014 +0900
@@ -1847,6 +1847,7 @@
if (lcu->getPredictionMode(absPartIdx) == MODE_INTER)
{
+ int part = partitionFromLog2Size(cu->getLog2CUSize(0));
if (!lcu->getSkipFlag(absPartIdx))
{
// Calculate Residue
@@ -1856,7 +1857,6 @@
uint32_t src2stride = m_bestPredYuv[0]->getStride();
uint32_t src1stride = m_origYuv[0]->getStride();
uint32_t dststride = m_tmpResiYuv[depth]->m_width;
- int part = partitionFromLog2Size(cu->getLog2CUSize(0));
primitives.luma_sub_ps[part](dst, dststride, src1, src2, src1stride, src2stride);
src2 = m_bestPredYuv[0]->getCbAddr(absPartIdx);
@@ -1915,7 +1915,6 @@
// Generate Recon
TComPicYuv* rec = pic->getPicYuvRec();
- int part = partitionFromLog2Size(cu->getLog2CUSize(0));
pixel* src = m_bestPredYuv[0]->getLumaAddr(absPartIdx);
pixel* dst = rec->getLumaAddr(cu->getAddr(), absPartIdx);
uint32_t srcstride = m_bestPredYuv[0]->getStride();
diff -r d303b4d860e9 -r b2ad081e4bfc source/test/testbench.cpp
--- a/source/test/testbench.cpp Mon Jul 21 22:43:38 2014 -0500
+++ b/source/test/testbench.cpp Tue Jul 22 15:53:10 2014 +0900
@@ -127,6 +127,7 @@
EncoderPrimitives cprim;
memset(&cprim, 0, sizeof(EncoderPrimitives));
Setup_C_Primitives(cprim);
+ Setup_Alias_Primitives(cprim);
struct test_arch_t
{
@@ -186,6 +187,7 @@
memset(&optprim, 0, sizeof(optprim));
Setup_Instrinsic_Primitives(optprim, cpuid);
Setup_Assembly_Primitives(optprim, cpuid);
+ Setup_Alias_Primitives(optprim);
printf("\nTest performance improvement with full optimizations\n");
More information about the x265-devel
mailing list