[x265] [PATCH] primitives: add sa8d and sse_pp aliases for chroma square blocks
Steve Borho
steve at borho.org
Mon Jan 5 05:23:48 CET 2015
# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1420431805 -19800
# Mon Jan 05 09:53:25 2015 +0530
# Node ID c781e15eb4d5146efa115f1ab170ca673440baf6
# Parent f255e8d06423231cb8c58ab5d3b10de7fb27b424
primitives: add sa8d and sse_pp aliases for chroma square blocks
This avoids the need for calling partitionFromSizes() in some key analysis
functions
diff -r f255e8d06423 -r c781e15eb4d5 source/common/primitives.cpp
--- a/source/common/primitives.cpp Fri Jan 02 18:22:38 2015 +0530
+++ b/source/common/primitives.cpp Mon Jan 05 09:53:25 2015 +0530
@@ -81,8 +81,10 @@
for (int i = 0; i < NUM_SQUARE_BLOCKS; i++)
{
- p.chroma[X265_CSP_I444].add_ps[i] = p.luma_add_ps[i];
- p.chroma[X265_CSP_I444].sub_ps[i] = p.luma_sub_ps[i];
+ p.chroma[X265_CSP_I444].sa8d[i] = p.sa8d[i];
+ p.chroma[X265_CSP_I444].sse_pp[i] = p.sse_pp[i];
+ p.chroma[X265_CSP_I444].add_ps[i] = p.luma_add_ps[i];
+ p.chroma[X265_CSP_I444].sub_ps[i] = p.luma_sub_ps[i];
}
primitives.sa8d[BLOCK_4x4] = primitives.sa8d_inter[LUMA_4x4];
@@ -145,6 +147,28 @@
//p.chroma[X265_CSP_I422].satd[CHROMA422_24x64] = satd8<24, 64>;
p.chroma[X265_CSP_I422].satd[CHROMA422_32x16] = primitives.satd[LUMA_32x16];
//p.chroma[X265_CSP_I422].satd[CHROMA422_8x64] = satd8<8, 64>;
+
+ p.chroma[X265_CSP_I420].sa8d[BLOCK_4x4] = NULL;
+ p.chroma[X265_CSP_I422].sa8d[BLOCK_4x4] = NULL;
+ p.chroma[X265_CSP_I420].sa8d[BLOCK_8x8] = p.satd[LUMA_4x4];
+ p.chroma[X265_CSP_I422].sa8d[BLOCK_8x8] = p.satd[LUMA_4x8];
+ p.chroma[X265_CSP_I420].sa8d[BLOCK_16x16] = p.sa8d[LUMA_8x8];
+ p.chroma[X265_CSP_I422].sa8d[BLOCK_16x16] = p.sa8d_inter[LUMA_8x16];
+ p.chroma[X265_CSP_I420].sa8d[BLOCK_32x32] = p.sa8d[LUMA_16x16];
+ p.chroma[X265_CSP_I422].sa8d[BLOCK_32x32] = p.sa8d_inter[LUMA_16x32];
+ p.chroma[X265_CSP_I420].sa8d[BLOCK_64x64] = p.sa8d[LUMA_32x32];
+ p.chroma[X265_CSP_I422].sa8d[BLOCK_64x64] = p.sa8d_inter[LUMA_32x64];
+
+ p.chroma[X265_CSP_I420].sse_pp[BLOCK_4x4] = NULL;
+ p.chroma[X265_CSP_I422].sse_pp[BLOCK_4x4] = NULL;
+ p.chroma[X265_CSP_I420].sse_pp[BLOCK_8x8] = p.sse_pp[LUMA_4x4];
+ p.chroma[X265_CSP_I422].sse_pp[BLOCK_8x8] = p.sse_pp[LUMA_4x8];
+ p.chroma[X265_CSP_I420].sse_pp[BLOCK_16x16] = p.sse_pp[LUMA_8x8];
+ p.chroma[X265_CSP_I422].sse_pp[BLOCK_16x16] = p.sse_pp[LUMA_8x16];
+ p.chroma[X265_CSP_I420].sse_pp[BLOCK_32x32] = p.sse_pp[LUMA_16x16];
+ p.chroma[X265_CSP_I422].sse_pp[BLOCK_32x32] = p.sse_pp[LUMA_16x32];
+ p.chroma[X265_CSP_I420].sse_pp[BLOCK_64x64] = p.sse_pp[LUMA_32x32];
+ p.chroma[X265_CSP_I422].sse_pp[BLOCK_64x64] = p.sse_pp[LUMA_32x64];
}
}
using namespace x265;
diff -r f255e8d06423 -r c781e15eb4d5 source/common/primitives.h
--- a/source/common/primitives.h Fri Jan 02 18:22:38 2015 +0530
+++ b/source/common/primitives.h Mon Jan 05 09:53:25 2015 +0530
@@ -274,6 +274,12 @@
struct
{
+ pixelcmp_t sa8d[NUM_SQUARE_BLOCKS];
+ pixelcmp_t sse_pp[NUM_SQUARE_BLOCKS];
+ pixel_sub_ps_t sub_ps[NUM_SQUARE_BLOCKS];
+ pixel_add_ps_t add_ps[NUM_SQUARE_BLOCKS];
+ filter_p2s_t p2s;
+
pixelcmp_t satd[NUM_LUMA_PARTITIONS];
filter_pp_t filter_vpp[NUM_LUMA_PARTITIONS];
filter_ps_t filter_vps[NUM_LUMA_PARTITIONS];
@@ -286,9 +292,6 @@
copy_sp_t copy_sp[NUM_LUMA_PARTITIONS];
copy_ps_t copy_ps[NUM_LUMA_PARTITIONS];
copy_ss_t copy_ss[NUM_LUMA_PARTITIONS];
- pixel_sub_ps_t sub_ps[NUM_SQUARE_BLOCKS];
- pixel_add_ps_t add_ps[NUM_SQUARE_BLOCKS];
- filter_p2s_t p2s;
} chroma[X265_CSP_COUNT];
};
diff -r f255e8d06423 -r c781e15eb4d5 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Fri Jan 02 18:22:38 2015 +0530
+++ b/source/encoder/analysis.cpp Mon Jan 05 09:53:25 2015 +0530
@@ -1228,12 +1228,8 @@
bestPred->sa8dCost = MAX_INT64;
int bestSadCand = -1;
- int cpart, sizeIdx = cuGeom.log2CUSize - 2;
- if (m_bChromaSa8d)
- {
- int cuSize = 1 << cuGeom.log2CUSize;
- cpart = partitionFromSizes(cuSize >> m_hChromaShift, cuSize >> m_vChromaShift);
- }
+ int sizeIdx = cuGeom.log2CUSize - 2;
+
for (uint32_t i = 0; i < maxNumMergeCand; ++i)
{
if (m_bFrameParallel &&
@@ -1255,8 +1251,8 @@
tempPred->distortion = primitives.sa8d[sizeIdx](fencYuv->m_buf[0], fencYuv->m_size, tempPred->predYuv.m_buf[0], tempPred->predYuv.m_size);
if (m_bChromaSa8d)
{
- tempPred->distortion += primitives.sa8d_inter[cpart](fencYuv->m_buf[1], fencYuv->m_csize, tempPred->predYuv.m_buf[1], tempPred->predYuv.m_csize);
- tempPred->distortion += primitives.sa8d_inter[cpart](fencYuv->m_buf[2], fencYuv->m_csize, tempPred->predYuv.m_buf[2], tempPred->predYuv.m_csize);
+ tempPred->distortion += primitives.chroma[m_csp].sa8d[sizeIdx](fencYuv->m_buf[1], fencYuv->m_csize, tempPred->predYuv.m_buf[1], tempPred->predYuv.m_csize);
+ tempPred->distortion += primitives.chroma[m_csp].sa8d[sizeIdx](fencYuv->m_buf[2], fencYuv->m_csize, tempPred->predYuv.m_buf[2], tempPred->predYuv.m_csize);
}
tempPred->sa8dCost = m_rdCost.calcRdSADCost(tempPred->distortion, tempPred->sa8dBits);
@@ -1450,10 +1446,8 @@
interMode.distortion = primitives.sa8d[part](fencYuv.m_buf[0], fencYuv.m_size, predYuv.m_buf[0], predYuv.m_size);
if (m_bChromaSa8d)
{
- uint32_t cuSize = 1 << cuGeom.log2CUSize;
- int cpart = partitionFromSizes(cuSize >> m_hChromaShift, cuSize >> m_vChromaShift);
- interMode.distortion += primitives.sa8d_inter[cpart](fencYuv.m_buf[1], fencYuv.m_csize, predYuv.m_buf[1], predYuv.m_csize);
- interMode.distortion += primitives.sa8d_inter[cpart](fencYuv.m_buf[2], fencYuv.m_csize, predYuv.m_buf[2], predYuv.m_csize);
+ interMode.distortion += primitives.chroma[m_csp].sa8d[part](fencYuv.m_buf[1], fencYuv.m_csize, predYuv.m_buf[1], predYuv.m_csize);
+ interMode.distortion += primitives.chroma[m_csp].sa8d[part](fencYuv.m_buf[2], fencYuv.m_csize, predYuv.m_buf[2], predYuv.m_csize);
}
interMode.sa8dCost = m_rdCost.calcRdSADCost(interMode.distortion, interMode.sa8dBits);
@@ -1534,13 +1528,7 @@
const Yuv& fencYuv = *bidir2Nx2N.fencYuv;
MV mvzero(0, 0);
- int cpart, partEnum = cuGeom.log2CUSize - 2;
-
- if (m_bChromaSa8d)
- {
- int cuSize = 1 << cuGeom.log2CUSize;
- cpart = partitionFromSizes(cuSize >> m_hChromaShift, cuSize >> m_vChromaShift);
- }
+ int partEnum = cuGeom.log2CUSize - 2;
bidir2Nx2N.bestME[0][0] = inter2Nx2N.bestME[0][0];
bidir2Nx2N.bestME[0][1] = inter2Nx2N.bestME[0][1];
@@ -1576,8 +1564,8 @@
if (m_bChromaSa8d)
{
/* Add in chroma distortion */
- sa8d += primitives.sa8d_inter[cpart](fencYuv.m_buf[1], fencYuv.m_csize, bidir2Nx2N.predYuv.m_buf[1], bidir2Nx2N.predYuv.m_csize);
- sa8d += primitives.sa8d_inter[cpart](fencYuv.m_buf[2], fencYuv.m_csize, bidir2Nx2N.predYuv.m_buf[2], bidir2Nx2N.predYuv.m_csize);
+ sa8d += primitives.chroma[m_csp].sa8d[partEnum](fencYuv.m_buf[1], fencYuv.m_csize, bidir2Nx2N.predYuv.m_buf[1], bidir2Nx2N.predYuv.m_csize);
+ sa8d += primitives.chroma[m_csp].sa8d[partEnum](fencYuv.m_buf[2], fencYuv.m_csize, bidir2Nx2N.predYuv.m_buf[2], bidir2Nx2N.predYuv.m_csize);
}
bidir2Nx2N.sa8dBits = bestME[0].bits + bestME[1].bits + m_listSelBits[2] - (m_listSelBits[0] + m_listSelBits[1]);
bidir2Nx2N.sa8dCost = sa8d + m_rdCost.getCost(bidir2Nx2N.sa8dBits);
@@ -1613,8 +1601,8 @@
motionCompensation(tmpPredYuv, true, true);
zsa8d = primitives.sa8d[partEnum](fencYuv.m_buf[0], fencYuv.m_size, tmpPredYuv.m_buf[0], tmpPredYuv.m_size);
- zsa8d += primitives.sa8d_inter[cpart](fencYuv.m_buf[1], fencYuv.m_csize, tmpPredYuv.m_buf[1], tmpPredYuv.m_csize);
- zsa8d += primitives.sa8d_inter[cpart](fencYuv.m_buf[2], fencYuv.m_csize, tmpPredYuv.m_buf[2], tmpPredYuv.m_csize);
+ zsa8d += primitives.chroma[m_csp].sa8d[partEnum](fencYuv.m_buf[1], fencYuv.m_csize, tmpPredYuv.m_buf[1], tmpPredYuv.m_csize);
+ zsa8d += primitives.chroma[m_csp].sa8d[partEnum](fencYuv.m_buf[2], fencYuv.m_csize, tmpPredYuv.m_buf[2], tmpPredYuv.m_csize);
}
else
{
diff -r f255e8d06423 -r c781e15eb4d5 source/encoder/search.cpp
--- a/source/encoder/search.cpp Fri Jan 02 18:22:38 2015 +0530
+++ b/source/encoder/search.cpp Mon Jan 05 09:53:25 2015 +0530
@@ -2510,11 +2510,8 @@
X265_CHECK(!cu.isIntra(0), "intra CU not expected\n");
uint32_t log2CUSize = cu.m_log2CUSize[0];
- uint32_t cuSize = 1 << log2CUSize;
- uint32_t depth = cu.m_cuDepth[0];
-
- int part = partitionFromLog2Size(log2CUSize);
- int cpart = partitionFromSizes(cuSize >> m_hChromaShift, cuSize >> m_vChromaShift);
+ uint32_t depth = cu.m_cuDepth[0];
+ int sizeIdx = log2CUSize - 2;
m_quant.setQPforQuant(interMode.cu);
@@ -2530,9 +2527,9 @@
if (!cu.m_tqBypass[0])
{
- uint32_t cbf0Dist = primitives.sse_pp[part](fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
- cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.sse_pp[cpart](fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
- cbf0Dist += m_rdCost.scaleChromaDist(2, primitives.sse_pp[cpart](fencYuv->m_buf[2], predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize));
+ uint32_t cbf0Dist = primitives.sse_pp[sizeIdx](fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
+ cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].sse_pp[sizeIdx](fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
+ cbf0Dist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].sse_pp[sizeIdx](fencYuv->m_buf[2], predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize));
/* Consider the RD cost of not signaling any residual */
m_entropyCoder.load(m_rqt[depth].cur);
@@ -2603,9 +2600,9 @@
reconYuv->copyFromYuv(*predYuv);
// update with clipped distortion and cost (qp estimation loop uses unclipped values)
- uint32_t bestDist = primitives.sse_pp[part](fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
- bestDist += m_rdCost.scaleChromaDist(1, primitives.sse_pp[cpart](fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
- bestDist += m_rdCost.scaleChromaDist(2, primitives.sse_pp[cpart](fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
+ uint32_t bestDist = primitives.sse_pp[sizeIdx](fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
+ bestDist += m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].sse_pp[sizeIdx](fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
+ bestDist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].sse_pp[sizeIdx](fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
if (m_rdCost.m_psyRd)
interMode.psyEnergy = m_rdCost.psyCost(log2CUSize - 2, fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
More information about the x265-devel
mailing list