[x265] [PATCH 2 of 5] Refactor EncoderPrimitives under encoder
Kevin Wu
kevin at multicorewareinc.com
Fri Jan 9 07:53:49 CET 2015
# HG changeset patch
# User Kevin Wu <kevin at multicorewareinc.com>
# Date 1420752626 21600
# Thu Jan 08 15:30:26 2015 -0600
# Node ID e5ad493e60de85e58193d49218997ace8fa43c00
# Parent c6ca0fd54aa7c50119c9e5bdbbd02d49abb45559
Refactor EncoderPrimitives under encoder.
diff -r c6ca0fd54aa7 -r e5ad493e60de source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Thu Jan 08 15:23:38 2015 -0600
+++ b/source/encoder/analysis.cpp Thu Jan 08 15:30:26 2015 -0600
@@ -1254,11 +1254,11 @@
motionCompensation(tempPred->predYuv, true, m_bChromaSa8d);
tempPred->sa8dBits = getTUBits(i, maxNumMergeCand);
- tempPred->distortion = primitives.sa8d[sizeIdx](fencYuv->m_buf[0], fencYuv->m_size, tempPred->predYuv.m_buf[0], tempPred->predYuv.m_size);
+ tempPred->distortion = primitives.cu[sizeIdx].sa8d(fencYuv->m_buf[0], fencYuv->m_size, tempPred->predYuv.m_buf[0], tempPred->predYuv.m_size);
if (m_bChromaSa8d)
{
- tempPred->distortion += primitives.sa8d_inter[cpart](fencYuv->m_buf[1], fencYuv->m_csize, tempPred->predYuv.m_buf[1], tempPred->predYuv.m_csize);
- tempPred->distortion += primitives.sa8d_inter[cpart](fencYuv->m_buf[2], fencYuv->m_csize, tempPred->predYuv.m_buf[2], tempPred->predYuv.m_csize);
+ tempPred->distortion += primitives.pu[cpart].sa8d_inter(fencYuv->m_buf[1], fencYuv->m_csize, tempPred->predYuv.m_buf[1], tempPred->predYuv.m_csize);
+ tempPred->distortion += primitives.pu[cpart].sa8d_inter(fencYuv->m_buf[2], fencYuv->m_csize, tempPred->predYuv.m_buf[2], tempPred->predYuv.m_csize);
}
tempPred->sa8dCost = m_rdCost.calcRdSADCost(tempPred->distortion, tempPred->sa8dBits);
@@ -1449,13 +1449,13 @@
const Yuv& fencYuv = *interMode.fencYuv;
Yuv& predYuv = interMode.predYuv;
int part = partitionFromLog2Size(cuGeom.log2CUSize);
- interMode.distortion = primitives.sa8d[part](fencYuv.m_buf[0], fencYuv.m_size, predYuv.m_buf[0], predYuv.m_size);
+ interMode.distortion = primitives.cu[part].sa8d(fencYuv.m_buf[0], fencYuv.m_size, predYuv.m_buf[0], predYuv.m_size);
if (m_bChromaSa8d)
{
uint32_t cuSize = 1 << cuGeom.log2CUSize;
int cpart = partitionFromSizes(cuSize >> m_hChromaShift, cuSize >> m_vChromaShift);
- interMode.distortion += primitives.sa8d_inter[cpart](fencYuv.m_buf[1], fencYuv.m_csize, predYuv.m_buf[1], predYuv.m_csize);
- interMode.distortion += primitives.sa8d_inter[cpart](fencYuv.m_buf[2], fencYuv.m_csize, predYuv.m_buf[2], predYuv.m_csize);
+ interMode.distortion += primitives.pu[cpart].sa8d_inter(fencYuv.m_buf[1], fencYuv.m_csize, predYuv.m_buf[1], predYuv.m_csize);
+ interMode.distortion += primitives.pu[cpart].sa8d_inter(fencYuv.m_buf[2], fencYuv.m_csize, predYuv.m_buf[2], predYuv.m_csize);
}
interMode.sa8dCost = m_rdCost.calcRdSADCost(interMode.distortion, interMode.sa8dBits);
@@ -1574,12 +1574,12 @@
prepMotionCompensation(cu, cuGeom, 0);
motionCompensation(bidir2Nx2N.predYuv, true, m_bChromaSa8d);
- int sa8d = primitives.sa8d[partEnum](fencYuv.m_buf[0], fencYuv.m_size, bidir2Nx2N.predYuv.m_buf[0], bidir2Nx2N.predYuv.m_size);
+ int sa8d = primitives.cu[partEnum].sa8d(fencYuv.m_buf[0], fencYuv.m_size, bidir2Nx2N.predYuv.m_buf[0], bidir2Nx2N.predYuv.m_size);
if (m_bChromaSa8d)
{
/* Add in chroma distortion */
- sa8d += primitives.sa8d_inter[cpart](fencYuv.m_buf[1], fencYuv.m_csize, bidir2Nx2N.predYuv.m_buf[1], bidir2Nx2N.predYuv.m_csize);
- sa8d += primitives.sa8d_inter[cpart](fencYuv.m_buf[2], fencYuv.m_csize, bidir2Nx2N.predYuv.m_buf[2], bidir2Nx2N.predYuv.m_csize);
+ sa8d += primitives.pu[cpart].sa8d_inter(fencYuv.m_buf[1], fencYuv.m_csize, bidir2Nx2N.predYuv.m_buf[1], bidir2Nx2N.predYuv.m_csize);
+ sa8d += primitives.pu[cpart].sa8d_inter(fencYuv.m_buf[2], fencYuv.m_csize, bidir2Nx2N.predYuv.m_buf[2], bidir2Nx2N.predYuv.m_csize);
}
bidir2Nx2N.sa8dBits = bestME[0].bits + bestME[1].bits + m_listSelBits[2] - (m_listSelBits[0] + m_listSelBits[1]);
bidir2Nx2N.sa8dCost = sa8d + m_rdCost.getCost(bidir2Nx2N.sa8dBits);
@@ -1614,9 +1614,9 @@
prepMotionCompensation(cu, cuGeom, 0);
motionCompensation(tmpPredYuv, true, true);
- zsa8d = primitives.sa8d[partEnum](fencYuv.m_buf[0], fencYuv.m_size, tmpPredYuv.m_buf[0], tmpPredYuv.m_size);
- zsa8d += primitives.sa8d_inter[cpart](fencYuv.m_buf[1], fencYuv.m_csize, tmpPredYuv.m_buf[1], tmpPredYuv.m_csize);
- zsa8d += primitives.sa8d_inter[cpart](fencYuv.m_buf[2], fencYuv.m_csize, tmpPredYuv.m_buf[2], tmpPredYuv.m_csize);
+ zsa8d = primitives.cu[partEnum].sa8d(fencYuv.m_buf[0], fencYuv.m_size, tmpPredYuv.m_buf[0], tmpPredYuv.m_size);
+ zsa8d += primitives.pu[cpart].sa8d_inter(fencYuv.m_buf[1], fencYuv.m_csize, tmpPredYuv.m_buf[1], tmpPredYuv.m_csize);
+ zsa8d += primitives.pu[cpart].sa8d_inter(fencYuv.m_buf[2], fencYuv.m_csize, tmpPredYuv.m_buf[2], tmpPredYuv.m_csize);
}
else
{
@@ -1624,8 +1624,8 @@
pixel *fref1 = m_slice->m_mref[1][ref1].getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx);
intptr_t refStride = m_slice->m_mref[0][0].lumaStride;
- primitives.pixelavg_pp[partEnum](tmpPredYuv.m_buf[0], tmpPredYuv.m_size, fref0, refStride, fref1, refStride, 32);
- zsa8d = primitives.sa8d[partEnum](fencYuv.m_buf[0], fencYuv.m_size, tmpPredYuv.m_buf[0], tmpPredYuv.m_size);
+ primitives.pu[partEnum].pixelavg_pp(tmpPredYuv.m_buf[0], tmpPredYuv.m_size, fref0, refStride, fref1, refStride, 32);
+ zsa8d = primitives.cu[partEnum].sa8d(fencYuv.m_buf[0], fencYuv.m_size, tmpPredYuv.m_buf[0], tmpPredYuv.m_size);
}
uint32_t bits0 = bestME[0].bits - m_me.bitcost(bestME[0].mv, mvp0) + m_me.bitcost(mvzero, mvp0);
@@ -1721,15 +1721,15 @@
pixel* predU = predYuv.getCbAddr(absPartIdx);
pixel* predV = predYuv.getCrAddr(absPartIdx);
- primitives.luma_sub_ps[sizeIdx](resiYuv.m_buf[0], resiYuv.m_size,
+ primitives.pu[sizeIdx].luma_sub_ps(resiYuv.m_buf[0], resiYuv.m_size,
fencYuv.m_buf[0], predY,
fencYuv.m_size, predYuv.m_size);
- primitives.chroma[m_csp].sub_ps[sizeIdx](resiYuv.m_buf[1], resiYuv.m_csize,
+ primitives.chroma[m_csp].cu[sizeIdx].sub_ps(resiYuv.m_buf[1], resiYuv.m_csize,
fencYuv.m_buf[1], predU,
fencYuv.m_csize, predYuv.m_csize);
- primitives.chroma[m_csp].sub_ps[sizeIdx](resiYuv.m_buf[2], resiYuv.m_csize,
+ primitives.chroma[m_csp].cu[sizeIdx].sub_ps(resiYuv.m_buf[2], resiYuv.m_csize,
fencYuv.m_buf[2], predV,
fencYuv.m_csize, predYuv.m_csize);
@@ -1746,24 +1746,24 @@
PicYuv& reconPic = *m_frame->m_reconPic;
if (cu.m_cbf[0][0])
- primitives.luma_add_ps[sizeIdx](reconPic.getLumaAddr(cu.m_cuAddr, absPartIdx), reconPic.m_stride,
+ primitives.pu[sizeIdx].luma_add_ps(reconPic.getLumaAddr(cu.m_cuAddr, absPartIdx), reconPic.m_stride,
predY, resiYuv.m_buf[0], predYuv.m_size, resiYuv.m_size);
else
- primitives.luma_copy_pp[sizeIdx](reconPic.getLumaAddr(cu.m_cuAddr, absPartIdx), reconPic.m_stride,
+ primitives.pu[sizeIdx].luma_copy_pp(reconPic.getLumaAddr(cu.m_cuAddr, absPartIdx), reconPic.m_stride,
predY, predYuv.m_size);
if (cu.m_cbf[1][0])
- primitives.chroma[m_csp].add_ps[sizeIdx](reconPic.getCbAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC,
+ primitives.chroma[m_csp].cu[sizeIdx].add_ps(reconPic.getCbAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC,
predU, resiYuv.m_buf[1], predYuv.m_csize, resiYuv.m_csize);
else
- primitives.chroma[m_csp].copy_pp[sizeIdx](reconPic.getCbAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC,
+ primitives.chroma[m_csp].pu[sizeIdx].copy_pp(reconPic.getCbAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC,
predU, predYuv.m_csize);
if (cu.m_cbf[2][0])
- primitives.chroma[m_csp].add_ps[sizeIdx](reconPic.getCrAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC,
+ primitives.chroma[m_csp].cu[sizeIdx].add_ps(reconPic.getCrAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC,
predV, resiYuv.m_buf[2], predYuv.m_csize, resiYuv.m_csize);
else
- primitives.chroma[m_csp].copy_pp[sizeIdx](reconPic.getCrAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC,
+ primitives.chroma[m_csp].pu[sizeIdx].copy_pp(reconPic.getCrAddr(cu.m_cuAddr, absPartIdx), reconPic.m_strideC,
predV, predYuv.m_csize);
}
diff -r c6ca0fd54aa7 -r e5ad493e60de source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp Thu Jan 08 15:23:38 2015 -0600
+++ b/source/encoder/encoder.cpp Thu Jan 08 15:30:26 2015 -0600
@@ -85,7 +85,7 @@
void Encoder::create()
{
- if (!primitives.sad[0])
+ if (!primitives.pu[0].sad)
{
// this should be an impossible condition when using our public API, and indicates a serious bug.
x265_log(m_param, X265_LOG_ERROR, "Primitives must be initialized before encoder is created\n");
diff -r c6ca0fd54aa7 -r e5ad493e60de source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp Thu Jan 08 15:23:38 2015 -0600
+++ b/source/encoder/framefilter.cpp Thu Jan 08 15:30:26 2015 -0600
@@ -330,18 +330,18 @@
if (!(stride & 31))
for (; x + 64 <= width; x += 64)
- ssd += primitives.sse_pp[LUMA_64x64](fenc + x, stride, rec + x, stride);
+ ssd += primitives.pu[LUMA_64x64].sse_pp(fenc + x, stride, rec + x, stride);
if (!(stride & 15))
for (; x + 16 <= width; x += 16)
- ssd += primitives.sse_pp[LUMA_16x64](fenc + x, stride, rec + x, stride);
+ ssd += primitives.pu[LUMA_16x64].sse_pp(fenc + x, stride, rec + x, stride);
for (; x + 4 <= width; x += 4)
{
- ssd += primitives.sse_pp[LUMA_4x16](fenc + x, stride, rec + x, stride);
- ssd += primitives.sse_pp[LUMA_4x16](fenc + x + 16 * stride, stride, rec + x + 16 * stride, stride);
- ssd += primitives.sse_pp[LUMA_4x16](fenc + x + 32 * stride, stride, rec + x + 32 * stride, stride);
- ssd += primitives.sse_pp[LUMA_4x16](fenc + x + 48 * stride, stride, rec + x + 48 * stride, stride);
+ ssd += primitives.pu[LUMA_4x16].sse_pp(fenc + x, stride, rec + x, stride);
+ ssd += primitives.pu[LUMA_4x16].sse_pp(fenc + x + 16 * stride, stride, rec + x + 16 * stride, stride);
+ ssd += primitives.pu[LUMA_4x16].sse_pp(fenc + x + 32 * stride, stride, rec + x + 32 * stride, stride);
+ ssd += primitives.pu[LUMA_4x16].sse_pp(fenc + x + 48 * stride, stride, rec + x + 48 * stride, stride);
}
fenc += stride * 64;
@@ -355,14 +355,14 @@
if (!(stride & 31))
for (; x + 64 <= width; x += 64)
- ssd += primitives.sse_pp[LUMA_64x16](fenc + x, stride, rec + x, stride);
+ ssd += primitives.pu[LUMA_64x16].sse_pp(fenc + x, stride, rec + x, stride);
if (!(stride & 15))
for (; x + 16 <= width; x += 16)
- ssd += primitives.sse_pp[LUMA_16x16](fenc + x, stride, rec + x, stride);
+ ssd += primitives.pu[LUMA_16x16].sse_pp(fenc + x, stride, rec + x, stride);
for (; x + 4 <= width; x += 4)
- ssd += primitives.sse_pp[LUMA_4x16](fenc + x, stride, rec + x, stride);
+ ssd += primitives.pu[LUMA_4x16].sse_pp(fenc + x, stride, rec + x, stride);
fenc += stride * 16;
rec += stride * 16;
@@ -375,10 +375,10 @@
if (!(stride & 15))
for (; x + 16 <= width; x += 16)
- ssd += primitives.sse_pp[LUMA_16x4](fenc + x, stride, rec + x, stride);
+ ssd += primitives.pu[LUMA_16x4].sse_pp(fenc + x, stride, rec + x, stride);
for (; x + 4 <= width; x += 4)
- ssd += primitives.sse_pp[LUMA_4x4](fenc + x, stride, rec + x, stride);
+ ssd += primitives.pu[LUMA_4x4].sse_pp(fenc + x, stride, rec + x, stride);
fenc += stride * 4;
rec += stride * 4;
@@ -427,7 +427,7 @@
pixel* dst = reconPic->getLumaAddr(cu->m_cuAddr, absPartIdx);
pixel* src = fencPic->getLumaAddr(cu->m_cuAddr, absPartIdx);
- primitives.luma_copy_pp[part](dst, reconPic->m_stride, src, fencPic->m_stride);
+ primitives.pu[part].luma_copy_pp(dst, reconPic->m_stride, src, fencPic->m_stride);
pixel* dstCb = reconPic->getCbAddr(cu->m_cuAddr, absPartIdx);
pixel* srcCb = fencPic->getCbAddr(cu->m_cuAddr, absPartIdx);
@@ -436,8 +436,8 @@
pixel* srcCr = fencPic->getCrAddr(cu->m_cuAddr, absPartIdx);
int csp = fencPic->m_picCsp;
- primitives.chroma[csp].copy_pp[part](dstCb, reconPic->m_strideC, srcCb, fencPic->m_strideC);
- primitives.chroma[csp].copy_pp[part](dstCr, reconPic->m_strideC, srcCr, fencPic->m_strideC);
+ primitives.chroma[csp].pu[part].copy_pp(dstCb, reconPic->m_strideC, srcCb, fencPic->m_strideC);
+ primitives.chroma[csp].pu[part].copy_pp(dstCr, reconPic->m_strideC, srcCr, fencPic->m_strideC);
}
/* Original YUV restoration for CU in lossless coding */
diff -r c6ca0fd54aa7 -r e5ad493e60de source/encoder/motion.cpp
--- a/source/encoder/motion.cpp Thu Jan 08 15:23:38 2015 -0600
+++ b/source/encoder/motion.cpp Thu Jan 08 15:30:26 2015 -0600
@@ -160,17 +160,17 @@
{
partEnum = partitionFromSizes(pwidth, pheight);
X265_CHECK(LUMA_4x4 != partEnum, "4x4 inter partition detected!\n");
- sad = primitives.sad[partEnum];
- satd = primitives.satd[partEnum];
- sad_x3 = primitives.sad_x3[partEnum];
- sad_x4 = primitives.sad_x4[partEnum];
+ sad = primitives.pu[partEnum].sad;
+ satd = primitives.pu[partEnum].satd;
+ sad_x3 = primitives.pu[partEnum].sad_x3;
+ sad_x4 = primitives.pu[partEnum].sad_x4;
blockwidth = pwidth;
blockOffset = offset;
absPartIdx = ctuAddr = -1;
/* copy PU block into cache */
- primitives.luma_copy_pp[partEnum](fencPUYuv.m_buf[0], FENC_STRIDE, fencY + offset, stride);
+ primitives.pu[partEnum].luma_copy_pp(fencPUYuv.m_buf[0], FENC_STRIDE, fencY + offset, stride);
X265_CHECK(!bChromaSATD, "chroma distortion measurements impossible in this code path\n");
}
@@ -179,11 +179,11 @@
{
partEnum = partitionFromSizes(pwidth, pheight);
X265_CHECK(LUMA_4x4 != partEnum, "4x4 inter partition detected!\n");
- sad = primitives.sad[partEnum];
- satd = primitives.satd[partEnum];
- sad_x3 = primitives.sad_x3[partEnum];
- sad_x4 = primitives.sad_x4[partEnum];
- chromaSatd = primitives.chroma[fencPUYuv.m_csp].satd[partEnum];
+ sad = primitives.pu[partEnum].sad;
+ satd = primitives.pu[partEnum].satd;
+ sad_x3 = primitives.pu[partEnum].sad_x3;
+ sad_x4 = primitives.pu[partEnum].sad_x4;
+ chromaSatd = primitives.chroma[fencPUYuv.m_csp].pu[partEnum].satd;
/* Enable chroma residual cost if subpelRefine level is greater than 2 and chroma block size
* is an even multiple of 4x4 pixels (indicated by non-null chromaSatd pointer) */
@@ -1203,11 +1203,11 @@
* accurate but good enough for fast qpel ME */
ALIGN_VAR_32(pixel, subpelbuf[64 * 64]);
if (!yFrac)
- primitives.luma_hpp[partEnum](fref, refStride, subpelbuf, lclStride, xFrac);
+ primitives.pu[partEnum].luma_hpp(fref, refStride, subpelbuf, lclStride, xFrac);
else if (!xFrac)
- primitives.luma_vpp[partEnum](fref, refStride, subpelbuf, lclStride, yFrac);
+ primitives.pu[partEnum].luma_vpp(fref, refStride, subpelbuf, lclStride, yFrac);
else
- primitives.luma_hvpp[partEnum](fref, refStride, subpelbuf, lclStride, xFrac, yFrac);
+ primitives.pu[partEnum].luma_hvpp(fref, refStride, subpelbuf, lclStride, xFrac, yFrac);
cost = cmp(fencPUYuv.m_buf[0], lclStride, subpelbuf, lclStride);
}
@@ -1240,18 +1240,18 @@
ALIGN_VAR_32(pixel, subpelbuf[64 * 64]);
if (!yFrac)
{
- primitives.chroma[csp].filter_hpp[partEnum](refCb, refStrideC, subpelbuf, lclStride, xFrac << (1 - hshift));
+ primitives.chroma[csp].pu[partEnum].filter_hpp(refCb, refStrideC, subpelbuf, lclStride, xFrac << (1 - hshift));
cost += chromaSatd(fencPUYuv.m_buf[1], lclStride, subpelbuf, lclStride);
- primitives.chroma[csp].filter_hpp[partEnum](refCr, refStrideC, subpelbuf, lclStride, xFrac << (1 - hshift));
+ primitives.chroma[csp].pu[partEnum].filter_hpp(refCr, refStrideC, subpelbuf, lclStride, xFrac << (1 - hshift));
cost += chromaSatd(fencPUYuv.m_buf[2], lclStride, subpelbuf, lclStride);
}
else if (!xFrac)
{
- primitives.chroma[csp].filter_vpp[partEnum](refCb, refStrideC, subpelbuf, lclStride, yFrac << (1 - vshift));
+ primitives.chroma[csp].pu[partEnum].filter_vpp(refCb, refStrideC, subpelbuf, lclStride, yFrac << (1 - vshift));
cost += chromaSatd(fencPUYuv.m_buf[1], lclStride, subpelbuf, lclStride);
- primitives.chroma[csp].filter_vpp[partEnum](refCr, refStrideC, subpelbuf, lclStride, yFrac << (1 - vshift));
+ primitives.chroma[csp].pu[partEnum].filter_vpp(refCr, refStrideC, subpelbuf, lclStride, yFrac << (1 - vshift));
cost += chromaSatd(fencPUYuv.m_buf[2], lclStride, subpelbuf, lclStride);
}
else
@@ -1262,12 +1262,12 @@
int filterSize = NTAPS_CHROMA;
int halfFilterSize = (filterSize >> 1);
- primitives.chroma[csp].filter_hps[partEnum](refCb, refStrideC, immed, extStride, xFrac << (1 - hshift), 1);
- primitives.chroma[csp].filter_vsp[partEnum](immed + (halfFilterSize - 1) * extStride, extStride, subpelbuf, lclStride, yFrac << (1 - vshift));
+ primitives.chroma[csp].pu[partEnum].filter_hps(refCb, refStrideC, immed, extStride, xFrac << (1 - hshift), 1);
+ primitives.chroma[csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * extStride, extStride, subpelbuf, lclStride, yFrac << (1 - vshift));
cost += chromaSatd(fencPUYuv.m_buf[1], lclStride, subpelbuf, lclStride);
- primitives.chroma[csp].filter_hps[partEnum](refCr, refStrideC, immed, extStride, xFrac << (1 - hshift), 1);
- primitives.chroma[csp].filter_vsp[partEnum](immed + (halfFilterSize - 1) * extStride, extStride, subpelbuf, lclStride, yFrac << (1 - vshift));
+ primitives.chroma[csp].pu[partEnum].filter_hps(refCr, refStrideC, immed, extStride, xFrac << (1 - hshift), 1);
+ primitives.chroma[csp].pu[partEnum].filter_vsp(immed + (halfFilterSize - 1) * extStride, extStride, subpelbuf, lclStride, yFrac << (1 - vshift));
cost += chromaSatd(fencPUYuv.m_buf[2], lclStride, subpelbuf, lclStride);
}
}
diff -r c6ca0fd54aa7 -r e5ad493e60de source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp Thu Jan 08 15:23:38 2015 -0600
+++ b/source/encoder/ratecontrol.cpp Thu Jan 08 15:30:26 2015 -0600
@@ -162,11 +162,11 @@
if ((colorFormat != X265_CSP_I444) && bChroma)
{
ALIGN_VAR_8(pixel, pix[8 * 8]);
- primitives.luma_copy_pp[LUMA_8x8](pix, 8, src, srcStride);
- return acEnergyVar(curFrame, primitives.var[BLOCK_8x8](pix, 8), 6, bChroma);
+ primitives.pu[LUMA_8x8].luma_copy_pp(pix, 8, src, srcStride);
+ return acEnergyVar(curFrame, primitives.cu[BLOCK_8x8].var(pix, 8), 6, bChroma);
}
else
- return acEnergyVar(curFrame, primitives.var[BLOCK_16x16](src, srcStride), 8, bChroma);
+ return acEnergyVar(curFrame, primitives.cu[BLOCK_16x16].var(src, srcStride), 8, bChroma);
}
/* Find the total AC energy of each block in all planes */
diff -r c6ca0fd54aa7 -r e5ad493e60de source/encoder/rdcost.h
--- a/source/encoder/rdcost.h Thu Jan 08 15:23:38 2015 -0600
+++ b/source/encoder/rdcost.h Thu Jan 08 15:30:26 2015 -0600
@@ -86,13 +86,13 @@
/* return the difference in energy between the source block and the recon block */
inline int psyCost(int size, const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride) const
{
- return primitives.psy_cost_pp[size](source, sstride, recon, rstride);
+ return primitives.cu[size].psy_cost_pp(source, sstride, recon, rstride);
}
/* return the difference in energy between the source block and the recon block */
inline int psyCost(int size, const int16_t* source, intptr_t sstride, const int16_t* recon, intptr_t rstride) const
{
- return primitives.psy_cost_ss[size](source, sstride, recon, rstride);
+ return primitives.cu[size].psy_cost_ss(source, sstride, recon, rstride);
}
/* return the RD cost of this prediction, including the effect of psy-rd */
diff -r c6ca0fd54aa7 -r e5ad493e60de source/encoder/search.cpp
--- a/source/encoder/search.cpp Thu Jan 08 15:23:38 2015 -0600
+++ b/source/encoder/search.cpp Thu Jan 08 15:30:26 2015 -0600
@@ -289,21 +289,21 @@
if (m_bEnableRDOQ)
m_entropyCoder.estBit(m_entropyCoder.m_estBitsSbac, log2TrSize, true);
- primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
+ primitives.cu[sizeIdx].calcresidual(fenc, pred, residual, stride);
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
if (numSig)
{
m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
- primitives.luma_add_ps[sizeIdx](reconQt, reconQtStride, pred, residual, stride, stride);
+ primitives.pu[sizeIdx].luma_add_ps(reconQt, reconQtStride, pred, residual, stride, stride);
}
else
// no coded residual, recon = pred
- primitives.luma_copy_pp[sizeIdx](reconQt, reconQtStride, pred, stride);
+ primitives.pu[sizeIdx].luma_copy_pp(reconQt, reconQtStride, pred, stride);
bCBF = !!numSig << tuDepth;
cu.setCbfSubParts(bCBF, TEXT_LUMA, absPartIdx, fullDepth);
- fullCost.distortion = primitives.sse_pp[sizeIdx](reconQt, reconQtStride, fenc, stride);
+ fullCost.distortion = primitives.pu[sizeIdx].sse_pp(reconQt, reconQtStride, fenc, stride);
m_entropyCoder.resetBits();
if (!absPartIdx)
@@ -423,7 +423,7 @@
// set reconstruction for next intra prediction blocks if full TU prediction won
pixel* picReconY = m_frame->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + absPartIdx);
intptr_t picStride = m_frame->m_reconPic->m_stride;
- primitives.luma_copy_pp[sizeIdx](picReconY, picStride, reconQt, reconQtStride);
+ primitives.pu[sizeIdx].luma_copy_pp(picReconY, picStride, reconQt, reconQtStride);
outCost.rdcost += fullCost.rdcost;
outCost.distortion += fullCost.distortion;
@@ -490,13 +490,13 @@
pixel* tmpRecon = (useTSkip ? tsReconY : reconQt);
uint32_t tmpReconStride = (useTSkip ? MAX_TS_SIZE : reconQtStride);
- primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
+ primitives.cu[sizeIdx].calcresidual(fenc, pred, residual, stride);
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSize, TEXT_LUMA, absPartIdx, useTSkip);
if (numSig)
{
m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeff, log2TrSize, TEXT_LUMA, true, useTSkip, numSig);
- primitives.luma_add_ps[sizeIdx](tmpRecon, tmpReconStride, pred, residual, stride, stride);
+ primitives.pu[sizeIdx].luma_add_ps(tmpRecon, tmpReconStride, pred, residual, stride, stride);
}
else if (useTSkip)
{
@@ -506,9 +506,9 @@
}
else
// no residual coded, recon = pred
- primitives.luma_copy_pp[sizeIdx](tmpRecon, tmpReconStride, pred, stride);
-
- uint32_t tmpDist = primitives.sse_pp[sizeIdx](tmpRecon, tmpReconStride, fenc, stride);
+ primitives.pu[sizeIdx].luma_copy_pp(tmpRecon, tmpReconStride, pred, stride);
+
+ uint32_t tmpDist = primitives.pu[sizeIdx].sse_pp(tmpRecon, tmpReconStride, fenc, stride);
cu.setTransformSkipSubParts(useTSkip, TEXT_LUMA, absPartIdx, fullDepth);
cu.setCbfSubParts((!!numSig) << tuDepth, TEXT_LUMA, absPartIdx, fullDepth);
@@ -579,7 +579,7 @@
if (bTSkip)
{
memcpy(coeffY, tsCoeffY, sizeof(coeff_t) << (log2TrSize * 2));
- primitives.luma_copy_pp[sizeIdx](reconQt, reconQtStride, tsReconY, tuSize);
+ primitives.pu[sizeIdx].luma_copy_pp(reconQt, reconQtStride, tsReconY, tuSize);
}
else if (checkTransformSkip)
{
@@ -591,7 +591,7 @@
// set reconstruction for next intra prediction blocks
pixel* picReconY = m_frame->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + absPartIdx);
intptr_t picStride = m_frame->m_reconPic->m_stride;
- primitives.luma_copy_pp[sizeIdx](picReconY, picStride, reconQt, reconQtStride);
+ primitives.pu[sizeIdx].luma_copy_pp(picReconY, picStride, reconQt, reconQtStride);
outCost.rdcost += fullCost.rdcost;
outCost.distortion += fullCost.distortion;
@@ -637,7 +637,7 @@
coeff_t* coeffY = cu.m_trCoeff[0] + coeffOffsetY;
uint32_t sizeIdx = log2TrSize - 2;
- primitives.calcresidual[sizeIdx](fenc, pred, residual, stride);
+ primitives.cu[sizeIdx].calcresidual(fenc, pred, residual, stride);
pixel* picReconY = m_frame->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + absPartIdx);
intptr_t picStride = m_frame->m_reconPic->m_stride;
@@ -646,19 +646,19 @@
if (numSig)
{
m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffY, log2TrSize, TEXT_LUMA, true, false, numSig);
- primitives.luma_add_ps[sizeIdx](picReconY, picStride, pred, residual, stride, stride);
+ primitives.pu[sizeIdx].luma_add_ps(picReconY, picStride, pred, residual, stride, stride);
cu.setCbfSubParts(1 << tuDepth, TEXT_LUMA, absPartIdx, fullDepth);
}
else
{
- primitives.luma_copy_pp[sizeIdx](picReconY, picStride, pred, stride);
+ primitives.pu[sizeIdx].luma_copy_pp(picReconY, picStride, pred, stride);
cu.setCbfSubParts(0, TEXT_LUMA, absPartIdx, fullDepth);
}
}
else
{
X265_CHECK(log2TrSize > depthRange[0], "intra luma split state failure\n");
-
+
/* code split block */
uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2;
uint32_t cbf = 0;
@@ -822,27 +822,27 @@
cu.setTransformSkipPartRange(0, ttype, absPartIdxC, tuIterator.absPartIdxStep);
- primitives.calcresidual[sizeIdxC](fenc, pred, residual, stride);
+ primitives.cu[sizeIdxC].calcresidual(fenc, pred, residual, stride);
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);
if (numSig)
{
m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
- primitives.luma_add_ps[sizeIdxC](reconQt, reconQtStride, pred, residual, stride, stride);
+ primitives.pu[sizeIdxC].luma_add_ps(reconQt, reconQtStride, pred, residual, stride, stride);
cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
}
else
{
// no coded residual, recon = pred
- primitives.luma_copy_pp[sizeIdxC](reconQt, reconQtStride, pred, stride);
+ primitives.pu[sizeIdxC].luma_copy_pp(reconQt, reconQtStride, pred, stride);
cu.setCbfPartRange(0, ttype, absPartIdxC, tuIterator.absPartIdxStep);
}
- outDist += m_rdCost.scaleChromaDist(chromaId, primitives.sse_pp[sizeIdxC](reconQt, reconQtStride, fenc, stride));
+ outDist += m_rdCost.scaleChromaDist(chromaId, primitives.pu[sizeIdxC].sse_pp(reconQt, reconQtStride, fenc, stride));
if (m_rdCost.m_psyRd)
psyEnergy += m_rdCost.psyCost(sizeIdxC, fenc, stride, picReconC, picStride);
- primitives.luma_copy_pp[sizeIdxC](picReconC, picStride, reconQt, reconQtStride);
+ primitives.pu[sizeIdxC].luma_copy_pp(picReconC, picStride, reconQt, reconQtStride);
}
}
while (tuIterator.isNextSection());
@@ -927,13 +927,13 @@
pixel* recon = (useTSkip ? tskipReconC : reconQt);
uint32_t reconStride = (useTSkip ? MAX_TS_SIZE : reconQtStride);
- primitives.calcresidual[sizeIdxC](fenc, pred, residual, stride);
+ primitives.cu[sizeIdxC].calcresidual(fenc, pred, residual, stride);
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSizeC, ttype, absPartIdxC, useTSkip);
if (numSig)
{
m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeff, log2TrSizeC, ttype, true, useTSkip, numSig);
- primitives.luma_add_ps[sizeIdxC](recon, reconStride, pred, residual, stride, stride);
+ primitives.pu[sizeIdxC].luma_add_ps(recon, reconStride, pred, residual, stride, stride);
cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
}
else if (useTSkip)
@@ -943,10 +943,10 @@
}
else
{
- primitives.luma_copy_pp[sizeIdxC](recon, reconStride, pred, stride);
+ primitives.pu[sizeIdxC].luma_copy_pp(recon, reconStride, pred, stride);
cu.setCbfPartRange(0, ttype, absPartIdxC, tuIterator.absPartIdxStep);
}
- uint32_t tmpDist = primitives.sse_pp[sizeIdxC](recon, reconStride, fenc, stride);
+ uint32_t tmpDist = primitives.pu[sizeIdxC].sse_pp(recon, reconStride, fenc, stride);
tmpDist = m_rdCost.scaleChromaDist(chromaId, tmpDist);
cu.setTransformSkipPartRange(useTSkip, ttype, absPartIdxC, tuIterator.absPartIdxStep);
@@ -982,7 +982,7 @@
if (bTSkip)
{
memcpy(coeffC, tskipCoeffC, sizeof(coeff_t) << (log2TrSizeC * 2));
- primitives.luma_copy_pp[sizeIdxC](reconQt, reconQtStride, tskipReconC, MAX_TS_SIZE);
+ primitives.pu[sizeIdxC].luma_copy_pp(reconQt, reconQtStride, tskipReconC, MAX_TS_SIZE);
}
cu.setCbfPartRange(bCbf << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
@@ -990,7 +990,7 @@
pixel* reconPicC = m_frame->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.encodeIdx + absPartIdxC);
intptr_t picStride = m_frame->m_reconPic->m_strideC;
- primitives.luma_copy_pp[sizeIdxC](reconPicC, picStride, reconQt, reconQtStride);
+ primitives.pu[sizeIdxC].luma_copy_pp(reconPicC, picStride, reconQt, reconQtStride);
outDist += bDist;
psyEnergy += bEnergy;
@@ -1118,18 +1118,18 @@
X265_CHECK(!cu.m_transformSkip[ttype][0], "transform skip not supported at low RD levels\n");
- primitives.calcresidual[sizeIdxC](fenc, pred, residual, stride);
+ primitives.cu[sizeIdxC].calcresidual(fenc, pred, residual, stride);
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);
if (numSig)
{
m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, coeffC, log2TrSizeC, ttype, true, false, numSig);
- primitives.luma_add_ps[sizeIdxC](picReconC, picStride, pred, residual, stride, stride);
+ primitives.pu[sizeIdxC].luma_add_ps(picReconC, picStride, pred, residual, stride, stride);
cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, tuIterator.absPartIdxStep);
}
else
{
// no coded residual, recon = pred
- primitives.luma_copy_pp[sizeIdxC](picReconC, picStride, pred, stride);
+ primitives.pu[sizeIdxC].luma_copy_pp(picReconC, picStride, pred, stride);
cu.setCbfPartRange(0, ttype, absPartIdxC, tuIterator.absPartIdxStep);
}
}
@@ -1253,7 +1253,7 @@
leftFiltered = leftScale;
}
- pixelcmp_t sa8d = primitives.sa8d[sizeIdx];
+ pixelcmp_t sa8d = primitives.cu[sizeIdx].sa8d;
int predsize = scaleTuSize * scaleTuSize;
m_entropyCoder.loadIntraDirModeLuma(m_rqt[depth].cur);
@@ -1293,7 +1293,7 @@
bool allangs = true;
if (primitives.intra_pred_allangs[sizeIdx])
{
- primitives.transpose[sizeIdx](bufTrans, fenc, scaleStride);
+ primitives.cu[sizeIdx].transpose(bufTrans, fenc, scaleStride);
primitives.intra_pred_allangs[sizeIdx](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize <= 16));
}
else
@@ -1505,7 +1505,7 @@
uint32_t preds[3];
uint32_t rbits = getIntraRemModeBits(cu, absPartIdx, preds, mpms);
- pixelcmp_t sa8d = primitives.sa8d[sizeIdx];
+ pixelcmp_t sa8d = primitives.cu[sizeIdx].sa8d;
uint64_t modeCosts[35];
uint64_t bcost;
@@ -1533,7 +1533,7 @@
if (primitives.intra_pred_allangs[sizeIdx])
{
primitives.intra_pred_allangs[sizeIdx](tmp, above, left, aboveFiltered, leftFiltered, (scaleTuSize <= 16));
- primitives.transpose[sizeIdx](buf_trans, fenc, scaleStride);
+ primitives.cu[sizeIdx].transpose(buf_trans, fenc, scaleStride);
for (int mode = 2; mode < 35; mode++)
{
bool modeHor = (mode < 18);
@@ -1617,7 +1617,7 @@
uint32_t dststride = m_frame->m_reconPic->m_stride;
const pixel* src = reconYuv->getLumaAddr(absPartIdx);
uint32_t srcstride = reconYuv->m_size;
- primitives.luma_copy_pp[log2TrSize - 2](dst, dststride, src, srcstride);
+ primitives.pu[log2TrSize - 2].luma_copy_pp(dst, dststride, src, srcstride);
}
}
@@ -1685,7 +1685,7 @@
// get prediction signal
predIntraChromaAng(chromaPred, chromaPredMode, pred, fencYuv->m_csize, log2TrSizeC, m_csp);
- cost += primitives.sa8d[log2TrSizeC - 2](fenc, predYuv->m_csize, pred, fencYuv->m_csize) << costShift;
+ cost += primitives.cu[log2TrSizeC - 2].sa8d(fenc, predYuv->m_csize, pred, fencYuv->m_csize) << costShift;
}
if (cost < bestCost)
@@ -1783,11 +1783,11 @@
dst = m_frame->m_reconPic->getCbAddr(cu.m_cuAddr, zorder);
src = reconYuv.getCbAddr(absPartIdxC);
- primitives.chroma[m_csp].copy_pp[part](dst, dststride, src, reconYuv.m_csize);
+ primitives.chroma[m_csp].pu[part].copy_pp(dst, dststride, src, reconYuv.m_csize);
dst = m_frame->m_reconPic->getCrAddr(cu.m_cuAddr, zorder);
src = reconYuv.getCrAddr(absPartIdxC);
- primitives.chroma[m_csp].copy_pp[part](dst, dststride, src, reconYuv.m_csize);
+ primitives.chroma[m_csp].pu[part].copy_pp(dst, dststride, src, reconYuv.m_csize);
}
memcpy(cu.m_cbf[1] + absPartIdxC, m_qtTempCbf[1], tuIterator.absPartIdxStep * sizeof(uint8_t));
@@ -2232,7 +2232,7 @@
predInterLumaPixel(bidirYuv[0], *refPic0, bestME[0].mv);
predInterLumaPixel(bidirYuv[1], *refPic1, bestME[1].mv);
- primitives.pixelavg_pp[m_me.partEnum](tmpPredYuv.m_buf[0], tmpPredYuv.m_size, bidirYuv[0].getLumaAddr(m_puAbsPartIdx), bidirYuv[0].m_size,
+ primitives.pu[m_me.partEnum].pixelavg_pp(tmpPredYuv.m_buf[0], tmpPredYuv.m_size, bidirYuv[0].getLumaAddr(m_puAbsPartIdx), bidirYuv[0].m_size,
bidirYuv[1].getLumaAddr(m_puAbsPartIdx), bidirYuv[1].m_size, 32);
satdCost = m_me.bufSATD(tmpPredYuv.m_buf[0], tmpPredYuv.m_size);
}
@@ -2277,7 +2277,7 @@
const pixel* ref1 = m_slice->m_mref[1][bestME[1].ref].getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + m_puAbsPartIdx);
intptr_t refStride = slice->m_mref[0][0].lumaStride;
- primitives.pixelavg_pp[m_me.partEnum](tmpPredYuv.m_buf[0], tmpPredYuv.m_size, ref0, refStride, ref1, refStride, 32);
+ primitives.pu[m_me.partEnum].pixelavg_pp(tmpPredYuv.m_buf[0], tmpPredYuv.m_size, ref0, refStride, ref1, refStride, 32);
satdCost = m_me.bufSATD(tmpPredYuv.m_buf[0], tmpPredYuv.m_size);
}
@@ -2500,11 +2500,11 @@
// Luma
int part = partitionFromLog2Size(cu.m_log2CUSize[0]);
- interMode.distortion = primitives.sse_pp[part](fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
+ interMode.distortion = primitives.pu[part].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
// Chroma
part = partitionFromSizes(cuSize >> m_hChromaShift, cuSize >> m_vChromaShift);
- interMode.distortion += m_rdCost.scaleChromaDist(1, primitives.sse_pp[part](fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
- interMode.distortion += m_rdCost.scaleChromaDist(2, primitives.sse_pp[part](fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
+ interMode.distortion += m_rdCost.scaleChromaDist(1, primitives.pu[part].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
+ interMode.distortion += m_rdCost.scaleChromaDist(2, primitives.pu[part].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
m_entropyCoder.load(m_rqt[depth].cur);
m_entropyCoder.resetBits();
@@ -2556,9 +2556,9 @@
if (!cu.m_tqBypass[0])
{
- uint32_t cbf0Dist = primitives.sse_pp[part](fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
- cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.sse_pp[cpart](fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
- cbf0Dist += m_rdCost.scaleChromaDist(2, primitives.sse_pp[cpart](fencYuv->m_buf[2], predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize));
+ uint32_t cbf0Dist = primitives.pu[part].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
+ cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.pu[cpart].sse_pp(fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
+ cbf0Dist += m_rdCost.scaleChromaDist(2, primitives.pu[cpart].sse_pp(fencYuv->m_buf[2], predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize));
/* Consider the RD cost of not signaling any residual */
m_entropyCoder.load(m_rqt[depth].cur);
@@ -2629,9 +2629,9 @@
reconYuv->copyFromYuv(*predYuv);
// update with clipped distortion and cost (qp estimation loop uses unclipped values)
- uint32_t bestDist = primitives.sse_pp[part](fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
- bestDist += m_rdCost.scaleChromaDist(1, primitives.sse_pp[cpart](fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
- bestDist += m_rdCost.scaleChromaDist(2, primitives.sse_pp[cpart](fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
+ uint32_t bestDist = primitives.pu[part].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
+ bestDist += m_rdCost.scaleChromaDist(1, primitives.pu[cpart].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
+ bestDist += m_rdCost.scaleChromaDist(2, primitives.pu[cpart].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
if (m_rdCost.m_psyRd)
interMode.psyEnergy = m_rdCost.psyCost(log2CUSize - 2, fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
@@ -2695,7 +2695,7 @@
}
else
{
- primitives.blockfill_s[sizeIdx](curResiY, strideResiY, 0);
+ primitives.cu[sizeIdx].blockfill_s(curResiY, strideResiY, 0);
cu.setCbfSubParts(0, TEXT_LUMA, absPartIdx, depth);
}
@@ -2728,7 +2728,7 @@
}
else
{
- primitives.blockfill_s[sizeIdxC](curResiU, strideResiC, 0);
+ primitives.cu[sizeIdxC].blockfill_s(curResiU, strideResiC, 0);
cu.setCbfPartRange(0, TEXT_CHROMA_U, absPartIdxC, tuIterator.absPartIdxStep);
}
@@ -2742,7 +2742,7 @@
}
else
{
- primitives.blockfill_s[sizeIdxC](curResiV, strideResiC, 0);
+ primitives.cu[sizeIdxC].blockfill_s(curResiV, strideResiC, 0);
cu.setCbfPartRange(0, TEXT_CHROMA_V, absPartIdxC, tuIterator.absPartIdxStep);
}
}
@@ -2874,7 +2874,7 @@
singleBits[TEXT_LUMA][0] = singleBitsPrev - fullCost.bits;
X265_CHECK(log2TrSize <= 5, "log2TrSize is too large\n");
- uint32_t distY = primitives.ssd_s[partSize](resiYuv.getLumaAddr(absPartIdx), resiYuv.m_size);
+ uint32_t distY = primitives.cu[partSize].ssd_s(resiYuv.getLumaAddr(absPartIdx), resiYuv.m_size);
uint32_t psyEnergyY = 0;
if (m_rdCost.m_psyRd)
psyEnergyY = m_rdCost.psyCost(partSize, resiYuv.getLumaAddr(absPartIdx), resiYuv.m_size, (int16_t*)zeroShort, 0);
@@ -2888,7 +2888,7 @@
// non-zero cost calculation for luma - This is an approximation
// finally we have to encode correct cbf after comparing with null cost
- const uint32_t nonZeroDistY = primitives.sse_ss[partSize](resiYuv.getLumaAddr(absPartIdx), resiYuv.m_size, curResiY, strideResiY);
+ const uint32_t nonZeroDistY = primitives.pu[partSize].sse_ss(resiYuv.getLumaAddr(absPartIdx), resiYuv.m_size, curResiY, strideResiY);
uint32_t nzCbfBitsY = m_entropyCoder.estimateCbfBits(cbfFlag[TEXT_LUMA][0], TEXT_LUMA, tuDepth);
uint32_t nonZeroPsyEnergyY = 0; uint64_t singleCostY = 0;
if (m_rdCost.m_psyRd)
@@ -2915,7 +2915,7 @@
{
cbfFlag[TEXT_LUMA][0] = 0;
singleBits[TEXT_LUMA][0] = 0;
- primitives.blockfill_s[partSize](curResiY, strideResiY, 0);
+ primitives.cu[partSize].blockfill_s(curResiY, strideResiY, 0);
#if CHECKED_BUILD || _DEBUG
uint32_t numCoeffY = 1 << (log2TrSize << 1);
memset(coeffCurY, 0, sizeof(coeff_t) * numCoeffY);
@@ -2938,7 +2938,7 @@
{
if (checkTransformSkipY)
minCost[TEXT_LUMA][0] = estimateNullCbfCost(distY, psyEnergyY, tuDepth, TEXT_LUMA);
- primitives.blockfill_s[partSize](curResiY, strideResiY, 0);
+ primitives.cu[partSize].blockfill_s(curResiY, strideResiY, 0);
singleDist[TEXT_LUMA][0] = distY;
singlePsyEnergy[TEXT_LUMA][0] = psyEnergyY;
}
@@ -2979,7 +2979,7 @@
singleBitsPrev = newBits;
int16_t* curResiC = m_rqt[qtLayer].resiQtYuv.getChromaAddr(chromaId, absPartIdxC);
- distC = m_rdCost.scaleChromaDist(chromaId, primitives.ssd_s[log2TrSizeC - 2](resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize));
+ distC = m_rdCost.scaleChromaDist(chromaId, primitives.cu[log2TrSizeC - 2].ssd_s(resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize));
if (cbfFlag[chromaId][tuIterator.section])
{
@@ -2988,7 +2988,7 @@
// non-zero cost calculation for luma, same as luma - This is an approximation
// finally we have to encode correct cbf after comparing with null cost
- uint32_t dist = primitives.sse_ss[partSizeC](resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize, curResiC, strideResiC);
+ uint32_t dist = primitives.pu[partSizeC].sse_ss(resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize, curResiC, strideResiC);
uint32_t nzCbfBitsC = m_entropyCoder.estimateCbfBits(cbfFlag[chromaId][tuIterator.section], (TextType)chromaId, tuDepth);
uint32_t nonZeroDistC = m_rdCost.scaleChromaDist(chromaId, dist);
uint32_t nonZeroPsyEnergyC = 0; uint64_t singleCostC = 0;
@@ -3014,7 +3014,7 @@
{
cbfFlag[chromaId][tuIterator.section] = 0;
singleBits[chromaId][tuIterator.section] = 0;
- primitives.blockfill_s[partSizeC](curResiC, strideResiC, 0);
+ primitives.cu[partSizeC].blockfill_s(curResiC, strideResiC, 0);
#if CHECKED_BUILD || _DEBUG
uint32_t numCoeffC = 1 << (log2TrSizeC << 1);
memset(coeffCurC + subTUOffset, 0, sizeof(coeff_t) * numCoeffC);
@@ -3037,7 +3037,7 @@
{
if (checkTransformSkipC)
minCost[chromaId][tuIterator.section] = estimateNullCbfCost(distC, psyEnergyC, tuDepthC, (TextType)chromaId);
- primitives.blockfill_s[partSizeC](curResiC, strideResiC, 0);
+ primitives.cu[partSizeC].blockfill_s(curResiC, strideResiC, 0);
singleDist[chromaId][tuIterator.section] = distC;
singlePsyEnergy[chromaId][tuIterator.section] = psyEnergyC;
}
@@ -3077,7 +3077,7 @@
m_quant.invtransformNxN(cu.m_tqBypass[absPartIdx], tsResiY, trSize, tsCoeffY, log2TrSize, TEXT_LUMA, false, true, numSigTSkipY);
- nonZeroDistY = primitives.sse_ss[partSize](resiYuv.getLumaAddr(absPartIdx), resiYuv.m_size, tsResiY, trSize);
+ nonZeroDistY = primitives.pu[partSize].sse_ss(resiYuv.getLumaAddr(absPartIdx), resiYuv.m_size, tsResiY, trSize);
if (m_rdCost.m_psyRd)
{
@@ -3098,7 +3098,7 @@
bestTransformMode[TEXT_LUMA][0] = 1;
uint32_t numCoeffY = 1 << (log2TrSize << 1);
memcpy(coeffCurY, tsCoeffY, sizeof(coeff_t) * numCoeffY);
- primitives.luma_copy_ss[partSize](curResiY, strideResiY, tsResiY, trSize);
+ primitives.pu[partSize].luma_copy_ss(curResiY, strideResiY, tsResiY, trSize);
}
cu.setCbfSubParts(cbfFlag[TEXT_LUMA][0] << tuDepth, TEXT_LUMA, absPartIdx, depth);
@@ -3148,7 +3148,7 @@
m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], tsResiC, trSizeC, tsCoeffC,
log2TrSizeC, (TextType)chromaId, false, true, numSigTSkipC);
- uint32_t dist = primitives.sse_ss[partSizeC](resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize, tsResiC, trSizeC);
+ uint32_t dist = primitives.pu[partSizeC].sse_ss(resiYuv.getChromaAddr(chromaId, absPartIdxC), resiYuv.m_csize, tsResiC, trSizeC);
nonZeroDistC = m_rdCost.scaleChromaDist(chromaId, dist);
if (m_rdCost.m_psyRd)
{
@@ -3169,7 +3169,7 @@
bestTransformMode[chromaId][tuIterator.section] = 1;
uint32_t numCoeffC = 1 << (log2TrSizeC << 1);
memcpy(coeffCurC + subTUOffset, tsCoeffC, sizeof(coeff_t) * numCoeffC);
- primitives.luma_copy_ss[partSizeC](curResiC, strideResiC, tsResiC, trSizeC);
+ primitives.pu[partSizeC].luma_copy_ss(curResiC, strideResiC, tsResiC, trSizeC);
}
cu.setCbfPartRange(cbfFlag[chromaId][tuIterator.section] << tuDepth, (TextType)chromaId, absPartIdxC, tuIterator.absPartIdxStep);
diff -r c6ca0fd54aa7 -r e5ad493e60de source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Thu Jan 08 15:23:38 2015 -0600
+++ b/source/encoder/slicetype.cpp Thu Jan 08 15:30:26 2015 -0600
@@ -1428,7 +1428,7 @@
{
for (int x = 0; x < fenc->width; x += 8, mb++, pixoff += 8)
{
- int satd = primitives.satd[LUMA_8x8](src + pixoff, stride, fenc->fpelPlane[0] + pixoff, stride);
+ int satd = primitives.pu[LUMA_8x8].satd(src + pixoff, stride, fenc->fpelPlane[0] + pixoff, stride);
cost += X265_MIN(satd, fenc->intraCost[mb]);
}
}
@@ -1653,15 +1653,15 @@
pixel *src1 = fref1->lowresMC(pelOffset, *fenc_mvs[1], subpelbuf1, stride1);
ALIGN_VAR_32(pixel, ref[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
- primitives.pixelavg_pp[LUMA_8x8](ref, X265_LOWRES_CU_SIZE, src0, stride0, src1, stride1, 32);
- int bicost = primitives.satd[LUMA_8x8](fenc->lowresPlane[0] + pelOffset, fenc->lumaStride, ref, X265_LOWRES_CU_SIZE);
+ primitives.pu[LUMA_8x8].pixelavg_pp(ref, X265_LOWRES_CU_SIZE, src0, stride0, src1, stride1, 32);
+ int bicost = primitives.pu[LUMA_8x8].satd(fenc->lowresPlane[0] + pelOffset, fenc->lumaStride, ref, X265_LOWRES_CU_SIZE);
COPY2_IF_LT(bcost, bicost, listused, 3);
// Try 0,0 candidates
src0 = wfref0->lowresPlane[0] + pelOffset;
src1 = fref1->lowresPlane[0] + pelOffset;
- primitives.pixelavg_pp[LUMA_8x8](ref, X265_LOWRES_CU_SIZE, src0, wfref0->lumaStride, src1, fref1->lumaStride, 32);
- bicost = primitives.satd[LUMA_8x8](fenc->lowresPlane[0] + pelOffset, fenc->lumaStride, ref, X265_LOWRES_CU_SIZE);
+ primitives.pu[LUMA_8x8].pixelavg_pp(ref, X265_LOWRES_CU_SIZE, src0, wfref0->lumaStride, src1, fref1->lumaStride, 32);
+ bicost = primitives.pu[LUMA_8x8].satd(fenc->lowresPlane[0] + pelOffset, fenc->lumaStride, ref, X265_LOWRES_CU_SIZE);
COPY2_IF_LT(bcost, bicost, listused, 3);
}
}
@@ -1704,7 +1704,7 @@
int predsize = cuSize * cuSize;
// generate 35 intra predictions into m_predictions
- pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)];
+ pixelcmp_t satd = primitives.pu[partitionFromLog2Size(X265_LOWRES_CU_BITS)].satd;
int icost = m_me.COST_MAX;
primitives.intra_pred[DC_IDX][sizeIdx](m_predictions, cuSize, left0, above0, 0, (cuSize <= 16));
int cost = m_me.bufSATD(m_predictions, cuSize);
@@ -1723,7 +1723,7 @@
ALIGN_VAR_32(pixel, buf_trans[32 * 32]);
primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 * predsize, above0, left0, above1, left1, (cuSize <= 16));
- primitives.transpose[sizeIdx](buf_trans, m_me.fencPUYuv.m_buf[0], FENC_STRIDE);
+ primitives.cu[sizeIdx].transpose(buf_trans, m_me.fencPUYuv.m_buf[0], FENC_STRIDE);
int acost = m_me.COST_MAX;
for (mode = 5; mode < 35; mode += 5)
diff -r c6ca0fd54aa7 -r e5ad493e60de source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp Thu Jan 08 15:23:38 2015 -0600
+++ b/source/encoder/weightPrediction.cpp Thu Jan 08 15:30:26 2015 -0600
@@ -80,7 +80,7 @@
MV mv = mvs[cu];
mv = mv.clipped(mvmin, mvmax);
pixel *tmp = ref.lowresMC(pixoff, mv, buf8x8, bstride);
- primitives.luma_copy_pp[LUMA_8x8](mcout + pixoff, stride, tmp, bstride);
+ primitives.pu[LUMA_8x8].luma_copy_pp(mcout + pixoff, stride, tmp, bstride);
}
}
}
@@ -133,26 +133,26 @@
int yFrac = mv.y & 0x7;
if ((yFrac | xFrac) == 0)
{
- primitives.chroma[csp].copy_pp[LUMA_16x16](mcout + pixoff, stride, temp, stride);
+ primitives.chroma[csp].pu[LUMA_16x16].copy_pp(mcout + pixoff, stride, temp, stride);
}
else if (yFrac == 0)
{
- primitives.chroma[csp].filter_hpp[LUMA_16x16](temp, stride, mcout + pixoff, stride, xFrac);
+ primitives.chroma[csp].pu[LUMA_16x16].filter_hpp(temp, stride, mcout + pixoff, stride, xFrac);
}
else if (xFrac == 0)
{
- primitives.chroma[csp].filter_vpp[LUMA_16x16](temp, stride, mcout + pixoff, stride, yFrac);
+ primitives.chroma[csp].pu[LUMA_16x16].filter_vpp(temp, stride, mcout + pixoff, stride, yFrac);
}
else
{
ALIGN_VAR_16(int16_t, imm[16 * (16 + NTAPS_CHROMA)]);
- primitives.chroma[csp].filter_hps[LUMA_16x16](temp, stride, imm, bw, xFrac, 1);
- primitives.chroma[csp].filter_vsp[LUMA_16x16](imm + ((NTAPS_CHROMA >> 1) - 1) * bw, bw, mcout + pixoff, stride, yFrac);
+ primitives.chroma[csp].pu[LUMA_16x16].filter_hps(temp, stride, imm, bw, xFrac, 1);
+ primitives.chroma[csp].pu[LUMA_16x16].filter_vsp(imm + ((NTAPS_CHROMA >> 1) - 1) * bw, bw, mcout + pixoff, stride, yFrac);
}
}
else
{
- primitives.chroma[csp].copy_pp[LUMA_16x16](mcout + pixoff, stride, src + pixoff, stride);
+ primitives.chroma[csp].pu[LUMA_16x16].copy_pp(mcout + pixoff, stride, src + pixoff, stride);
}
}
}
@@ -197,7 +197,7 @@
{
for (int x = 0; x < width; x += 8, cu++)
{
- int cmp = primitives.satd[LUMA_8x8](r + x, stride, f + x, stride);
+ int cmp = primitives.pu[LUMA_8x8].satd(r + x, stride, f + x, stride);
cost += X265_MIN(cmp, cache.intraCost[cu]);
}
}
@@ -205,11 +205,11 @@
else if (cache.csp == X265_CSP_I444)
for (int y = 0; y < height; y += 16, r += 16 * stride, f += 16 * stride)
for (int x = 0; x < width; x += 16)
- cost += primitives.satd[LUMA_16x16](r + x, stride, f + x, stride);
+ cost += primitives.pu[LUMA_16x16].satd(r + x, stride, f + x, stride);
else
for (int y = 0; y < height; y += 8, r += 8 * stride, f += 8 * stride)
for (int x = 0; x < width; x += 8)
- cost += primitives.satd[LUMA_8x8](r + x, stride, f + x, stride);
+ cost += primitives.pu[LUMA_8x8].satd(r + x, stride, f + x, stride);
return cost;
}
More information about the x265-devel
mailing list