[x265] [PATCH 2 of 3] Importing x264 weight analysis to encoder
shazeb at multicorewareinc.com
shazeb at multicorewareinc.com
Mon Dec 30 13:25:51 CET 2013
# HG changeset patch
# User Shazeb Nawaz Khan <shazeb at multicorewareinc.com>
# Date 1388406183 -19800
# Mon Dec 30 17:53:03 2013 +0530
# Node ID a172c7333c31488425ba2209fdf5cc6bd11b078e
# Parent 3cc30f74ce04aed7af82ea4ba6e17856160974f5
Importing x264 weight analysis to encoder
diff -r 3cc30f74ce04 -r a172c7333c31 source/encoder/CMakeLists.txt
--- a/source/encoder/CMakeLists.txt Mon Dec 30 17:51:24 2013 +0530
+++ b/source/encoder/CMakeLists.txt Mon Dec 30 17:53:03 2013 +0530
@@ -69,4 +69,5 @@
ratecontrol.cpp ratecontrol.h
compress.cpp
reference.cpp reference.h
- encoder.cpp encoder.h)
+ encoder.cpp encoder.h
+ weightPrediction.cpp weightPrediction.h)
diff -r 3cc30f74ce04 -r a172c7333c31 source/encoder/weightPrediction.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/source/encoder/weightPrediction.cpp Mon Dec 30 17:53:03 2013 +0530
@@ -0,0 +1,386 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Author: Shazeb Nawaz Khan <shazeb at multicorewareinc.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing at multicorewareinc.com.
+ *****************************************************************************/
+
+#include "TLibCommon/TComPic.h"
+#include "lowres.h"
+#include "mv.h"
+#include "slicetype.h"
+#include "weightPrediction.h"
+
+using namespace x265;
+
+void WeightPrediction::mcChroma()
+{
+ intptr_t strd = 8;
+
+ strd = m_refStride;
+ int pixoff = 0;
+ int cu = 0;
+ pixel *temp;
+ int partEnum = CHROMA_8x8;
+
+ for (int y = 0; y < frmHeight; y += blockSize, pixoff = y * m_refStride)
+ {
+ for (int x = 0; x < frmWidth; x += blockSize, pixoff += blockSize, cu++)
+ {
+ if (m_mvCost[cu] < m_intraCost[cu])
+ {
+ MV mv(mvs[cu]);
+ int refOffset = (mv.x >> (3 - m_csp444)) + (mv.y >> (3 - m_csp444)) * (int)m_refStride;
+ temp = mcbuf + refOffset + pixoff;
+
+ int xFrac = mv.x & 0x7;
+ int yFrac = mv.y & 0x7;
+
+ if ((yFrac | xFrac) == 0)
+ {
+ primitives.chroma[m_csp].copy_pp[partEnum](buf + pixoff, m_refStride, temp, strd);
+ }
+ else if (yFrac == 0)
+ {
+ primitives.chroma[m_csp].filter_hpp[partEnum](temp, strd, buf + pixoff, m_refStride, xFrac);
+ }
+ else if (xFrac == 0)
+ {
+ primitives.chroma[m_csp].filter_vpp[partEnum](temp, strd, buf + pixoff, m_refStride, yFrac);
+ }
+ else
+ {
+ uint32_t cxWidth = blockSize;
+ uint32_t cxHeight = blockSize;
+ int16_t *immedVal = (int16_t*)X265_MALLOC(int16_t, 64 * (64 + NTAPS_LUMA - 1));
+ int extStride = cxWidth;
+ int filterSize = NTAPS_CHROMA;
+ int halfFilterSize = (filterSize >> 1);
+
+ primitives.chroma[m_csp].filter_hps[partEnum](temp, strd, immedVal, extStride, xFrac, 1);
+ primitives.chroma_vsp(immedVal + (halfFilterSize - 1) * extStride, extStride, buf + pixoff, m_refStride, cxWidth, cxHeight, yFrac);
+
+ X265_FREE(immedVal);
+ }
+ }
+ else
+ {
+ primitives.chroma[m_csp].copy_pp[partEnum](buf + pixoff, m_refStride, mcbuf + pixoff, m_refStride);
+ }
+ }
+ }
+
+ mcbuf = buf;
+}
+
+uint32_t WeightPrediction::weightCost(pixel *cur, pixel *ref, wpScalingParam *w)
+{
+ int stride = m_refStride;
+ pixel *temp = (pixel*)X265_MALLOC(pixel, frmWidth * frmHeight);
+ bool nonBorderCU;
+
+ if (w)
+ {
+ int offset = w->inputOffset << (X265_DEPTH - 8);
+ int scale = w->inputWeight;
+ int denom = w->log2WeightDenom;
+ int correction = IF_INTERNAL_PREC - X265_DEPTH;
+
+ // Adding (IF_INTERNAL_PREC - X265_DEPTH) to cancel effect of pixel to short conversion inside the primitive
+ primitives.weight_pp(ref, temp, m_refStride, m_dstStride, frmWidth, frmHeight,
+ scale, (1 << (denom - 1 + correction)), denom + correction, offset);
+ ref = temp;
+ stride = m_dstStride;
+ }
+
+ int32_t cost = 0;
+ int pixoff = 0;
+ int mb = 0;
+ int count = 0;
+ for (int y = 0; y < frmHeight; y += 8, pixoff = y * m_refStride)
+ {
+ for (int x = 0; x < frmWidth; x += 8, mb++, pixoff += 8)
+ {
+ nonBorderCU = (x > 0) && (x < frmWidth - 8 - 1) && (y > 0) && (y < frmHeight - 8 - 1);
+ if (nonBorderCU)
+ {
+ if (mvs)
+ {
+ if (m_mvCost[mb] < m_intraCost[mb])
+ {
+ int satd = primitives.satd[LUMA_8x8](ref + (stride * y) + x, stride, cur + pixoff, m_refStride);
+ cost += satd;
+ count++;
+ }
+ }
+ else
+ {
+ int satd = primitives.satd[LUMA_8x8](ref + (stride * y) + x, stride, cur + pixoff, m_refStride);
+ cost += satd;
+ }
+ }
+ }
+ }
+
+ X265_FREE(temp);
+ x265_emms();
+ return cost;
+}
+
+void WeightPrediction::weightAnalyseEnc()
+{
+ wpScalingParam w, *fw;
+ Lowres *fenc, *ref;
+
+ fenc = &slice->getPic()->m_lowres;
+ int numPredDir = slice->isInterP() ? 1 : 2;
+ int curPoc, refPoc, difPoc;
+ curPoc = slice->getPOC();
+ int check;
+ int fullCheck = 0;
+ int lumaDenom = 0;
+ int numWeighted = 0; // number of weighted references for each slice must be less than 8 as per HEVC standard
+ int width[3], height[3];
+ // Rounding the width, height to 16
+ width[0] = ((slice->getPic()->getPicYuvOrg()->getWidth() + 8) >> 4) << 4;
+ height[0] = ((slice->getPic()->getPicYuvOrg()->getHeight() + 8) >> 4) << 4;
+ width[2] = width[1] = width[0] >> 1;
+ height[2] = height[1] = height[0] >> 1;
+
+ for (int list = 0; list < numPredDir; list++)
+ {
+ for (int refIdxTemp = 0; (refIdxTemp < slice->getNumRefIdx(list)) && (numWeighted < 8); refIdxTemp++)
+ {
+ check = 0;
+ fw = m_wp[list][refIdxTemp];
+ ref = &slice->getRefPic(list, refIdxTemp)->m_lowres;
+ refPoc = slice->getRefPic(list, refIdxTemp)->getPOC();
+ difPoc = abs(curPoc - refPoc);
+ mvs = fenc->lowresMvs[list][difPoc - 1];
+ if (mvs) m_mvCost = fenc->lowresMvCosts[0][difPoc - 1];
+ const float epsilon = 1.f / 128.f;
+ float guessScale[3], fencMean[3], refMean[3];
+
+ for (int yuv = 0; yuv < 3; yuv++)
+ {
+ float fencVar = (float)fenc->wp_ssd[yuv] + !ref->wp_ssd[yuv];
+ float refVar = (float)ref->wp_ssd[yuv] + !ref->wp_ssd[yuv];
+ guessScale[yuv] = sqrtf((float)fencVar / refVar);
+ fencMean[yuv] = (float)fenc->wp_sum[yuv] / (height[yuv] * width[yuv]) / (1 << (X265_DEPTH - 8));
+ refMean[yuv] = (float)ref->wp_sum[yuv] / (height[yuv] * width[yuv]) / (1 << (X265_DEPTH - 8));
+ }
+
+ for (int yuv = 0; yuv < 3; yuv++)
+ {
+ int ic = 0;
+ SET_WEIGHT(w, 0, 1, 0, 0);
+ /* Early termination */
+ if (fabsf(refMean[yuv] - fencMean[yuv]) < 0.5f && fabsf(1.f - guessScale[yuv]) < epsilon)
+ continue;
+
+ int chromaDenom = 7;
+ if (yuv)
+ {
+ while (chromaDenom > lumaDenom)
+ {
+ float thresh = 127.f / (1 << chromaDenom);
+ if (guessScale[1] < thresh && guessScale[2] < thresh)
+ break;
+ chromaDenom--;
+ }
+ }
+
+ /* Don't check chroma in lookahead, or if there wasn't a luma weight. */
+ int minoff = 0, minscale, mindenom;
+ unsigned int minscore = 0, origscore = 1;
+ int found = 0;
+
+ if (yuv)
+ {
+ w.log2WeightDenom = chromaDenom;
+ w.inputWeight = Clip3(0, 255, (int)guessScale[yuv] * (1 << w.log2WeightDenom));
+ if (w.inputWeight > 127)
+ {
+ SET_WEIGHT(fw[1], 0, 64, 6, 0);
+ SET_WEIGHT(fw[2], 0, 64, 6, 0);
+ break;
+ }
+ }
+ else
+ w.setFromWeightAndOffset((int)(guessScale[yuv] * 128 + 0.5), 0);
+
+ if (!yuv) lumaDenom = w.log2WeightDenom;
+ mindenom = w.log2WeightDenom;
+ minscale = w.inputWeight;
+
+ switch (yuv)
+ {
+ case 0:
+
+ mcbuf = ref->fpelPlane;
+ inbuf = fenc->lowresPlane[0];
+ if (mvs)
+ {
+ pixel *tempBuf;
+ pixel buf8[8 * 8];
+ int pixoff = 0, cu = 0;
+ intptr_t strd;
+ for (int y = 0; y < frmHeight; y += 8, pixoff = y * m_refStride)
+ {
+ for (int x = 0; x < frmWidth; x += 8, pixoff += 8, cu++)
+ {
+ if (fenc->lowresMvCosts[0][difPoc - 1][cu] > fenc->intraCost[cu])
+ {
+ strd = m_refStride;
+ tempBuf = inbuf + pixoff;
+ }
+ else
+ {
+ strd = 8;
+ tempBuf = ref->lowresMC(pixoff, mvs[cu], buf8, strd);
+ ic++;
+ }
+ primitives.blockcpy_pp(8, 8, buf + (y * m_refStride) + x, m_refStride, tempBuf, strd);
+ }
+ }
+
+ mcbuf = buf;
+ }
+ break;
+
+ case 1:
+
+ mcbuf = slice->getRefPic(list, refIdxTemp)->getPicYuvOrg()->getCbAddr();
+ inbuf = slice->getPic()->getPicYuvOrg()->getCbAddr();
+ blockSize = 8;
+ if (mvs) mcChroma();
+ break;
+
+ case 2:
+
+ mcbuf = slice->getRefPic(list, refIdxTemp)->getPicYuvOrg()->getCrAddr();
+ inbuf = slice->getPic()->getPicYuvOrg()->getCrAddr();
+ blockSize = 8;
+ if (mvs) mcChroma();
+ break;
+ }
+
+ origscore = minscore = weightCost(inbuf, mcbuf, NULL);
+
+ if (!minscore)
+ continue;
+
+ int sD = 4;
+ int oD = 2;
+ unsigned int s = 0;
+
+ for (int is = minscale - sD; is <= minscale + sD; is++)
+ {
+ int deltaWeight = minscale - (1 << mindenom);
+ if (deltaWeight > 127 || deltaWeight <= -128)
+ continue;
+
+ int curScale = minscale;
+ int curOffset = (int)(fencMean[yuv] - refMean[yuv] * curScale / (1 << mindenom) + 0.5f);
+ if (curOffset < -128 || curOffset > 127)
+ {
+ /* Rescale considering the constraints on curOffset. We do it in this order
+ * because scale has a much wider range than offset (because of denom), so
+ * it should almost never need to be clamped. */
+ curOffset = Clip3(-128, 127, curOffset);
+ curScale = (int)((1 << mindenom) * (fencMean[yuv] - curOffset) / refMean[yuv] + 0.5f);
+ curScale = Clip3(0, 127, curScale);
+ }
+
+ for (int ioff = curOffset - oD; (ioff <= (curOffset + oD)) && (ioff < 127); ioff++)
+ {
+ if (yuv)
+ {
+ int pred = (128 - ((128 * curScale) >> (mindenom)));
+ int deltaOffset = Clip3(-512, 511, (ioff - pred)); // signed 10bit
+ ioff = Clip3(-128, 127, (deltaOffset + pred)); // signed 8bit
+ }
+ else
+ {
+ ioff = Clip3(-128, 127, ioff);
+ }
+
+ s = 0;
+ SET_WEIGHT(w, 1, is, mindenom, ioff);
+ s = weightCost(inbuf, mcbuf, &w);
+ COPY4_IF_LT(minscore, s, minscale, is, minoff, ioff, found, 1);
+ if (minoff == curOffset - oD && ioff != curOffset - oD)
+ break;
+ }
+ }
+
+ if (!found || (minscale == 1 << mindenom && minoff == 0) || (float)minscore / origscore > 0.998f)
+ continue;
+ else
+ {
+ SET_WEIGHT(w, 1, minscale, mindenom, minoff);
+ SET_WEIGHT(fw[yuv], 1, minscale, mindenom, minoff);
+ check++;
+ fullCheck++;
+ }
+ }
+
+ if (check)
+ {
+ numWeighted++;
+ if (fw[0].log2WeightDenom == 7)
+ {
+ fw[0].inputWeight >>= 1;
+ fw[0].log2WeightDenom--;
+ }
+
+ int maxlog = fw[0].log2WeightDenom > fw[1].log2WeightDenom ? (fw[0].log2WeightDenom > fw[2].log2WeightDenom ? fw[0].log2WeightDenom : fw[2].log2WeightDenom) :
+ (fw[1].log2WeightDenom > fw[2].log2WeightDenom ? fw[1].log2WeightDenom : fw[2].log2WeightDenom);
+ fw[0].inputWeight <<= (maxlog - fw[0].log2WeightDenom);
+ fw[0].log2WeightDenom += (maxlog - fw[0].log2WeightDenom);
+ fw[1].inputWeight <<= (maxlog - fw[1].log2WeightDenom);
+ fw[1].log2WeightDenom += (maxlog - fw[1].log2WeightDenom);
+ fw[2].inputWeight <<= (maxlog - fw[2].log2WeightDenom);
+ fw[2].log2WeightDenom += (maxlog - fw[2].log2WeightDenom);
+ fw[1].bPresentFlag = true;
+ fw[2].bPresentFlag = true;
+
+ int deltaWeight;
+ bool deltaHigh = false;
+ for (int i = 0; i < 3; i++)
+ {
+ deltaWeight = fw[i].inputWeight - (1 << fw[i].log2WeightDenom);
+ if (deltaWeight > 127 || deltaWeight <= -128)
+ deltaHigh = true;
+ }
+
+ if (deltaHigh)
+ {
+ SET_WEIGHT(fw[0], 0, 64, 6, 0);
+ SET_WEIGHT(fw[1], 0, 64, 6, 0);
+ SET_WEIGHT(fw[2], 0, 64, 6, 0);
+ fullCheck = 0;
+ }
+ }
+ }
+ }
+
+ slice->setWpScaling(m_wp);
+ slice->getPPS()->setUseWP((fullCheck > 0) ? true : false);
+}
diff -r 3cc30f74ce04 -r a172c7333c31 source/encoder/weightPrediction.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/source/encoder/weightPrediction.h Mon Dec 30 17:53:03 2013 +0530
@@ -0,0 +1,92 @@
+/*****************************************************************************
+ * Copyright (C) 2013 x265 project
+ *
+ * Authors: Shazeb Nawaz Khan <shazeb at multicorewareinc.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing at multicorewareinc.com.
+ *****************************************************************************/
+
+#include "TLibCommon/TComPic.h"
+#include "lowres.h"
+#include "mv.h"
+
+namespace x265 {
+
+class TComSlice;
+
+class WeightPrediction
+{
+private:
+
+ int m_csp, m_csp444;
+ int blockSize, frmHeight, frmWidth;
+ int m_refStride, m_dstStride;
+ int weightType;
+ int32_t *m_mvCost;
+ TComSlice *slice;
+ wpScalingParam m_wp[2][MAX_NUM_REF][3];
+
+
+ pixel *refBuf, *mcbuf, *inbuf, *buf;
+ uint16_t *lowresCosts;
+ int32_t *m_intraCost;
+ MV *mvs;
+
+public:
+
+ WeightPrediction(TComSlice *slice)
+ {
+ this->slice = slice;
+ m_csp = slice->getPic()->getPicYuvOrg()->m_picCsp;
+ m_csp444 = (m_csp == X265_CSP_I444) ? 1: 0;
+ blockSize = 8 << m_csp444;
+ frmHeight = slice->getPic()->m_lowres.lines << m_csp444;
+ frmWidth = slice->getPic()->m_lowres.width << m_csp444;
+ m_dstStride = frmWidth;
+ m_refStride = slice->getPic()->m_lowres.lumaStride;
+ m_intraCost = slice->getPic()->m_lowres.intraCost;
+
+ mcbuf = NULL;
+ inbuf = NULL;
+ buf = (pixel *) X265_MALLOC(pixel, frmHeight * m_refStride);
+
+ int numPredDir = slice->isInterP() ? 1 : slice->isInterB() ? 2 : 0;
+ for (int list = 0; list < numPredDir; list++)
+ {
+ for (int refIdxTemp = 0; refIdxTemp < slice->getNumRefIdx(list); refIdxTemp++)
+ {
+ for (int yuv = 0; yuv < 3; yuv++)
+ {
+ SET_WEIGHT(m_wp[list][refIdxTemp][yuv], 0, 64, 6, 0);
+ }
+ }
+ }
+
+ }
+
+ ~WeightPrediction()
+ {
+ X265_FREE(buf);
+ }
+
+ void mcChroma();
+ void weightAnalyseEnc();
+ uint32_t weightCost(pixel *cur, pixel *ref, wpScalingParam *w);
+
+};
+};
More information about the x265-devel
mailing list