[x265] [PATCH 3 of 3] reference: weight chroma planes of reference pictures if using chroma satd

Steve Borho steve at borho.org
Wed Dec 10 19:26:18 CET 2014


# HG changeset patch
# User Steve Borho <steve at borho.org>
# Date 1418196051 21600
#      Wed Dec 10 01:20:51 2014 -0600
# Node ID 6c32c8d4e0a1fa33c9b9cc1731ee32096b093546
# Parent  afd5620c77a4729f4c599f9ad69000082693a32e
reference: weight chroma planes of reference pictures if using chroma satd

diff -r afd5620c77a4 -r 6c32c8d4e0a1 source/common/lowres.cpp
--- a/source/common/lowres.cpp	Mon Dec 08 18:53:28 2014 -0600
+++ b/source/common/lowres.cpp	Wed Dec 10 01:20:51 2014 -0600
@@ -166,5 +166,5 @@
     extendPicBorder(lowresPlane[1], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY);
     extendPicBorder(lowresPlane[2], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY);
     extendPicBorder(lowresPlane[3], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY);
-    fpelPlane = lowresPlane[0];
+    fpelPlane[0] = lowresPlane[0];
 }
diff -r afd5620c77a4 -r 6c32c8d4e0a1 source/common/lowres.h
--- a/source/common/lowres.h	Mon Dec 08 18:53:28 2014 -0600
+++ b/source/common/lowres.h	Wed Dec 10 01:20:51 2014 -0600
@@ -26,28 +26,36 @@
 
 #include "primitives.h"
 #include "common.h"
+#include "picyuv.h"
 #include "mv.h"
 
 namespace x265 {
 // private namespace
 
-class PicYuv;
-
 struct ReferencePlanes
 {
     ReferencePlanes() { memset(this, 0, sizeof(ReferencePlanes)); }
 
-    pixel*   fpelPlane;
+    pixel*   fpelPlane[3];
     pixel*   lowresPlane[4];
     PicYuv*  reconPic;
 
     bool     isWeighted;
     bool     isLowres;
+
     intptr_t lumaStride;
-    int      weight;
-    int      offset;
-    int      shift;
-    int      round;
+    intptr_t chromaStride;
+
+    struct {
+        int      weight;
+        int      offset;
+        int      shift;
+        int      round;
+    } w[3];
+
+    pixel* getLumaAddr(uint32_t ctuAddr, uint32_t absPartIdx) { return fpelPlane[0] + reconPic->m_cuOffsetY[ctuAddr] + reconPic->m_buOffsetY[absPartIdx]; }
+    pixel* getCbAddr(uint32_t ctuAddr, uint32_t absPartIdx)   { return fpelPlane[1] + reconPic->m_cuOffsetC[ctuAddr] + reconPic->m_buOffsetC[absPartIdx]; }
+    pixel* getCrAddr(uint32_t ctuAddr, uint32_t absPartIdx)   { return fpelPlane[2] + reconPic->m_cuOffsetC[ctuAddr] + reconPic->m_buOffsetC[absPartIdx]; }
 
     /* lowres motion compensation, you must provide a buffer and stride for QPEL averaged pixels
      * in case QPEL is required.  Else it returns a pointer to the HPEL pixels */
diff -r afd5620c77a4 -r 6c32c8d4e0a1 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Mon Dec 08 18:53:28 2014 -0600
+++ b/source/encoder/frameencoder.cpp	Wed Dec 10 01:20:51 2014 -0600
@@ -267,7 +267,7 @@
             WeightParam *w = NULL;
             if ((bUseWeightP || bUseWeightB) && slice->m_weightPredTable[l][ref][0].bPresentFlag)
                 w = slice->m_weightPredTable[l][ref];
-            m_mref[l][ref].init(slice->m_refPicList[l][ref]->m_reconPic, w);
+            m_mref[l][ref].init(slice->m_refPicList[l][ref]->m_reconPic, w, *m_param);
         }
     }
 
diff -r afd5620c77a4 -r 6c32c8d4e0a1 source/encoder/motion.cpp
--- a/source/encoder/motion.cpp	Mon Dec 08 18:53:28 2014 -0600
+++ b/source/encoder/motion.cpp	Wed Dec 10 01:20:51 2014 -0600
@@ -330,7 +330,7 @@
 {
     ALIGN_VAR_16(int, costs[16]);
     pixel* fenc = fencPUYuv.m_buf[0];
-    pixel* fref = ref->fpelPlane + blockOffset;
+    pixel* fref = ref->fpelPlane[0] + blockOffset;
     intptr_t stride = ref->lumaStride;
 
     MV omv = bmv;
@@ -575,7 +575,7 @@
         blockOffset = ref->reconPic->getLumaAddr(ctuAddr, absPartIdx) - ref->reconPic->getLumaAddr(0);
     intptr_t stride = ref->lumaStride;
     pixel* fenc = fencPUYuv.m_buf[0];
-    pixel* fref = ref->fpelPlane + blockOffset;
+    pixel* fref = ref->fpelPlane[0] + blockOffset;
 
     setMVP(qmvp);
 
@@ -1185,7 +1185,7 @@
 int MotionEstimate::subpelCompare(ReferencePlanes *ref, const MV& qmv, pixelcmp_t cmp)
 {
     intptr_t refStride = ref->lumaStride;
-    pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * refStride;
+    pixel *fref = ref->fpelPlane[0] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * refStride;
     int xFrac = qmv.x & 0x3;
     int yFrac = qmv.y & 0x3;
     int cost;
@@ -1230,8 +1230,8 @@
         intptr_t refStrideC = ref->reconPic->m_strideC;
         intptr_t refOffset = (qmv.x >> shiftHor) + (qmv.y >> shiftVer) * refStrideC;
 
-        const pixel* refCb = ref->reconPic->getCbAddr(ctuAddr, absPartIdx) + refOffset;
-        const pixel* refCr = ref->reconPic->getCrAddr(ctuAddr, absPartIdx) + refOffset;
+        const pixel* refCb = ref->getCbAddr(ctuAddr, absPartIdx) + refOffset;
+        const pixel* refCr = ref->getCrAddr(ctuAddr, absPartIdx) + refOffset;
 
         xFrac = qmv.x & ((1 << shiftHor) - 1);
         yFrac = qmv.y & ((1 << shiftVer) - 1);
diff -r afd5620c77a4 -r 6c32c8d4e0a1 source/encoder/reference.cpp
--- a/source/encoder/reference.cpp	Mon Dec 08 18:53:28 2014 -0600
+++ b/source/encoder/reference.cpp	Wed Dec 10 01:20:51 2014 -0600
@@ -33,86 +33,135 @@
 
 MotionReference::MotionReference()
 {
-    m_weightBuffer = NULL;
+    weightBuffer[0] = NULL;
+    weightBuffer[1] = NULL;
+    weightBuffer[2] = NULL;
 }
 
-int MotionReference::init(PicYuv* recPic, WeightParam *w)
+MotionReference::~MotionReference()
+{
+    X265_FREE(weightBuffer[0]);
+    X265_FREE(weightBuffer[1]);
+    X265_FREE(weightBuffer[2]);
+}
+
+int MotionReference::init(PicYuv* recPic, WeightParam *wp, const x265_param& p)
 {
     reconPic = recPic;
-    m_numWeightedRows = 0;
+    numWeightedRows = 0;
     lumaStride = recPic->m_stride;
-    intptr_t startpad = recPic->m_lumaMarginY * lumaStride + recPic->m_lumaMarginX;
+    chromaStride = recPic->m_strideC;
+    numInterpPlanes = p.subpelRefine > 2 ? 3 : 1; /* is chroma satd possible? */
 
-    /* directly reference the pre-extended integer pel plane */
-    fpelPlane = recPic->m_picBuf[0] + startpad;
+    /* directly reference the extended integer pel planes */
+    fpelPlane[0] = recPic->m_picOrg[0];
+    fpelPlane[1] = recPic->m_picOrg[1];
+    fpelPlane[2] = recPic->m_picOrg[2];
     isWeighted = false;
 
-    if (w)
+    if (wp)
     {
-        if (!m_weightBuffer)
+        uint32_t numCUinHeight = (reconPic->m_picHeight + g_maxCUSize - 1) / g_maxCUSize;
+
+        int marginX = reconPic->m_lumaMarginX;
+        int marginY = reconPic->m_lumaMarginY;
+        intptr_t stride = reconPic->m_stride;
+        int cuHeight = g_maxCUSize;
+
+        for (int c = 0; c < numInterpPlanes; c++)
         {
-            uint32_t numCUinHeight = (recPic->m_picHeight + g_maxCUSize - 1) / g_maxCUSize;
-            size_t padheight = (numCUinHeight * g_maxCUSize) + recPic->m_lumaMarginY * 2;
-            m_weightBuffer = X265_MALLOC(pixel, lumaStride * padheight);
-            if (!m_weightBuffer)
-                return -1;
+            if (c == 1)
+            {
+                marginX = reconPic->m_chromaMarginX;
+                marginY = reconPic->m_chromaMarginY;
+                stride  = reconPic->m_strideC;
+                cuHeight >>= reconPic->m_vChromaShift;
+            }
+
+            if (!weightBuffer[c])
+            {
+                size_t padheight = (numCUinHeight * cuHeight) + marginY * 2;
+                weightBuffer[c] = X265_MALLOC(pixel, stride * padheight);
+                if (!weightBuffer[c])
+                    return -1;
+            }
+
+            /* use our buffer which will have weighted pixels written to it */
+            fpelPlane[c] = weightBuffer[c] + marginY * stride + marginX;
+            X265_CHECK(recPic->m_picOrg[c] - recPic->m_picBuf[c] == marginY * stride + marginX, "PicYuv pad calculation mismatch\n");
+
+            w[c].weight = wp[c].inputWeight;
+            w[c].offset = wp[c].inputOffset * (1 << (X265_DEPTH - 8));
+            w[c].shift = wp[c].log2WeightDenom;
+            w[c].round = w[c].shift ? 1 << (w[c].shift - 1) : 0;
         }
 
         isWeighted = true;
-        weight = w->inputWeight;
-        offset = w->inputOffset * (1 << (X265_DEPTH - 8));
-        shift  = w->log2WeightDenom;
-        round  = shift ? 1 << (shift - 1) : 0;
-
-        /* use our buffer which will have weighted pixels written to it */
-        fpelPlane = m_weightBuffer + startpad;
     }
 
     return 0;
 }
 
-MotionReference::~MotionReference()
+void MotionReference::applyWeight(int finishedRows, int maxNumRows)
 {
-    X265_FREE(m_weightBuffer);
-}
+    finishedRows = X265_MIN(finishedRows, maxNumRows);
+    if (numWeightedRows >= finishedRows)
+        return;
 
-void MotionReference::applyWeight(int rows, int numRows)
-{
-    rows = X265_MIN(rows, numRows);
-    if (m_numWeightedRows >= rows)
-        return;
     int marginX = reconPic->m_lumaMarginX;
     int marginY = reconPic->m_lumaMarginY;
-    pixel* src = (pixel*)reconPic->m_picOrg[0] + (m_numWeightedRows * (int)g_maxCUSize * lumaStride);
-    pixel* dst = fpelPlane + ((m_numWeightedRows * (int)g_maxCUSize) * lumaStride);
-    int width = reconPic->m_picWidth;
-    int height = ((rows - m_numWeightedRows) * g_maxCUSize);
-    if (rows == numRows)
-        height = ((reconPic->m_picHeight % g_maxCUSize) ? (reconPic->m_picHeight % g_maxCUSize) : g_maxCUSize);
+    intptr_t stride = reconPic->m_stride;
+    int width   = reconPic->m_picWidth;
+    int height  = (finishedRows - numWeightedRows) * g_maxCUSize;
+    if (finishedRows == maxNumRows && (reconPic->m_picHeight % g_maxCUSize))
+    {
+        /* the last row may be partial height */
+        height -= g_maxCUSize;
+        height += reconPic->m_picHeight % g_maxCUSize;
+    }
+    int cuHeight = g_maxCUSize;
 
-    // Computing weighted CU rows
-    int correction = IF_INTERNAL_PREC - X265_DEPTH; // intermediate interpolation depth
-    int padwidth = (width + 15) & ~15;  // weightp assembly needs even 16 byte widths
-    primitives.weight_pp(src, dst, lumaStride, padwidth, height, weight, round << correction, shift + correction, offset);
+    for (int c = 0; c < numInterpPlanes; c++)
+    {
+        if (c == 1)
+        {
+            marginX = reconPic->m_chromaMarginX;
+            marginY = reconPic->m_chromaMarginY;
+            stride  = reconPic->m_strideC;
+            width    >>= reconPic->m_hChromaShift;
+            height   >>= reconPic->m_vChromaShift;
+            cuHeight >>= reconPic->m_vChromaShift;
+        }
 
-    // Extending Left & Right
-    primitives.extendRowBorder(dst, lumaStride, width, height, marginX);
+        const pixel* src = reconPic->m_picOrg[c] + numWeightedRows * cuHeight * stride;
+        pixel* dst = fpelPlane[c] + numWeightedRows * cuHeight * stride;
 
-    // Extending Above
-    if (m_numWeightedRows == 0)
-    {
-        pixel *pixY = fpelPlane - marginX;
-        for (int y = 0; y < marginY; y++)
-            memcpy(pixY - (y + 1) * lumaStride, pixY, lumaStride * sizeof(pixel));
+        // Computing weighted CU rows
+        int correction = IF_INTERNAL_PREC - X265_DEPTH; // intermediate interpolation depth
+        int padwidth = (width + 15) & ~15;              // weightp assembly needs even 16 byte widths
+        primitives.weight_pp(src, dst, stride, padwidth, height, w[c].weight, w[c].round << correction, w[c].shift + correction, w[c].offset);
+
+        // Extending Left & Right
+        primitives.extendRowBorder(dst, stride, width, height, marginX);
+
+        // Extending Above
+        if (numWeightedRows == 0)
+        {
+            pixel *pixY = fpelPlane[c] - marginX;
+            for (int y = 0; y < marginY; y++)
+                memcpy(pixY - (y + 1) * stride, pixY, stride * sizeof(pixel));
+        }
+
+        // Extending Bottom
+        if (finishedRows == maxNumRows)
+        {
+            int picHeight = reconPic->m_picHeight;
+            if (c) picHeight >>= reconPic->m_vChromaShift;
+            pixel *pixY = fpelPlane[c] - marginX + (picHeight - 1) * stride;
+            for (int y = 0; y < marginY; y++)
+                memcpy(pixY + (y + 1) * stride, pixY, stride * sizeof(pixel));
+        }
     }
 
-    // Extending Bottom
-    if (rows == numRows)
-    {
-        pixel *pixY = fpelPlane - marginX + (reconPic->m_picHeight - 1) * lumaStride;
-        for (int y = 0; y < marginY; y++)
-            memcpy(pixY + (y + 1) * lumaStride, pixY, lumaStride * sizeof(pixel));
-    }
-
-    m_numWeightedRows = rows;
+    numWeightedRows = finishedRows;
 }
diff -r afd5620c77a4 -r 6c32c8d4e0a1 source/encoder/reference.h
--- a/source/encoder/reference.h	Mon Dec 08 18:53:28 2014 -0600
+++ b/source/encoder/reference.h	Wed Dec 10 01:20:51 2014 -0600
@@ -40,14 +40,12 @@
 
     MotionReference();
     ~MotionReference();
-    int  init(PicYuv*, WeightParam* w = NULL);
+    int  init(PicYuv*, WeightParam* wp, const x265_param& p);
     void applyWeight(int rows, int numRows);
 
-    pixel*  m_weightBuffer;
-    int     m_numWeightedRows;
-
-    pixel*  getLumaAddr(uint32_t ctuAddr)                      { return fpelPlane + reconPic->m_cuOffsetY[ctuAddr]; }
-    pixel*  getLumaAddr(uint32_t ctuAddr, uint32_t absPartIdx) { return fpelPlane + reconPic->m_cuOffsetY[ctuAddr] + reconPic->m_buOffsetY[absPartIdx]; }
+    pixel*  weightBuffer[3];
+    int     numInterpPlanes;
+    int     numWeightedRows;
 
 protected:
 
diff -r afd5620c77a4 -r 6c32c8d4e0a1 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp	Mon Dec 08 18:53:28 2014 -0600
+++ b/source/encoder/slicetype.cpp	Wed Dec 10 01:20:51 2014 -0600
@@ -1261,7 +1261,7 @@
             m_weightedRef.lowresPlane[i] = m_wbuffer[i] + padoffset;
         }
 
-        m_weightedRef.fpelPlane = m_weightedRef.lowresPlane[0];
+        m_weightedRef.fpelPlane[0] = m_weightedRef.lowresPlane[0];
         m_weightedRef.lumaStride = curFrame->m_lowres.lumaStride;
         m_weightedRef.isLowres = true;
         m_weightedRef.isWeighted = false;
@@ -1362,7 +1362,7 @@
 {
     Lowres *fenc = frames[b];
     Lowres *ref  = frames[p0];
-    pixel *src = ref->fpelPlane;
+    pixel *src = ref->fpelPlane[0];
     intptr_t stride = fenc->lumaStride;
 
     if (wp)
@@ -1376,7 +1376,7 @@
 
         primitives.weight_pp(ref->buffer[0], m_wbuffer[0], stride, widthHeight, m_paddedLines,
                              scale, round << correction, denom + correction, offset);
-        src = m_weightedRef.fpelPlane;
+        src = m_weightedRef.fpelPlane[0];
     }
 
     uint32_t cost = 0;
@@ -1387,7 +1387,7 @@
     {
         for (int x = 0; x < fenc->width; x += 8, mb++, pixoff += 8)
         {
-            int satd = primitives.satd[LUMA_8x8](src + pixoff, stride, fenc->fpelPlane + pixoff, stride);
+            int satd = primitives.satd[LUMA_8x8](src + pixoff, stride, fenc->fpelPlane[0] + pixoff, stride);
             cost += X265_MIN(satd, fenc->intraCost[mb]);
         }
     }


More information about the x265-devel mailing list