[x265] [PATCH] primitive function for luma and chroma for loops in addAvg()

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Fri Jan 17 08:10:06 CET 2014


# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1389941305 -19800
#      Fri Jan 17 12:18:25 2014 +0530
# Node ID 06726f0e04fb2fa2f1ebe8302ee579e791dbd0cc
# Parent  1d7ea03e1a386301b82287c87f6c4d08fce638d6
primitive function for luma and chroma for loops in addAvg().

diff -r 1d7ea03e1a38 -r 06726f0e04fb source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp	Wed Jan 15 19:18:53 2014 +0530
+++ b/source/Lib/TLibCommon/TComYuv.cpp	Fri Jan 17 12:18:25 2014 +0530
@@ -572,9 +572,7 @@
 
 void TComYuv::addAvg(TShortYUV* srcYuv0, TShortYUV* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
 {
-    int x, y;
     uint32_t src0Stride, src1Stride, dststride;
-    int shiftNum, offset;
 
     int16_t* srcY0 = srcYuv0->getLumaAddr(partUnitIdx);
     int16_t* srcU0 = srcYuv0->getCbAddr(partUnitIdx);
@@ -588,61 +586,24 @@
     Pel* dstU = getCbAddr(partUnitIdx);
     Pel* dstV = getCrAddr(partUnitIdx);
 
+    int part = partitionFromSizes(width, height);
+
     if (bLuma)
     {
         src0Stride = srcYuv0->m_width;
         src1Stride = srcYuv1->m_width;
         dststride  = getStride();
-        shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
-        offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
 
-        for (y = 0; y < height; y++)
-        {
-            for (x = 0; x < width; x += 4)
-            {
-                dstY[x + 0] = ClipY((srcY0[x + 0] + srcY1[x + 0] + offset) >> shiftNum);
-                dstY[x + 1] = ClipY((srcY0[x + 1] + srcY1[x + 1] + offset) >> shiftNum);
-                dstY[x + 2] = ClipY((srcY0[x + 2] + srcY1[x + 2] + offset) >> shiftNum);
-                dstY[x + 3] = ClipY((srcY0[x + 3] + srcY1[x + 3] + offset) >> shiftNum);
-            }
-
-            srcY0 += src0Stride;
-            srcY1 += src1Stride;
-            dstY  += dststride;
-        }
+        primitives.luma_addAvg[part](dstY, dststride, srcY0, src0Stride, srcY1, src1Stride);
     }
     if (bChroma)
     {
-        shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
-        offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
-
         src0Stride = srcYuv0->m_cwidth;
         src1Stride = srcYuv1->m_cwidth;
         dststride  = getCStride();
 
-        width  >>= m_hChromaShift;
-        height >>= m_vChromaShift;
-
-        for (y = height - 1; y >= 0; y--)
-        {
-            for (x = width - 1; x >= 0; )
-            {
-                // note: chroma min width is 2
-                dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) >> shiftNum);
-                dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) >> shiftNum);
-                x--;
-                dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) >> shiftNum);
-                dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) >> shiftNum);
-                x--;
-            }
-
-            srcU0 += src0Stride;
-            srcU1 += src1Stride;
-            srcV0 += src0Stride;
-            srcV1 += src1Stride;
-            dstU  += dststride;
-            dstV  += dststride;
-        }
+        primitives.chroma_addAvg[part](dstU, dststride, srcU0, src0Stride, srcU1, src1Stride);
+        primitives.chroma_addAvg[part](dstV, dststride, srcV0, src0Stride, srcV1, src1Stride);
     }
 }
 
diff -r 1d7ea03e1a38 -r 06726f0e04fb source/common/pixel.cpp
--- a/source/common/pixel.cpp	Wed Jan 15 19:18:53 2014 +0530
+++ b/source/common/pixel.cpp	Fri Jan 17 12:18:25 2014 +0530
@@ -800,6 +800,27 @@
         a += dstride;
     }
 }
+
+template<int bx, int by>
+void addAvg(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride)
+{
+    int shiftNum, offset;
+    shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
+    offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
+
+    for (int y = 0; y < by; y++)
+    {
+        for (int x = 0; x < bx; x += 2)
+        {
+            dst[x + 0] = (pixel)ClipY((src0[x + 0] + src1[x + 0] + offset) >> shiftNum);
+            dst[x + 1] = (pixel)ClipY((src0[x + 1] + src1[x + 1] + offset) >> shiftNum);
+        }
+
+        src0 += src0Stride;
+        src1 += src1Stride;
+        dst  += dstStride;
+    }
+}
 }  // end anonymous namespace
 
 namespace x265 {
@@ -841,6 +862,7 @@
     p.satd[LUMA_16x64] = satd8<16, 64>;
 
 #define CHROMA(W, H) \
+    p.chroma_addAvg[CHROMA_ ## W ## x ## H]  = addAvg<W, H>; \
     p.chroma[X265_CSP_I420].copy_pp[CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
     p.chroma[X265_CSP_I420].copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
     p.chroma[X265_CSP_I420].copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
@@ -848,6 +870,7 @@
     p.chroma[X265_CSP_I420].add_ps[CHROMA_ ## W ## x ## H] = pixel_add_ps_c<W, H>;
 
 #define LUMA(W, H) \
+    p.luma_addAvg[LUMA_ ## W ## x ## H]  = addAvg<W, H>; \
     p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
     p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
     p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>; \
diff -r 1d7ea03e1a38 -r 06726f0e04fb source/common/primitives.h
--- a/source/common/primitives.h	Wed Jan 15 19:18:53 2014 +0530
+++ b/source/common/primitives.h	Fri Jan 17 12:18:25 2014 +0530
@@ -203,6 +203,8 @@
 typedef void (*pixel_sub_ps_t)(int16_t *dst, intptr_t dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);
 typedef void (*pixel_add_ps_t)(pixel *a, intptr_t dstride, pixel *b0, int16_t *b1, intptr_t sstride0, intptr_t sstride1);
 
+typedef void (*addAvg_t)(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride);
+
 /* Define a structure containing function pointers to optimized encoder
  * primitives.  Each pointer can reference either an assembly routine,
  * a vectorized primitive, or a C function. */
@@ -271,6 +273,9 @@
     plane_copy_deinterleave_t plane_copy_deinterleave_c;
     extendCURowBorder_t extendRowBorder;
 
+    addAvg_t        luma_addAvg[NUM_LUMA_PARTITIONS];
+    addAvg_t        chroma_addAvg[NUM_CHROMA_PARTITIONS];
+
     struct
     {
         filter_pp_t     filter_vpp[NUM_LUMA_PARTITIONS];


More information about the x265-devel mailing list