[x265] [PATCH] primitive function for luma and chroma for loops in addAvg()
chen
chenm003 at 163.com
Thu Nov 14 16:27:11 CET 2013
At 2013-11-14 23:12:15,dnyaneshwar at multicorewareinc.com wrote:
># HG changeset patch
># User Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
># Date 1384441865 -19800
># Thu Nov 14 20:41:05 2013 +0530
># Node ID d16bcb0416b43912fc8d69d98af89c9a17475c60
># Parent c4ca80d19105ccf1ba2ec14dd65915f2820a660d
>primitive function for luma and chroma for loops in addAvg()
>
>diff -r c4ca80d19105 -r d16bcb0416b4 source/Lib/TLibCommon/TComYuv.cpp
>--- a/source/Lib/TLibCommon/TComYuv.cpp Tue Nov 12 19:10:23 2013 +0530
>+++ b/source/Lib/TLibCommon/TComYuv.cpp Thu Nov 14 20:41:05 2013 +0530
>@@ -590,7 +590,6 @@
>
> void TComYuv::addAvg(TShortYUV* srcYuv0, TShortYUV* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
> {
>- int x, y;
> uint32_t src0Stride, src1Stride, dststride;
> int shiftNum, offset;
>
>@@ -606,6 +605,8 @@
> Pel* dstU = getCbAddr(partUnitIdx);
> Pel* dstV = getCrAddr(partUnitIdx);
>
>+ int part = partitionFromSizes(width, height);
>+
> if (bLuma)
> {
> src0Stride = srcYuv0->m_width;
>@@ -614,20 +615,7 @@
> shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
> offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
>
>- for (y = 0; y < height; y++)
>- {
>- for (x = 0; x < width; x += 4)
>- {
>- dstY[x + 0] = ClipY((srcY0[x + 0] + srcY1[x + 0] + offset) >> shiftNum);
>- dstY[x + 1] = ClipY((srcY0[x + 1] + srcY1[x + 1] + offset) >> shiftNum);
>- dstY[x + 2] = ClipY((srcY0[x + 2] + srcY1[x + 2] + offset) >> shiftNum);
>- dstY[x + 3] = ClipY((srcY0[x + 3] + srcY1[x + 3] + offset) >> shiftNum);
>- }
>-
>- srcY0 += src0Stride;
>- srcY1 += src1Stride;
>- dstY += dststride;
>- }
>+ primitives.addAvg_c[part](dstY, dststride, srcY0, src0Stride, srcY1, src1Stride);
> }
> if (bChroma)
> {
>@@ -641,26 +629,8 @@
> width >>= m_hChromaShift;
> height >>= m_vChromaShift;
>
>- for (y = height - 1; y >= 0; y--)
>- {
>- for (x = width - 1; x >= 0; )
>- {
>- // note: chroma min width is 2
>- dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) >> shiftNum);
>- dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) >> shiftNum);
>- x--;
>- dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) >> shiftNum);
>- dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) >> shiftNum);
>- x--;
>- }
>-
>- srcU0 += src0Stride;
>- srcU1 += src1Stride;
>- srcV0 += src0Stride;
>- srcV1 += src1Stride;
>- dstU += dststride;
>- dstV += dststride;
>- }
>+ primitives.addAvg_c[part](dstU, dststride, srcU0, src0Stride, srcU1, src1Stride);
>+ primitives.addAvg_c[part](dstV, dststride, srcV0, src0Stride, srcV1, src1Stride);
I guess you use luma index here will be wrong size for chroma
> }
> }
>
>diff -r c4ca80d19105 -r d16bcb0416b4 source/common/pixel.cpp
>--- a/source/common/pixel.cpp Tue Nov 12 19:10:23 2013 +0530
>+++ b/source/common/pixel.cpp Thu Nov 14 20:41:05 2013 +0530
>@@ -794,6 +794,27 @@
> a += dstride;
> }
> }
>+
>+template<int bx, int by>
>+void addAvg_c(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride)
>+{
>+ int shiftNum, offset;
>+ shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
>+ offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
>+
>+ for (int y = 0; y < by; y++)
>+ {
>+ for (int x = 0; x < bx; x += 2)
>+ {
>+ dst[x + 0] = ClipY((src0[x + 0] + src1[x + 0] + offset) >> shiftNum);
>+ dst[x + 1] = ClipY((src0[x + 1] + src1[x + 1] + offset) >> shiftNum);
>+ }
>+
>+ src0 += src0Stride;
>+ src1 += src1Stride;
>+ dst += dstStride;
>+ }
>+}
> } // end anonymous namespace
>
> namespace x265 {
>@@ -806,6 +827,7 @@
> SET_FUNC_PRIMITIVE_TABLE_C2(sad_x3)
> SET_FUNC_PRIMITIVE_TABLE_C2(sad_x4)
> SET_FUNC_PRIMITIVE_TABLE_C2(pixelavg_pp)
>+ SET_FUNC_PRIMITIVE_TABLE_C2(addAvg_c)
>
> // satd
> p.satd[LUMA_4x4] = satd_4x4;
>diff -r c4ca80d19105 -r d16bcb0416b4 source/common/primitives.h
>--- a/source/common/primitives.h Tue Nov 12 19:10:23 2013 +0530
>+++ b/source/common/primitives.h Thu Nov 14 20:41:05 2013 +0530
>@@ -208,6 +208,8 @@
>
> typedef void (*pixel_sub_ps_t)(int16_t *dst, intptr_t dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);
>
>+typedef void (*addAvg_t)(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride);
>+
> /* Define a structure containing function pointers to optimized encoder
> * primitives. Each pointer can reference either an assembly routine,
> * a vectorized primitive, or a C function. */
>@@ -288,6 +290,8 @@
> var_t var[NUM_LUMA_PARTITIONS];
> ssim_4x4x2_core_t ssim_4x4x2_core;
> plane_copy_deinterleave_t plane_copy_deinterleave_c;
>+
>+ addAvg_t addAvg_c[NUM_LUMA_PARTITIONS];
name addAvg_c is for C reference code, here use addAvg is better
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131114/3fd39ed8/attachment-0001.html>
More information about the x265-devel
mailing list