[x265] [PATCH] TComYuv::addAvg, primitive function for luma and chroma loops
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Mon Nov 18 08:14:29 CET 2013
# HG changeset patch
# User Dnyaneshwar Gorade <dnyaneshwar at multicorewareinc.com>
# Date 1384758687 -19800
# Mon Nov 18 12:41:27 2013 +0530
# Node ID ee062baf96b18ab2ecd64a2e4219b2a5a3c09e5d
# Parent e2895ce7bbeb2c3d845fee2578758d0012fa2cb4
TComYuv::addAvg, primitive function for luma and chroma loops
diff -r e2895ce7bbeb -r ee062baf96b1 source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp Sun Nov 17 11:24:13 2013 -0600
+++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 12:41:27 2013 +0530
@@ -589,9 +589,7 @@
void TComYuv::addAvg(TShortYUV* srcYuv0, TShortYUV* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)
{
- int x, y;
uint32_t src0Stride, src1Stride, dststride;
- int shiftNum, offset;
int16_t* srcY0 = srcYuv0->getLumaAddr(partUnitIdx);
int16_t* srcU0 = srcYuv0->getCbAddr(partUnitIdx);
@@ -610,29 +608,12 @@
src0Stride = srcYuv0->m_width;
src1Stride = srcYuv1->m_width;
dststride = getStride();
- shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
- offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
- for (y = 0; y < height; y++)
- {
- for (x = 0; x < width; x += 4)
- {
- dstY[x + 0] = ClipY((srcY0[x + 0] + srcY1[x + 0] + offset) >> shiftNum);
- dstY[x + 1] = ClipY((srcY0[x + 1] + srcY1[x + 1] + offset) >> shiftNum);
- dstY[x + 2] = ClipY((srcY0[x + 2] + srcY1[x + 2] + offset) >> shiftNum);
- dstY[x + 3] = ClipY((srcY0[x + 3] + srcY1[x + 3] + offset) >> shiftNum);
- }
-
- srcY0 += src0Stride;
- srcY1 += src1Stride;
- dstY += dststride;
- }
+ int part = partitionFromSizes(width, height);
+ primitives.luma_addAvg[part](dstY, dststride, srcY0, src0Stride, srcY1, src1Stride);
}
if (bChroma)
{
- shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
- offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
-
src0Stride = srcYuv0->m_cwidth;
src1Stride = srcYuv1->m_cwidth;
dststride = getCStride();
@@ -640,26 +621,9 @@
width >>= m_hChromaShift;
height >>= m_vChromaShift;
- for (y = height - 1; y >= 0; y--)
- {
- for (x = width - 1; x >= 0; )
- {
- // note: chroma min width is 2
- dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) >> shiftNum);
- dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) >> shiftNum);
- x--;
- dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) >> shiftNum);
- dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) >> shiftNum);
- x--;
- }
-
- srcU0 += src0Stride;
- srcU1 += src1Stride;
- srcV0 += src0Stride;
- srcV1 += src1Stride;
- dstU += dststride;
- dstV += dststride;
- }
+ int part = partitionFromSizes(width, height);
+ primitives.chroma_addAvg[part](dstU, dststride, srcU0, src0Stride, srcU1, src1Stride);
+ primitives.chroma_addAvg[part](dstV, dststride, srcV0, src0Stride, srcV1, src1Stride);
}
}
diff -r e2895ce7bbeb -r ee062baf96b1 source/common/pixel.cpp
--- a/source/common/pixel.cpp Sun Nov 17 11:24:13 2013 -0600
+++ b/source/common/pixel.cpp Mon Nov 18 12:41:27 2013 +0530
@@ -794,6 +794,27 @@
a += dstride;
}
}
+
+template<int bx, int by>
+void addAvg(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride)
+{
+ int shiftNum, offset;
+ shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;
+ offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
+
+ for (int y = 0; y < by; y++)
+ {
+ for (int x = 0; x < bx; x += 2)
+ {
+ dst[x + 0] = ClipY((src0[x + 0] + src1[x + 0] + offset) >> shiftNum);
+ dst[x + 1] = ClipY((src0[x + 1] + src1[x + 1] + offset) >> shiftNum);
+ }
+
+ src0 += src0Stride;
+ src1 += src1Stride;
+ dst += dstStride;
+ }
+}
} // end anonymous namespace
namespace x265 {
@@ -835,12 +856,14 @@
p.satd[LUMA_16x64] = satd8<16, 64>;
#define CHROMA(W, H) \
+ p.chroma_addAvg[CHROMA_ ## W ## x ## H] = addAvg<W, H>; \
p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;\
p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>;
#define LUMA(W, H) \
+ p.luma_addAvg[LUMA_ ## W ## x ## H] = addAvg<W, H>; \
p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \
p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \
p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;\
diff -r e2895ce7bbeb -r ee062baf96b1 source/common/primitives.h
--- a/source/common/primitives.h Sun Nov 17 11:24:13 2013 -0600
+++ b/source/common/primitives.h Mon Nov 18 12:41:27 2013 +0530
@@ -219,6 +219,8 @@
typedef void (*pixel_sub_ps_t)(int16_t *dst, intptr_t dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);
+typedef void (*addAvg_t)(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride);
+
/* Define a structure containing function pointers to optimized encoder
* primitives. Each pointer can reference either an assembly routine,
* a vectorized primitive, or a C function. */
@@ -301,6 +303,9 @@
var_t var[NUM_LUMA_PARTITIONS];
ssim_4x4x2_core_t ssim_4x4x2_core;
plane_copy_deinterleave_t plane_copy_deinterleave_c;
+
+ addAvg_t luma_addAvg[NUM_LUMA_PARTITIONS];
+ addAvg_t chroma_addAvg[NUM_CHROMA_PARTITIONS];
};
/* This copy of the table is what gets used by the encoder.
More information about the x265-devel
mailing list