<div dir="ltr"><div><div><div><div><div>Pushed. But next time, please organize your patches more clearly. <br><br></div><div>1. Add C primitive, if it does not exist. <br></div></div>2. Add the function pointer declarations and new primitive declarations to EncoderPrimitives struct. <br>
</div>3. Add testbench code for primitives.<br></div>4. Add asm code. <br><br></div>Once all above patches have been reviewed, pushed and tested on all platforms, then you can integrate it with the actual encoder. <br><br>
<br><div><div><div><div><div><div><div id="__tbSetup"></div></div></div></div></div></div></div></div><div class="gmail_extra"><br><br><div class="gmail_quote">On Mon, Nov 18, 2013 at 3:23 PM, <span dir="ltr"><<a href="mailto:dnyaneshwar@multicorewareinc.com" target="_blank">dnyaneshwar@multicorewareinc.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Dnyaneshwar G <<a href="mailto:dnyaneshwar@multicorewareinc.com">dnyaneshwar@multicorewareinc.com</a>><br>
# Date 1384768323 -19800<br>
# Mon Nov 18 15:22:03 2013 +0530<br>
# Node ID cdd54aa200bd635395c01bbb07c156be4edbf7b1<br>
# Parent ac9e64d8a80bffe33fdaa0a9b83fdbe84f39d0b0<br>
TComYuv::addAvg, primitive function for luma and chroma loops<br>
<br>
diff -r ac9e64d8a80b -r cdd54aa200bd source/Lib/TLibCommon/TComYuv.cpp<br>
--- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 12:26:44 2013 +0530<br>
+++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 15:22:03 2013 +0530<br>
@@ -589,9 +589,7 @@<br>
<br>
void TComYuv::addAvg(TShortYUV* srcYuv0, TShortYUV* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma)<br>
{<br>
- int x, y;<br>
uint32_t src0Stride, src1Stride, dststride;<br>
- int shiftNum, offset;<br>
<br>
int16_t* srcY0 = srcYuv0->getLumaAddr(partUnitIdx);<br>
int16_t* srcU0 = srcYuv0->getCbAddr(partUnitIdx);<br>
@@ -605,61 +603,24 @@<br>
Pel* dstU = getCbAddr(partUnitIdx);<br>
Pel* dstV = getCrAddr(partUnitIdx);<br>
<br>
+ int part = partitionFromSizes(width, height);<br>
+<br>
if (bLuma)<br>
{<br>
src0Stride = srcYuv0->m_width;<br>
src1Stride = srcYuv1->m_width;<br>
dststride = getStride();<br>
- shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;<br>
- offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;<br>
<br>
- for (y = 0; y < height; y++)<br>
- {<br>
- for (x = 0; x < width; x += 4)<br>
- {<br>
- dstY[x + 0] = ClipY((srcY0[x + 0] + srcY1[x + 0] + offset) >> shiftNum);<br>
- dstY[x + 1] = ClipY((srcY0[x + 1] + srcY1[x + 1] + offset) >> shiftNum);<br>
- dstY[x + 2] = ClipY((srcY0[x + 2] + srcY1[x + 2] + offset) >> shiftNum);<br>
- dstY[x + 3] = ClipY((srcY0[x + 3] + srcY1[x + 3] + offset) >> shiftNum);<br>
- }<br>
-<br>
- srcY0 += src0Stride;<br>
- srcY1 += src1Stride;<br>
- dstY += dststride;<br>
- }<br>
+ primitives.luma_addAvg[part](dstY, dststride, srcY0, src0Stride, srcY1, src1Stride);<br>
}<br>
if (bChroma)<br>
{<br>
- shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;<br>
- offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;<br>
-<br>
src0Stride = srcYuv0->m_cwidth;<br>
src1Stride = srcYuv1->m_cwidth;<br>
dststride = getCStride();<br>
<br>
- width >>= m_hChromaShift;<br>
- height >>= m_vChromaShift;<br>
-<br>
- for (y = height - 1; y >= 0; y--)<br>
- {<br>
- for (x = width - 1; x >= 0; )<br>
- {<br>
- // note: chroma min width is 2<br>
- dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) >> shiftNum);<br>
- dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) >> shiftNum);<br>
- x--;<br>
- dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) >> shiftNum);<br>
- dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) >> shiftNum);<br>
- x--;<br>
- }<br>
-<br>
- srcU0 += src0Stride;<br>
- srcU1 += src1Stride;<br>
- srcV0 += src0Stride;<br>
- srcV1 += src1Stride;<br>
- dstU += dststride;<br>
- dstV += dststride;<br>
- }<br>
+ primitives.chroma_addAvg[part](dstU, dststride, srcU0, src0Stride, srcU1, src1Stride);<br>
+ primitives.chroma_addAvg[part](dstV, dststride, srcV0, src0Stride, srcV1, src1Stride);<br>
}<br>
}<br>
<br>
diff -r ac9e64d8a80b -r cdd54aa200bd source/common/pixel.cpp<br>
--- a/source/common/pixel.cpp Mon Nov 18 12:26:44 2013 +0530<br>
+++ b/source/common/pixel.cpp Mon Nov 18 15:22:03 2013 +0530<br>
@@ -794,6 +794,27 @@<br>
a += dstride;<br>
}<br>
}<br>
+<br>
+template<int bx, int by><br>
+void addAvg(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride)<br>
+{<br>
+ int shiftNum, offset;<br>
+ shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH;<br>
+ offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;<br>
+<br>
+ for (int y = 0; y < by; y++)<br>
+ {<br>
+ for (int x = 0; x < bx; x += 2)<br>
+ {<br>
+ dst[x + 0] = ClipY((src0[x + 0] + src1[x + 0] + offset) >> shiftNum);<br>
+ dst[x + 1] = ClipY((src0[x + 1] + src1[x + 1] + offset) >> shiftNum);<br>
+ }<br>
+<br>
+ src0 += src0Stride;<br>
+ src1 += src1Stride;<br>
+ dst += dstStride;<br>
+ }<br>
+}<br>
} // end anonymous namespace<br>
<br>
namespace x265 {<br>
@@ -835,12 +856,14 @@<br>
p.satd[LUMA_16x64] = satd8<16, 64>;<br>
<br>
#define CHROMA(W, H) \<br>
+ p.chroma_addAvg[CHROMA_ ## W ## x ## H] = addAvg<W, H>; \<br>
p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \<br>
p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \<br>
p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;\<br>
p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>;<br>
<br>
#define LUMA(W, H) \<br>
+ p.luma_addAvg[LUMA_ ## W ## x ## H] = addAvg<W, H>; \<br>
p.luma_copy_pp[LUMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \<br>
p.luma_copy_sp[LUMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \<br>
p.luma_copy_ps[LUMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;\<br>
diff -r ac9e64d8a80b -r cdd54aa200bd source/common/primitives.h<br>
--- a/source/common/primitives.h Mon Nov 18 12:26:44 2013 +0530<br>
+++ b/source/common/primitives.h Mon Nov 18 15:22:03 2013 +0530<br>
@@ -219,6 +219,8 @@<br>
<br>
typedef void (*pixel_sub_ps_t)(int16_t *dst, intptr_t dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);<br>
<br>
+typedef void (*addAvg_t)(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride);<br>
+<br>
/* Define a structure containing function pointers to optimized encoder<br>
* primitives. Each pointer can reference either an assembly routine,<br>
* a vectorized primitive, or a C function. */<br>
@@ -301,6 +303,9 @@<br>
var_t var[NUM_LUMA_PARTITIONS];<br>
ssim_4x4x2_core_t ssim_4x4x2_core;<br>
plane_copy_deinterleave_t plane_copy_deinterleave_c;<br>
+<br>
+ addAvg_t luma_addAvg[NUM_LUMA_PARTITIONS];<br>
+ addAvg_t chroma_addAvg[NUM_CHROMA_PARTITIONS];<br>
};<br>
<br>
/* This copy of the table is what gets used by the encoder.<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br></div>