[x265] [PATCH] Main12: fix distortion data type
Divya Manivannan
divya at multicorewareinc.com
Mon Aug 10 08:49:27 CEST 2015
Thanks. I will make the change and send the patch again.
On Mon, Aug 10, 2015 at 10:15 AM, Deepthi Nandakumar <
deepthi at multicorewareinc.com> wrote:
> Also calcRdSADcost does not need to move to 64-bit precision. SAD cost
> will not exceed dynamic range, as there is no squaring.
>
> On Fri, Aug 7, 2015 at 7:55 PM, chen <chenm003 at 163.com> wrote:
>
>> dynamic range:
>>
>> 10 bits: 10 + 10 + 12 = 32 bits
>> 12 bits: 12 + 12 + 12 = 36 bits.
>>
>>
>> so in 10bits, we don't need uint64_t
>>
>>
>> At 2015-08-07 20:02:13,"Divya Manivannan" <divya at multicorewareinc.com> wrote:
>> ># HG changeset patch
>> ># User Divya Manivannan <divya at multicorewareinc.com>
>> ># Date 1438948368 -19800
>> ># Fri Aug 07 17:22:48 2015 +0530
>> ># Branch stable
>> ># Node ID e2d9e7ea1add7cfacb40314155c3a23aa6e63426
>> ># Parent 4781e6cef251006db10e107b2916741572f7760a
>> >Main12: fix distortion data type
>> >
>> >diff -r 4781e6cef251 -r e2d9e7ea1add source/common/pixel.cpp
>> >--- a/source/common/pixel.cpp Fri Aug 07 12:29:40 2015 +0530
>> >+++ b/source/common/pixel.cpp Fri Aug 07 17:22:48 2015 +0530
>> >@@ -137,6 +137,27 @@
>> > return sum;
>> > }
>> >
>> >+template<int lx, int ly, class T1, class T2>
>> >+uint64_t sse(const T1* pix1, intptr_t stride_pix1, const T2* pix2, intptr_t stride_pix2)
>> >+{
>> >+ uint64_t sum = 0;
>> >+ int tmp;
>> >+
>> >+ for (int y = 0; y < ly; y++)
>> >+ {
>> >+ for (int x = 0; x < lx; x++)
>> >+ {
>> >+ tmp = pix1[x] - pix2[x];
>> >+ sum += (tmp * tmp);
>> >+ }
>> >+
>> >+ pix1 += stride_pix1;
>> >+ pix2 += stride_pix2;
>> >+ }
>> >+
>> >+ return sum;
>> >+}
>> >+
>> > #define BITS_PER_SUM (8 * sizeof(sum_t))
>> >
>> > #define HADAMARD4(d0, d1, d2, d3, s0, s1, s2, s3) { \
>> >diff -r 4781e6cef251 -r e2d9e7ea1add source/common/primitives.cpp
>> >--- a/source/common/primitives.cpp Fri Aug 07 12:29:40 2015 +0530
>> >+++ b/source/common/primitives.cpp Fri Aug 07 17:22:48 2015 +0530
>> >@@ -74,7 +74,11 @@
>> > /* at HIGH_BIT_DEPTH, pixel == short so we can alias many primitives */
>> > for (int i = 0; i < NUM_CU_SIZES; i++)
>> > {
>> >+#if X265_DEPTH <= 10
>> > p.cu[i].sse_pp = (pixelcmp_t)p.cu[i].sse_ss;
>> >+#else
>> >+ p.cu[i].sse_pp = (pixel_sse_t)p.cu[i].sse_ss;
>> >+#endif
>> >
>> > p.cu[i].copy_ps = (copy_ps_t)p.pu[i].copy_pp;
>> > p.cu[i].copy_sp = (copy_sp_t)p.pu[i].copy_pp;
>> >diff -r 4781e6cef251 -r e2d9e7ea1add source/common/primitives.h
>> >--- a/source/common/primitives.h Fri Aug 07 12:29:40 2015 +0530
>> >+++ b/source/common/primitives.h Fri Aug 07 17:22:48 2015 +0530
>> >@@ -112,6 +112,10 @@
>> >
>> > typedef int (*pixelcmp_t)(const pixel* fenc, intptr_t fencstride, const pixel* fref, intptr_t frefstride); // fenc is aligned
>> > typedef int (*pixelcmp_ss_t)(const int16_t* fenc, intptr_t fencstride, const int16_t* fref, intptr_t frefstride);
>> >+#if X265_DEPTH > 10
>> >+typedef uint64_t (*pixel_sse_t)(const pixel* fenc, intptr_t fencstride, const pixel* fref, intptr_t frefstride); // fenc is aligned
>> >+typedef uint64_t (*pixel_sse_ss_t)(const int16_t* fenc, intptr_t fencstride, const int16_t* fref, intptr_t frefstride);
>> >+#endif
>> > typedef int (*pixel_ssd_s_t)(const int16_t* fenc, intptr_t fencstride);
>> > typedef void (*pixelcmp_x4_t)(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
>> > typedef void (*pixelcmp_x3_t)(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
>> >@@ -253,8 +257,14 @@
>> > copy_pp_t copy_pp; // alias to pu[].copy_pp
>> >
>> > var_t var; // block internal variance
>> >+
>> >+#if X265_DEPTH <= 10
>> > pixelcmp_t sse_pp; // Sum of Square Error (pixel, pixel) fenc alignment not assumed
>> > pixelcmp_ss_t sse_ss; // Sum of Square Error (short, short) fenc alignment not assumed
>> >+#else
>> >+ pixel_sse_t sse_pp; // Sum of Square Error (pixel, pixel) fenc alignment not assumed
>> >+ pixel_sse_ss_t sse_ss; // Sum of Square Error (short, short) fenc alignment not assumed
>> >+#endif
>> > pixelcmp_t psy_cost_pp; // difference in AC energy between two pixel blocks
>> > pixelcmp_ss_t psy_cost_ss; // difference in AC energy between two signed residual blocks
>> > pixel_ssd_s_t ssd_s; // Sum of Square Error (residual coeff to self)
>> >@@ -358,7 +368,11 @@
>> > struct CUChroma
>> > {
>> > pixelcmp_t sa8d; // if chroma CU is not multiple of 8x8, will use satd
>> >+#if X265_DEPTH <= 10
>> > pixelcmp_t sse_pp;
>> >+#else
>> >+ pixel_sse_t sse_pp;
>> >+#endif
>> > pixel_sub_ps_t sub_ps;
>> > pixel_add_ps_t add_ps;
>> >
>> >diff -r 4781e6cef251 -r e2d9e7ea1add source/common/x86/asm-primitives.cpp
>> >--- a/source/common/x86/asm-primitives.cpp Fri Aug 07 12:29:40 2015 +0530
>> >+++ b/source/common/x86/asm-primitives.cpp Fri Aug 07 17:22:48 2015 +0530
>> >@@ -998,6 +998,7 @@
>> > p.cu[BLOCK_4x4].intra_pred[32] = PFX(intra_pred_ang4_32_sse2);
>> > p.cu[BLOCK_4x4].intra_pred[33] = PFX(intra_pred_ang4_33_sse2);
>> >
>> >+#if X265_DEPTH <= 10
>> > p.cu[BLOCK_4x4].sse_ss = PFX(pixel_ssd_ss_4x4_mmx2);
>> > ALL_LUMA_CU(sse_ss, pixel_ssd_ss, sse2);
>> >
>> >@@ -1005,6 +1006,7 @@
>> > p.chroma[X265_CSP_I422].cu[BLOCK_422_8x16].sse_pp = (pixelcmp_t)PFX(pixel_ssd_ss_8x16_sse2);
>> > p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].sse_pp = (pixelcmp_t)PFX(pixel_ssd_ss_16x32_sse2);
>> > p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].sse_pp = (pixelcmp_t)PFX(pixel_ssd_ss_32x64_sse2);
>> >+#endif
>> >
>> > p.cu[BLOCK_4x4].dct = PFX(dct4_sse2);
>> > p.cu[BLOCK_8x8].dct = PFX(dct8_sse2);
>> >@@ -1527,6 +1529,7 @@
>> > p.cu[BLOCK_16x16].ssd_s = PFX(pixel_ssd_s_16_avx2);
>> > p.cu[BLOCK_32x32].ssd_s = PFX(pixel_ssd_s_32_avx2);
>> >
>> >+#if X265_DEPTH <= 10
>> > p.cu[BLOCK_16x16].sse_ss = PFX(pixel_ssd_ss_16x16_avx2);
>> > p.cu[BLOCK_32x32].sse_ss = PFX(pixel_ssd_ss_32x32_avx2);
>> > p.cu[BLOCK_64x64].sse_ss = PFX(pixel_ssd_ss_64x64_avx2);
>> >@@ -1538,6 +1541,7 @@
>> > p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].sse_pp = PFX(pixel_ssd_32x32_avx2);
>> > p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].sse_pp = (pixelcmp_t)PFX(pixel_ssd_ss_16x32_avx2);
>> > p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].sse_pp = (pixelcmp_t)PFX(pixel_ssd_ss_32x64_avx2);
>> >+#endif
>> >
>> > p.quant = PFX(quant_avx2);
>> > p.nquant = PFX(nquant_avx2);
>> >diff -r 4781e6cef251 -r e2d9e7ea1add source/encoder/rdcost.h
>> >--- a/source/encoder/rdcost.h Fri Aug 07 12:29:40 2015 +0530
>> >+++ b/source/encoder/rdcost.h Fri Aug 07 17:22:48 2015 +0530
>> >@@ -88,12 +88,21 @@
>> > m_lambda = (uint64_t)floor(256.0 * lambda);
>> > }
>> >
>> >+#if X265_DEPTH <= 10
>> > inline uint64_t calcRdCost(uint32_t distortion, uint32_t bits) const
>> > {
>> > X265_CHECK(bits <= (UINT64_MAX - 128) / m_lambda2,
>> > "calcRdCost wrap detected dist: %u, bits %u, lambda: "X265_LL"\n", distortion, bits, m_lambda2);
>> > return distortion + ((bits * m_lambda2 + 128) >> 8);
>> > }
>> >+#else
>> >+ inline uint64_t calcRdCost(uint64_t distortion, uint32_t bits) const
>> >+ {
>> >+ X265_CHECK(bits <= (UINT64_MAX - 128) / m_lambda2,
>> >+ "calcRdCost wrap detected dist: %u, bits %u, lambda: "X265_LL"\n", distortion, bits, m_lambda2);
>> >+ return distortion + ((bits * m_lambda2 + 128) >> 8);
>> >+ }
>> >+#endif
>> >
>> > /* return the difference in energy between the source block and the recon block */
>> > inline int psyCost(int size, const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride) const
>> >@@ -108,17 +117,33 @@
>> > }
>> >
>> > /* return the RD cost of this prediction, including the effect of psy-rd */
>> >+#if X265_DEPTH <= 10
>> > inline uint64_t calcPsyRdCost(uint32_t distortion, uint32_t bits, uint32_t psycost) const
>> > {
>> > return distortion + ((m_lambda * m_psyRd * psycost) >> 24) + ((bits * m_lambda2) >> 8);
>> > }
>> >+#else
>> >+ inline uint64_t calcPsyRdCost(uint64_t distortion, uint32_t bits, uint32_t psycost) const
>> >+ {
>> >+ return distortion + ((m_lambda * m_psyRd * psycost) >> 24) + ((bits * m_lambda2) >> 8);
>> >+ }
>> >+#endif
>> >
>> >+#if X265_DEPTH <= 10
>> > inline uint64_t calcRdSADCost(uint32_t sadCost, uint32_t bits) const
>> > {
>> > X265_CHECK(bits <= (UINT64_MAX - 128) / m_lambda,
>> > "calcRdSADCost wrap detected dist: %u, bits %u, lambda: "X265_LL"\n", sadCost, bits, m_lambda);
>> > return sadCost + ((bits * m_lambda + 128) >> 8);
>> > }
>> >+#else
>> >+ inline uint64_t calcRdSADCost(uint64_t sadCost, uint32_t bits) const
>> >+ {
>> >+ X265_CHECK(bits <= (UINT64_MAX - 128) / m_lambda,
>> >+ "calcRdSADCost wrap detected dist: %u, bits %u, lambda: "X265_LL"\n", sadCost, bits, m_lambda);
>> >+ return sadCost + ((bits * m_lambda + 128) >> 8);
>> >+ }
>> >+#endif
>> >
>> > inline uint32_t scaleChromaDist(uint32_t plane, uint32_t dist) const
>> > {
>> >diff -r 4781e6cef251 -r e2d9e7ea1add source/encoder/search.h
>> >--- a/source/encoder/search.h Fri Aug 07 12:29:40 2015 +0530
>> >+++ b/source/encoder/search.h Fri Aug 07 17:22:48 2015 +0530
>> >@@ -109,9 +109,15 @@
>> > uint64_t sa8dCost; // sum of partition sa8d distortion costs (sa8d(fenc, pred) + lambda * bits)
>> > uint32_t sa8dBits; // signal bits used in sa8dCost calculation
>> > uint32_t psyEnergy; // sum of partition psycho-visual energy difference
>> >+#if X265_DEPTH <= 10
>> > uint32_t lumaDistortion;
>> > uint32_t chromaDistortion;
>> > uint32_t distortion; // sum of partition SSE distortion
>> >+#else
>> >+ uint64_t lumaDistortion;
>> >+ uint64_t chromaDistortion;
>> >+ uint64_t distortion; // sum of partition SSE distortion
>> >+#endif
>> > uint32_t totalBits; // sum of partition bits (mv + coeff)
>> > uint32_t mvBits; // Mv bits + Ref + block type (or intra mode)
>> > uint32_t coeffBits; // Texture bits (DCT Coeffs)
>> >@@ -137,9 +143,15 @@
>> > sa8dCost = UINT64_MAX / 2;
>> > sa8dBits = MAX_UINT / 2;
>> > psyEnergy = MAX_UINT / 2;
>> >+#if X265_DEPTH <= 10
>> > lumaDistortion = MAX_UINT / 2;
>> > chromaDistortion = MAX_UINT / 2;
>> > distortion = MAX_UINT / 2;
>> >+#else
>> >+ lumaDistortion = UINT64_MAX / 2;
>> >+ chromaDistortion = UINT64_MAX / 2;
>> >+ distortion = UINT64_MAX / 2;
>> >+#endif
>> > totalBits = MAX_UINT / 2;
>> > mvBits = MAX_UINT / 2;
>> > coeffBits = MAX_UINT / 2;
>> >@@ -147,16 +159,29 @@
>> >
>> > bool ok() const
>> > {
>> >+#if X265_DEPTH <= 10
>> >+ return !(rdCost >= UINT64_MAX / 2 ||
>> >+ sa8dCost >= UINT64_MAX / 2 ||
>> >+ sa8dBits >= MAX_UINT / 2 ||
>> >+ psyEnergy >= MAX_UINT / 2 ||
>> >+ lumaDistortion >= MAX_UINT / 2 ||
>> >+ chromaDistortion >= MAX_UINT / 2 ||
>> >+ distortion >= MAX_UINT / 2 ||
>> >+ totalBits >= MAX_UINT / 2 ||
>> >+ mvBits >= MAX_UINT / 2 ||
>> >+ coeffBits >= MAX_UINT / 2);
>> >+#else
>> > return !(rdCost >= UINT64_MAX / 2 ||
>> > sa8dCost >= UINT64_MAX / 2 ||
>> > sa8dBits >= MAX_UINT / 2 ||
>> > psyEnergy >= MAX_UINT / 2 ||
>> >- lumaDistortion >= MAX_UINT / 2 ||
>> >- chromaDistortion >= MAX_UINT / 2 ||
>> >- distortion >= MAX_UINT / 2 ||
>> >+ lumaDistortion >= UINT64_MAX / 2 ||
>> >+ chromaDistortion >= UINT64_MAX / 2 ||
>> >+ distortion >= UINT64_MAX / 2 ||
>> > totalBits >= MAX_UINT / 2 ||
>> > mvBits >= MAX_UINT / 2 ||
>> > coeffBits >= MAX_UINT / 2);
>> >+#endif
>> > }
>> >
>> > void addSubCosts(const Mode& subMode)
>> >diff -r 4781e6cef251 -r e2d9e7ea1add source/test/pixelharness.cpp
>> >--- a/source/test/pixelharness.cpp Fri Aug 07 12:29:40 2015 +0530
>> >+++ b/source/test/pixelharness.cpp Fri Aug 07 17:22:48 2015 +0530
>> >@@ -1799,20 +1799,24 @@
>> > {
>> > if (opt.cu[part].sse_pp)
>> > {
>> >+#if X265_DEPTH <= 10
>> > if (!check_pixelcmp(ref.cu[part].sse_pp, opt.cu[part].sse_pp))
>> > {
>> > printf("sse_pp[%s]: failed!\n", lumaPartStr[part]);
>> > return false;
>> > }
>> >+#endif
>> > }
>> >
>> > if (opt.cu[part].sse_ss)
>> > {
>> >+#if X265_DEPTH <= 10
>> > if (!check_pixelcmp_ss(ref.cu[part].sse_ss, opt.cu[part].sse_ss))
>> > {
>> > printf("sse_ss[%s]: failed!\n", lumaPartStr[part]);
>> > return false;
>> > }
>> >+#endif
>> > }
>> >
>> > if (opt.cu[part].sub_ps)
>> >@@ -1891,11 +1895,13 @@
>> > {
>> > if (opt.chroma[i].cu[part].sse_pp)
>> > {
>> >+#if X265_DEPTH <= 10
>> > if (!check_pixelcmp(ref.chroma[i].cu[part].sse_pp, opt.chroma[i].cu[part].sse_pp))
>> > {
>> > printf("chroma_sse_pp[%s][%s]: failed!\n", x265_source_csp_names[i], chromaPartStr[i][part]);
>> > return false;
>> > }
>> >+#endif
>> > }
>> > if (opt.chroma[i].cu[part].sub_ps)
>> > {
>> >_______________________________________________
>> >x265-devel mailing list
>> >x265-devel at videolan.org
>> >https://mailman.videolan.org/listinfo/x265-devel
>>
>>
>> _______________________________________________
>> x265-devel mailing list
>> x265-devel at videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>>
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20150810/6057c707/attachment-0001.html>
More information about the x265-devel
mailing list