<div dir="ltr">Thanks. I will make the change and send the patch again.</div><div class="gmail_extra"><br><div class="gmail_quote">On Mon, Aug 10, 2015 at 10:15 AM, Deepthi Nandakumar <span dir="ltr"><<a href="mailto:deepthi@multicorewareinc.com" target="_blank">deepthi@multicorewareinc.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr">Also calcRdSADcost does not need to move to 64-bit precision. SAD cost will not exceed dynamic range, as there is no squaring. </div><div class="HOEnZb"><div class="h5"><div class="gmail_extra"><br><div class="gmail_quote">On Fri, Aug 7, 2015 at 7:55 PM, chen <span dir="ltr"><<a href="mailto:chenm003@163.com" target="_blank">chenm003@163.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div style="line-height:1.7;color:#000000;font-size:14px;font-family:arial"><div>dynamic range:</div><div><br></div><div>10 bits: 10 + 10 + 12 = 32 bits</div><div>12 bits: 12 + 12 + 12 = 36 bits.<br></div><pre><br>so in 10bits, we don't need uint64_t</pre><div><div><pre><br></pre><pre>At 2015-08-07 20:02:13,"Divya Manivannan" <<a href="mailto:divya@multicorewareinc.com" target="_blank">divya@multicorewareinc.com</a>> wrote:


># HG changeset patch


># User Divya Manivannan <<a href="mailto:divya@multicorewareinc.com" target="_blank">divya@multicorewareinc.com</a>>


># Date 1438948368 -19800


>#      Fri Aug 07 17:22:48 2015 +0530


># Branch stable


># Node ID e2d9e7ea1add7cfacb40314155c3a23aa6e63426


># Parent  4781e6cef251006db10e107b2916741572f7760a


>Main12: fix distortion data type


>


>diff -r 4781e6cef251 -r e2d9e7ea1add source/common/pixel.cpp


>--- a/source/common/pixel.cpp      Fri Aug 07 12:29:40 2015 +0530


>+++ b/source/common/pixel.cpp      Fri Aug 07 17:22:48 2015 +0530


>@@ -137,6 +137,27 @@


>     return sum;


> }


> 


>+template<int lx, int ly, class T1, class T2>


>+uint64_t sse(const T1* pix1, intptr_t stride_pix1, const T2* pix2, intptr_t stride_pix2)


>+{


>+    uint64_t sum = 0;


>+    int tmp;


>+


>+    for (int y = 0; y < ly; y++)


>+    {


>+        for (int x = 0; x < lx; x++)


>+        {


>+            tmp = pix1[x] - pix2[x];


>+            sum += (tmp * tmp);


>+        }


>+


>+        pix1 += stride_pix1;


>+        pix2 += stride_pix2;


>+    }


>+


>+    return sum;


>+}


>+


> #define BITS_PER_SUM (8 * sizeof(sum_t))


> 


> #define HADAMARD4(d0, d1, d2, d3, s0, s1, s2, s3) { \


>diff -r 4781e6cef251 -r e2d9e7ea1add source/common/primitives.cpp


>--- a/source/common/primitives.cpp Fri Aug 07 12:29:40 2015 +0530


>+++ b/source/common/primitives.cpp Fri Aug 07 17:22:48 2015 +0530


>@@ -74,7 +74,11 @@


>     /* at HIGH_BIT_DEPTH, pixel == short so we can alias many primitives */


>     for (int i = 0; i < NUM_CU_SIZES; i++)


>     {


>+#if X265_DEPTH <= 10


>         <a href="http://p.cu" target="_blank">p.cu</a>[i].sse_pp = (pixelcmp_t)<a href="http://p.cu" target="_blank">p.cu</a>[i].sse_ss;


>+#else


>+        <a href="http://p.cu" target="_blank">p.cu</a>[i].sse_pp = (pixel_sse_t)<a href="http://p.cu" target="_blank">p.cu</a>[i].sse_ss;


>+#endif


> 


>         <a href="http://p.cu" target="_blank">p.cu</a>[i].copy_ps = (copy_ps_t)p.pu[i].copy_pp;


>         <a href="http://p.cu" target="_blank">p.cu</a>[i].copy_sp = (copy_sp_t)p.pu[i].copy_pp;


>diff -r 4781e6cef251 -r e2d9e7ea1add source/common/primitives.h


>--- a/source/common/primitives.h   Fri Aug 07 12:29:40 2015 +0530


>+++ b/source/common/primitives.h   Fri Aug 07 17:22:48 2015 +0530


>@@ -112,6 +112,10 @@


> 


> typedef int  (*pixelcmp_t)(const pixel* fenc, intptr_t fencstride, const pixel* fref, intptr_t frefstride); // fenc is aligned


> typedef int  (*pixelcmp_ss_t)(const int16_t* fenc, intptr_t fencstride, const int16_t* fref, intptr_t frefstride);


>+#if X265_DEPTH > 10


>+typedef uint64_t (*pixel_sse_t)(const pixel* fenc, intptr_t fencstride, const pixel* fref, intptr_t frefstride); // fenc is aligned


>+typedef uint64_t (*pixel_sse_ss_t)(const int16_t* fenc, intptr_t fencstride, const int16_t* fref, intptr_t frefstride);


>+#endif


> typedef int  (*pixel_ssd_s_t)(const int16_t* fenc, intptr_t fencstride);


> typedef void (*pixelcmp_x4_t)(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);


> typedef void (*pixelcmp_x3_t)(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);


>@@ -253,8 +257,14 @@


>         copy_pp_t       copy_pp;       // alias to pu[].copy_pp


> 


>         var_t           var;           // block internal variance


>+


>+#if X265_DEPTH <= 10


>         pixelcmp_t      sse_pp;        // Sum of Square Error (pixel, pixel) fenc alignment not assumed


>         pixelcmp_ss_t   sse_ss;        // Sum of Square Error (short, short) fenc alignment not assumed


>+#else


>+        pixel_sse_t     sse_pp;        // Sum of Square Error (pixel, pixel) fenc alignment not assumed


>+        pixel_sse_ss_t  sse_ss;        // Sum of Square Error (short, short) fenc alignment not assumed


>+#endif


>         pixelcmp_t      psy_cost_pp;   // difference in AC energy between two pixel blocks


>         pixelcmp_ss_t   psy_cost_ss;   // difference in AC energy between two signed residual blocks


>         pixel_ssd_s_t   ssd_s;         // Sum of Square Error (residual coeff to self)


>@@ -358,7 +368,11 @@


>         struct CUChroma


>         {


>             pixelcmp_t     sa8d;    // if chroma CU is not multiple of 8x8, will use satd


>+#if X265_DEPTH <= 10


>             pixelcmp_t     sse_pp;


>+#else


>+            pixel_sse_t    sse_pp;


>+#endif


>             pixel_sub_ps_t sub_ps;


>             pixel_add_ps_t add_ps;


> 


>diff -r 4781e6cef251 -r e2d9e7ea1add source/common/x86/asm-primitives.cpp


>--- a/source/common/x86/asm-primitives.cpp Fri Aug 07 12:29:40 2015 +0530


>+++ b/source/common/x86/asm-primitives.cpp Fri Aug 07 17:22:48 2015 +0530


>@@ -998,6 +998,7 @@


>         <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_4x4].intra_pred[32] = PFX(intra_pred_ang4_32_sse2);


>         <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_4x4].intra_pred[33] = PFX(intra_pred_ang4_33_sse2);


> 


>+#if X265_DEPTH <= 10


>         <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_4x4].sse_ss = PFX(pixel_ssd_ss_4x4_mmx2);


>         ALL_LUMA_CU(sse_ss, pixel_ssd_ss, sse2);


> 


>@@ -1005,6 +1006,7 @@


>         p.chroma[X265_CSP_I422].cu[BLOCK_422_8x16].sse_pp = (pixelcmp_t)PFX(pixel_ssd_ss_8x16_sse2);


>         p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].sse_pp = (pixelcmp_t)PFX(pixel_ssd_ss_16x32_sse2);


>         p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].sse_pp = (pixelcmp_t)PFX(pixel_ssd_ss_32x64_sse2);


>+#endif


> 


>         <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_4x4].dct = PFX(dct4_sse2);


>         <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_8x8].dct = PFX(dct8_sse2);


>@@ -1527,6 +1529,7 @@


>         <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_16x16].ssd_s = PFX(pixel_ssd_s_16_avx2);


>         <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_32x32].ssd_s = PFX(pixel_ssd_s_32_avx2);


> 


>+#if X265_DEPTH <= 10


>         <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_16x16].sse_ss = PFX(pixel_ssd_ss_16x16_avx2);


>         <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_32x32].sse_ss = PFX(pixel_ssd_ss_32x32_avx2);


>         <a href="http://p.cu" target="_blank">p.cu</a>[BLOCK_64x64].sse_ss = PFX(pixel_ssd_ss_64x64_avx2);


>@@ -1538,6 +1541,7 @@


>         p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].sse_pp = PFX(pixel_ssd_32x32_avx2);


>         p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].sse_pp = (pixelcmp_t)PFX(pixel_ssd_ss_16x32_avx2);


>         p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].sse_pp = (pixelcmp_t)PFX(pixel_ssd_ss_32x64_avx2);


>+#endif


> 


>         p.quant = PFX(quant_avx2);


>         p.nquant = PFX(nquant_avx2);


>diff -r 4781e6cef251 -r e2d9e7ea1add source/encoder/rdcost.h


>--- a/source/encoder/rdcost.h      Fri Aug 07 12:29:40 2015 +0530


>+++ b/source/encoder/rdcost.h      Fri Aug 07 17:22:48 2015 +0530


>@@ -88,12 +88,21 @@


>         m_lambda = (uint64_t)floor(256.0 * lambda);


>     }


> 


>+#if X265_DEPTH <= 10


>     inline uint64_t calcRdCost(uint32_t distortion, uint32_t bits) const


>     {


>         X265_CHECK(bits <= (UINT64_MAX - 128) / m_lambda2,


>                    "calcRdCost wrap detected dist: %u, bits %u, lambda: "X265_LL"\n", distortion, bits, m_lambda2);


>         return distortion + ((bits * m_lambda2 + 128) >> 8);


>     }


>+#else


>+    inline uint64_t calcRdCost(uint64_t distortion, uint32_t bits) const


>+    {


>+        X265_CHECK(bits <= (UINT64_MAX - 128) / m_lambda2,


>+            "calcRdCost wrap detected dist: %u, bits %u, lambda: "X265_LL"\n", distortion, bits, m_lambda2);


>+        return distortion + ((bits * m_lambda2 + 128) >> 8);


>+    }


>+#endif


> 


>     /* return the difference in energy between the source block and the recon block */


>     inline int psyCost(int size, const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride) const


>@@ -108,17 +117,33 @@


>     }


> 


>     /* return the RD cost of this prediction, including the effect of psy-rd */


>+#if X265_DEPTH <= 10


>     inline uint64_t calcPsyRdCost(uint32_t distortion, uint32_t bits, uint32_t psycost) const


>     {


>         return distortion + ((m_lambda * m_psyRd * psycost) >> 24) + ((bits * m_lambda2) >> 8);


>     }


>+#else


>+    inline uint64_t calcPsyRdCost(uint64_t distortion, uint32_t bits, uint32_t psycost) const


>+    {


>+        return distortion + ((m_lambda * m_psyRd * psycost) >> 24) + ((bits * m_lambda2) >> 8);


>+    }


>+#endif


> 


>+#if X265_DEPTH <= 10


>     inline uint64_t calcRdSADCost(uint32_t sadCost, uint32_t bits) const


>     {


>         X265_CHECK(bits <= (UINT64_MAX - 128) / m_lambda,


>                    "calcRdSADCost wrap detected dist: %u, bits %u, lambda: "X265_LL"\n", sadCost, bits, m_lambda);


>         return sadCost + ((bits * m_lambda + 128) >> 8);


>     }


>+#else


>+    inline uint64_t calcRdSADCost(uint64_t sadCost, uint32_t bits) const


>+    {


>+        X265_CHECK(bits <= (UINT64_MAX - 128) / m_lambda,


>+            "calcRdSADCost wrap detected dist: %u, bits %u, lambda: "X265_LL"\n", sadCost, bits, m_lambda);


>+        return sadCost + ((bits * m_lambda + 128) >> 8);


>+    }


>+#endif


> 


>     inline uint32_t scaleChromaDist(uint32_t plane, uint32_t dist) const


>     {


>diff -r 4781e6cef251 -r e2d9e7ea1add source/encoder/search.h


>--- a/source/encoder/search.h      Fri Aug 07 12:29:40 2015 +0530


>+++ b/source/encoder/search.h      Fri Aug 07 17:22:48 2015 +0530


>@@ -109,9 +109,15 @@


>     uint64_t   sa8dCost;   // sum of partition sa8d distortion costs   (sa8d(fenc, pred) + lambda * bits)


>     uint32_t   sa8dBits;   // signal bits used in sa8dCost calculation


>     uint32_t   psyEnergy;  // sum of partition psycho-visual energy difference


>+#if X265_DEPTH <= 10


>     uint32_t   lumaDistortion;


>     uint32_t   chromaDistortion;


>     uint32_t   distortion; // sum of partition SSE distortion


>+#else


>+    uint64_t   lumaDistortion;


>+    uint64_t   chromaDistortion;


>+    uint64_t   distortion; // sum of partition SSE distortion


>+#endif


>     uint32_t   totalBits;  // sum of partition bits (mv + coeff)


>     uint32_t   mvBits;     // Mv bits + Ref + block type (or intra mode)


>     uint32_t   coeffBits;  // Texture bits (DCT Coeffs)


>@@ -137,9 +143,15 @@


>         sa8dCost = UINT64_MAX / 2;


>         sa8dBits = MAX_UINT / 2;


>         psyEnergy = MAX_UINT / 2;


>+#if X265_DEPTH <= 10


>         lumaDistortion = MAX_UINT / 2;


>         chromaDistortion = MAX_UINT / 2;


>         distortion = MAX_UINT / 2;


>+#else


>+        lumaDistortion = UINT64_MAX / 2;


>+        chromaDistortion = UINT64_MAX / 2;


>+        distortion = UINT64_MAX / 2;


>+#endif


>         totalBits = MAX_UINT / 2;


>         mvBits = MAX_UINT / 2;


>         coeffBits = MAX_UINT / 2;


>@@ -147,16 +159,29 @@


> 


>     bool ok() const


>     {


>+#if X265_DEPTH <= 10


>+        return !(rdCost >= UINT64_MAX / 2 ||


>+            sa8dCost >= UINT64_MAX / 2 ||


>+            sa8dBits >= MAX_UINT / 2 ||


>+            psyEnergy >= MAX_UINT / 2 ||


>+            lumaDistortion >= MAX_UINT / 2 ||


>+            chromaDistortion >= MAX_UINT / 2 ||


>+            distortion >= MAX_UINT / 2 ||


>+            totalBits >= MAX_UINT / 2 ||


>+            mvBits >= MAX_UINT / 2 ||


>+            coeffBits >= MAX_UINT / 2);


>+#else


>         return !(rdCost >= UINT64_MAX / 2 ||


>                  sa8dCost >= UINT64_MAX / 2 ||


>                  sa8dBits >= MAX_UINT / 2 ||


>                  psyEnergy >= MAX_UINT / 2 ||


>-                 lumaDistortion >= MAX_UINT / 2 ||


>-                 chromaDistortion >= MAX_UINT / 2 ||


>-                 distortion >= MAX_UINT / 2 ||


>+                 lumaDistortion >= UINT64_MAX / 2 ||


>+                 chromaDistortion >= UINT64_MAX / 2 ||


>+                 distortion >= UINT64_MAX / 2 ||


>                  totalBits >= MAX_UINT / 2 ||


>                  mvBits >= MAX_UINT / 2 ||


>                  coeffBits >= MAX_UINT / 2);


>+#endif


>     }


> 


>     void addSubCosts(const Mode& subMode)


>diff -r 4781e6cef251 -r e2d9e7ea1add source/test/pixelharness.cpp


>--- a/source/test/pixelharness.cpp Fri Aug 07 12:29:40 2015 +0530


>+++ b/source/test/pixelharness.cpp Fri Aug 07 17:22:48 2015 +0530


>@@ -1799,20 +1799,24 @@


>     {


>         if (<a href="http://opt.cu" target="_blank">opt.cu</a>[part].sse_pp)


>         {


>+#if X265_DEPTH <= 10


>             if (!check_pixelcmp(<a href="http://ref.cu" target="_blank">ref.cu</a>[part].sse_pp, <a href="http://opt.cu" target="_blank">opt.cu</a>[part].sse_pp))


>             {


>                 printf("sse_pp[%s]: failed!\n", lumaPartStr[part]);


>                 return false;


>             }


>+#endif


>         }


> 


>         if (<a href="http://opt.cu" target="_blank">opt.cu</a>[part].sse_ss)


>         {


>+#if X265_DEPTH <= 10


>             if (!check_pixelcmp_ss(<a href="http://ref.cu" target="_blank">ref.cu</a>[part].sse_ss, <a href="http://opt.cu" target="_blank">opt.cu</a>[part].sse_ss))


>             {


>                 printf("sse_ss[%s]: failed!\n", lumaPartStr[part]);


>                 return false;


>             }


>+#endif


>         }


> 


>         if (<a href="http://opt.cu" target="_blank">opt.cu</a>[part].sub_ps)


>@@ -1891,11 +1895,13 @@


>         {


>             if (opt.chroma[i].cu[part].sse_pp)


>             {


>+#if X265_DEPTH <= 10


>                 if (!check_pixelcmp(ref.chroma[i].cu[part].sse_pp, opt.chroma[i].cu[part].sse_pp))


>                 {


>                     printf("chroma_sse_pp[%s][%s]: failed!\n", x265_source_csp_names[i], chromaPartStr[i][part]);


>                     return false;


>                 }


>+#endif


>             }


>             if (opt.chroma[i].cu[part].sub_ps)


>             {


>_______________________________________________


>x265-devel mailing list


><a href="mailto:x265-devel@videolan.org" target="_blank">x265-devel@videolan.org</a>


><a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a>


</pre></div></div></div><br>_______________________________________________<br>


x265-devel mailing list<br>


<a href="mailto:x265-devel@videolan.org" target="_blank">x265-devel@videolan.org</a><br>


<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>


<br></blockquote></div><br></div>


</div></div><br>_______________________________________________<br>


x265-devel mailing list<br>


<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>


<a href="https://mailman.videolan.org/listinfo/x265-devel" rel="noreferrer" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>


<br></blockquote></div><br></div>