[x265] [PATCH] Main12: fix distortion data type

Tue Aug 11 15:47:59 CEST 2015

# HG changeset patch
# User Divya Manivannan <divya at multicorewareinc.com>
# Date 1438948368 -19800
#      Fri Aug 07 17:22:48 2015 +0530
# Branch stable
# Node ID 4468274744e5c66a5988b618e18199a55b8ee56e
# Parent  4781e6cef251006db10e107b2916741572f7760a
Main12: fix distortion data type

diff -r 4781e6cef251 -r 4468274744e5 source/common/common.h

--- a/source/common/common.h	Fri Aug 07 12:29:40 2015 +0530
+++ b/source/common/common.h	Fri Aug 07 17:22:48 2015 +0530
@@ -134,6 +134,12 @@
 typedef int32_t  ssum2_t; // Signed sum
 #endif // if HIGH_BIT_DEPTH
 
+#if X265_DEPTH <= 10
+typedef uint32_t sse_ret_t;
+#else
+typedef uint64_t sse_ret_t;
+#endif
+
 #ifndef NULL
 #define NULL 0
 #endif
diff -r 4781e6cef251 -r 4468274744e5 source/common/pixel.cpp
--- a/source/common/pixel.cpp	Fri Aug 07 12:29:40 2015 +0530
+++ b/source/common/pixel.cpp	Fri Aug 07 17:22:48 2015 +0530
@@ -117,9 +117,9 @@
 }
 
 template<int lx, int ly, class T1, class T2>
-int sse(const T1* pix1, intptr_t stride_pix1, const T2* pix2, intptr_t stride_pix2)
+sse_ret_t sse(const T1* pix1, intptr_t stride_pix1, const T2* pix2, intptr_t stride_pix2)
 {
-    int sum = 0;
+    sse_ret_t sum = 0;
     int tmp;
 
     for (int y = 0; y < ly; y++)
diff -r 4781e6cef251 -r 4468274744e5 source/common/primitives.cpp
--- a/source/common/primitives.cpp	Fri Aug 07 12:29:40 2015 +0530
+++ b/source/common/primitives.cpp	Fri Aug 07 17:22:48 2015 +0530
@@ -74,7 +74,7 @@
     /* at HIGH_BIT_DEPTH, pixel == short so we can alias many primitives */
     for (int i = 0; i < NUM_CU_SIZES; i++)
     {
-        p.cu[i].sse_pp = (pixelcmp_t)p.cu[i].sse_ss;
+        p.cu[i].sse_pp = (pixel_sse_t)p.cu[i].sse_ss;
 
         p.cu[i].copy_ps = (copy_ps_t)p.pu[i].copy_pp;
         p.cu[i].copy_sp = (copy_sp_t)p.pu[i].copy_pp;
diff -r 4781e6cef251 -r 4468274744e5 source/common/primitives.h
--- a/source/common/primitives.h	Fri Aug 07 12:29:40 2015 +0530
+++ b/source/common/primitives.h	Fri Aug 07 17:22:48 2015 +0530
@@ -112,6 +112,8 @@
 
 typedef int  (*pixelcmp_t)(const pixel* fenc, intptr_t fencstride, const pixel* fref, intptr_t frefstride); // fenc is aligned
 typedef int  (*pixelcmp_ss_t)(const int16_t* fenc, intptr_t fencstride, const int16_t* fref, intptr_t frefstride);
+typedef sse_ret_t (*pixel_sse_t)(const pixel* fenc, intptr_t fencstride, const pixel* fref, intptr_t frefstride); // fenc is aligned
+typedef sse_ret_t (*pixel_sse_ss_t)(const int16_t* fenc, intptr_t fencstride, const int16_t* fref, intptr_t frefstride);
 typedef int  (*pixel_ssd_s_t)(const int16_t* fenc, intptr_t fencstride);
 typedef void (*pixelcmp_x4_t)(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
 typedef void (*pixelcmp_x3_t)(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
@@ -253,8 +255,9 @@
         copy_pp_t       copy_pp;       // alias to pu[].copy_pp
 
         var_t           var;           // block internal variance
-        pixelcmp_t      sse_pp;        // Sum of Square Error (pixel, pixel) fenc alignment not assumed
-        pixelcmp_ss_t   sse_ss;        // Sum of Square Error (short, short) fenc alignment not assumed
+
+        pixel_sse_t     sse_pp;        // Sum of Square Error (pixel, pixel) fenc alignment not assumed
+        pixel_sse_ss_t  sse_ss;        // Sum of Square Error (short, short) fenc alignment not assumed
         pixelcmp_t      psy_cost_pp;   // difference in AC energy between two pixel blocks
         pixelcmp_ss_t   psy_cost_ss;   // difference in AC energy between two signed residual blocks
         pixel_ssd_s_t   ssd_s;         // Sum of Square Error (residual coeff to self)
@@ -358,7 +361,7 @@
         struct CUChroma
         {
             pixelcmp_t     sa8d;    // if chroma CU is not multiple of 8x8, will use satd
-            pixelcmp_t     sse_pp;
+            pixel_sse_t    sse_pp;
             pixel_sub_ps_t sub_ps;
             pixel_add_ps_t add_ps;
 
diff -r 4781e6cef251 -r 4468274744e5 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Fri Aug 07 12:29:40 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp	Fri Aug 07 17:22:48 2015 +0530
@@ -998,13 +998,15 @@
         p.cu[BLOCK_4x4].intra_pred[32] = PFX(intra_pred_ang4_32_sse2);
         p.cu[BLOCK_4x4].intra_pred[33] = PFX(intra_pred_ang4_33_sse2);
 
+#if X265_DEPTH <= 10
         p.cu[BLOCK_4x4].sse_ss = PFX(pixel_ssd_ss_4x4_mmx2);
         ALL_LUMA_CU(sse_ss, pixel_ssd_ss, sse2);
 
-        p.chroma[X265_CSP_I422].cu[BLOCK_422_4x8].sse_pp = (pixelcmp_t)PFX(pixel_ssd_ss_4x8_mmx2);
-        p.chroma[X265_CSP_I422].cu[BLOCK_422_8x16].sse_pp = (pixelcmp_t)PFX(pixel_ssd_ss_8x16_sse2);
-        p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].sse_pp = (pixelcmp_t)PFX(pixel_ssd_ss_16x32_sse2);
-        p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].sse_pp = (pixelcmp_t)PFX(pixel_ssd_ss_32x64_sse2);
+        p.chroma[X265_CSP_I422].cu[BLOCK_422_4x8].sse_pp = (pixel_sse_t)PFX(pixel_ssd_ss_4x8_mmx2);
+        p.chroma[X265_CSP_I422].cu[BLOCK_422_8x16].sse_pp = (pixel_sse_t)PFX(pixel_ssd_ss_8x16_sse2);
+        p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].sse_pp = (pixel_sse_t)PFX(pixel_ssd_ss_16x32_sse2);
+        p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].sse_pp = (pixel_sse_t)PFX(pixel_ssd_ss_32x64_sse2);
+#endif
 
         p.cu[BLOCK_4x4].dct = PFX(dct4_sse2);
         p.cu[BLOCK_8x8].dct = PFX(dct8_sse2);
@@ -1527,6 +1529,7 @@
         p.cu[BLOCK_16x16].ssd_s = PFX(pixel_ssd_s_16_avx2);
         p.cu[BLOCK_32x32].ssd_s = PFX(pixel_ssd_s_32_avx2);
 
+#if X265_DEPTH <= 10
         p.cu[BLOCK_16x16].sse_ss = PFX(pixel_ssd_ss_16x16_avx2);
         p.cu[BLOCK_32x32].sse_ss = PFX(pixel_ssd_ss_32x32_avx2);
         p.cu[BLOCK_64x64].sse_ss = PFX(pixel_ssd_ss_64x64_avx2);
@@ -1536,8 +1539,9 @@
         p.cu[BLOCK_64x64].sse_pp = PFX(pixel_ssd_64x64_avx2);
         p.chroma[X265_CSP_I420].cu[BLOCK_420_16x16].sse_pp = PFX(pixel_ssd_16x16_avx2);
         p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].sse_pp = PFX(pixel_ssd_32x32_avx2);
-        p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].sse_pp = (pixelcmp_t)PFX(pixel_ssd_ss_16x32_avx2);
-        p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].sse_pp = (pixelcmp_t)PFX(pixel_ssd_ss_32x64_avx2);
+        p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].sse_pp = (pixel_sse_t)PFX(pixel_ssd_ss_16x32_avx2);
+        p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].sse_pp = (pixel_sse_t)PFX(pixel_ssd_ss_32x64_avx2);
+#endif
 
         p.quant = PFX(quant_avx2);
         p.nquant = PFX(nquant_avx2);
diff -r 4781e6cef251 -r 4468274744e5 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h	Fri Aug 07 12:29:40 2015 +0530
+++ b/source/common/x86/pixel.h	Fri Aug 07 17:22:48 2015 +0530
@@ -36,7 +36,7 @@
 void PFX(upShift_8_avx2)(const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);
 
 #define DECL_PIXELS(cpu) \
-    FUNCDEF_PU(int, pixel_ssd, cpu, const pixel*, intptr_t, const pixel*, intptr_t); \
+    FUNCDEF_PU(uint32_t, pixel_ssd, cpu, const pixel*, intptr_t, const pixel*, intptr_t); \
     FUNCDEF_PU(int, pixel_sa8d, cpu, const pixel*, intptr_t, const pixel*, intptr_t); \
     FUNCDEF_PU(void, pixel_sad_x3, cpu, const pixel*, const pixel*, const pixel*, const pixel*, intptr_t, int32_t*); \
     FUNCDEF_PU(void, pixel_sad_x4, cpu, const pixel*, const pixel*, const pixel*, const pixel*, const pixel*, intptr_t, int32_t*); \
@@ -45,7 +45,7 @@
     FUNCDEF_PU(void, pixel_sub_ps, cpu, int16_t* a, intptr_t dstride, const pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1); \
     FUNCDEF_CHROMA_PU(int, pixel_satd, cpu, const pixel*, intptr_t, const pixel*, intptr_t); \
     FUNCDEF_CHROMA_PU(int, pixel_sad, cpu, const pixel*, intptr_t, const pixel*, intptr_t); \
-    FUNCDEF_CHROMA_PU(int, pixel_ssd_ss, cpu, const int16_t*, intptr_t, const int16_t*, intptr_t); \
+    FUNCDEF_CHROMA_PU(uint32_t, pixel_ssd_ss, cpu, const int16_t*, intptr_t, const int16_t*, intptr_t); \
     FUNCDEF_CHROMA_PU(void, addAvg, cpu, const int16_t*, const int16_t*, pixel*, intptr_t, intptr_t, intptr_t); \
     FUNCDEF_CHROMA_PU(int, pixel_ssd_s, cpu, const int16_t*, intptr_t); \
     FUNCDEF_TU_S(int, pixel_ssd_s, cpu, const int16_t*, intptr_t); \
diff -r 4781e6cef251 -r 4468274744e5 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp	Fri Aug 07 12:29:40 2015 +0530
+++ b/source/encoder/analysis.cpp	Fri Aug 07 17:22:48 2015 +0530
@@ -832,7 +832,7 @@
         else if (m_param->rdLevel > 1)
             updateModeCost(*splitPred);
         else
-            splitPred->sa8dCost = m_rdCost.calcRdSADCost(splitPred->distortion, splitPred->sa8dBits);
+            splitPred->sa8dCost = m_rdCost.calcRdSADCost((uint32_t)splitPred->distortion, splitPred->sa8dBits);
     }
 
     /* Split CUs
@@ -1418,7 +1418,7 @@
             tempPred->distortion += primitives.chroma[m_csp].cu[sizeIdx].sa8d(fencYuv->m_buf[1], fencYuv->m_csize, tempPred->predYuv.m_buf[1], tempPred->predYuv.m_csize);
             tempPred->distortion += primitives.chroma[m_csp].cu[sizeIdx].sa8d(fencYuv->m_buf[2], fencYuv->m_csize, tempPred->predYuv.m_buf[2], tempPred->predYuv.m_csize);
         }
-        tempPred->sa8dCost = m_rdCost.calcRdSADCost(tempPred->distortion, tempPred->sa8dBits);
+        tempPred->sa8dCost = m_rdCost.calcRdSADCost((uint32_t)tempPred->distortion, tempPred->sa8dBits);
 
         if (tempPred->sa8dCost < bestPred->sa8dCost)
         {
@@ -1634,7 +1634,7 @@
         interMode.distortion += primitives.chroma[m_csp].cu[part].sa8d(fencYuv.m_buf[1], fencYuv.m_csize, predYuv.m_buf[1], predYuv.m_csize);
         interMode.distortion += primitives.chroma[m_csp].cu[part].sa8d(fencYuv.m_buf[2], fencYuv.m_csize, predYuv.m_buf[2], predYuv.m_csize);
     }
-    interMode.sa8dCost = m_rdCost.calcRdSADCost(interMode.distortion, interMode.sa8dBits);
+    interMode.sa8dCost = m_rdCost.calcRdSADCost((uint32_t)interMode.distortion, interMode.sa8dBits);
 
     if (m_param->analysisMode == X265_ANALYSIS_SAVE && m_reuseInterDataCTU)
     {
@@ -1947,7 +1947,7 @@
     else if (m_param->rdLevel <= 1)
     {
         mode.sa8dBits++;
-        mode.sa8dCost = m_rdCost.calcRdSADCost(mode.distortion, mode.sa8dBits);
+        mode.sa8dCost = m_rdCost.calcRdSADCost((uint32_t)mode.distortion, mode.sa8dBits);
     }
     else
     {
diff -r 4781e6cef251 -r 4468274744e5 source/encoder/rdcost.h
--- a/source/encoder/rdcost.h	Fri Aug 07 12:29:40 2015 +0530
+++ b/source/encoder/rdcost.h	Fri Aug 07 17:22:48 2015 +0530
@@ -88,7 +88,7 @@
         m_lambda = (uint64_t)floor(256.0 * lambda);
     }
 
-    inline uint64_t calcRdCost(uint32_t distortion, uint32_t bits) const
+    inline uint64_t calcRdCost(sse_ret_t distortion, uint32_t bits) const
     {
         X265_CHECK(bits <= (UINT64_MAX - 128) / m_lambda2,
                    "calcRdCost wrap detected dist: %u, bits %u, lambda: "X265_LL"\n", distortion, bits, m_lambda2);
@@ -108,7 +108,7 @@
     }
 
     /* return the RD cost of this prediction, including the effect of psy-rd */
-    inline uint64_t calcPsyRdCost(uint32_t distortion, uint32_t bits, uint32_t psycost) const
+    inline uint64_t calcPsyRdCost(sse_ret_t distortion, uint32_t bits, uint32_t psycost) const
     {
         return distortion + ((m_lambda * m_psyRd * psycost) >> 24) + ((bits * m_lambda2) >> 8);
     }
diff -r 4781e6cef251 -r 4468274744e5 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Fri Aug 07 12:29:40 2015 +0530
+++ b/source/encoder/search.cpp	Fri Aug 07 17:22:48 2015 +0530
@@ -3436,7 +3436,7 @@
             else if (m_param->rdLevel <= 1)
             {
                 mode.sa8dBits++;
-                mode.sa8dCost = m_rdCost.calcRdSADCost(mode.distortion, mode.sa8dBits);
+                mode.sa8dCost = m_rdCost.calcRdSADCost((uint32_t)mode.distortion, mode.sa8dBits);
             }
             else
             {
@@ -3481,7 +3481,7 @@
             else if (m_param->rdLevel <= 1)
             {
                 mode.sa8dBits++;
-                mode.sa8dCost = m_rdCost.calcRdSADCost(mode.distortion, mode.sa8dBits);
+                mode.sa8dCost = m_rdCost.calcRdSADCost((uint32_t)mode.distortion, mode.sa8dBits);
             }
             else
             {
diff -r 4781e6cef251 -r 4468274744e5 source/encoder/search.h
--- a/source/encoder/search.h	Fri Aug 07 12:29:40 2015 +0530
+++ b/source/encoder/search.h	Fri Aug 07 17:22:48 2015 +0530
@@ -109,9 +109,9 @@
     uint64_t   sa8dCost;   // sum of partition sa8d distortion costs   (sa8d(fenc, pred) + lambda * bits)
     uint32_t   sa8dBits;   // signal bits used in sa8dCost calculation
     uint32_t   psyEnergy;  // sum of partition psycho-visual energy difference
-    uint32_t   lumaDistortion;
-    uint32_t   chromaDistortion;
-    uint32_t   distortion; // sum of partition SSE distortion
+    sse_ret_t  lumaDistortion;
+    sse_ret_t  chromaDistortion;
+    sse_ret_t  distortion; // sum of partition SSE distortion
     uint32_t   totalBits;  // sum of partition bits (mv + coeff)
     uint32_t   mvBits;     // Mv bits + Ref + block type (or intra mode)
     uint32_t   coeffBits;  // Texture bits (DCT Coeffs)
@@ -137,9 +137,15 @@
         sa8dCost = UINT64_MAX / 2;
         sa8dBits = MAX_UINT / 2;
         psyEnergy = MAX_UINT / 2;
+#if X265_DEPTH <= 10
         lumaDistortion = MAX_UINT / 2;
         chromaDistortion = MAX_UINT / 2;
         distortion = MAX_UINT / 2;
+#else
+        lumaDistortion = UINT64_MAX / 2;
+        chromaDistortion = UINT64_MAX / 2;
+        distortion = UINT64_MAX / 2;
+#endif
         totalBits = MAX_UINT / 2;
         mvBits = MAX_UINT / 2;
         coeffBits = MAX_UINT / 2;
@@ -147,16 +153,29 @@
 
     bool ok() const
     {
+#if X265_DEPTH <= 10
+        return !(rdCost >= UINT64_MAX / 2 ||
+            sa8dCost >= UINT64_MAX / 2 ||
+            sa8dBits >= MAX_UINT / 2 ||
+            psyEnergy >= MAX_UINT / 2 ||
+            lumaDistortion >= MAX_UINT / 2 ||
+            chromaDistortion >= MAX_UINT / 2 ||
+            distortion >= MAX_UINT / 2 ||
+            totalBits >= MAX_UINT / 2 ||
+            mvBits >= MAX_UINT / 2 ||
+            coeffBits >= MAX_UINT / 2);
+#else
         return !(rdCost >= UINT64_MAX / 2 ||
                  sa8dCost >= UINT64_MAX / 2 ||
                  sa8dBits >= MAX_UINT / 2 ||
                  psyEnergy >= MAX_UINT / 2 ||
-                 lumaDistortion >= MAX_UINT / 2 ||
-                 chromaDistortion >= MAX_UINT / 2 ||
-                 distortion >= MAX_UINT / 2 ||
+                 lumaDistortion >= UINT64_MAX / 2 ||
+                 chromaDistortion >= UINT64_MAX / 2 ||
+                 distortion >= UINT64_MAX / 2 ||
                  totalBits >= MAX_UINT / 2 ||
                  mvBits >= MAX_UINT / 2 ||
                  coeffBits >= MAX_UINT / 2);
+#endif
     }
 
     void addSubCosts(const Mode& subMode)
diff -r 4781e6cef251 -r 4468274744e5 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp	Fri Aug 07 12:29:40 2015 +0530
+++ b/source/test/pixelharness.cpp	Fri Aug 07 17:22:48 2015 +0530
@@ -94,7 +94,7 @@
     return true;
 }
 
-bool PixelHarness::check_pixelcmp_ss(pixelcmp_ss_t ref, pixelcmp_ss_t opt)
+bool PixelHarness::check_pixel_sse(pixel_sse_t ref, pixel_sse_t opt)
 {
     int j = 0;
     intptr_t stride = STRIDE;
@@ -103,8 +103,29 @@
     {
         int index1 = rand() % TEST_CASES;
         int index2 = rand() % TEST_CASES;
-        int vres = (int)checked(opt, short_test_buff[index1], stride, short_test_buff[index2] + j, stride);
-        int cres = ref(short_test_buff[index1], stride, short_test_buff[index2] + j, stride);
+        sse_ret_t vres = (sse_ret_t)checked(opt, pixel_test_buff[index1], stride, pixel_test_buff[index2] + j, stride);
+        sse_ret_t cres = ref(pixel_test_buff[index1], stride, pixel_test_buff[index2] + j, stride);
+        if (vres != cres)
+            return false;
+
+        reportfail();
+        j += INCR;
+    }
+
+    return true;
+}
+
+bool PixelHarness::check_pixel_sse_ss(pixel_sse_ss_t ref, pixel_sse_ss_t opt)
+{
+    int j = 0;
+    intptr_t stride = STRIDE;
+
+    for (int i = 0; i < ITERS; i++)
+    {
+        int index1 = rand() % TEST_CASES;
+        int index2 = rand() % TEST_CASES;
+        sse_ret_t vres = (sse_ret_t)checked(opt, short_test_buff[index1], stride, short_test_buff[index2] + j, stride);
+        sse_ret_t cres = ref(short_test_buff[index1], stride, short_test_buff[index2] + j, stride);
         if (vres != cres)
             return false;
 
@@ -1799,7 +1820,7 @@
     {
         if (opt.cu[part].sse_pp)
         {
-            if (!check_pixelcmp(ref.cu[part].sse_pp, opt.cu[part].sse_pp))
+            if (!check_pixel_sse(ref.cu[part].sse_pp, opt.cu[part].sse_pp))
             {
                 printf("sse_pp[%s]: failed!\n", lumaPartStr[part]);
                 return false;
@@ -1808,7 +1829,7 @@
 
         if (opt.cu[part].sse_ss)
         {
-            if (!check_pixelcmp_ss(ref.cu[part].sse_ss, opt.cu[part].sse_ss))
+            if (!check_pixel_sse_ss(ref.cu[part].sse_ss, opt.cu[part].sse_ss))
             {
                 printf("sse_ss[%s]: failed!\n", lumaPartStr[part]);
                 return false;
@@ -1891,7 +1912,7 @@
         {
             if (opt.chroma[i].cu[part].sse_pp)
             {
-                if (!check_pixelcmp(ref.chroma[i].cu[part].sse_pp, opt.chroma[i].cu[part].sse_pp))
+                if (!check_pixel_sse(ref.chroma[i].cu[part].sse_pp, opt.chroma[i].cu[part].sse_pp))
                 {
                     printf("chroma_sse_pp[%s][%s]: failed!\n", x265_source_csp_names[i], chromaPartStr[i][part]);
                     return false;
diff -r 4781e6cef251 -r 4468274744e5 source/test/pixelharness.h
--- a/source/test/pixelharness.h	Fri Aug 07 12:29:40 2015 +0530
+++ b/source/test/pixelharness.h	Fri Aug 07 17:22:48 2015 +0530
@@ -66,7 +66,8 @@
     double   double_test_buff[TEST_CASES][BUFFSIZE];
 
     bool check_pixelcmp(pixelcmp_t ref, pixelcmp_t opt);
-    bool check_pixelcmp_ss(pixelcmp_ss_t ref, pixelcmp_ss_t opt);
+    bool check_pixel_sse(pixel_sse_t ref, pixel_sse_t opt);
+    bool check_pixel_sse_ss(pixel_sse_ss_t ref, pixel_sse_ss_t opt);
     bool check_pixelcmp_x3(pixelcmp_x3_t ref, pixelcmp_x3_t opt);
     bool check_pixelcmp_x4(pixelcmp_x4_t ref, pixelcmp_x4_t opt);
     bool check_copy_pp(copy_pp_t ref, copy_pp_t opt);