[x265] [PATCH] conv16to32_count renamed to copy_count as per new interface

praveen at multicorewareinc.com praveen at multicorewareinc.com
Tue Sep 2 16:12:17 CEST 2014


# HG changeset patch
# User Praveen Tiwari
# Date 1409648260 -19800
# Node ID d0d545f2982ff9600b05e70ee4f066ce28ba51a7
# Parent  a70b4e57aac2c535add2de15145c2a86638116f4
conv16to32_count renamed to copy_count as per new interface

diff -r a70b4e57aac2 -r d0d545f2982f source/common/dct.cpp
--- a/source/common/dct.cpp	Tue Sep 02 14:04:02 2014 +0530
+++ b/source/common/dct.cpp	Tue Sep 02 14:27:40 2014 +0530
@@ -827,7 +827,7 @@
 }
 
 template<int trSize>
-uint32_t conv16to32_count(int16_t* coeff, int16_t* residual, intptr_t stride)
+uint32_t copy_count(int16_t* coeff, int16_t* residual, intptr_t stride)
 {
     uint32_t numSig = 0;
     for (int k = 0; k < trSize; k++)
@@ -879,9 +879,9 @@
     p.count_nonzero = count_nonzero_c;
     p.denoiseDct = denoiseDct_c;
 
-    p.cvt16to32_cnt[BLOCK_4x4] = conv16to32_count<4>;
-    p.cvt16to32_cnt[BLOCK_8x8] = conv16to32_count<8>;
-    p.cvt16to32_cnt[BLOCK_16x16] = conv16to32_count<16>;
-    p.cvt16to32_cnt[BLOCK_32x32] = conv16to32_count<32>;
+    p.copy_cnt[BLOCK_4x4] = copy_count<4>;
+    p.copy_cnt[BLOCK_8x8] = copy_count<8>;
+    p.copy_cnt[BLOCK_16x16] = copy_count<16>;
+    p.copy_cnt[BLOCK_32x32] = copy_count<32>;
 }
 }
diff -r a70b4e57aac2 -r d0d545f2982f source/common/primitives.h
--- a/source/common/primitives.h	Tue Sep 02 14:04:02 2014 +0530
+++ b/source/common/primitives.h	Tue Sep 02 14:27:40 2014 +0530
@@ -153,7 +153,7 @@
 typedef void (*cvt16to32_shr_t)(int32_t *dst, int16_t *src, intptr_t, int, int);
 typedef void (*cvt32to16_shr_t)(int16_t *dst, int32_t *src, intptr_t, int, int);
 typedef void (*cvt32to16_shl_t)(int16_t *dst, int32_t *src, intptr_t, int);
-typedef uint32_t (*cvt16to32_cnt_t)(int16_t* coeff, int16_t* residual, intptr_t stride);
+typedef uint32_t (*copy_cnt_t)(int16_t* coeff, int16_t* residual, intptr_t stride);
 
 typedef void (*dct_t)(int16_t *src, int32_t *dst, intptr_t stride);
 typedef void (*idct_t)(int32_t *src, int16_t *dst, intptr_t stride);
@@ -226,7 +226,7 @@
     cvt16to32_shr_t cvt16to32_shr[NUM_SQUARE_BLOCKS - 1];
     cvt32to16_shr_t cvt32to16_shr;
     cvt32to16_shl_t cvt32to16_shl[NUM_SQUARE_BLOCKS - 1];
-    cvt16to32_cnt_t cvt16to32_cnt[NUM_SQUARE_BLOCKS - 1];
+    copy_cnt_t      copy_cnt[NUM_SQUARE_BLOCKS - 1];
 
     copy_pp_t       luma_copy_pp[NUM_LUMA_PARTITIONS];
     copy_sp_t       luma_copy_sp[NUM_LUMA_PARTITIONS];
diff -r a70b4e57aac2 -r d0d545f2982f source/common/quant.cpp
--- a/source/common/quant.cpp	Tue Sep 02 14:04:02 2014 +0530
+++ b/source/common/quant.cpp	Tue Sep 02 14:27:40 2014 +0530
@@ -323,7 +323,7 @@
         {
              qCoeff[i] = (int16_t)Clip3(-32768, 32767, coeff[i]);
         }
-        int numSign = primitives.cvt16to32_cnt[log2TrSize - 2](qCoeff, residual, stride);
+        int numSign = primitives.copy_cnt[log2TrSize - 2](qCoeff, residual, stride);
 
         /* This section of code is to safely convert int16_t coefficients to int32_t, once the caller function is
          * optimize to take coefficients as int16_t*, it will be cleanse.*/
diff -r a70b4e57aac2 -r d0d545f2982f source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue Sep 02 14:04:02 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp	Tue Sep 02 14:27:40 2014 +0530
@@ -1601,10 +1601,10 @@
         p.cvt16to32_shr[BLOCK_32x32] = x265_cvt16to32_shr_32_sse4;
 
         // TODO: check POPCNT flag!
-        p.cvt16to32_cnt[BLOCK_4x4] = x265_cvt16to32_cnt_4_sse4;
-        p.cvt16to32_cnt[BLOCK_8x8] = x265_cvt16to32_cnt_8_sse4;
-        p.cvt16to32_cnt[BLOCK_16x16] = x265_cvt16to32_cnt_16_sse4;
-        p.cvt16to32_cnt[BLOCK_32x32] = x265_cvt16to32_cnt_32_sse4;
+        p.copy_cnt[BLOCK_4x4] = x265_copy_cnt_4_sse4;
+        p.copy_cnt[BLOCK_8x8] = x265_copy_cnt_8_sse4;
+        p.copy_cnt[BLOCK_16x16] = x265_copy_cnt_16_sse4;
+        p.copy_cnt[BLOCK_32x32] = x265_copy_cnt_32_sse4;
 
         HEVC_SATD(sse4);
         SA8D_INTER_FROM_BLOCK(sse4);
@@ -1706,10 +1706,10 @@
         p.sad_x4[LUMA_16x12] = x265_pixel_sad_x4_16x12_avx2;
         p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_avx2;
         p.ssd_s[BLOCK_32x32] = x265_pixel_ssd_s_32_avx2;
-        p.cvt16to32_cnt[BLOCK_4x4] = x265_cvt16to32_cnt_4_avx2;
-        p.cvt16to32_cnt[BLOCK_8x8] = x265_cvt16to32_cnt_8_avx2;
-        p.cvt16to32_cnt[BLOCK_16x16] = x265_cvt16to32_cnt_16_avx2;
-        p.cvt16to32_cnt[BLOCK_32x32] = x265_cvt16to32_cnt_32_avx2;
+        p.copy_cnt[BLOCK_4x4] = x265_copy_cnt_4_avx2;
+        p.copy_cnt[BLOCK_8x8] = x265_copy_cnt_8_avx2;
+        p.copy_cnt[BLOCK_16x16] = x265_copy_cnt_16_avx2;
+        p.copy_cnt[BLOCK_32x32] = x265_copy_cnt_32_avx2;
         p.cvt32to16_shl[BLOCK_4x4] = x265_cvt32to16_shl_4_avx2;
         p.cvt32to16_shl[BLOCK_8x8] = x265_cvt32to16_shl_8_avx2;
         p.cvt32to16_shl[BLOCK_16x16] = x265_cvt32to16_shl_16_avx2;
diff -r a70b4e57aac2 -r d0d545f2982f source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm	Tue Sep 02 14:04:02 2014 +0530
+++ b/source/common/x86/blockcopy8.asm	Tue Sep 02 14:27:40 2014 +0530
@@ -3950,10 +3950,10 @@
 
 
 ;--------------------------------------------------------------------------------------
-; uint32_t cvt16to32_cnt(int16_t *dst, int16_t *src, intptr_t stride);
+; uint32_t copy_cnt(int16_t *dst, int16_t *src, intptr_t stride);
 ;--------------------------------------------------------------------------------------
 INIT_XMM sse4
-cglobal cvt16to32_cnt_4, 3,3,5
+cglobal copy_cnt_4, 3,3,5
     add         r2d, r2d
     pxor        m4, m4
 
@@ -3994,7 +3994,7 @@
 
 
 INIT_YMM avx2
-cglobal cvt16to32_cnt_4, 3,3,5
+cglobal copy_cnt_4, 3,3,5
     add         r2d, r2d
     pxor        m4, m4
 
@@ -4022,10 +4022,10 @@
 
 
 ;--------------------------------------------------------------------------------------
-; uint32_t cvt16to32_cnt(int16_t *dst, int16_t *src, intptr_t stride);
+; uint32_t copy_cnt(int16_t *dst, int16_t *src, intptr_t stride);
 ;--------------------------------------------------------------------------------------
 INIT_XMM sse4
-cglobal cvt16to32_cnt_8, 3,3,6
+cglobal copy_cnt_8, 3,3,6
     add         r2d, r2d
     pxor        m4, m4
     pxor        m5, m5
@@ -4085,10 +4085,10 @@
 
 INIT_YMM avx2
 %if ARCH_X86_64 == 1
-cglobal cvt16to32_cnt_8, 3,4,6
+cglobal copy_cnt_8, 3,4,6
   %define tmpd eax
 %else
-cglobal cvt16to32_cnt_8, 3,5,6
+cglobal copy_cnt_8, 3,5,6
   %define tmpd r4d
 %endif
     add         r2d, r2d
@@ -4167,10 +4167,10 @@
 
 
 ;--------------------------------------------------------------------------------------
-; uint32_t cvt16to32_cnt(int16_t *dst, int16_t *src, intptr_t stride);
+; uint32_t copy_cnt(int16_t *dst, int16_t *src, intptr_t stride);
 ;--------------------------------------------------------------------------------------
 INIT_XMM sse4
-cglobal cvt16to32_cnt_16, 3,4,6
+cglobal copy_cnt_16, 3,4,6
      add         r2d, r2d
      mov         r3d, 4
      pxor        m4, m4
@@ -4233,7 +4233,7 @@
 
 
 INIT_YMM avx2
-cglobal cvt16to32_cnt_16, 3,5,5
+cglobal copy_cnt_16, 3,5,5
     add         r2d, r2d
     lea         r4, [r2 * 3]
     mov         r3d, 16/4
@@ -4299,10 +4299,10 @@
     RET
 
 ;--------------------------------------------------------------------------------------
-; uint32_t cvt16to32_cnt(int32_t *dst, int16_t *src, intptr_t stride);
+; uint32_t copy_cnt(int32_t *dst, int16_t *src, intptr_t stride);
 ;--------------------------------------------------------------------------------------
 INIT_XMM sse4
-cglobal cvt16to32_cnt_32, 3,4,6
+cglobal copy_cnt_32, 3,4,6
     add         r2d, r2d
     mov         r3d, 16
     pxor        m4, m4
@@ -4363,7 +4363,7 @@
 
 
 INIT_YMM avx2
-cglobal cvt16to32_cnt_32, 3,4,5
+cglobal copy_cnt_32, 3,4,5
     add         r2d, r2d
     mov         r3d, 32/1
     xorpd       m3, m3
diff -r a70b4e57aac2 -r d0d545f2982f source/common/x86/blockcopy8.h
--- a/source/common/x86/blockcopy8.h	Tue Sep 02 14:04:02 2014 +0530
+++ b/source/common/x86/blockcopy8.h	Tue Sep 02 14:27:40 2014 +0530
@@ -38,14 +38,14 @@
 void x265_cvt16to32_shr_8_sse4(int32_t * dst, int16_t * src, intptr_t, int32_t, int32_t);
 void x265_cvt16to32_shr_16_sse4(int32_t * dst, int16_t * src, intptr_t, int32_t, int32_t);
 void x265_cvt16to32_shr_32_sse4(int32_t * dst, int16_t * src, intptr_t, int32_t, int32_t);
-uint32_t x265_cvt16to32_cnt_4_sse4(int16_t * dst, int16_t * src, intptr_t);
-uint32_t x265_cvt16to32_cnt_8_sse4(int16_t * dst, int16_t * src, intptr_t);
-uint32_t x265_cvt16to32_cnt_16_sse4(int16_t * dst, int16_t * src, intptr_t);
-uint32_t x265_cvt16to32_cnt_32_sse4(int16_t * dst, int16_t * src, intptr_t);
-uint32_t x265_cvt16to32_cnt_4_avx2(int16_t * dst, int16_t * src, intptr_t);
-uint32_t x265_cvt16to32_cnt_8_avx2(int16_t * dst, int16_t * src, intptr_t);
-uint32_t x265_cvt16to32_cnt_16_avx2(int16_t * dst, int16_t * src, intptr_t);
-uint32_t x265_cvt16to32_cnt_32_avx2(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_copy_cnt_4_sse4(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_copy_cnt_8_sse4(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_copy_cnt_16_sse4(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_copy_cnt_32_sse4(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_copy_cnt_4_avx2(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_copy_cnt_8_avx2(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_copy_cnt_16_avx2(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_copy_cnt_32_avx2(int16_t * dst, int16_t * src, intptr_t);
 
 #define SETUP_BLOCKCOPY_FUNC(W, H, cpu) \
     void x265_blockcopy_pp_ ## W ## x ## H ## cpu(pixel * a, intptr_t stridea, pixel * b, intptr_t strideb); \
diff -r a70b4e57aac2 -r d0d545f2982f source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp	Tue Sep 02 14:04:02 2014 +0530
+++ b/source/test/pixelharness.cpp	Tue Sep 02 14:27:40 2014 +0530
@@ -580,7 +580,7 @@
     return true;
 }
 
-bool PixelHarness::check_cvt16to32_cnt_t(cvt16to32_cnt_t ref, cvt16to32_cnt_t opt)
+bool PixelHarness::check_copy_cnt_t(copy_cnt_t ref, copy_cnt_t opt)
 {
     ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
     ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
@@ -1378,11 +1378,11 @@
             }
         }
 
-        if ((i < BLOCK_64x64) && opt.cvt16to32_cnt[i])
+        if ((i < BLOCK_64x64) && opt.copy_cnt[i])
         {
-            if (!check_cvt16to32_cnt_t(ref.cvt16to32_cnt[i], opt.cvt16to32_cnt[i]))
+            if (!check_copy_cnt_t(ref.copy_cnt[i], opt.copy_cnt[i]))
             {
-                printf("cvt16to32_cnt[%dx%d] failed!\n", 4 << i, 4 << i);
+                printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i);
                 return false;
             }
         }
@@ -1779,10 +1779,10 @@
             REPORT_SPEEDUP(opt.cvt32to16_shl[i], ref.cvt32to16_shl[i], sbuf2, ibuf1, STRIDE, 3);
         }
 
-        if ((i < BLOCK_64x64) && opt.cvt16to32_cnt[i])
+        if ((i < BLOCK_64x64) && opt.copy_cnt[i])
         {
-            HEADER("cvt16to32_cnt[%dx%d]", 4 << i, 4 << i);
-            REPORT_SPEEDUP(opt.cvt16to32_cnt[i], ref.cvt16to32_cnt[i], sbuf1, sbuf2, STRIDE);
+            HEADER("copy_cnt[%dx%d]", 4 << i, 4 << i);
+            REPORT_SPEEDUP(opt.copy_cnt[i], ref.copy_cnt[i], sbuf1, sbuf2, STRIDE);
         }
     }
 
diff -r a70b4e57aac2 -r d0d545f2982f source/test/pixelharness.h
--- a/source/test/pixelharness.h	Tue Sep 02 14:04:02 2014 +0530
+++ b/source/test/pixelharness.h	Tue Sep 02 14:27:40 2014 +0530
@@ -88,7 +88,7 @@
     bool check_cvt16to32_shl_t(cvt16to32_shl_t ref, cvt16to32_shl_t opt);
     bool check_cvt16to32_shr_t(cvt16to32_shr_t ref, cvt16to32_shr_t opt);
     bool check_cvt32to16_shl_t(cvt32to16_shl_t ref, cvt32to16_shl_t opt);
-    bool check_cvt16to32_cnt_t(cvt16to32_cnt_t ref, cvt16to32_cnt_t opt);
+    bool check_copy_cnt_t(copy_cnt_t ref, copy_cnt_t opt);
     bool check_pixel_var(var_t ref, var_t opt);
     bool check_ssim_4x4x2_core(ssim_4x4x2_core_t ref, ssim_4x4x2_core_t opt);
     bool check_ssim_end(ssim_end4_t ref, ssim_end4_t opt);


More information about the x265-devel mailing list