[x265] [PATCH] conv16to32_count C interface modification, downscaling coeff from int32_t* to int16_t*

praveen at multicorewareinc.com praveen at multicorewareinc.com
Tue Sep 2 16:11:54 CEST 2014


# HG changeset patch
# User Praveen Tiwari
# Date 1409646842 -19800
# Node ID a70b4e57aac2c535add2de15145c2a86638116f4
# Parent  51b5a6d820da97a4178dc42d2ef98ffe1970511b
conv16to32_count C interface modification, downscaling coeff from int32_t* to int16_t*

diff -r 51b5a6d820da -r a70b4e57aac2 source/common/dct.cpp
--- a/source/common/dct.cpp	Mon Sep 01 17:07:05 2014 +0530
+++ b/source/common/dct.cpp	Tue Sep 02 14:04:02 2014 +0530
@@ -827,7 +827,7 @@
 }
 
 template<int trSize>
-uint32_t conv16to32_count(coeff_t* coeff, int16_t* residual, intptr_t stride)
+uint32_t conv16to32_count(int16_t* coeff, int16_t* residual, intptr_t stride)
 {
     uint32_t numSig = 0;
     for (int k = 0; k < trSize; k++)
diff -r 51b5a6d820da -r a70b4e57aac2 source/common/primitives.h
--- a/source/common/primitives.h	Mon Sep 01 17:07:05 2014 +0530
+++ b/source/common/primitives.h	Tue Sep 02 14:04:02 2014 +0530
@@ -153,7 +153,7 @@
 typedef void (*cvt16to32_shr_t)(int32_t *dst, int16_t *src, intptr_t, int, int);
 typedef void (*cvt32to16_shr_t)(int16_t *dst, int32_t *src, intptr_t, int, int);
 typedef void (*cvt32to16_shl_t)(int16_t *dst, int32_t *src, intptr_t, int);
-typedef uint32_t (*cvt16to32_cnt_t)(coeff_t* coeff, int16_t* residual, intptr_t stride);
+typedef uint32_t (*cvt16to32_cnt_t)(int16_t* coeff, int16_t* residual, intptr_t stride);
 
 typedef void (*dct_t)(int16_t *src, int32_t *dst, intptr_t stride);
 typedef void (*idct_t)(int32_t *src, int16_t *dst, intptr_t stride);
diff -r 51b5a6d820da -r a70b4e57aac2 source/common/quant.cpp
--- a/source/common/quant.cpp	Mon Sep 01 17:07:05 2014 +0530
+++ b/source/common/quant.cpp	Tue Sep 02 14:04:02 2014 +0530
@@ -315,7 +315,23 @@
     if (cu->getCUTransquantBypass(absPartIdx))
     {
         X265_CHECK(log2TrSize >= 2 && log2TrSize <= 5, "Block size mistake!\n");
-        return primitives.cvt16to32_cnt[log2TrSize - 2](coeff, residual, stride);
+        /* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is
+         * optimize to take coefficients as int16_t*, it will be cleanse.*/
+        int numCoeff = 1 << log2TrSize * 2;
+        ALIGN_VAR_16(int16_t, qCoeff[32 * 32]);
+        for (int i = 0; i < numCoeff; i++)
+        {
+             qCoeff[i] = (int16_t)Clip3(-32768, 32767, coeff[i]);
+        }
+        int numSign = primitives.cvt16to32_cnt[log2TrSize - 2](qCoeff, residual, stride);
+
+        /* This section of code is to safely convert int16_t coefficients to int32_t, once the caller function is
+         * optimize to take coefficients as int16_t*, it will be cleanse.*/
+        for (int i = 0; i < numCoeff; i++)
+        {
+            coeff[i] = qCoeff[i];
+        }
+        return numSign;
     }
 
     bool isLuma  = ttype == TEXT_LUMA;
diff -r 51b5a6d820da -r a70b4e57aac2 source/common/x86/blockcopy8.h
--- a/source/common/x86/blockcopy8.h	Mon Sep 01 17:07:05 2014 +0530
+++ b/source/common/x86/blockcopy8.h	Tue Sep 02 14:04:02 2014 +0530
@@ -38,14 +38,14 @@
 void x265_cvt16to32_shr_8_sse4(int32_t * dst, int16_t * src, intptr_t, int32_t, int32_t);
 void x265_cvt16to32_shr_16_sse4(int32_t * dst, int16_t * src, intptr_t, int32_t, int32_t);
 void x265_cvt16to32_shr_32_sse4(int32_t * dst, int16_t * src, intptr_t, int32_t, int32_t);
-uint32_t x265_cvt16to32_cnt_4_sse4(int32_t * dst, int16_t * src, intptr_t);
-uint32_t x265_cvt16to32_cnt_8_sse4(int32_t * dst, int16_t * src, intptr_t);
-uint32_t x265_cvt16to32_cnt_16_sse4(int32_t * dst, int16_t * src, intptr_t);
-uint32_t x265_cvt16to32_cnt_32_sse4(int32_t * dst, int16_t * src, intptr_t);
-uint32_t x265_cvt16to32_cnt_4_avx2(int32_t * dst, int16_t * src, intptr_t);
-uint32_t x265_cvt16to32_cnt_8_avx2(int32_t * dst, int16_t * src, intptr_t);
-uint32_t x265_cvt16to32_cnt_16_avx2(int32_t * dst, int16_t * src, intptr_t);
-uint32_t x265_cvt16to32_cnt_32_avx2(int32_t * dst, int16_t * src, intptr_t);
+uint32_t x265_cvt16to32_cnt_4_sse4(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_cvt16to32_cnt_8_sse4(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_cvt16to32_cnt_16_sse4(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_cvt16to32_cnt_32_sse4(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_cvt16to32_cnt_4_avx2(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_cvt16to32_cnt_8_avx2(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_cvt16to32_cnt_16_avx2(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_cvt16to32_cnt_32_avx2(int16_t * dst, int16_t * src, intptr_t);
 
 #define SETUP_BLOCKCOPY_FUNC(W, H, cpu) \
     void x265_blockcopy_pp_ ## W ## x ## H ## cpu(pixel * a, intptr_t stridea, pixel * b, intptr_t strideb); \
diff -r 51b5a6d820da -r a70b4e57aac2 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp	Mon Sep 01 17:07:05 2014 +0530
+++ b/source/test/pixelharness.cpp	Tue Sep 02 14:04:02 2014 +0530
@@ -582,8 +582,8 @@
 
 bool PixelHarness::check_cvt16to32_cnt_t(cvt16to32_cnt_t ref, cvt16to32_cnt_t opt)
 {
-    ALIGN_VAR_16(int32_t, ref_dest[64 * 64]);
-    ALIGN_VAR_16(int32_t, opt_dest[64 * 64]);
+    ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
+    ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
 
     memset(ref_dest, 0xCD, sizeof(ref_dest));
     memset(opt_dest, 0xCD, sizeof(opt_dest));
@@ -1782,7 +1782,7 @@
         if ((i < BLOCK_64x64) && opt.cvt16to32_cnt[i])
         {
             HEADER("cvt16to32_cnt[%dx%d]", 4 << i, 4 << i);
-            REPORT_SPEEDUP(opt.cvt16to32_cnt[i], ref.cvt16to32_cnt[i], ibuf1, sbuf2, STRIDE);
+            REPORT_SPEEDUP(opt.cvt16to32_cnt[i], ref.cvt16to32_cnt[i], sbuf1, sbuf2, STRIDE);
         }
     }
 


More information about the x265-devel mailing list