[x265] [PATCH] optimize cvt32to16_shl by replacing copy_shl

Tue Sep 2 16:13:53 CEST 2014

# HG changeset patch
# User Praveen Tiwari
# Date 1409660553 -19800
# Node ID e1b2ab942177bc9f67547a61c365c6167b5cee38
# Parent  61f7c056cd6e01e5a24a51b40c20c53bf4593ec7
optimize cvt32to16_shl by replacing copy_shl

diff -r 61f7c056cd6e -r e1b2ab942177 source/common/quant.cpp

--- a/source/common/quant.cpp	Tue Sep 02 17:47:11 2014 +0530
+++ b/source/common/quant.cpp	Tue Sep 02 17:52:33 2014 +0530
@@ -442,7 +442,15 @@
 {
     if (transQuantBypass)
     {
-        primitives.cvt32to16_shl[log2TrSize - 2](residual, coeff, stride, 0);
+        int numCoeff = (1 << (log2TrSize << 1));
+        assert(numCoeff <= 1024);
+        ALIGN_VAR_16(int16_t, qCoeff[1024]);
+        for (int i = 0; i < numCoeff; i++)
+        {
+            qCoeff[i] = (int16_t)Clip3(-32768, 32767, coeff[i]);
+        }
+
+        primitives.copy_shl[log2TrSize - 2](residual, qCoeff, stride, 0);
         return;
     }