[x265] [PATCH] optimize cvt32to16_shl by replacing copy_shl
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Tue Sep 2 16:13:53 CEST 2014
# HG changeset patch
# User Praveen Tiwari
# Date 1409660553 -19800
# Node ID e1b2ab942177bc9f67547a61c365c6167b5cee38
# Parent 61f7c056cd6e01e5a24a51b40c20c53bf4593ec7
optimize cvt32to16_shl by replacing copy_shl
diff -r 61f7c056cd6e -r e1b2ab942177 source/common/quant.cpp
--- a/source/common/quant.cpp Tue Sep 02 17:47:11 2014 +0530
+++ b/source/common/quant.cpp Tue Sep 02 17:52:33 2014 +0530
@@ -442,7 +442,15 @@
{
if (transQuantBypass)
{
- primitives.cvt32to16_shl[log2TrSize - 2](residual, coeff, stride, 0);
+ int numCoeff = (1 << (log2TrSize << 1));
+ assert(numCoeff <= 1024);
+ ALIGN_VAR_16(int16_t, qCoeff[1024]);
+ for (int i = 0; i < numCoeff; i++)
+ {
+ qCoeff[i] = (int16_t)Clip3(-32768, 32767, coeff[i]);
+ }
+
+ primitives.copy_shl[log2TrSize - 2](residual, qCoeff, stride, 0);
return;
}
More information about the x265-devel
mailing list