[x265-commits] [x265] build: remove icl32 and icl64 scripts

Thu Nov 20 17:48:41 CET 2014

details:   http://hg.videolan.org/x265/rev/3649fabf90d3
branches:  
changeset: 8868:3649fabf90d3
user:      Deepthi Nandakumar <deepthi at multicorewareinc.com>
date:      Thu Nov 20 14:27:53 2014 +0530
description:
build: remove icl32 and icl64 scripts

Typical Windows ICL users link with Visual Studio
Subject: [x265] fix for old gcc

details:   http://hg.videolan.org/x265/rev/ed587d360b97
branches:  
changeset: 8869:ed587d360b97
user:      Satoshi Nakagawa <nakagawa424 at oki.com>
date:      Thu Nov 20 18:25:09 2014 +0900
description:
fix for old gcc
Subject: [x265] Fix for C code mismatch

details:   http://hg.videolan.org/x265/rev/1d17ec0cb954
branches:  
changeset: 8870:1d17ec0cb954
user:      Praveen Tiwari
date:      Thu Nov 20 20:04:02 2014 +0530
description:
Fix for C code mismatch

This patch is for fix the the binary mismatch in encoded output introduced during
refactorizaton of the transform/quant path. Basically it is original version of
code to make sure all valid inputs are copied in input buffer, in other hand
it is not fully optimized code but this patch is quick fix for the problem and
allow us to optimze one function at a time.

diffstat:

 build/icl32/build-all.bat     |   14 ---
 build/icl32/make-makefile.bat |   15 ---
 build/icl64/build-all.bat     |   14 ---
 build/icl64/make-makefile.bat |   17 ----
 source/common/dct.cpp         |  169 +++++++++++++++++++++++++++++++++++++----
 source/common/pixel.cpp       |    2 +-
 6 files changed, 152 insertions(+), 79 deletions(-)

diffs (truncated from 334 to 300 lines):

diff -r 4b637cb9b792 -r 1d17ec0cb954 build/icl32/build-all.bat

--- a/build/icl32/build-all.bat	Thu Nov 20 11:49:38 2014 +0530
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
- at echo off
-if not "%ICPP_COMPILER13%" == "" ( set ICL="%ICPP_COMPILER13" )
-if not "%ICPP_COMPILER14%" == "" ( set ICL="%ICPP_COMPILER14" )
-if "%ICL%" == "" (
-  msg "%username%" "Intel C++ 2013 not detected"
-  exit 1
-)
-if not exist Makefile (
-  call make-makefile.bat
-)
-if exist Makefile (
-  call "%ICL%\bin\compilervars.bat" ia32
-  nmake
-)
diff -r 4b637cb9b792 -r 1d17ec0cb954 build/icl32/make-makefile.bat
--- a/build/icl32/make-makefile.bat	Thu Nov 20 11:49:38 2014 +0530
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,15 +0,0 @@
- at echo off
-::
-:: run this batch file to create an Intel C++ 2013 NMake makefile for this project.
-:: See the cmake documentation for other generator targets
-::
-if not "%ICPP_COMPILER13%" == "" ( set ICL="%ICPP_COMPILER13" )
-if not "%ICPP_COMPILER14%" == "" ( set ICL="%ICPP_COMPILER14" )
-if "%ICL%" == "" (
-  msg "%username%" "Intel C++ 2013 not detected"
-  exit 1
-)
-call "%ICL%\bin\compilervars.bat" ia32
-set CC=icl
-set CXX=icl
-cmake -G "NMake Makefiles" ..\..\source && cmake-gui ..\..\source
diff -r 4b637cb9b792 -r 1d17ec0cb954 build/icl64/build-all.bat
--- a/build/icl64/build-all.bat	Thu Nov 20 11:49:38 2014 +0530
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
- at echo off
-if not "%ICPP_COMPILER13%" == "" ( set ICL="%ICPP_COMPILER13" )
-if not "%ICPP_COMPILER14%" == "" ( set ICL="%ICPP_COMPILER14" )
-if "%ICL%" == "" (
-  msg "%username%" "Intel C++ 2013 not detected"
-  exit 1
-)
-if not exist Makefile (
-  call make-makefile.bat
-)
-if exist Makefile (
-  call "%ICL%\bin\compilervars.bat" intel64
-  nmake
-)
diff -r 4b637cb9b792 -r 1d17ec0cb954 build/icl64/make-makefile.bat
--- a/build/icl64/make-makefile.bat	Thu Nov 20 11:49:38 2014 +0530
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,17 +0,0 @@
- at echo off
-::
-:: run this batch file to create an Intel C++ 2013 NMake makefile for this project.
-:: See the cmake documentation for other generator targets
-::
-if not "%ICPP_COMPILER13%" == "" ( set ICL="%ICPP_COMPILER13" )
-if not "%ICPP_COMPILER14%" == "" ( set ICL="%ICPP_COMPILER14" )
-if "%ICL%" == "" (
-  msg "%username%" "Intel C++ 2013 not detected"
-  pause
-  exit 1
-)
-call "%ICL%\bin\compilervars.bat" intel64
-set CC=icl
-set CXX=icl
-cmake -G "NMake Makefiles" ..\..\source && cmake-gui ..\..\source
-pause
diff -r 4b637cb9b792 -r 1d17ec0cb954 source/common/dct.cpp
--- a/source/common/dct.cpp	Thu Nov 20 11:49:38 2014 +0530
+++ b/source/common/dct.cpp	Thu Nov 20 20:04:02 2014 +0530
@@ -454,51 +454,129 @@ void dst4_c(const int16_t *src, int16_t 
     }
 
     fastForwardDst(block, coef, shift_1st);
-    fastForwardDst(coef, dst, shift_2nd);
+    fastForwardDst(coef, block, shift_2nd);
+
+#define N (4)
+    for (int i = 0; i < N; i++)
+    {
+        for (int j = 0; j < N; j++)
+        {
+            dst[i * N + j] = block[i * N + j];
+        }
+    }
+
+#undef N
 }
 
-void dct4_c(const int16_t *src, int16_t *dst, intptr_t /* stride */)
+void dct4_c(const int16_t *src, int16_t *dst, intptr_t stride)
 {
     const int shift_1st = 1 + X265_DEPTH - 8;
     const int shift_2nd = 8;
 
     ALIGN_VAR_32(int16_t, coef[4 * 4]);
+    ALIGN_VAR_32(int16_t, block[4 * 4]);
 
-    partialButterfly4(src, coef, shift_1st, 4);
-    partialButterfly4(coef, dst, shift_2nd, 4);
+    for (int i = 0; i < 4; i++)
+    {
+        memcpy(&block[i * 4], &src[i * stride], 4 * sizeof(int16_t));
+    }
+
+    partialButterfly4(block, coef, shift_1st, 4);
+    partialButterfly4(coef, block, shift_2nd, 4);
+#define N (4)
+    for (int i = 0; i < N; i++)
+    {
+        for (int j = 0; j < N; j++)
+        {
+            dst[i * N + j] = block[i * N + j];
+        }
+    }
+
+#undef N
 }
 
-void dct8_c(const int16_t *src, int16_t *dst, intptr_t /* stride */)
+void dct8_c(const int16_t *src, int16_t *dst, intptr_t stride)
 {
     const int shift_1st = 2 + X265_DEPTH - 8;
     const int shift_2nd = 9;
 
     ALIGN_VAR_32(int16_t, coef[8 * 8]);
+    ALIGN_VAR_32(int16_t, block[8 * 8]);
 
-    partialButterfly8(src, coef, shift_1st, 8);
-    partialButterfly8(coef, dst, shift_2nd, 8);
+    for (int i = 0; i < 8; i++)
+    {
+        memcpy(&block[i * 8], &src[i * stride], 8 * sizeof(int16_t));
+    }
+
+    partialButterfly8(block, coef, shift_1st, 8);
+    partialButterfly8(coef, block, shift_2nd, 8);
+
+#define N (8)
+    for (int i = 0; i < N; i++)
+    {
+        for (int j = 0; j < N; j++)
+        {
+            dst[i * N + j] = block[i * N + j];
+        }
+    }
+
+#undef N
 }
 
-void dct16_c(const int16_t *src, int16_t *dst, intptr_t /* stride */)
+void dct16_c(const int16_t *src, int16_t *dst, intptr_t stride)
 {
     const int shift_1st = 3 + X265_DEPTH - 8;
     const int shift_2nd = 10;
 
     ALIGN_VAR_32(int16_t, coef[16 * 16]);
+    ALIGN_VAR_32(int16_t, block[16 * 16]);
 
-    partialButterfly16(src, coef, shift_1st, 16);
-    partialButterfly16(coef, dst, shift_2nd, 16);
+    for (int i = 0; i < 16; i++)
+    {
+        memcpy(&block[i * 16], &src[i * stride], 16 * sizeof(int16_t));
+    }
+
+    partialButterfly16(block, coef, shift_1st, 16);
+    partialButterfly16(coef, block, shift_2nd, 16);
+
+#define N (16)
+    for (int i = 0; i < N; i++)
+    {
+        for (int j = 0; j < N; j++)
+        {
+            dst[i * N + j] = block[i * N + j];
+        }
+    }
+
+#undef N
 }
 
-void dct32_c(const int16_t *src, int16_t *dst, intptr_t /* stride */)
+void dct32_c(const int16_t *src, int16_t *dst, intptr_t stride)
 {
     const int shift_1st = 4 + X265_DEPTH - 8;
     const int shift_2nd = 11;
 
     ALIGN_VAR_32(int16_t, coef[32 * 32]);
+    ALIGN_VAR_32(int16_t, block[32 * 32]);
 
-    partialButterfly32(src, coef, shift_1st, 32);
-    partialButterfly32(coef, dst, shift_2nd, 32);
+    for (int i = 0; i < 32; i++)
+    {
+        memcpy(&block[i * 32], &src[i * stride], 32 * sizeof(int16_t));
+    }
+
+    partialButterfly32(block, coef, shift_1st, 32);
+    partialButterfly32(coef, block, shift_2nd, 32);
+
+#define N (32)
+    for (int i = 0; i < N; i++)
+    {
+        for (int j = 0; j < N; j++)
+        {
+            dst[i * N + j] = block[i * N + j];
+        }
+    }
+
+#undef N
 }
 
 void idst4_c(const int16_t *src, int16_t *dst, intptr_t stride)
@@ -509,7 +587,18 @@ void idst4_c(const int16_t *src, int16_t
     ALIGN_VAR_32(int16_t, coef[4 * 4]);
     ALIGN_VAR_32(int16_t, block[4 * 4]);
 
-    inversedst(src, coef, shift_1st); // Forward DST BY FAST ALGORITHM, block input, coef output
+#define N (4)
+    for (int i = 0; i < N; i++)
+    {
+        for (int j = 0; j < N; j++)
+        {
+            block[i * N + j] = (int16_t)src[i * N + j];
+        }
+    }
+
+#undef N
+
+    inversedst(block, coef, shift_1st); // Forward DST BY FAST ALGORITHM, block input, coef output
     inversedst(coef, block, shift_2nd); // Forward DST BY FAST ALGORITHM, coef input, coeff output
 
     for (int i = 0; i < 4; i++)
@@ -526,7 +615,18 @@ void idct4_c(const int16_t *src, int16_t
     ALIGN_VAR_32(int16_t, coef[4 * 4]);
     ALIGN_VAR_32(int16_t, block[4 * 4]);
 
-    partialButterflyInverse4(src, coef, shift_1st, 4); // Forward DST BY FAST ALGORITHM, block input, coef output
+#define N (4)
+    for (int i = 0; i < N; i++)
+    {
+        for (int j = 0; j < N; j++)
+        {
+            block[i * N + j] = (int16_t)src[i * N + j];
+        }
+    }
+
+#undef N
+
+    partialButterflyInverse4(block, coef, shift_1st, 4); // Forward DST BY FAST ALGORITHM, block input, coef output
     partialButterflyInverse4(coef, block, shift_2nd, 4); // Forward DST BY FAST ALGORITHM, coef input, coeff output
 
     for (int i = 0; i < 4; i++)
@@ -543,7 +643,18 @@ void idct8_c(const int16_t *src, int16_t
     ALIGN_VAR_32(int16_t, coef[8 * 8]);
     ALIGN_VAR_32(int16_t, block[8 * 8]);
 
-    partialButterflyInverse8(src, coef, shift_1st, 8);
+#define N (8)
+    for (int i = 0; i < N; i++)
+    {
+        for (int j = 0; j < N; j++)
+        {
+            block[i * N + j] = (int16_t)src[i * N + j];
+        }
+    }
+
+#undef N
+
+    partialButterflyInverse8(block, coef, shift_1st, 8);
     partialButterflyInverse8(coef, block, shift_2nd, 8);
     for (int i = 0; i < 8; i++)
     {
@@ -559,7 +670,18 @@ void idct16_c(const int16_t *src, int16_
     ALIGN_VAR_32(int16_t, coef[16 * 16]);
     ALIGN_VAR_32(int16_t, block[16 * 16]);
 
-    partialButterflyInverse16(src, coef, shift_1st, 16);
+#define N (16)
+    for (int i = 0; i < N; i++)
+    {
+        for (int j = 0; j < N; j++)
+        {
+            block[i * N + j] = (int16_t)src[i * N + j];
+        }
+    }
+
+#undef N
+
+    partialButterflyInverse16(block, coef, shift_1st, 16);
     partialButterflyInverse16(coef, block, shift_2nd, 16);