[x265-commits] [x265] intra: fix GCC warning about potentially uninitialized su...

Fri Oct 18 06:45:39 CEST 2013

details:   http://hg.videolan.org/x265/rev/61abe115acfc
branches:  
changeset: 4533:61abe115acfc
user:      Steve Borho <steve at borho.org>
date:      Thu Oct 17 21:50:02 2013 -0500
description:
intra: fix GCC warning about potentially uninitialized sum variable
Subject: [x265] common: force float and double arguments to logf() and log(), respectively

details:   http://hg.videolan.org/x265/rev/d61e2ff59c29
branches:  
changeset: 4534:d61e2ff59c29
user:      Steve Borho <steve at borho.org>
date:      Thu Oct 17 23:32:25 2013 -0500
description:
common: force float and double arguments to logf() and log(), respectively

Fixes warnings on some MSVC versions
Subject: [x265] asm: add cvt32to16_shr_sse2, remove intrinsic primitive

details:   http://hg.videolan.org/x265/rev/84857e7ba3e1
branches:  
changeset: 4535:84857e7ba3e1
user:      Min Chen <chenm003 at 163.com>
date:      Thu Oct 17 22:15:00 2013 +0800
description:
asm: add cvt32to16_shr_sse2, remove intrinsic primitive

diffstat:

 source/common/CMakeLists.txt         |    2 +-
 source/common/common.h               |    4 +-
 source/common/vec/intra-sse3.cpp     |    3 +-
 source/common/vec/pixel-sse3.cpp     |   23 -------
 source/common/x86/asm-primitives.cpp |    2 +
 source/common/x86/pixel-util.asm     |  103 +++++++++++++++++++++++++++++++++++
 source/common/x86/pixel.h            |    2 +
 7 files changed, 112 insertions(+), 27 deletions(-)

diffs (217 lines):

diff -r 5ab2da8320f5 -r 84857e7ba3e1 source/common/CMakeLists.txt

--- a/source/common/CMakeLists.txt	Thu Oct 17 21:28:02 2013 -0500
+++ b/source/common/CMakeLists.txt	Thu Oct 17 22:15:00 2013 +0800
@@ -153,7 +153,7 @@ if(ENABLE_PRIMITIVES_ASM)
         add_definitions(-DHAVE_ALIGNED_STACK=0)
     endif()
 
-    set(ASMS pixel-a.asm const-a.asm cpu-a.asm sad-a.asm mc-a.asm mc-a2.asm ipfilter8.asm)
+    set(ASMS pixel-a.asm const-a.asm cpu-a.asm sad-a.asm mc-a.asm mc-a2.asm ipfilter8.asm pixel-util.asm)
     if (X64)
         add_definitions(-DARCH_X86_64=1)
     else()
diff -r 5ab2da8320f5 -r 84857e7ba3e1 source/common/common.h
--- a/source/common/common.h	Thu Oct 17 21:28:02 2013 -0500
+++ b/source/common/common.h	Thu Oct 17 22:15:00 2013 +0800
@@ -102,8 +102,8 @@
 #endif // if ENABLE_CYCLE_COUNTERS
 
 #if defined(_MSC_VER)
-#define X265_LOG2F(x) (logf(x) * 1.44269504088896405f)
-#define X265_LOG2(x) (log(x) * 1.4426950408889640513713538072172)
+#define X265_LOG2F(x) (logf((float)(x)) * 1.44269504088896405f)
+#define X265_LOG2(x) (log((double)(x)) * 1.4426950408889640513713538072172)
 #else
 #define X265_LOG2F(x) log2f(x)
 #define X265_LOG2(x)  log2(x)
diff -r 5ab2da8320f5 -r 84857e7ba3e1 source/common/vec/intra-sse3.cpp
--- a/source/common/vec/intra-sse3.cpp	Thu Oct 17 21:28:02 2013 -0500
+++ b/source/common/vec/intra-sse3.cpp	Thu Oct 17 22:15:00 2013 +0800
@@ -199,6 +199,7 @@ void intra_pred_dc(pixel* above, pixel* 
         sum += horizontal_add_x(pixT);
         break;
 
+    default:
     case 32:
         pixL.load(left);
         im1  = (Vec4ui)(pixL.sad(_mm_setzero_si128()));
@@ -210,7 +211,7 @@ void intra_pred_dc(pixel* above, pixel* 
         pixT.load(above + 16);
         im1 += (Vec4ui)(pixT.sad(_mm_setzero_si128()));
         im1 += (Vec4ui)((Vec128b)im1 >> const_int(64));
-        sum  = toInt32(im1);
+        sum = toInt32(im1);
         break;
     }
 
diff -r 5ab2da8320f5 -r 84857e7ba3e1 source/common/vec/pixel-sse3.cpp
--- a/source/common/vec/pixel-sse3.cpp	Thu Oct 17 21:28:02 2013 -0500
+++ b/source/common/vec/pixel-sse3.cpp	Thu Oct 17 22:15:00 2013 +0800
@@ -31,28 +31,6 @@
 using namespace x265;
 
 namespace {
-void convert32to16_shr(short *dst, int *org, intptr_t stride, int shift, int size)
-{
-    int i, j;
-    __m128i round = _mm_set1_epi32(1 << (shift - 1));
-
-    for (i = 0; i < size; i++)
-    {
-        for (j = 0; j < size; j += 4)
-        {
-            __m128i im32;
-            __m128i im16;
-
-            im32 = _mm_loadu_si128((__m128i const*)(org + j));
-            im32 = _mm_sra_epi32(_mm_add_epi32(im32, round), _mm_cvtsi32_si128(shift));
-            im16 = _mm_packs_epi32(im32, im32);
-            _mm_storel_epi64((__m128i*)(dst + j), im16);
-        }
-        org += size;
-        dst += stride;
-    }
-}
-
 void convert16to32_shl(int *dst, short *org, intptr_t stride, int shift, int size)
 {
     int i, j;
@@ -638,7 +616,6 @@ void calcRecons(pixel* pred, short* resi
 namespace x265 {
 void Setup_Vec_PixelPrimitives_sse3(EncoderPrimitives &p)
 {
-    p.cvt32to16_shr = convert32to16_shr;
     p.cvt16to32_shl = convert16to32_shl;
     p.cvt16to16_shl = convert16to16_shl;
 
diff -r 5ab2da8320f5 -r 84857e7ba3e1 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu Oct 17 21:28:02 2013 -0500
+++ b/source/common/x86/asm-primitives.cpp	Thu Oct 17 22:15:00 2013 +0800
@@ -253,6 +253,8 @@ void Setup_Assembly_Primitives(EncoderPr
         p.sa8d[BLOCK_8x8]   = x265_pixel_sa8d_8x8_sse2;
         p.sa8d[BLOCK_16x16] = x265_pixel_sa8d_16x16_sse2;
         SA8D_INTER_FROM_BLOCK(sse2);
+
+        p.cvt32to16_shr = x265_cvt32to16_shr_sse2;
     }
     if (cpuMask & X265_CPU_SSSE3)
     {
diff -r 5ab2da8320f5 -r 84857e7ba3e1 source/common/x86/pixel-util.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/source/common/x86/pixel-util.asm	Thu Oct 17 22:15:00 2013 +0800
@@ -0,0 +1,103 @@
+;*****************************************************************************
+;* Copyright (C) 2013 x265 project
+;*
+;* Authors: Min Chen <chenm003 at 163.com> <min.chen at multicorewareinc.com>
+;*
+;* This program is free software; you can redistribute it and/or modify
+;* it under the terms of the GNU General Public License as published by
+;* the Free Software Foundation; either version 2 of the License, or
+;* (at your option) any later version.
+;*
+;* This program is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;* GNU General Public License for more details.
+;*
+;* You should have received a copy of the GNU General Public License
+;* along with this program; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+;*
+;* This program is also available under a commercial proprietary license.
+;* For more information, contact us at licensing at multicorewareinc.com.
+;*****************************************************************************/
+
+%include "x86inc.asm"
+%include "x86util.asm"
+
+SECTION_RODATA 32
+
+SECTION .text
+
+
+;-----------------------------------------------------------------------------
+; void cvt32to16_shr(short *dst, int *src, intptr_t stride, int shift, int size)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal cvt32to16_shr, 5, 7, 1, dst, src, stride
+%define rnd     m7
+%define shift   m6
+
+    ; make shift
+    mov         r5d, r3m
+    movd        shift, r5d
+
+    ; make round
+    dec         r5
+    xor         r6, r6
+    bts         r6, r5
+    
+    movd        rnd, r6d
+    pshufd      rnd, rnd, 0
+
+    ; register alloc
+    ; r0 - dst
+    ; r1 - src
+    ; r2 - stride * 2 (short*)
+    ; r3 - lx
+    ; r4 - size
+    ; r5 - ly
+    ; r6 - diff
+    lea         r2, [r2 * 2]
+
+    mov         r4d, r4m
+    mov         r5, r4
+    mov         r6, r2
+    sub         r6, r4
+    lea         r6, [r6 * 2]
+
+    shr         r5, 1
+.loop_row:
+
+    mov         r3, r4
+    shr         r3, 2
+.loop_col:
+    ; row 0
+    movu        m0, [r1]
+    paddd       m0, rnd
+    psrad       m0, shift
+    packssdw    m0, m0
+    movh        [r0], m0
+
+    ; row 1
+    movu        m0, [r1 + r4 * 4]
+    paddd       m0, rnd
+    psrad       m0, shift
+    packssdw    m0, m0
+    movh        [r0 + r2], m0
+
+    ; move col pointer
+    add         r1, 16
+    add         r0, 8
+
+    dec         r3
+    jg          .loop_col
+
+    ; update pointer
+    lea         r1, [r1 + r4 * 4]
+    add         r0, r6
+
+    ; end of loop_row
+    dec         r5
+    jg         .loop_row
+    
+    RET
diff -r 5ab2da8320f5 -r 84857e7ba3e1 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h	Thu Oct 17 21:28:02 2013 -0500
+++ b/source/common/x86/pixel.h	Thu Oct 17 22:15:00 2013 +0800
@@ -194,6 +194,8 @@ uint64_t x265_pixel_sa8d_satd_16x16_avx(
 uint64_t x265_pixel_sa8d_satd_16x16_xop(pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2);
 uint64_t x265_pixel_sa8d_satd_16x16_avx2(pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2);
 
+void x265_cvt32to16_shr_sse2(short *dst, int *src, intptr_t, int, int);
+
 #define DECL_HEVC_SSD(suffix) \
     int x265_pixel_ssd_32x64_ ## suffix(pixel *, intptr_t, pixel *, intptr_t); \
     int x265_pixel_ssd_16x64_ ## suffix(pixel *, intptr_t, pixel *, intptr_t); \