[x265-commits] [x265] intra: fix GCC warning about potentially uninitialized su...
Steve Borho
steve at borho.org
Fri Oct 18 06:45:39 CEST 2013
details: http://hg.videolan.org/x265/rev/61abe115acfc
branches:
changeset: 4533:61abe115acfc
user: Steve Borho <steve at borho.org>
date: Thu Oct 17 21:50:02 2013 -0500
description:
intra: fix GCC warning about potentially uninitialized sum variable
Subject: [x265] common: force float and double arguments to logf() and log(), respectively
details: http://hg.videolan.org/x265/rev/d61e2ff59c29
branches:
changeset: 4534:d61e2ff59c29
user: Steve Borho <steve at borho.org>
date: Thu Oct 17 23:32:25 2013 -0500
description:
common: force float and double arguments to logf() and log(), respectively
Fixes warnings on some MSVC versions
Subject: [x265] asm: add cvt32to16_shr_sse2, remove intrinsic primitive
details: http://hg.videolan.org/x265/rev/84857e7ba3e1
branches:
changeset: 4535:84857e7ba3e1
user: Min Chen <chenm003 at 163.com>
date: Thu Oct 17 22:15:00 2013 +0800
description:
asm: add cvt32to16_shr_sse2, remove intrinsic primitive
diffstat:
source/common/CMakeLists.txt | 2 +-
source/common/common.h | 4 +-
source/common/vec/intra-sse3.cpp | 3 +-
source/common/vec/pixel-sse3.cpp | 23 -------
source/common/x86/asm-primitives.cpp | 2 +
source/common/x86/pixel-util.asm | 103 +++++++++++++++++++++++++++++++++++
source/common/x86/pixel.h | 2 +
7 files changed, 112 insertions(+), 27 deletions(-)
diffs (217 lines):
diff -r 5ab2da8320f5 -r 84857e7ba3e1 source/common/CMakeLists.txt
--- a/source/common/CMakeLists.txt Thu Oct 17 21:28:02 2013 -0500
+++ b/source/common/CMakeLists.txt Thu Oct 17 22:15:00 2013 +0800
@@ -153,7 +153,7 @@ if(ENABLE_PRIMITIVES_ASM)
add_definitions(-DHAVE_ALIGNED_STACK=0)
endif()
- set(ASMS pixel-a.asm const-a.asm cpu-a.asm sad-a.asm mc-a.asm mc-a2.asm ipfilter8.asm)
+ set(ASMS pixel-a.asm const-a.asm cpu-a.asm sad-a.asm mc-a.asm mc-a2.asm ipfilter8.asm pixel-util.asm)
if (X64)
add_definitions(-DARCH_X86_64=1)
else()
diff -r 5ab2da8320f5 -r 84857e7ba3e1 source/common/common.h
--- a/source/common/common.h Thu Oct 17 21:28:02 2013 -0500
+++ b/source/common/common.h Thu Oct 17 22:15:00 2013 +0800
@@ -102,8 +102,8 @@
#endif // if ENABLE_CYCLE_COUNTERS
#if defined(_MSC_VER)
-#define X265_LOG2F(x) (logf(x) * 1.44269504088896405f)
-#define X265_LOG2(x) (log(x) * 1.4426950408889640513713538072172)
+#define X265_LOG2F(x) (logf((float)(x)) * 1.44269504088896405f)
+#define X265_LOG2(x) (log((double)(x)) * 1.4426950408889640513713538072172)
#else
#define X265_LOG2F(x) log2f(x)
#define X265_LOG2(x) log2(x)
diff -r 5ab2da8320f5 -r 84857e7ba3e1 source/common/vec/intra-sse3.cpp
--- a/source/common/vec/intra-sse3.cpp Thu Oct 17 21:28:02 2013 -0500
+++ b/source/common/vec/intra-sse3.cpp Thu Oct 17 22:15:00 2013 +0800
@@ -199,6 +199,7 @@ void intra_pred_dc(pixel* above, pixel*
sum += horizontal_add_x(pixT);
break;
+ default:
case 32:
pixL.load(left);
im1 = (Vec4ui)(pixL.sad(_mm_setzero_si128()));
@@ -210,7 +211,7 @@ void intra_pred_dc(pixel* above, pixel*
pixT.load(above + 16);
im1 += (Vec4ui)(pixT.sad(_mm_setzero_si128()));
im1 += (Vec4ui)((Vec128b)im1 >> const_int(64));
- sum = toInt32(im1);
+ sum = toInt32(im1);
break;
}
diff -r 5ab2da8320f5 -r 84857e7ba3e1 source/common/vec/pixel-sse3.cpp
--- a/source/common/vec/pixel-sse3.cpp Thu Oct 17 21:28:02 2013 -0500
+++ b/source/common/vec/pixel-sse3.cpp Thu Oct 17 22:15:00 2013 +0800
@@ -31,28 +31,6 @@
using namespace x265;
namespace {
-void convert32to16_shr(short *dst, int *org, intptr_t stride, int shift, int size)
-{
- int i, j;
- __m128i round = _mm_set1_epi32(1 << (shift - 1));
-
- for (i = 0; i < size; i++)
- {
- for (j = 0; j < size; j += 4)
- {
- __m128i im32;
- __m128i im16;
-
- im32 = _mm_loadu_si128((__m128i const*)(org + j));
- im32 = _mm_sra_epi32(_mm_add_epi32(im32, round), _mm_cvtsi32_si128(shift));
- im16 = _mm_packs_epi32(im32, im32);
- _mm_storel_epi64((__m128i*)(dst + j), im16);
- }
- org += size;
- dst += stride;
- }
-}
-
void convert16to32_shl(int *dst, short *org, intptr_t stride, int shift, int size)
{
int i, j;
@@ -638,7 +616,6 @@ void calcRecons(pixel* pred, short* resi
namespace x265 {
void Setup_Vec_PixelPrimitives_sse3(EncoderPrimitives &p)
{
- p.cvt32to16_shr = convert32to16_shr;
p.cvt16to32_shl = convert16to32_shl;
p.cvt16to16_shl = convert16to16_shl;
diff -r 5ab2da8320f5 -r 84857e7ba3e1 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu Oct 17 21:28:02 2013 -0500
+++ b/source/common/x86/asm-primitives.cpp Thu Oct 17 22:15:00 2013 +0800
@@ -253,6 +253,8 @@ void Setup_Assembly_Primitives(EncoderPr
p.sa8d[BLOCK_8x8] = x265_pixel_sa8d_8x8_sse2;
p.sa8d[BLOCK_16x16] = x265_pixel_sa8d_16x16_sse2;
SA8D_INTER_FROM_BLOCK(sse2);
+
+ p.cvt32to16_shr = x265_cvt32to16_shr_sse2;
}
if (cpuMask & X265_CPU_SSSE3)
{
diff -r 5ab2da8320f5 -r 84857e7ba3e1 source/common/x86/pixel-util.asm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/source/common/x86/pixel-util.asm Thu Oct 17 22:15:00 2013 +0800
@@ -0,0 +1,103 @@
+;*****************************************************************************
+;* Copyright (C) 2013 x265 project
+;*
+;* Authors: Min Chen <chenm003 at 163.com> <min.chen at multicorewareinc.com>
+;*
+;* This program is free software; you can redistribute it and/or modify
+;* it under the terms of the GNU General Public License as published by
+;* the Free Software Foundation; either version 2 of the License, or
+;* (at your option) any later version.
+;*
+;* This program is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;* GNU General Public License for more details.
+;*
+;* You should have received a copy of the GNU General Public License
+;* along with this program; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+;*
+;* This program is also available under a commercial proprietary license.
+;* For more information, contact us at licensing at multicorewareinc.com.
+;*****************************************************************************/
+
+%include "x86inc.asm"
+%include "x86util.asm"
+
+SECTION_RODATA 32
+
+SECTION .text
+
+
+;-----------------------------------------------------------------------------
+; void cvt32to16_shr(short *dst, int *src, intptr_t stride, int shift, int size)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal cvt32to16_shr, 5, 7, 1, dst, src, stride
+%define rnd m7
+%define shift m6
+
+ ; make shift
+ mov r5d, r3m
+ movd shift, r5d
+
+ ; make round
+ dec r5
+ xor r6, r6
+ bts r6, r5
+
+ movd rnd, r6d
+ pshufd rnd, rnd, 0
+
+ ; register alloc
+ ; r0 - dst
+ ; r1 - src
+ ; r2 - stride * 2 (short*)
+ ; r3 - lx
+ ; r4 - size
+ ; r5 - ly
+ ; r6 - diff
+ lea r2, [r2 * 2]
+
+ mov r4d, r4m
+ mov r5, r4
+ mov r6, r2
+ sub r6, r4
+ lea r6, [r6 * 2]
+
+ shr r5, 1
+.loop_row:
+
+ mov r3, r4
+ shr r3, 2
+.loop_col:
+ ; row 0
+ movu m0, [r1]
+ paddd m0, rnd
+ psrad m0, shift
+ packssdw m0, m0
+ movh [r0], m0
+
+ ; row 1
+ movu m0, [r1 + r4 * 4]
+ paddd m0, rnd
+ psrad m0, shift
+ packssdw m0, m0
+ movh [r0 + r2], m0
+
+ ; move col pointer
+ add r1, 16
+ add r0, 8
+
+ dec r3
+ jg .loop_col
+
+ ; update pointer
+ lea r1, [r1 + r4 * 4]
+ add r0, r6
+
+ ; end of loop_row
+ dec r5
+ jg .loop_row
+
+ RET
diff -r 5ab2da8320f5 -r 84857e7ba3e1 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Thu Oct 17 21:28:02 2013 -0500
+++ b/source/common/x86/pixel.h Thu Oct 17 22:15:00 2013 +0800
@@ -194,6 +194,8 @@ uint64_t x265_pixel_sa8d_satd_16x16_avx(
uint64_t x265_pixel_sa8d_satd_16x16_xop(pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2);
uint64_t x265_pixel_sa8d_satd_16x16_avx2(pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2);
+void x265_cvt32to16_shr_sse2(short *dst, int *src, intptr_t, int, int);
+
#define DECL_HEVC_SSD(suffix) \
int x265_pixel_ssd_32x64_ ## suffix(pixel *, intptr_t, pixel *, intptr_t); \
int x265_pixel_ssd_16x64_ ## suffix(pixel *, intptr_t, pixel *, intptr_t); \
More information about the x265-commits
mailing list