[x265] [PATCH] asm: correction of function declaration to sse4
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Fri Feb 7 11:23:27 CET 2014
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1391768540 -19800
# Fri Feb 07 15:52:20 2014 +0530
# Node ID 8e4805fbd89594b24f36be135f384f1e611a0dfa
# Parent c1cea0534e6b6ca359a7fc2a665c3ef6909dd041
asm: correction of function declaration to sse4
diff -r c1cea0534e6b -r 8e4805fbd895 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Fri Feb 07 13:14:45 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp Fri Feb 07 15:52:20 2014 +0530
@@ -705,7 +705,6 @@
p.sad_x4[LUMA_12x16] = x265_pixel_sad_x4_12x16_mmx2;
p.cvt32to16_shr = x265_cvt32to16_shr_sse2;
- p.cvt16to32_shl = x265_cvt16to32_shl_sse2;
CHROMA_PIXELSUB_PS(_sse2);
LUMA_PIXELSUB(_sse2);
@@ -738,6 +737,8 @@
}
if (cpuMask & X265_CPU_SSE4)
{
+ p.cvt16to32_shl = x265_cvt16to32_shl_sse4;
+
p.intra_pred[BLOCK_4x4][0] = x265_intra_pred_planar4_sse4;
p.intra_pred[BLOCK_8x8][0] = x265_intra_pred_planar8_sse4;
p.intra_pred[BLOCK_16x16][0] = x265_intra_pred_planar16_sse4;
@@ -897,7 +898,6 @@
SA8D_INTER_FROM_BLOCK(sse2);
p.cvt32to16_shr = x265_cvt32to16_shr_sse2;
- p.cvt16to32_shl = x265_cvt16to32_shl_sse2;
p.calcrecon[BLOCK_4x4] = x265_calcRecons4_sse2;
p.calcrecon[BLOCK_8x8] = x265_calcRecons8_sse2;
p.calcresidual[BLOCK_4x4] = x265_getResidual4_sse2;
@@ -912,9 +912,6 @@
p.dct[DCT_4x4] = x265_dct4_sse2;
p.idct[IDCT_4x4] = x265_idct4_sse2;
p.idct[IDST_4x4] = x265_idst4_sse2;
-
- LUMA_ADDAVG(_sse2);
- CHROMA_ADDAVG(_sse2);
}
if (cpuMask & X265_CPU_SSSE3)
{
@@ -959,6 +956,10 @@
}
if (cpuMask & X265_CPU_SSE4)
{
+ LUMA_ADDAVG(_sse4);
+ CHROMA_ADDAVG(_sse4);
+ p.cvt16to32_shl = x265_cvt16to32_shl_sse4;
+
HEVC_SATD(sse4);
SA8D_INTER_FROM_BLOCK(sse4);
diff -r c1cea0534e6b -r 8e4805fbd895 source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm Fri Feb 07 13:14:45 2014 +0530
+++ b/source/common/x86/blockcopy8.asm Fri Feb 07 15:52:20 2014 +0530
@@ -2881,7 +2881,7 @@
;--------------------------------------------------------------------------------------
; void cvt16to32_shl(int32_t *dst, int16_t *src, intptr_t stride, int shift, int size);
;--------------------------------------------------------------------------------------
-INIT_XMM sse2
+INIT_XMM sse4
cglobal cvt16to32_shl, 5, 7, 2, dst, src, stride, shift, size
%define shift m1
diff -r c1cea0534e6b -r 8e4805fbd895 source/common/x86/blockcopy8.h
--- a/source/common/x86/blockcopy8.h Fri Feb 07 13:14:45 2014 +0530
+++ b/source/common/x86/blockcopy8.h Fri Feb 07 15:52:20 2014 +0530
@@ -25,7 +25,7 @@
#define X265_BLOCKCOPY8_H
void x265_cvt32to16_shr_sse2(int16_t * dst, int *src, intptr_t, int, int);
-void x265_cvt16to32_shl_sse2(int32_t * dst, int16_t * src, intptr_t, int32_t, int32_t);
+void x265_cvt16to32_shl_sse4(int32_t * dst, int16_t * src, intptr_t, int32_t, int32_t);
#define SETUP_CHROMA_BLOCKCOPY_FUNC(W, H, cpu) \
void x265_blockcopy_pp_ ## W ## x ## H ## cpu(pixel * a, intptr_t stridea, pixel * b, intptr_t strideb); \
diff -r c1cea0534e6b -r 8e4805fbd895 source/common/x86/mc-a.asm
--- a/source/common/x86/mc-a.asm Fri Feb 07 13:14:45 2014 +0530
+++ b/source/common/x86/mc-a.asm Fri Feb 07 15:52:20 2014 +0530
@@ -190,7 +190,7 @@
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
-INIT_XMM sse2
+INIT_XMM sse4
cglobal addAvg_4x2, 6,6,4, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
mova m1, [pw_256]
@@ -217,7 +217,7 @@
;-----------------------------------------------------------------------------
%macro ADDAVG_W4_H4 1
-INIT_XMM sse2
+INIT_XMM sse4
cglobal addAvg_4x%1, 6,7,4, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
mova m1, [pw_256]
mova m3, [pw_128]
@@ -370,7 +370,7 @@
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
-INIT_XMM sse2
+INIT_XMM sse4
cglobal addAvg_8x2, 6,6,6, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
mova m4, [pw_256]
mova m5, [pw_128]
@@ -397,7 +397,7 @@
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
-INIT_XMM sse2
+INIT_XMM sse4
cglobal addAvg_8x6, 6,6,6, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
mova m4, [pw_256]
@@ -466,7 +466,7 @@
;-----------------------------------------------------------------------------
%macro ADDAVG_W8_H4 1
-INIT_XMM sse2
+INIT_XMM sse4
cglobal addAvg_8x%1, 6,7,6, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
mova m4, [pw_256]
@@ -536,7 +536,7 @@
;-----------------------------------------------------------------------------
%macro ADDAVG_W12_H4 1
-INIT_XMM sse2
+INIT_XMM sse4
cglobal addAvg_12x%1, 6,7,6, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
mova m4, [pw_256]
mova m5, [pw_128]
@@ -629,7 +629,7 @@
;-----------------------------------------------------------------------------
%macro ADDAVG_W16_H4 1
-INIT_XMM sse2
+INIT_XMM sse4
cglobal addAvg_16x%1, 6,7,6, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
mova m4, [pw_256]
mova m5, [pw_128]
@@ -724,7 +724,7 @@
;-----------------------------------------------------------------------------
%macro ADDAVG_W24_H2 2
-INIT_XMM sse2
+INIT_XMM sse4
cglobal addAvg_%1x%2, 6,7,6, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
mova m4, [pw_256]
mova m5, [pw_128]
@@ -797,7 +797,7 @@
;-----------------------------------------------------------------------------
%macro ADDAVG_W32_H2 1
-INIT_XMM sse2
+INIT_XMM sse4
cglobal addAvg_32x%1, 6,7,6, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
mova m4, [pw_256]
mova m5, [pw_128]
@@ -887,7 +887,7 @@
;-----------------------------------------------------------------------------
%macro ADDAVG_W48_H2 1
-INIT_XMM sse2
+INIT_XMM sse4
cglobal addAvg_48x%1, 6,7,6, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
mova m4, [pw_256]
@@ -1003,7 +1003,7 @@
;-----------------------------------------------------------------------------
%macro ADDAVG_W64_H1 1
-INIT_XMM sse2
+INIT_XMM sse4
cglobal addAvg_64x%1, 6,7,6, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
mova m4, [pw_256]
mova m5, [pw_128]
diff -r c1cea0534e6b -r 8e4805fbd895 source/common/x86/pixel.h
--- a/source/common/x86/pixel.h Fri Feb 07 13:14:45 2014 +0530
+++ b/source/common/x86/pixel.h Fri Feb 07 15:52:20 2014 +0530
@@ -167,7 +167,6 @@
int x265_pixel_ssd_64x64_sse4(pixel *, intptr_t, pixel *, intptr_t);
#define ADDAVG(func) \
- void x265_ ## func ## _sse2 (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t); \
void x265_ ## func ## _sse4 (int16_t*, int16_t*, pixel*, intptr_t, intptr_t, intptr_t);
ADDAVG(addAvg_2x4)
ADDAVG(addAvg_2x8)
More information about the x265-devel
mailing list