[x265] [PATCH] conv16to32_count renamed to copy_count as per new interface
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Tue Sep 2 16:12:17 CEST 2014
# HG changeset patch
# User Praveen Tiwari
# Date 1409648260 -19800
# Node ID d0d545f2982ff9600b05e70ee4f066ce28ba51a7
# Parent a70b4e57aac2c535add2de15145c2a86638116f4
conv16to32_count renamed to copy_count as per new interface
diff -r a70b4e57aac2 -r d0d545f2982f source/common/dct.cpp
--- a/source/common/dct.cpp Tue Sep 02 14:04:02 2014 +0530
+++ b/source/common/dct.cpp Tue Sep 02 14:27:40 2014 +0530
@@ -827,7 +827,7 @@
}
template<int trSize>
-uint32_t conv16to32_count(int16_t* coeff, int16_t* residual, intptr_t stride)
+uint32_t copy_count(int16_t* coeff, int16_t* residual, intptr_t stride)
{
uint32_t numSig = 0;
for (int k = 0; k < trSize; k++)
@@ -879,9 +879,9 @@
p.count_nonzero = count_nonzero_c;
p.denoiseDct = denoiseDct_c;
- p.cvt16to32_cnt[BLOCK_4x4] = conv16to32_count<4>;
- p.cvt16to32_cnt[BLOCK_8x8] = conv16to32_count<8>;
- p.cvt16to32_cnt[BLOCK_16x16] = conv16to32_count<16>;
- p.cvt16to32_cnt[BLOCK_32x32] = conv16to32_count<32>;
+ p.copy_cnt[BLOCK_4x4] = copy_count<4>;
+ p.copy_cnt[BLOCK_8x8] = copy_count<8>;
+ p.copy_cnt[BLOCK_16x16] = copy_count<16>;
+ p.copy_cnt[BLOCK_32x32] = copy_count<32>;
}
}
diff -r a70b4e57aac2 -r d0d545f2982f source/common/primitives.h
--- a/source/common/primitives.h Tue Sep 02 14:04:02 2014 +0530
+++ b/source/common/primitives.h Tue Sep 02 14:27:40 2014 +0530
@@ -153,7 +153,7 @@
typedef void (*cvt16to32_shr_t)(int32_t *dst, int16_t *src, intptr_t, int, int);
typedef void (*cvt32to16_shr_t)(int16_t *dst, int32_t *src, intptr_t, int, int);
typedef void (*cvt32to16_shl_t)(int16_t *dst, int32_t *src, intptr_t, int);
-typedef uint32_t (*cvt16to32_cnt_t)(int16_t* coeff, int16_t* residual, intptr_t stride);
+typedef uint32_t (*copy_cnt_t)(int16_t* coeff, int16_t* residual, intptr_t stride);
typedef void (*dct_t)(int16_t *src, int32_t *dst, intptr_t stride);
typedef void (*idct_t)(int32_t *src, int16_t *dst, intptr_t stride);
@@ -226,7 +226,7 @@
cvt16to32_shr_t cvt16to32_shr[NUM_SQUARE_BLOCKS - 1];
cvt32to16_shr_t cvt32to16_shr;
cvt32to16_shl_t cvt32to16_shl[NUM_SQUARE_BLOCKS - 1];
- cvt16to32_cnt_t cvt16to32_cnt[NUM_SQUARE_BLOCKS - 1];
+ copy_cnt_t copy_cnt[NUM_SQUARE_BLOCKS - 1];
copy_pp_t luma_copy_pp[NUM_LUMA_PARTITIONS];
copy_sp_t luma_copy_sp[NUM_LUMA_PARTITIONS];
diff -r a70b4e57aac2 -r d0d545f2982f source/common/quant.cpp
--- a/source/common/quant.cpp Tue Sep 02 14:04:02 2014 +0530
+++ b/source/common/quant.cpp Tue Sep 02 14:27:40 2014 +0530
@@ -323,7 +323,7 @@
{
qCoeff[i] = (int16_t)Clip3(-32768, 32767, coeff[i]);
}
- int numSign = primitives.cvt16to32_cnt[log2TrSize - 2](qCoeff, residual, stride);
+ int numSign = primitives.copy_cnt[log2TrSize - 2](qCoeff, residual, stride);
/* This section of code is to safely convert int16_t coefficients to int32_t, once the caller function is
* optimize to take coefficients as int16_t*, it will be cleanse.*/
diff -r a70b4e57aac2 -r d0d545f2982f source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Tue Sep 02 14:04:02 2014 +0530
+++ b/source/common/x86/asm-primitives.cpp Tue Sep 02 14:27:40 2014 +0530
@@ -1601,10 +1601,10 @@
p.cvt16to32_shr[BLOCK_32x32] = x265_cvt16to32_shr_32_sse4;
// TODO: check POPCNT flag!
- p.cvt16to32_cnt[BLOCK_4x4] = x265_cvt16to32_cnt_4_sse4;
- p.cvt16to32_cnt[BLOCK_8x8] = x265_cvt16to32_cnt_8_sse4;
- p.cvt16to32_cnt[BLOCK_16x16] = x265_cvt16to32_cnt_16_sse4;
- p.cvt16to32_cnt[BLOCK_32x32] = x265_cvt16to32_cnt_32_sse4;
+ p.copy_cnt[BLOCK_4x4] = x265_copy_cnt_4_sse4;
+ p.copy_cnt[BLOCK_8x8] = x265_copy_cnt_8_sse4;
+ p.copy_cnt[BLOCK_16x16] = x265_copy_cnt_16_sse4;
+ p.copy_cnt[BLOCK_32x32] = x265_copy_cnt_32_sse4;
HEVC_SATD(sse4);
SA8D_INTER_FROM_BLOCK(sse4);
@@ -1706,10 +1706,10 @@
p.sad_x4[LUMA_16x12] = x265_pixel_sad_x4_16x12_avx2;
p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_avx2;
p.ssd_s[BLOCK_32x32] = x265_pixel_ssd_s_32_avx2;
- p.cvt16to32_cnt[BLOCK_4x4] = x265_cvt16to32_cnt_4_avx2;
- p.cvt16to32_cnt[BLOCK_8x8] = x265_cvt16to32_cnt_8_avx2;
- p.cvt16to32_cnt[BLOCK_16x16] = x265_cvt16to32_cnt_16_avx2;
- p.cvt16to32_cnt[BLOCK_32x32] = x265_cvt16to32_cnt_32_avx2;
+ p.copy_cnt[BLOCK_4x4] = x265_copy_cnt_4_avx2;
+ p.copy_cnt[BLOCK_8x8] = x265_copy_cnt_8_avx2;
+ p.copy_cnt[BLOCK_16x16] = x265_copy_cnt_16_avx2;
+ p.copy_cnt[BLOCK_32x32] = x265_copy_cnt_32_avx2;
p.cvt32to16_shl[BLOCK_4x4] = x265_cvt32to16_shl_4_avx2;
p.cvt32to16_shl[BLOCK_8x8] = x265_cvt32to16_shl_8_avx2;
p.cvt32to16_shl[BLOCK_16x16] = x265_cvt32to16_shl_16_avx2;
diff -r a70b4e57aac2 -r d0d545f2982f source/common/x86/blockcopy8.asm
--- a/source/common/x86/blockcopy8.asm Tue Sep 02 14:04:02 2014 +0530
+++ b/source/common/x86/blockcopy8.asm Tue Sep 02 14:27:40 2014 +0530
@@ -3950,10 +3950,10 @@
;--------------------------------------------------------------------------------------
-; uint32_t cvt16to32_cnt(int16_t *dst, int16_t *src, intptr_t stride);
+; uint32_t copy_cnt(int16_t *dst, int16_t *src, intptr_t stride);
;--------------------------------------------------------------------------------------
INIT_XMM sse4
-cglobal cvt16to32_cnt_4, 3,3,5
+cglobal copy_cnt_4, 3,3,5
add r2d, r2d
pxor m4, m4
@@ -3994,7 +3994,7 @@
INIT_YMM avx2
-cglobal cvt16to32_cnt_4, 3,3,5
+cglobal copy_cnt_4, 3,3,5
add r2d, r2d
pxor m4, m4
@@ -4022,10 +4022,10 @@
;--------------------------------------------------------------------------------------
-; uint32_t cvt16to32_cnt(int16_t *dst, int16_t *src, intptr_t stride);
+; uint32_t copy_cnt(int16_t *dst, int16_t *src, intptr_t stride);
;--------------------------------------------------------------------------------------
INIT_XMM sse4
-cglobal cvt16to32_cnt_8, 3,3,6
+cglobal copy_cnt_8, 3,3,6
add r2d, r2d
pxor m4, m4
pxor m5, m5
@@ -4085,10 +4085,10 @@
INIT_YMM avx2
%if ARCH_X86_64 == 1
-cglobal cvt16to32_cnt_8, 3,4,6
+cglobal copy_cnt_8, 3,4,6
%define tmpd eax
%else
-cglobal cvt16to32_cnt_8, 3,5,6
+cglobal copy_cnt_8, 3,5,6
%define tmpd r4d
%endif
add r2d, r2d
@@ -4167,10 +4167,10 @@
;--------------------------------------------------------------------------------------
-; uint32_t cvt16to32_cnt(int16_t *dst, int16_t *src, intptr_t stride);
+; uint32_t copy_cnt(int16_t *dst, int16_t *src, intptr_t stride);
;--------------------------------------------------------------------------------------
INIT_XMM sse4
-cglobal cvt16to32_cnt_16, 3,4,6
+cglobal copy_cnt_16, 3,4,6
add r2d, r2d
mov r3d, 4
pxor m4, m4
@@ -4233,7 +4233,7 @@
INIT_YMM avx2
-cglobal cvt16to32_cnt_16, 3,5,5
+cglobal copy_cnt_16, 3,5,5
add r2d, r2d
lea r4, [r2 * 3]
mov r3d, 16/4
@@ -4299,10 +4299,10 @@
RET
;--------------------------------------------------------------------------------------
-; uint32_t cvt16to32_cnt(int32_t *dst, int16_t *src, intptr_t stride);
+; uint32_t copy_cnt(int32_t *dst, int16_t *src, intptr_t stride);
;--------------------------------------------------------------------------------------
INIT_XMM sse4
-cglobal cvt16to32_cnt_32, 3,4,6
+cglobal copy_cnt_32, 3,4,6
add r2d, r2d
mov r3d, 16
pxor m4, m4
@@ -4363,7 +4363,7 @@
INIT_YMM avx2
-cglobal cvt16to32_cnt_32, 3,4,5
+cglobal copy_cnt_32, 3,4,5
add r2d, r2d
mov r3d, 32/1
xorpd m3, m3
diff -r a70b4e57aac2 -r d0d545f2982f source/common/x86/blockcopy8.h
--- a/source/common/x86/blockcopy8.h Tue Sep 02 14:04:02 2014 +0530
+++ b/source/common/x86/blockcopy8.h Tue Sep 02 14:27:40 2014 +0530
@@ -38,14 +38,14 @@
void x265_cvt16to32_shr_8_sse4(int32_t * dst, int16_t * src, intptr_t, int32_t, int32_t);
void x265_cvt16to32_shr_16_sse4(int32_t * dst, int16_t * src, intptr_t, int32_t, int32_t);
void x265_cvt16to32_shr_32_sse4(int32_t * dst, int16_t * src, intptr_t, int32_t, int32_t);
-uint32_t x265_cvt16to32_cnt_4_sse4(int16_t * dst, int16_t * src, intptr_t);
-uint32_t x265_cvt16to32_cnt_8_sse4(int16_t * dst, int16_t * src, intptr_t);
-uint32_t x265_cvt16to32_cnt_16_sse4(int16_t * dst, int16_t * src, intptr_t);
-uint32_t x265_cvt16to32_cnt_32_sse4(int16_t * dst, int16_t * src, intptr_t);
-uint32_t x265_cvt16to32_cnt_4_avx2(int16_t * dst, int16_t * src, intptr_t);
-uint32_t x265_cvt16to32_cnt_8_avx2(int16_t * dst, int16_t * src, intptr_t);
-uint32_t x265_cvt16to32_cnt_16_avx2(int16_t * dst, int16_t * src, intptr_t);
-uint32_t x265_cvt16to32_cnt_32_avx2(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_copy_cnt_4_sse4(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_copy_cnt_8_sse4(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_copy_cnt_16_sse4(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_copy_cnt_32_sse4(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_copy_cnt_4_avx2(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_copy_cnt_8_avx2(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_copy_cnt_16_avx2(int16_t * dst, int16_t * src, intptr_t);
+uint32_t x265_copy_cnt_32_avx2(int16_t * dst, int16_t * src, intptr_t);
#define SETUP_BLOCKCOPY_FUNC(W, H, cpu) \
void x265_blockcopy_pp_ ## W ## x ## H ## cpu(pixel * a, intptr_t stridea, pixel * b, intptr_t strideb); \
diff -r a70b4e57aac2 -r d0d545f2982f source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp Tue Sep 02 14:04:02 2014 +0530
+++ b/source/test/pixelharness.cpp Tue Sep 02 14:27:40 2014 +0530
@@ -580,7 +580,7 @@
return true;
}
-bool PixelHarness::check_cvt16to32_cnt_t(cvt16to32_cnt_t ref, cvt16to32_cnt_t opt)
+bool PixelHarness::check_copy_cnt_t(copy_cnt_t ref, copy_cnt_t opt)
{
ALIGN_VAR_16(int16_t, ref_dest[64 * 64]);
ALIGN_VAR_16(int16_t, opt_dest[64 * 64]);
@@ -1378,11 +1378,11 @@
}
}
- if ((i < BLOCK_64x64) && opt.cvt16to32_cnt[i])
+ if ((i < BLOCK_64x64) && opt.copy_cnt[i])
{
- if (!check_cvt16to32_cnt_t(ref.cvt16to32_cnt[i], opt.cvt16to32_cnt[i]))
+ if (!check_copy_cnt_t(ref.copy_cnt[i], opt.copy_cnt[i]))
{
- printf("cvt16to32_cnt[%dx%d] failed!\n", 4 << i, 4 << i);
+ printf("copy_cnt[%dx%d] failed!\n", 4 << i, 4 << i);
return false;
}
}
@@ -1779,10 +1779,10 @@
REPORT_SPEEDUP(opt.cvt32to16_shl[i], ref.cvt32to16_shl[i], sbuf2, ibuf1, STRIDE, 3);
}
- if ((i < BLOCK_64x64) && opt.cvt16to32_cnt[i])
+ if ((i < BLOCK_64x64) && opt.copy_cnt[i])
{
- HEADER("cvt16to32_cnt[%dx%d]", 4 << i, 4 << i);
- REPORT_SPEEDUP(opt.cvt16to32_cnt[i], ref.cvt16to32_cnt[i], sbuf1, sbuf2, STRIDE);
+ HEADER("copy_cnt[%dx%d]", 4 << i, 4 << i);
+ REPORT_SPEEDUP(opt.copy_cnt[i], ref.copy_cnt[i], sbuf1, sbuf2, STRIDE);
}
}
diff -r a70b4e57aac2 -r d0d545f2982f source/test/pixelharness.h
--- a/source/test/pixelharness.h Tue Sep 02 14:04:02 2014 +0530
+++ b/source/test/pixelharness.h Tue Sep 02 14:27:40 2014 +0530
@@ -88,7 +88,7 @@
bool check_cvt16to32_shl_t(cvt16to32_shl_t ref, cvt16to32_shl_t opt);
bool check_cvt16to32_shr_t(cvt16to32_shr_t ref, cvt16to32_shr_t opt);
bool check_cvt32to16_shl_t(cvt32to16_shl_t ref, cvt32to16_shl_t opt);
- bool check_cvt16to32_cnt_t(cvt16to32_cnt_t ref, cvt16to32_cnt_t opt);
+ bool check_copy_cnt_t(copy_cnt_t ref, copy_cnt_t opt);
bool check_pixel_var(var_t ref, var_t opt);
bool check_ssim_4x4x2_core(ssim_4x4x2_core_t ref, ssim_4x4x2_core_t opt);
bool check_ssim_end(ssim_end4_t ref, ssim_end4_t opt);
More information about the x265-devel
mailing list