[x265] [PATCH 05 of 29] scale1D_128to64_new: cleanup
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Tue Jan 13 08:11:13 CET 2015
# HG changeset patch
# User Praveen Tiwari
# Date 1421046277 -19800
# Mon Jan 12 12:34:37 2015 +0530
# Node ID f4daa8744d08b569ae652737c4506b397dfb55cb
# Parent 3caab705cfdf1d1d5549f0986bd9496d5fc606e1
scale1D_128to64_new: cleanup
This patch cleanup the new suffix to match with an existing naming pattern and removed duplicated code.
diff -r 3caab705cfdf -r f4daa8744d08 source/common/pixel.cpp
--- a/source/common/pixel.cpp Mon Jan 12 12:20:17 2015 +0530
+++ b/source/common/pixel.cpp Mon Jan 12 12:34:37 2015 +0530
@@ -629,7 +629,7 @@
}
}
-void scale1D_128to64_new(pixel *dst, const pixel *src, intptr_t /*stride*/)
+void scale1D_128to64(pixel *dst, const pixel *src, intptr_t /*stride*/)
{
int x;
const pixel* src1 = src;
@@ -1377,7 +1377,7 @@
p.weight_pp = weight_pp_c;
p.weight_sp = weight_sp_c;
- p.scale1D_128to64_new = scale1D_128to64_new;
+ p.scale1D_128to64 = scale1D_128to64;
p.scale2D_64to32 = scale2D_64to32;
p.frameInitLowres = frame_init_lowres_core;
p.ssim_4x4x2_core = ssim_4x4x2_core;
diff -r 3caab705cfdf -r f4daa8744d08 source/common/primitives.h
--- a/source/common/primitives.h Mon Jan 12 12:20:17 2015 +0530
+++ b/source/common/primitives.h Mon Jan 12 12:34:37 2015 +0530
@@ -255,7 +255,7 @@
intra_pred_new_t intra_pred_new[NUM_INTRA_MODE][NUM_TR_SIZE];
intra_allangs_t intra_pred_allangs[NUM_TR_SIZE]; /* todo: move to CU */
intra_allangs_new_t intra_pred_allangs_new[NUM_TR_SIZE];
- scale_t scale1D_128to64_new;
+ scale_t scale1D_128to64;
scale_t scale2D_64to32;
ssim_4x4x2_core_t ssim_4x4x2_core;
diff -r 3caab705cfdf -r f4daa8744d08 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Mon Jan 12 12:20:17 2015 +0530
+++ b/source/common/x86/asm-primitives.cpp Mon Jan 12 12:34:37 2015 +0530
@@ -1384,7 +1384,7 @@
}
if (cpuMask & X265_CPU_SSSE3)
{
- p.scale1D_128to64_new = x265_scale1D_128to64_new_ssse3;
+ p.scale1D_128to64 = x265_scale1D_128to64_ssse3;
p.scale2D_64to32 = x265_scale2D_64to32_ssse3;
INTRA_ANG_SSSE3(ssse3);
@@ -1445,7 +1445,7 @@
p.quant = x265_quant_avx2;
p.nquant = x265_nquant_avx2;
p.dequant_normal = x265_dequant_normal_avx2;
- p.scale1D_128to64_new = x265_scale1D_128to64_new_avx2;
+ p.scale1D_128to64= x265_scale1D_128to64_avx2;
p.cu[BLOCK_4x4].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_4_avx2;
p.cu[BLOCK_8x8].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_8_avx2;
p.cu[BLOCK_16x16].cpy1Dto2D_shl = x265_cpy1Dto2D_shl_16_avx2;
@@ -1602,7 +1602,7 @@
INTRA_ANG_SSSE3(ssse3);
- p.scale1D_128to64_new = x265_scale1D_128to64_new_ssse3;
+ p.scale1D_128to64 = x265_scale1D_128to64_ssse3;
p.scale2D_64to32 = x265_scale2D_64to32_ssse3;
SAD_X3(ssse3);
SAD_X4(ssse3);
@@ -1812,7 +1812,7 @@
p.chroma[X265_CSP_I422].pu[CHROMA422_16x24].copy_ss = x265_blockcopy_ss_16x24_avx;
p.chroma[X265_CSP_I422].pu[CHROMA422_16x32].copy_ss = x265_blockcopy_ss_16x32_avx;
p.chroma[X265_CSP_I422].pu[CHROMA422_16x64].copy_ss = x265_blockcopy_ss_16x64_avx;
- p.scale1D_128to64_new = x265_scale1D_128to64_new_avx2;
+ p.scale1D_128to64 = x265_scale1D_128to64_avx2;
p.weight_pp = x265_weight_pp_avx2;
diff -r 3caab705cfdf -r f4daa8744d08 source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h Mon Jan 12 12:20:17 2015 +0530
+++ b/source/common/x86/pixel-util.h Mon Jan 12 12:34:37 2015 +0530
@@ -65,8 +65,6 @@
void x265_scale1D_128to64_ssse3(pixel*, const pixel*, intptr_t);
void x265_scale1D_128to64_avx2(pixel*, const pixel*, intptr_t);
-void x265_scale1D_128to64_new_ssse3(pixel*, const pixel*, intptr_t);
-void x265_scale1D_128to64_new_avx2(pixel*, const pixel*, intptr_t);
void x265_scale2D_64to32_ssse3(pixel*, const pixel*, intptr_t);
#define SETUP_CHROMA_PIXELSUB_PS_FUNC(W, H, cpu) \
diff -r 3caab705cfdf -r f4daa8744d08 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Mon Jan 12 12:20:17 2015 +0530
+++ b/source/common/x86/pixel-util8.asm Mon Jan 12 12:34:37 2015 +0530
@@ -2984,7 +2984,7 @@
; void scale1D_128to64(pixel *dst, pixel *src, intptr_t /*stride*/)
;-----------------------------------------------------------------
INIT_XMM ssse3
-cglobal scale1D_128to64_new, 2, 2, 8, dest, src1, stride
+cglobal scale1D_128to64, 2, 2, 8, dest, src1, stride
%if HIGH_BIT_DEPTH
mova m7, [deinterleave_word_shuf]
@@ -3109,7 +3109,7 @@
%if HIGH_BIT_DEPTH == 1
INIT_YMM avx2
-cglobal scale1D_128to64_new, 2, 2, 3
+cglobal scale1D_128to64, 2, 2, 3
pxor m2, m2
;Top pixel
@@ -3173,7 +3173,7 @@
RET
%else ; HIGH_BIT_DEPTH == 0
INIT_YMM avx2
-cglobal scale1D_128to64_new, 2, 2, 4
+cglobal scale1D_128to64, 2, 2, 4
pxor m2, m2
mova m3, [pb_1]
@@ -3222,220 +3222,6 @@
%endif
;-----------------------------------------------------------------
-; void scale1D_128to64(pixel *dst, pixel *src, intptr_t /*stride*/)
-;-----------------------------------------------------------------
-INIT_XMM ssse3
-cglobal scale1D_128to64, 2, 2, 8, dest, src1, stride
-%if HIGH_BIT_DEPTH
- mova m7, [deinterleave_word_shuf]
-
- movu m0, [r1]
- palignr m1, m0, 2
- movu m2, [r1 + 16]
- palignr m3, m2, 2
- movu m4, [r1 + 32]
- palignr m5, m4, 2
- movu m6, [r1 + 48]
- pavgw m0, m1
- palignr m1, m6, 2
- pavgw m2, m3
- pavgw m4, m5
- pavgw m6, m1
- pshufb m0, m0, m7
- pshufb m2, m2, m7
- pshufb m4, m4, m7
- pshufb m6, m6, m7
- punpcklqdq m0, m2
- movu [r0], m0
- punpcklqdq m4, m6
- movu [r0 + 16], m4
-
-
-
- movu m0, [r1 + 64]
- palignr m1, m0, 2
- movu m2, [r1 + 80]
- palignr m3, m2, 2
- movu m4, [r1 + 96]
- palignr m5, m4, 2
- movu m6, [r1 + 112]
- pavgw m0, m1
- palignr m1, m6, 2
- pavgw m2, m3
- pavgw m4, m5
- pavgw m6, m1
- pshufb m0, m0, m7
- pshufb m2, m2, m7
- pshufb m4, m4, m7
- pshufb m6, m6, m7
- punpcklqdq m0, m2
- movu [r0 + 32], m0
- punpcklqdq m4, m6
- movu [r0 + 48], m4
-
- movu m0, [r1 + 128]
- palignr m1, m0, 2
- movu m2, [r1 + 144]
- palignr m3, m2, 2
- movu m4, [r1 + 160]
- palignr m5, m4, 2
- movu m6, [r1 + 176]
- pavgw m0, m1
- palignr m1, m6, 2
- pavgw m2, m3
- pavgw m4, m5
- pavgw m6, m1
- pshufb m0, m0, m7
- pshufb m2, m2, m7
- pshufb m4, m4, m7
- pshufb m6, m6, m7
-
- punpcklqdq m0, m2
- movu [r0 + 64], m0
- punpcklqdq m4, m6
- movu [r0 + 80], m4
-
- movu m0, [r1 + 192]
- palignr m1, m0, 2
- movu m2, [r1 + 208]
- palignr m3, m2, 2
- movu m4, [r1 + 224]
- palignr m5, m4, 2
- movu m6, [r1 + 240]
- pavgw m0, m1
- palignr m1, m6, 2
- pavgw m2, m3
- pavgw m4, m5
- pavgw m6, m1
- pshufb m0, m0, m7
- pshufb m2, m2, m7
- pshufb m4, m4, m7
- pshufb m6, m6, m7
-
- punpcklqdq m0, m2
- movu [r0 + 96], m0
- punpcklqdq m4, m6
- movu [r0 + 112], m4
-
-%else
- mova m7, [deinterleave_shuf]
-
- movu m0, [r1]
- palignr m1, m0, 1
- movu m2, [r1 + 16]
- palignr m3, m2, 1
- movu m4, [r1 + 32]
- palignr m5, m4, 1
- movu m6, [r1 + 48]
-
- pavgb m0, m1
-
- palignr m1, m6, 1
-
- pavgb m2, m3
- pavgb m4, m5
- pavgb m6, m1
-
- pshufb m0, m0, m7
- pshufb m2, m2, m7
- pshufb m4, m4, m7
- pshufb m6, m6, m7
-
- punpcklqdq m0, m2
- movu [r0], m0
- punpcklqdq m4, m6
- movu [r0 + 16], m4
-
- movu m0, [r1 + 64]
- palignr m1, m0, 1
- movu m2, [r1 + 80]
- palignr m3, m2, 1
- movu m4, [r1 + 96]
- palignr m5, m4, 1
- movu m6, [r1 + 112]
-
- pavgb m0, m1
-
- palignr m1, m6, 1
-
- pavgb m2, m3
- pavgb m4, m5
- pavgb m6, m1
-
- pshufb m0, m0, m7
- pshufb m2, m2, m7
- pshufb m4, m4, m7
- pshufb m6, m6, m7
-
- punpcklqdq m0, m2
- movu [r0 + 32], m0
- punpcklqdq m4, m6
- movu [r0 + 48], m4
-%endif
-RET
-
-%if HIGH_BIT_DEPTH == 1
-INIT_YMM avx2
-cglobal scale1D_128to64, 2, 2, 3
- pxor m2, m2
-
- movu m0, [r1]
- movu m1, [r1 + 32]
- phaddw m0, m1
- pavgw m0, m2
- vpermq m0, m0, 0xD8
- movu [r0], m0
-
- movu m0, [r1 + 64]
- movu m1, [r1 + 96]
- phaddw m0, m1
- pavgw m0, m2
- vpermq m0, m0, 0xD8
- movu [r0 + 32], m0
-
- movu m0, [r1 + 128]
- movu m1, [r1 + 160]
- phaddw m0, m1
- pavgw m0, m2
- vpermq m0, m0, 0xD8
- movu [r0 + 64], m0
-
- movu m0, [r1 + 192]
- movu m1, [r1 + 224]
- phaddw m0, m1
- pavgw m0, m2
- vpermq m0, m0, 0xD8
- movu [r0 + 96], m0
- RET
-%else ; HIGH_BIT_DEPTH == 0
-INIT_YMM avx2
-cglobal scale1D_128to64, 2, 2, 4
- pxor m2, m2
- mova m3, [pb_1]
-
- movu m0, [r1]
- pmaddubsw m0, m0, m3
- pavgw m0, m2
- movu m1, [r1 + 32]
- pmaddubsw m1, m1, m3
- pavgw m1, m2
- packuswb m0, m1
- vpermq m0, m0, 0xD8
- movu [r0], m0
-
- movu m0, [r1 + 64]
- pmaddubsw m0, m0, m3
- pavgw m0, m2
- movu m1, [r1 + 96]
- pmaddubsw m1, m1, m3
- pavgw m1, m2
- packuswb m0, m1
- vpermq m0, m0, 0xD8
- movu [r0 + 32], m0
- RET
-%endif
-
-;-----------------------------------------------------------------
; void scale2D_64to32(pixel *dst, pixel *src, intptr_t stride)
;-----------------------------------------------------------------
%if HIGH_BIT_DEPTH
diff -r 3caab705cfdf -r f4daa8744d08 source/encoder/search.cpp
--- a/source/encoder/search.cpp Mon Jan 12 12:20:17 2015 +0530
+++ b/source/encoder/search.cpp Mon Jan 12 12:34:37 2015 +0530
@@ -1222,7 +1222,7 @@
pixel nScale[129];
intraNeighbourBuf[1][0] = intraNeighbourBuf[0][0];
- primitives.scale1D_128to64_new(nScale + 1, intraNeighbourBuf[0] + 1, 0);
+ primitives.scale1D_128to64(nScale + 1, intraNeighbourBuf[0] + 1, 0);
//TO DO: primitive
for (int x = 1; x < 65; x++)
@@ -1454,7 +1454,7 @@
pixel nScale[129];
intraNeighbourBuf[1][0] = intraNeighbourBuf[0][0];
- primitives.scale1D_128to64_new(nScale + 1, intraNeighbourBuf[0] + 1, 0);
+ primitives.scale1D_128to64(nScale + 1, intraNeighbourBuf[0] + 1, 0);
// TO DO: primitive
for (int x = 1; x < 65; x++)
diff -r 3caab705cfdf -r f4daa8744d08 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp Mon Jan 12 12:20:17 2015 +0530
+++ b/source/test/pixelharness.cpp Mon Jan 12 12:34:37 2015 +0530
@@ -708,33 +708,6 @@
return true;
}
-bool PixelHarness::check_scale_pp_new(scale_t ref, scale_t opt)
-{
- ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
- ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
-
- memset(ref_dest, 0, sizeof(ref_dest));
- memset(opt_dest, 0, sizeof(opt_dest));
-
- int j = 0;
- intptr_t stride = STRIDE;
- for (int i = 0; i < ITERS; i++)
- {
- int index = i % TEST_CASES;
- checked(opt, opt_dest, pixel_test_buff[index] + j, stride);
- ref(ref_dest, pixel_test_buff[index] + j, stride);
-
- if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
- return false;
-
- reportfail();
- j += INCR;
- }
-
- return true;
-}
-
-
bool PixelHarness::check_transpose(transpose_t ref, transpose_t opt)
{
ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
@@ -1557,11 +1530,11 @@
}
}
- if (opt.scale1D_128to64_new)
+ if (opt.scale1D_128to64)
{
- if (!check_scale_pp_new(ref.scale1D_128to64_new, opt.scale1D_128to64_new))
+ if (!check_scale_pp(ref.scale1D_128to64, opt.scale1D_128to64))
{
- printf("scale1D_128to64_new failed!\n");
+ printf("scale1D_128to64 failed!\n");
return false;
}
}
@@ -1946,10 +1919,10 @@
REPORT_SPEEDUP(opt.frameInitLowres, ref.frameInitLowres, pbuf2, pbuf1, pbuf2, pbuf3, pbuf4, 64, 64, 64, 64);
}
- if (opt.scale1D_128to64_new)
+ if (opt.scale1D_128to64)
{
- HEADER0("scale1D_128to64_new");
- REPORT_SPEEDUP(opt.scale1D_128to64_new, ref.scale1D_128to64_new, pbuf2, pbuf1, 64);
+ HEADER0("scale1D_128to64");
+ REPORT_SPEEDUP(opt.scale1D_128to64, ref.scale1D_128to64, pbuf2, pbuf1, 64);
}
if (opt.scale2D_64to32)
diff -r 3caab705cfdf -r f4daa8744d08 source/test/pixelharness.h
--- a/source/test/pixelharness.h Mon Jan 12 12:20:17 2015 +0530
+++ b/source/test/pixelharness.h Mon Jan 12 12:34:37 2015 +0530
@@ -76,7 +76,6 @@
bool check_pixelavg_pp(pixelavg_pp_t ref, pixelavg_pp_t opt);
bool check_pixel_sub_ps(pixel_sub_ps_t ref, pixel_sub_ps_t opt);
bool check_pixel_add_ps(pixel_add_ps_t ref, pixel_add_ps_t opt);
- bool check_scale_pp_new(scale_t ref, scale_t opt);
bool check_scale_pp(scale_t ref, scale_t opt);
bool check_ssd_s(pixel_ssd_s_t ref, pixel_ssd_s_t opt);
bool check_blockfill_s(blockfill_s_t ref, blockfill_s_t opt);
More information about the x265-devel
mailing list