[x265] [PATCH] weighted prediction (pixel), interface simplification
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Fri Oct 17 12:29:49 CEST 2014
# HG changeset patch
# User Praveen Tiwari
# Date 1413541750 -19800
# Node ID 61051f5a16b387120b17be2024543d14aea61f16
# Parent b7eeae24aae63495bcad1570ecd76cae988f0f6e
weighted prediction (pixel), interface simplification
diff -r b7eeae24aae6 -r 61051f5a16b3 source/common/pixel.cpp
--- a/source/common/pixel.cpp Thu Oct 16 21:57:30 2014 +0530
+++ b/source/common/pixel.cpp Fri Oct 17 15:59:10 2014 +0530
@@ -640,7 +640,7 @@
}
}
-void weight_pp_c(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
+void weight_pp_c(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset)
{
int x, y;
@@ -656,8 +656,8 @@
x++;
}
- src += srcStride;
- dst += dstStride;
+ src += stride;
+ dst += stride;
}
}
diff -r b7eeae24aae6 -r 61051f5a16b3 source/common/primitives.h
--- a/source/common/primitives.h Thu Oct 16 21:57:30 2014 +0530
+++ b/source/common/primitives.h Fri Oct 17 15:59:10 2014 +0530
@@ -168,7 +168,7 @@
typedef void (*dequant_normal_t)(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
typedef int (*count_nonzero_t)(const int16_t *quantCoeff, int numCoeff);
-typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
+typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset);
typedef void (*weightp_sp_t)(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
typedef void (*scale_t)(pixel *dst, pixel *src, intptr_t stride);
typedef void (*downscale_t)(pixel *src0, pixel *dstf, pixel *dsth, pixel *dstv, pixel *dstc,
diff -r b7eeae24aae6 -r 61051f5a16b3 source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h Thu Oct 16 21:57:30 2014 +0530
+++ b/source/common/x86/pixel-util.h Fri Oct 17 15:59:10 2014 +0530
@@ -57,7 +57,7 @@
void x265_dequant_normal_avx2(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
int x265_count_nonzero_ssse3(const int16_t *quantCoeff, int numCoeff);
-void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
+void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset);
void x265_weight_sp_sse4(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
void x265_pixel_ssim_4x4x2_core_mmx2(const uint8_t * pix1, intptr_t stride1,
diff -r b7eeae24aae6 -r 61051f5a16b3 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Thu Oct 16 21:57:30 2014 +0530
+++ b/source/common/x86/pixel-util8.asm Fri Oct 17 15:59:10 2014 +0530
@@ -1298,35 +1298,32 @@
;-----------------------------------------------------------------------------------------------------------------------------------------------
-;void weight_pp(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
+;void weight_pp(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset)
;-----------------------------------------------------------------------------------------------------------------------------------------------
INIT_XMM sse4
cglobal weight_pp, 6, 7, 6
+ shl r5d, 6 ; m0 = [w0<<6]
mov r6d, r6m
- shl r6d, 6
- movd m0, r6d ; m0 = [w0<<6]
-
- movd m1, r7m ; m1 = [round]
- punpcklwd m0, m1 ; assuming both (w0<<6) and round are using maximum of 16 bits each.
- pshufd m0, m0, 0 ; m0 = [w0<<6 round]
-
- movd m1, r8m
-
- movd m2, r9m
+ shl r6d, 16
+ or r6d, r5d ; assuming both (w0<<6) and round are using maximum of 16 bits each.
+ movd m0, r6d
+ pshufd m0, m0, 0 ; m0 = [w0<<6, round]
+
+ movd m1, r7m
+
+ movd m2, r8m
pshufd m2, m2, 0
mova m5, [pw_1]
- sub r2d, r4d
- sub r3d, r4d
-
+ sub r2d, r3d
+ shr r3d, 4
.loopH:
- mov r6d, r4d
- shr r6d, 4
+ mov r5d, r3d
+
.loopW:
- movh m4, [r0]
- pmovzxbw m4, m4
+ pmovzxbw m4, [r0]
punpcklwd m3, m4, m5
pmaddwd m3, m0
@@ -1364,13 +1361,13 @@
add r0, 16
add r1, 16
- dec r6d
+ dec r5d
jnz .loopW
lea r0, [r0 + r2]
- lea r1, [r1 + r3]
-
- dec r5d
+ lea r1, [r1 + r2]
+
+ dec r4d
jnz .loopH
RET
diff -r b7eeae24aae6 -r 61051f5a16b3 source/encoder/reference.cpp
--- a/source/encoder/reference.cpp Thu Oct 16 21:57:30 2014 +0530
+++ b/source/encoder/reference.cpp Fri Oct 17 15:59:10 2014 +0530
@@ -92,7 +92,7 @@
// Computing weighted CU rows
int correction = IF_INTERNAL_PREC - X265_DEPTH; // intermediate interpolation depth
int padwidth = (width + 15) & ~15; // weightp assembly needs even 16 byte widths
- primitives.weight_pp(src, dst, lumaStride, lumaStride, padwidth, height,
+ primitives.weight_pp(src, dst, lumaStride, padwidth, height,
weight, round << correction, shift + correction, offset);
// Extending Left & Right
diff -r b7eeae24aae6 -r 61051f5a16b3 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Thu Oct 16 21:57:30 2014 +0530
+++ b/source/encoder/slicetype.cpp Fri Oct 17 15:59:10 2014 +0530
@@ -1384,7 +1384,7 @@
int round = denom ? 1 << (denom - 1) : 0;
int correction = IF_INTERNAL_PREC - X265_DEPTH; // intermediate interpolation depth
- primitives.weight_pp(ref->buffer[0], m_wbuffer[0], stride, stride, stride, m_paddedLines,
+ primitives.weight_pp(ref->buffer[0], m_wbuffer[0], stride, stride, m_paddedLines,
scale, round << correction, denom + correction, offset);
src = m_weightedRef.fpelPlane;
}
@@ -1482,7 +1482,7 @@
for (int i = 0; i < 4; i++)
{
- primitives.weight_pp(ref->buffer[i], m_wbuffer[i], stride, stride, stride, m_paddedLines,
+ primitives.weight_pp(ref->buffer[i], m_wbuffer[i], stride, stride, m_paddedLines,
scale, round << correction, denom + correction, offset);
}
diff -r b7eeae24aae6 -r 61051f5a16b3 source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp Thu Oct 16 21:57:30 2014 +0530
+++ b/source/encoder/weightPrediction.cpp Fri Oct 17 15:59:10 2014 +0530
@@ -185,7 +185,7 @@
int correction = IF_INTERNAL_PREC - X265_DEPTH; /* intermediate interpolation depth */
int pwidth = ((width + 15) >> 4) << 4;
- primitives.weight_pp(ref, weightTemp, stride, stride, pwidth, height,
+ primitives.weight_pp(ref, weightTemp, stride, pwidth, height,
weight, round << correction, denom + correction, offset);
ref = weightTemp;
}
diff -r b7eeae24aae6 -r 61051f5a16b3 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp Thu Oct 16 21:57:30 2014 +0530
+++ b/source/test/pixelharness.cpp Fri Oct 17 15:59:10 2014 +0530
@@ -334,8 +334,8 @@
for (int i = 0; i < ITERS; i++)
{
int index = i % TEST_CASES;
- checked(opt, pixel_test_buff[index] + j, opt_dest, stride, stride, width, height, w0, round, shift, offset);
- ref(pixel_test_buff[index] + j, ref_dest, stride, stride, width, height, w0, round, shift, offset);
+ checked(opt, pixel_test_buff[index] + j, opt_dest, stride, width, height, w0, round, shift, offset);
+ ref(pixel_test_buff[index] + j, ref_dest, stride, width, height, w0, round, shift, offset);
if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
return false;
@@ -1775,7 +1775,7 @@
if (opt.weight_pp)
{
HEADER0("weight_pp");
- REPORT_SPEEDUP(opt.weight_pp, ref.weight_pp, pbuf1, pbuf2, 64, 64, 32, 32, 128, 1 << 9, 10, 100);
+ REPORT_SPEEDUP(opt.weight_pp, ref.weight_pp, pbuf1, pbuf2, 64, 32, 32, 128, 1 << 9, 10, 100);
}
if (opt.weight_sp)
More information about the x265-devel
mailing list