[x265] [PATCH] weighted prediction pixel, interface simplification
praveen at multicorewareinc.com
praveen at multicorewareinc.com
Mon Oct 20 10:23:22 CEST 2014
# HG changeset patch
# User Praveen Tiwari
# Date 1413793389 -19800
# Node ID 3366be6ef59eec3d3ca69ed52942708b5d1b3bc6
# Parent 1e09d0395826bdd01a4b4e46569853a2f04b9e95
weighted prediction pixel, interface simplification
diff -r 1e09d0395826 -r 3366be6ef59e source/common/pixel.cpp
--- a/source/common/pixel.cpp Sun Oct 19 20:53:36 2014 -0500
+++ b/source/common/pixel.cpp Mon Oct 20 13:53:09 2014 +0530
@@ -640,11 +640,13 @@
}
}
-void weight_pp_c(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
+void weight_pp_c(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset)
{
int x, y;
X265_CHECK(!(width & 15), "weightp alignment error\n");
+ X265_CHECK(!((w0 << 6) > 32767), "w0 using more than 16 bits, asm output will mismatch\n");
+ X265_CHECK(!(round > 32767), "round using more than 16 bits, asm output will mismatch\n");
for (y = 0; y <= height - 1; y++)
{
@@ -656,8 +658,8 @@
x++;
}
- src += srcStride;
- dst += dstStride;
+ src += stride;
+ dst += stride;
}
}
diff -r 1e09d0395826 -r 3366be6ef59e source/common/primitives.h
--- a/source/common/primitives.h Sun Oct 19 20:53:36 2014 -0500
+++ b/source/common/primitives.h Mon Oct 20 13:53:09 2014 +0530
@@ -168,7 +168,7 @@
typedef void (*dequant_normal_t)(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
typedef int (*count_nonzero_t)(const int16_t *quantCoeff, int numCoeff);
-typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
+typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset);
typedef void (*weightp_sp_t)(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
typedef void (*scale_t)(pixel *dst, pixel *src, intptr_t stride);
typedef void (*downscale_t)(pixel *src0, pixel *dstf, pixel *dsth, pixel *dstv, pixel *dstc,
diff -r 1e09d0395826 -r 3366be6ef59e source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h Sun Oct 19 20:53:36 2014 -0500
+++ b/source/common/x86/pixel-util.h Mon Oct 20 13:53:09 2014 +0530
@@ -57,7 +57,7 @@
void x265_dequant_normal_avx2(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
int x265_count_nonzero_ssse3(const int16_t *quantCoeff, int numCoeff);
-void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
+void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset);
void x265_weight_sp_sse4(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
void x265_pixel_ssim_4x4x2_core_mmx2(const uint8_t * pix1, intptr_t stride1,
diff -r 1e09d0395826 -r 3366be6ef59e source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm Sun Oct 19 20:53:36 2014 -0500
+++ b/source/common/x86/pixel-util8.asm Mon Oct 20 13:53:09 2014 +0530
@@ -1298,36 +1298,29 @@
;-----------------------------------------------------------------------------------------------------------------------------------------------
-;void weight_pp(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
+;void weight_pp(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset)
;-----------------------------------------------------------------------------------------------------------------------------------------------
INIT_XMM sse4
cglobal weight_pp, 6, 7, 6
+ shl r5d, 6 ; m0 = [w0<<6]
mov r6d, r6m
- shl r6d, 6
- movd m0, r6d ; m0 = [w0<<6]
-
- movd m1, r7m ; m1 = [round]
- punpcklwd m0, m1 ; assuming both (w0<<6) and round are using maximum of 16 bits each.
- pshufd m0, m0, 0 ; m0 = [w0<<6 round]
-
- movd m1, r8m
-
- movd m2, r9m
+ shl r6d, 16
+ or r6d, r5d ; assuming both (w0<<6) and round are using maximum of 16 bits each.
+ movd m0, r6d
+ pshufd m0, m0, 0 ; m0 = [w0<<6, round]
+ movd m1, r7m
+ movd m2, r8m
pshufd m2, m2, 0
-
mova m5, [pw_1]
-
- sub r2d, r4d
- sub r3d, r4d
+ sub r2d, r3d
+ shr r3d, 4
.loopH:
- mov r6d, r4d
- shr r6d, 4
+ mov r5d, r3d
+
.loopW:
- movh m4, [r0]
- pmovzxbw m4, m4
-
+ pmovzxbw m4, [r0]
punpcklwd m3, m4, m5
pmaddwd m3, m0
psrad m3, m1
@@ -1340,12 +1333,9 @@
packssdw m3, m4
packuswb m3, m3
-
movh [r1], m3
- movh m4, [r0 + 8]
- pmovzxbw m4, m4
-
+ pmovzxbw m4, [r0 + 8]
punpcklwd m3, m4, m5
pmaddwd m3, m0
psrad m3, m1
@@ -1358,21 +1348,19 @@
packssdw m3, m4
packuswb m3, m3
-
movh [r1 + 8], m3
add r0, 16
add r1, 16
- dec r6d
+ dec r5d
jnz .loopW
lea r0, [r0 + r2]
- lea r1, [r1 + r3]
-
- dec r5d
+ lea r1, [r1 + r2]
+
+ dec r4d
jnz .loopH
-
RET
;-------------------------------------------------------------------------------------------------------------------------------------------------
diff -r 1e09d0395826 -r 3366be6ef59e source/encoder/reference.cpp
--- a/source/encoder/reference.cpp Sun Oct 19 20:53:36 2014 -0500
+++ b/source/encoder/reference.cpp Mon Oct 20 13:53:09 2014 +0530
@@ -92,7 +92,7 @@
// Computing weighted CU rows
int correction = IF_INTERNAL_PREC - X265_DEPTH; // intermediate interpolation depth
int padwidth = (width + 15) & ~15; // weightp assembly needs even 16 byte widths
- primitives.weight_pp(src, dst, lumaStride, lumaStride, padwidth, height,
+ primitives.weight_pp(src, dst, lumaStride, padwidth, height,
weight, round << correction, shift + correction, offset);
// Extending Left & Right
diff -r 1e09d0395826 -r 3366be6ef59e source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp Sun Oct 19 20:53:36 2014 -0500
+++ b/source/encoder/slicetype.cpp Mon Oct 20 13:53:09 2014 +0530
@@ -1383,7 +1383,7 @@
int correction = IF_INTERNAL_PREC - X265_DEPTH; // intermediate interpolation depth
int widthHeight = (int)stride;
- primitives.weight_pp(ref->buffer[0], m_wbuffer[0], stride, widthHeight, widthHeight, m_paddedLines,
+ primitives.weight_pp(ref->buffer[0], m_wbuffer[0], stride, widthHeight, m_paddedLines,
scale, round << correction, denom + correction, offset);
src = m_weightedRef.fpelPlane;
}
@@ -1481,7 +1481,7 @@
int widthHeight = (int)stride;
for (int i = 0; i < 4; i++)
- primitives.weight_pp(ref->buffer[i], m_wbuffer[i], stride, widthHeight, widthHeight, m_paddedLines,
+ primitives.weight_pp(ref->buffer[i], m_wbuffer[i], stride, widthHeight, m_paddedLines,
scale, round << correction, denom + correction, offset);
m_weightedRef.isWeighted = true;
diff -r 1e09d0395826 -r 3366be6ef59e source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp Sun Oct 19 20:53:36 2014 -0500
+++ b/source/encoder/weightPrediction.cpp Mon Oct 20 13:53:09 2014 +0530
@@ -186,7 +186,7 @@
int correction = IF_INTERNAL_PREC - X265_DEPTH; /* intermediate interpolation depth */
int pwidth = ((width + 15) >> 4) << 4;
- primitives.weight_pp(ref, weightTemp, stride, stride, pwidth, height,
+ primitives.weight_pp(ref, weightTemp, stride, pwidth, height,
weight, round << correction, denom + correction, offset);
ref = weightTemp;
}
diff -r 1e09d0395826 -r 3366be6ef59e source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp Sun Oct 19 20:53:36 2014 -0500
+++ b/source/test/pixelharness.cpp Mon Oct 20 13:53:09 2014 +0530
@@ -334,8 +334,8 @@
for (int i = 0; i < ITERS; i++)
{
int index = i % TEST_CASES;
- checked(opt, pixel_test_buff[index] + j, opt_dest, stride, stride, width, height, w0, round, shift, offset);
- ref(pixel_test_buff[index] + j, ref_dest, stride, stride, width, height, w0, round, shift, offset);
+ checked(opt, pixel_test_buff[index] + j, opt_dest, stride, width, height, w0, round, shift, offset);
+ ref(pixel_test_buff[index] + j, ref_dest, stride, width, height, w0, round, shift, offset);
if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
return false;
@@ -1775,7 +1775,7 @@
if (opt.weight_pp)
{
HEADER0("weight_pp");
- REPORT_SPEEDUP(opt.weight_pp, ref.weight_pp, pbuf1, pbuf2, 64, 64, 32, 32, 128, 1 << 9, 10, 100);
+ REPORT_SPEEDUP(opt.weight_pp, ref.weight_pp, pbuf1, pbuf2, 64, 32, 32, 128, 1 << 9, 10, 100);
}
if (opt.weight_sp)
More information about the x265-devel
mailing list