[x265] [PATCH] weighted prediction (pixel), interface simplification

Fri Oct 17 12:29:49 CEST 2014

# HG changeset patch
# User Praveen Tiwari
# Date 1413541750 -19800
# Node ID 61051f5a16b387120b17be2024543d14aea61f16
# Parent  b7eeae24aae63495bcad1570ecd76cae988f0f6e
weighted prediction (pixel), interface simplification

diff -r b7eeae24aae6 -r 61051f5a16b3 source/common/pixel.cpp

--- a/source/common/pixel.cpp	Thu Oct 16 21:57:30 2014 +0530
+++ b/source/common/pixel.cpp	Fri Oct 17 15:59:10 2014 +0530
@@ -640,7 +640,7 @@
     }
 }
 
-void weight_pp_c(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
+void weight_pp_c(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset)
 {
     int x, y;
 
@@ -656,8 +656,8 @@
             x++;
         }
 
-        src += srcStride;
-        dst += dstStride;
+        src += stride;
+        dst += stride;
     }
 }
 
diff -r b7eeae24aae6 -r 61051f5a16b3 source/common/primitives.h
--- a/source/common/primitives.h	Thu Oct 16 21:57:30 2014 +0530
+++ b/source/common/primitives.h	Fri Oct 17 15:59:10 2014 +0530
@@ -168,7 +168,7 @@
 typedef void (*dequant_normal_t)(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
 typedef int  (*count_nonzero_t)(const int16_t *quantCoeff, int numCoeff);
 
-typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
+typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset);
 typedef void (*weightp_sp_t)(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
 typedef void (*scale_t)(pixel *dst, pixel *src, intptr_t stride);
 typedef void (*downscale_t)(pixel *src0, pixel *dstf, pixel *dsth, pixel *dstv, pixel *dstc,
diff -r b7eeae24aae6 -r 61051f5a16b3 source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h	Thu Oct 16 21:57:30 2014 +0530
+++ b/source/common/x86/pixel-util.h	Fri Oct 17 15:59:10 2014 +0530
@@ -57,7 +57,7 @@
 void x265_dequant_normal_avx2(const int16_t* quantCoef, int32_t* coef, int num, int scale, int shift);
 int x265_count_nonzero_ssse3(const int16_t *quantCoeff, int numCoeff);
 
-void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
+void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset);
 void x265_weight_sp_sse4(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
 
 void x265_pixel_ssim_4x4x2_core_mmx2(const uint8_t * pix1, intptr_t stride1,
diff -r b7eeae24aae6 -r 61051f5a16b3 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm	Thu Oct 16 21:57:30 2014 +0530
+++ b/source/common/x86/pixel-util8.asm	Fri Oct 17 15:59:10 2014 +0530
@@ -1298,35 +1298,32 @@
 
 
 ;-----------------------------------------------------------------------------------------------------------------------------------------------
-;void weight_pp(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset)
+;void weight_pp(pixel *src, pixel *dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset)
 ;-----------------------------------------------------------------------------------------------------------------------------------------------
 INIT_XMM sse4
 cglobal weight_pp, 6, 7, 6
 
+    shl         r5d, 6      ; m0 = [w0<<6]
     mov         r6d, r6m
-    shl         r6d, 6
-    movd        m0, r6d         ; m0 = [w0<<6]
-
-    movd        m1, r7m         ; m1 = [round]
-    punpcklwd   m0, m1          ; assuming both (w0<<6) and round are using maximum of 16 bits each.
-    pshufd      m0, m0, 0       ; m0 = [w0<<6 round]
-
-    movd        m1, r8m
-
-    movd        m2, r9m
+    shl         r6d, 16
+    or          r6d, r5d    ; assuming both (w0<<6) and round are using maximum of 16 bits each.
+    movd        m0, r6d
+    pshufd      m0, m0, 0   ; m0 = [w0<<6, round]
+
+    movd        m1, r7m
+
+    movd        m2, r8m
     pshufd      m2, m2, 0
 
     mova        m5, [pw_1]
 
-    sub         r2d, r4d
-    sub         r3d, r4d
-
+    sub         r2d, r3d
+    shr         r3d, 4
 .loopH:
-    mov         r6d, r4d
-    shr         r6d, 4
+    mov         r5d, r3d
+
 .loopW:
-    movh        m4, [r0]
-    pmovzxbw    m4, m4
+    pmovzxbw    m4, [r0]
 
     punpcklwd   m3, m4, m5
     pmaddwd     m3, m0
@@ -1364,13 +1361,13 @@
     add         r0, 16
     add         r1, 16
 
-    dec         r6d
+    dec         r5d
     jnz         .loopW
 
     lea         r0, [r0 + r2]
-    lea         r1, [r1 + r3]
-
-    dec         r5d
+    lea         r1, [r1 + r2]
+
+    dec         r4d
     jnz         .loopH
 
     RET
diff -r b7eeae24aae6 -r 61051f5a16b3 source/encoder/reference.cpp
--- a/source/encoder/reference.cpp	Thu Oct 16 21:57:30 2014 +0530
+++ b/source/encoder/reference.cpp	Fri Oct 17 15:59:10 2014 +0530
@@ -92,7 +92,7 @@
     // Computing weighted CU rows
     int correction = IF_INTERNAL_PREC - X265_DEPTH; // intermediate interpolation depth
     int padwidth = (width + 15) & ~15;  // weightp assembly needs even 16 byte widths
-    primitives.weight_pp(src, dst, lumaStride, lumaStride, padwidth, height,
+    primitives.weight_pp(src, dst, lumaStride, padwidth, height,
                          weight, round << correction, shift + correction, offset);
 
     // Extending Left & Right
diff -r b7eeae24aae6 -r 61051f5a16b3 source/encoder/slicetype.cpp
--- a/source/encoder/slicetype.cpp	Thu Oct 16 21:57:30 2014 +0530
+++ b/source/encoder/slicetype.cpp	Fri Oct 17 15:59:10 2014 +0530
@@ -1384,7 +1384,7 @@
         int round = denom ? 1 << (denom - 1) : 0;
         int correction = IF_INTERNAL_PREC - X265_DEPTH; // intermediate interpolation depth
 
-        primitives.weight_pp(ref->buffer[0], m_wbuffer[0], stride, stride, stride, m_paddedLines,
+        primitives.weight_pp(ref->buffer[0], m_wbuffer[0], stride, stride, m_paddedLines,
                              scale, round << correction, denom + correction, offset);
         src = m_weightedRef.fpelPlane;
     }
@@ -1482,7 +1482,7 @@
 
         for (int i = 0; i < 4; i++)
         {
-            primitives.weight_pp(ref->buffer[i], m_wbuffer[i], stride, stride, stride, m_paddedLines,
+            primitives.weight_pp(ref->buffer[i], m_wbuffer[i], stride, stride, m_paddedLines,
                                  scale, round << correction, denom + correction, offset);
         }
 
diff -r b7eeae24aae6 -r 61051f5a16b3 source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp	Thu Oct 16 21:57:30 2014 +0530
+++ b/source/encoder/weightPrediction.cpp	Fri Oct 17 15:59:10 2014 +0530
@@ -185,7 +185,7 @@
         int correction = IF_INTERNAL_PREC - X265_DEPTH; /* intermediate interpolation depth */
         int pwidth = ((width + 15) >> 4) << 4;
 
-        primitives.weight_pp(ref, weightTemp, stride, stride, pwidth, height,
+        primitives.weight_pp(ref, weightTemp, stride, pwidth, height,
                              weight, round << correction, denom + correction, offset);
         ref = weightTemp;
     }
diff -r b7eeae24aae6 -r 61051f5a16b3 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp	Thu Oct 16 21:57:30 2014 +0530
+++ b/source/test/pixelharness.cpp	Fri Oct 17 15:59:10 2014 +0530
@@ -334,8 +334,8 @@
     for (int i = 0; i < ITERS; i++)
     {
         int index = i % TEST_CASES;
-        checked(opt, pixel_test_buff[index] + j, opt_dest, stride, stride, width, height, w0, round, shift, offset);
-        ref(pixel_test_buff[index] + j, ref_dest, stride, stride, width, height, w0, round, shift, offset);
+        checked(opt, pixel_test_buff[index] + j, opt_dest, stride, width, height, w0, round, shift, offset);
+        ref(pixel_test_buff[index] + j, ref_dest, stride, width, height, w0, round, shift, offset);
 
         if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
             return false;
@@ -1775,7 +1775,7 @@
     if (opt.weight_pp)
     {
         HEADER0("weight_pp");
-        REPORT_SPEEDUP(opt.weight_pp, ref.weight_pp, pbuf1, pbuf2, 64, 64, 32, 32, 128, 1 << 9, 10, 100);
+        REPORT_SPEEDUP(opt.weight_pp, ref.weight_pp, pbuf1, pbuf2, 64, 32, 32, 128, 1 << 9, 10, 100);
     }
 
     if (opt.weight_sp)