[x265-commits] [x265] intra: replace 64x64 scaled reference pixel copy loop by ...

Sat Mar 28 18:34:17 CET 2015

details:   http://hg.videolan.org/x265/rev/e442efdf3d7d
branches:  
changeset: 9933:e442efdf3d7d
user:      Min Chen <chenm003 at 163.com>
date:      Fri Mar 27 17:42:36 2015 -0700
description:
intra: replace 64x64 scaled reference pixel copy loop by memcpy
Subject: [x265] split scale_t to scale1D_t and scale2D_t

details:   http://hg.videolan.org/x265/rev/b5c267940edc
branches:  
changeset: 9934:b5c267940edc
user:      Min Chen <chenm003 at 163.com>
date:      Fri Mar 27 17:42:40 2015 -0700
description:
split scale_t to scale1D_t and scale2D_t
Subject: [x265] regression: refine max-refs tests, since we validate with the HM decoder

details:   http://hg.videolan.org/x265/rev/6cd3938d8683
branches:  stable
changeset: 9935:6cd3938d8683
user:      Steve Borho <steve at borho.org>
date:      Fri Mar 27 22:59:16 2015 -0500
description:
regression: refine max-refs tests, since we validate with the HM decoder
Subject: [x265] Merge with stable

details:   http://hg.videolan.org/x265/rev/22a312799bb0
branches:  
changeset: 9936:22a312799bb0
user:      Steve Borho <steve at borho.org>
date:      Fri Mar 27 22:59:30 2015 -0500
description:
Merge with stable

diffstat:

 doc/reST/cli.rst                 |  15 ++++++++++++++-
 source/common/pixel.cpp          |   2 +-
 source/common/primitives.h       |   7 ++++---
 source/common/x86/pixel-util.h   |   4 ++--
 source/encoder/search.cpp        |  25 +++++++------------------
 source/test/pixelharness.cpp     |  33 +++++++++++++++++++++++++++++----
 source/test/pixelharness.h       |   3 ++-
 source/test/regression-tests.txt |   4 ++--
 8 files changed, 61 insertions(+), 32 deletions(-)

diffs (210 lines):

diff -r 36d70728acc2 -r 22a312799bb0 doc/reST/cli.rst

--- a/doc/reST/cli.rst	Fri Mar 27 13:16:28 2015 -0500
+++ b/doc/reST/cli.rst	Fri Mar 27 22:59:30 2015 -0500
@@ -454,7 +454,20 @@ Profile, Level, Tier
 	Max number of L0 references to be allowed. This number has a linear
 	multiplier effect on the amount of work performed in motion search,
 	but will generally have a beneficial affect on compression and
-	distortion. Default 3
+	distortion.
+	
+	Note that x265 allows up to 16 L0 references but the HEVC
+	specification only allows a maximum of 8 total reference frames. So
+	if you have B frames enabled only 7 L0 refs are valid and if you
+	have :option:`--b-pyramid` enabled (which is enabled by default in
+	all presets), then only 6 L0 refs are the maximum allowed by the
+	HEVC specification.  If x265 detects that the total reference count
+	is greater than 8, it will issue a warning that the resulting stream
+	is non-compliant and it signals the stream as profile NONE and level
+	NONE but still allows the encode to continue.  Compliant HEVC
+	decoders may refuse to decode such streams.
+	
+	Default 3
 
 .. note::
 	:option:`--profile`, :option:`--level-idc`, and
diff -r 36d70728acc2 -r 22a312799bb0 source/common/pixel.cpp
--- a/source/common/pixel.cpp	Fri Mar 27 13:16:28 2015 -0500
+++ b/source/common/pixel.cpp	Fri Mar 27 22:59:30 2015 -0500
@@ -582,7 +582,7 @@ void pixelavg_pp(pixel* dst, intptr_t ds
     }
 }
 
-void scale1D_128to64(pixel *dst, const pixel *src, intptr_t /*stride*/)
+void scale1D_128to64(pixel *dst, const pixel *src)
 {
     int x;
     const pixel* src1 = src;
diff -r 36d70728acc2 -r 22a312799bb0 source/common/primitives.h
--- a/source/common/primitives.h	Fri Mar 27 13:16:28 2015 -0500
+++ b/source/common/primitives.h	Fri Mar 27 22:59:30 2015 -0500
@@ -140,7 +140,8 @@ typedef void (*dequant_normal_t)(const i
 typedef int(*count_nonzero_t)(const int16_t* quantCoeff);
 typedef void (*weightp_pp_t)(const pixel* src, pixel* dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset);
 typedef void (*weightp_sp_t)(const int16_t* src, pixel* dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
-typedef void (*scale_t)(pixel* dst, const pixel* src, intptr_t stride);
+typedef void (*scale1D_t)(pixel* dst, const pixel* src);
+typedef void (*scale2D_t)(pixel* dst, const pixel* src, intptr_t stride);
 typedef void (*downscale_t)(const pixel* src0, pixel* dstf, pixel* dsth, pixel* dstv, pixel* dstc,
                             intptr_t src_stride, intptr_t dst_stride, int width, int height);
 typedef void (*extendCURowBorder_t)(pixel* txt, intptr_t stride, int width, int height, int marginX);
@@ -266,8 +267,8 @@ struct EncoderPrimitives
     dequant_scaling_t     dequant_scaling;
     dequant_normal_t      dequant_normal;
     denoiseDct_t          denoiseDct;
-    scale_t               scale1D_128to64;
-    scale_t               scale2D_64to32;
+    scale1D_t             scale1D_128to64;
+    scale2D_t             scale2D_64to32;
 
     ssim_4x4x2_core_t     ssim_4x4x2_core;
     ssim_end4_t           ssim_end_4;
diff -r 36d70728acc2 -r 22a312799bb0 source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h	Fri Mar 27 13:16:28 2015 -0500
+++ b/source/common/x86/pixel-util.h	Fri Mar 27 22:59:30 2015 -0500
@@ -73,8 +73,8 @@ void x265_pixel_ssim_4x4x2_core_avx(cons
 float x265_pixel_ssim_end4_sse2(int sum0[5][4], int sum1[5][4], int width);
 float x265_pixel_ssim_end4_avx(int sum0[5][4], int sum1[5][4], int width);
 
-void x265_scale1D_128to64_ssse3(pixel*, const pixel*, intptr_t);
-void x265_scale1D_128to64_avx2(pixel*, const pixel*, intptr_t);
+void x265_scale1D_128to64_ssse3(pixel*, const pixel*);
+void x265_scale1D_128to64_avx2(pixel*, const pixel*);
 void x265_scale2D_64to32_ssse3(pixel*, const pixel*, intptr_t);
 void x265_scale2D_64to32_avx2(pixel*, const pixel*, intptr_t);
 
diff -r 36d70728acc2 -r 22a312799bb0 source/encoder/search.cpp
--- a/source/encoder/search.cpp	Fri Mar 27 13:16:28 2015 -0500
+++ b/source/encoder/search.cpp	Fri Mar 27 22:59:30 2015 -0500
@@ -1231,16 +1231,11 @@ void Search::checkIntraInInter(Mode& int
 
         pixel nScale[129];
         intraNeighbourBuf[1][0] = intraNeighbourBuf[0][0];
-        primitives.scale1D_128to64(nScale + 1, intraNeighbourBuf[0] + 1, 0);
+        primitives.scale1D_128to64(nScale + 1, intraNeighbourBuf[0] + 1);
 
         // we do not estimate filtering for downscaled samples
-        for (int x = 1; x < 65; x++)
-        {
-            intraNeighbourBuf[0][x] = nScale[x];           // Top pixel
-            intraNeighbourBuf[0][x + 64] = nScale[x + 64]; // Left pixel
-            intraNeighbourBuf[1][x] = nScale[x];           // Top pixel
-            intraNeighbourBuf[1][x + 64] = nScale[x + 64]; // Left pixel
-        }
+        memcpy(&intraNeighbourBuf[0][1], &nScale[1], 2 * 64 * sizeof(pixel));   // Top & Left pixels
+        memcpy(&intraNeighbourBuf[1][1], &nScale[1], 2 * 64 * sizeof(pixel));
 
         scaleTuSize = 32;
         scaleStride = 32;
@@ -1465,16 +1460,10 @@ uint32_t Search::estIntraPredQT(Mode &in
 
                     pixel nScale[129];
                     intraNeighbourBuf[1][0] = intraNeighbourBuf[0][0];
-                    primitives.scale1D_128to64(nScale + 1, intraNeighbourBuf[0] + 1, 0);
-
-                    // TO DO: primitive
-                    for (int x = 1; x < 65; x++)
-                    {
-                        intraNeighbourBuf[0][x] = nScale[x];           // Top pixel
-                        intraNeighbourBuf[0][x + 64] = nScale[x + 64]; // Left pixel
-                        intraNeighbourBuf[1][x] = nScale[x];           // Top pixel
-                        intraNeighbourBuf[1][x + 64] = nScale[x + 64]; // Left pixel
-                    }
+                    primitives.scale1D_128to64(nScale + 1, intraNeighbourBuf[0] + 1);
+
+                    memcpy(&intraNeighbourBuf[0][1], &nScale[1], 2 * 64 * sizeof(pixel));
+                    memcpy(&intraNeighbourBuf[1][1], &nScale[1], 2 * 64 * sizeof(pixel));
 
                     scaleTuSize = 32;
                     scaleStride = 32;
diff -r 36d70728acc2 -r 22a312799bb0 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp	Fri Mar 27 13:16:28 2015 -0500
+++ b/source/test/pixelharness.cpp	Fri Mar 27 22:59:30 2015 -0500
@@ -666,7 +666,32 @@ bool PixelHarness::check_pixel_sub_ps(pi
     return true;
 }
 
-bool PixelHarness::check_scale_pp(scale_t ref, scale_t opt)
+bool PixelHarness::check_scale1D_pp(scale1D_t ref, scale1D_t opt)
+{
+    ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
+    ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
+
+    memset(ref_dest, 0, sizeof(ref_dest));
+    memset(opt_dest, 0, sizeof(opt_dest));
+
+    int j = 0;
+    for (int i = 0; i < ITERS; i++)
+    {
+        int index = i % TEST_CASES;
+        checked(opt, opt_dest, pixel_test_buff[index] + j);
+        ref(ref_dest, pixel_test_buff[index] + j);
+
+        if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
+            return false;
+
+        reportfail();
+        j += INCR;
+    }
+
+    return true;
+}
+
+bool PixelHarness::check_scale2D_pp(scale2D_t ref, scale2D_t opt)
 {
     ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
     ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
@@ -1603,7 +1628,7 @@ bool PixelHarness::testCorrectness(const
 
     if (opt.scale1D_128to64)
     {
-        if (!check_scale_pp(ref.scale1D_128to64, opt.scale1D_128to64))
+        if (!check_scale1D_pp(ref.scale1D_128to64, opt.scale1D_128to64))
         {
             printf("scale1D_128to64 failed!\n");
             return false;
@@ -1612,7 +1637,7 @@ bool PixelHarness::testCorrectness(const
 
     if (opt.scale2D_64to32)
     {
-        if (!check_scale_pp(ref.scale2D_64to32, opt.scale2D_64to32))
+        if (!check_scale2D_pp(ref.scale2D_64to32, opt.scale2D_64to32))
         {
             printf("scale2D_64to32 failed!\n");
             return false;
@@ -2003,7 +2028,7 @@ void PixelHarness::measureSpeed(const En
     if (opt.scale1D_128to64)
     {
         HEADER0("scale1D_128to64");
-        REPORT_SPEEDUP(opt.scale1D_128to64, ref.scale1D_128to64, pbuf2, pbuf1, 64);
+        REPORT_SPEEDUP(opt.scale1D_128to64, ref.scale1D_128to64, pbuf2, pbuf1);
     }
 
     if (opt.scale2D_64to32)
diff -r 36d70728acc2 -r 22a312799bb0 source/test/pixelharness.h
--- a/source/test/pixelharness.h	Fri Mar 27 13:16:28 2015 -0500
+++ b/source/test/pixelharness.h	Fri Mar 27 22:59:30 2015 -0500
@@ -76,7 +76,8 @@ protected:
     bool check_pixelavg_pp(pixelavg_pp_t ref, pixelavg_pp_t opt);
     bool check_pixel_sub_ps(pixel_sub_ps_t ref, pixel_sub_ps_t opt);
     bool check_pixel_add_ps(pixel_add_ps_t ref, pixel_add_ps_t opt);
-    bool check_scale_pp(scale_t ref, scale_t opt);
+    bool check_scale1D_pp(scale1D_t ref, scale1D_t opt);
+    bool check_scale2D_pp(scale2D_t ref, scale2D_t opt);
     bool check_ssd_s(pixel_ssd_s_t ref, pixel_ssd_s_t opt);
     bool check_blockfill_s(blockfill_s_t ref, blockfill_s_t opt);
     bool check_calresidual(calcresidual_t ref, calcresidual_t opt);
diff -r 36d70728acc2 -r 22a312799bb0 source/test/regression-tests.txt
--- a/source/test/regression-tests.txt	Fri Mar 27 13:16:28 2015 -0500
+++ b/source/test/regression-tests.txt	Fri Mar 27 22:59:30 2015 -0500
@@ -98,8 +98,8 @@ mobile_calendar_422_ntsc.y4m,--preset sl
 mobile_calendar_422_ntsc.y4m,--preset superfast --weightp --rd 0
 mobile_calendar_422_ntsc.y4m,--preset veryslow --tskip
 old_town_cross_444_720p50.y4m,--preset faster --rd 1 --tune zero-latency
-old_town_cross_444_720p50.y4m,--preset medium --keyint -1 --no-weightp --ref 16
-old_town_cross_444_720p50.y4m,--preset slow --rdoq-level 1 --early-skip
+old_town_cross_444_720p50.y4m,--preset medium --keyint -1 --no-weightp --ref 6
+old_town_cross_444_720p50.y4m,--preset slow --rdoq-level 1 --early-skip --ref 7 --no-b-pyramid
 old_town_cross_444_720p50.y4m,--preset slower --crf 4 --cu-lossless
 old_town_cross_444_720p50.y4m,--preset superfast --weightp --min-cu 16
 old_town_cross_444_720p50.y4m,--preset ultrafast --weightp --min-cu 32