[x265-commits] [x265] intra: replace 64x64 scaled reference pixel copy loop by ...
Min Chen
chenm003 at 163.com
Sat Mar 28 18:34:17 CET 2015
details: http://hg.videolan.org/x265/rev/e442efdf3d7d
branches:
changeset: 9933:e442efdf3d7d
user: Min Chen <chenm003 at 163.com>
date: Fri Mar 27 17:42:36 2015 -0700
description:
intra: replace 64x64 scaled reference pixel copy loop by memcpy
Subject: [x265] split scale_t to scale1D_t and scale2D_t
details: http://hg.videolan.org/x265/rev/b5c267940edc
branches:
changeset: 9934:b5c267940edc
user: Min Chen <chenm003 at 163.com>
date: Fri Mar 27 17:42:40 2015 -0700
description:
split scale_t to scale1D_t and scale2D_t
Subject: [x265] regression: refine max-refs tests, since we validate with the HM decoder
details: http://hg.videolan.org/x265/rev/6cd3938d8683
branches: stable
changeset: 9935:6cd3938d8683
user: Steve Borho <steve at borho.org>
date: Fri Mar 27 22:59:16 2015 -0500
description:
regression: refine max-refs tests, since we validate with the HM decoder
Subject: [x265] Merge with stable
details: http://hg.videolan.org/x265/rev/22a312799bb0
branches:
changeset: 9936:22a312799bb0
user: Steve Borho <steve at borho.org>
date: Fri Mar 27 22:59:30 2015 -0500
description:
Merge with stable
diffstat:
doc/reST/cli.rst | 15 ++++++++++++++-
source/common/pixel.cpp | 2 +-
source/common/primitives.h | 7 ++++---
source/common/x86/pixel-util.h | 4 ++--
source/encoder/search.cpp | 25 +++++++------------------
source/test/pixelharness.cpp | 33 +++++++++++++++++++++++++++++----
source/test/pixelharness.h | 3 ++-
source/test/regression-tests.txt | 4 ++--
8 files changed, 61 insertions(+), 32 deletions(-)
diffs (210 lines):
diff -r 36d70728acc2 -r 22a312799bb0 doc/reST/cli.rst
--- a/doc/reST/cli.rst Fri Mar 27 13:16:28 2015 -0500
+++ b/doc/reST/cli.rst Fri Mar 27 22:59:30 2015 -0500
@@ -454,7 +454,20 @@ Profile, Level, Tier
Max number of L0 references to be allowed. This number has a linear
multiplier effect on the amount of work performed in motion search,
but will generally have a beneficial affect on compression and
- distortion. Default 3
+ distortion.
+
+ Note that x265 allows up to 16 L0 references but the HEVC
+ specification only allows a maximum of 8 total reference frames. So
+ if you have B frames enabled only 7 L0 refs are valid and if you
+ have :option:`--b-pyramid` enabled (which is enabled by default in
+ all presets), then only 6 L0 refs are the maximum allowed by the
+ HEVC specification. If x265 detects that the total reference count
+ is greater than 8, it will issue a warning that the resulting stream
+ is non-compliant and it signals the stream as profile NONE and level
+ NONE but still allows the encode to continue. Compliant HEVC
+ decoders may refuse to decode such streams.
+
+ Default 3
.. note::
:option:`--profile`, :option:`--level-idc`, and
diff -r 36d70728acc2 -r 22a312799bb0 source/common/pixel.cpp
--- a/source/common/pixel.cpp Fri Mar 27 13:16:28 2015 -0500
+++ b/source/common/pixel.cpp Fri Mar 27 22:59:30 2015 -0500
@@ -582,7 +582,7 @@ void pixelavg_pp(pixel* dst, intptr_t ds
}
}
-void scale1D_128to64(pixel *dst, const pixel *src, intptr_t /*stride*/)
+void scale1D_128to64(pixel *dst, const pixel *src)
{
int x;
const pixel* src1 = src;
diff -r 36d70728acc2 -r 22a312799bb0 source/common/primitives.h
--- a/source/common/primitives.h Fri Mar 27 13:16:28 2015 -0500
+++ b/source/common/primitives.h Fri Mar 27 22:59:30 2015 -0500
@@ -140,7 +140,8 @@ typedef void (*dequant_normal_t)(const i
typedef int(*count_nonzero_t)(const int16_t* quantCoeff);
typedef void (*weightp_pp_t)(const pixel* src, pixel* dst, intptr_t stride, int width, int height, int w0, int round, int shift, int offset);
typedef void (*weightp_sp_t)(const int16_t* src, pixel* dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset);
-typedef void (*scale_t)(pixel* dst, const pixel* src, intptr_t stride);
+typedef void (*scale1D_t)(pixel* dst, const pixel* src);
+typedef void (*scale2D_t)(pixel* dst, const pixel* src, intptr_t stride);
typedef void (*downscale_t)(const pixel* src0, pixel* dstf, pixel* dsth, pixel* dstv, pixel* dstc,
intptr_t src_stride, intptr_t dst_stride, int width, int height);
typedef void (*extendCURowBorder_t)(pixel* txt, intptr_t stride, int width, int height, int marginX);
@@ -266,8 +267,8 @@ struct EncoderPrimitives
dequant_scaling_t dequant_scaling;
dequant_normal_t dequant_normal;
denoiseDct_t denoiseDct;
- scale_t scale1D_128to64;
- scale_t scale2D_64to32;
+ scale1D_t scale1D_128to64;
+ scale2D_t scale2D_64to32;
ssim_4x4x2_core_t ssim_4x4x2_core;
ssim_end4_t ssim_end_4;
diff -r 36d70728acc2 -r 22a312799bb0 source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h Fri Mar 27 13:16:28 2015 -0500
+++ b/source/common/x86/pixel-util.h Fri Mar 27 22:59:30 2015 -0500
@@ -73,8 +73,8 @@ void x265_pixel_ssim_4x4x2_core_avx(cons
float x265_pixel_ssim_end4_sse2(int sum0[5][4], int sum1[5][4], int width);
float x265_pixel_ssim_end4_avx(int sum0[5][4], int sum1[5][4], int width);
-void x265_scale1D_128to64_ssse3(pixel*, const pixel*, intptr_t);
-void x265_scale1D_128to64_avx2(pixel*, const pixel*, intptr_t);
+void x265_scale1D_128to64_ssse3(pixel*, const pixel*);
+void x265_scale1D_128to64_avx2(pixel*, const pixel*);
void x265_scale2D_64to32_ssse3(pixel*, const pixel*, intptr_t);
void x265_scale2D_64to32_avx2(pixel*, const pixel*, intptr_t);
diff -r 36d70728acc2 -r 22a312799bb0 source/encoder/search.cpp
--- a/source/encoder/search.cpp Fri Mar 27 13:16:28 2015 -0500
+++ b/source/encoder/search.cpp Fri Mar 27 22:59:30 2015 -0500
@@ -1231,16 +1231,11 @@ void Search::checkIntraInInter(Mode& int
pixel nScale[129];
intraNeighbourBuf[1][0] = intraNeighbourBuf[0][0];
- primitives.scale1D_128to64(nScale + 1, intraNeighbourBuf[0] + 1, 0);
+ primitives.scale1D_128to64(nScale + 1, intraNeighbourBuf[0] + 1);
// we do not estimate filtering for downscaled samples
- for (int x = 1; x < 65; x++)
- {
- intraNeighbourBuf[0][x] = nScale[x]; // Top pixel
- intraNeighbourBuf[0][x + 64] = nScale[x + 64]; // Left pixel
- intraNeighbourBuf[1][x] = nScale[x]; // Top pixel
- intraNeighbourBuf[1][x + 64] = nScale[x + 64]; // Left pixel
- }
+ memcpy(&intraNeighbourBuf[0][1], &nScale[1], 2 * 64 * sizeof(pixel)); // Top & Left pixels
+ memcpy(&intraNeighbourBuf[1][1], &nScale[1], 2 * 64 * sizeof(pixel));
scaleTuSize = 32;
scaleStride = 32;
@@ -1465,16 +1460,10 @@ uint32_t Search::estIntraPredQT(Mode &in
pixel nScale[129];
intraNeighbourBuf[1][0] = intraNeighbourBuf[0][0];
- primitives.scale1D_128to64(nScale + 1, intraNeighbourBuf[0] + 1, 0);
-
- // TO DO: primitive
- for (int x = 1; x < 65; x++)
- {
- intraNeighbourBuf[0][x] = nScale[x]; // Top pixel
- intraNeighbourBuf[0][x + 64] = nScale[x + 64]; // Left pixel
- intraNeighbourBuf[1][x] = nScale[x]; // Top pixel
- intraNeighbourBuf[1][x + 64] = nScale[x + 64]; // Left pixel
- }
+ primitives.scale1D_128to64(nScale + 1, intraNeighbourBuf[0] + 1);
+
+ memcpy(&intraNeighbourBuf[0][1], &nScale[1], 2 * 64 * sizeof(pixel));
+ memcpy(&intraNeighbourBuf[1][1], &nScale[1], 2 * 64 * sizeof(pixel));
scaleTuSize = 32;
scaleStride = 32;
diff -r 36d70728acc2 -r 22a312799bb0 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp Fri Mar 27 13:16:28 2015 -0500
+++ b/source/test/pixelharness.cpp Fri Mar 27 22:59:30 2015 -0500
@@ -666,7 +666,32 @@ bool PixelHarness::check_pixel_sub_ps(pi
return true;
}
-bool PixelHarness::check_scale_pp(scale_t ref, scale_t opt)
+bool PixelHarness::check_scale1D_pp(scale1D_t ref, scale1D_t opt)
+{
+ ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
+ ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
+
+ memset(ref_dest, 0, sizeof(ref_dest));
+ memset(opt_dest, 0, sizeof(opt_dest));
+
+ int j = 0;
+ for (int i = 0; i < ITERS; i++)
+ {
+ int index = i % TEST_CASES;
+ checked(opt, opt_dest, pixel_test_buff[index] + j);
+ ref(ref_dest, pixel_test_buff[index] + j);
+
+ if (memcmp(ref_dest, opt_dest, 64 * 64 * sizeof(pixel)))
+ return false;
+
+ reportfail();
+ j += INCR;
+ }
+
+ return true;
+}
+
+bool PixelHarness::check_scale2D_pp(scale2D_t ref, scale2D_t opt)
{
ALIGN_VAR_16(pixel, ref_dest[64 * 64]);
ALIGN_VAR_16(pixel, opt_dest[64 * 64]);
@@ -1603,7 +1628,7 @@ bool PixelHarness::testCorrectness(const
if (opt.scale1D_128to64)
{
- if (!check_scale_pp(ref.scale1D_128to64, opt.scale1D_128to64))
+ if (!check_scale1D_pp(ref.scale1D_128to64, opt.scale1D_128to64))
{
printf("scale1D_128to64 failed!\n");
return false;
@@ -1612,7 +1637,7 @@ bool PixelHarness::testCorrectness(const
if (opt.scale2D_64to32)
{
- if (!check_scale_pp(ref.scale2D_64to32, opt.scale2D_64to32))
+ if (!check_scale2D_pp(ref.scale2D_64to32, opt.scale2D_64to32))
{
printf("scale2D_64to32 failed!\n");
return false;
@@ -2003,7 +2028,7 @@ void PixelHarness::measureSpeed(const En
if (opt.scale1D_128to64)
{
HEADER0("scale1D_128to64");
- REPORT_SPEEDUP(opt.scale1D_128to64, ref.scale1D_128to64, pbuf2, pbuf1, 64);
+ REPORT_SPEEDUP(opt.scale1D_128to64, ref.scale1D_128to64, pbuf2, pbuf1);
}
if (opt.scale2D_64to32)
diff -r 36d70728acc2 -r 22a312799bb0 source/test/pixelharness.h
--- a/source/test/pixelharness.h Fri Mar 27 13:16:28 2015 -0500
+++ b/source/test/pixelharness.h Fri Mar 27 22:59:30 2015 -0500
@@ -76,7 +76,8 @@ protected:
bool check_pixelavg_pp(pixelavg_pp_t ref, pixelavg_pp_t opt);
bool check_pixel_sub_ps(pixel_sub_ps_t ref, pixel_sub_ps_t opt);
bool check_pixel_add_ps(pixel_add_ps_t ref, pixel_add_ps_t opt);
- bool check_scale_pp(scale_t ref, scale_t opt);
+ bool check_scale1D_pp(scale1D_t ref, scale1D_t opt);
+ bool check_scale2D_pp(scale2D_t ref, scale2D_t opt);
bool check_ssd_s(pixel_ssd_s_t ref, pixel_ssd_s_t opt);
bool check_blockfill_s(blockfill_s_t ref, blockfill_s_t opt);
bool check_calresidual(calcresidual_t ref, calcresidual_t opt);
diff -r 36d70728acc2 -r 22a312799bb0 source/test/regression-tests.txt
--- a/source/test/regression-tests.txt Fri Mar 27 13:16:28 2015 -0500
+++ b/source/test/regression-tests.txt Fri Mar 27 22:59:30 2015 -0500
@@ -98,8 +98,8 @@ mobile_calendar_422_ntsc.y4m,--preset sl
mobile_calendar_422_ntsc.y4m,--preset superfast --weightp --rd 0
mobile_calendar_422_ntsc.y4m,--preset veryslow --tskip
old_town_cross_444_720p50.y4m,--preset faster --rd 1 --tune zero-latency
-old_town_cross_444_720p50.y4m,--preset medium --keyint -1 --no-weightp --ref 16
-old_town_cross_444_720p50.y4m,--preset slow --rdoq-level 1 --early-skip
+old_town_cross_444_720p50.y4m,--preset medium --keyint -1 --no-weightp --ref 6
+old_town_cross_444_720p50.y4m,--preset slow --rdoq-level 1 --early-skip --ref 7 --no-b-pyramid
old_town_cross_444_720p50.y4m,--preset slower --crf 4 --cu-lossless
old_town_cross_444_720p50.y4m,--preset superfast --weightp --min-cu 16
old_town_cross_444_720p50.y4m,--preset ultrafast --weightp --min-cu 32
More information about the x265-commits
mailing list