[x265-commits] [x265] asm: sse4 code for saoCuStatsBO, improved 185378c->131279c
Dnyaneshwar G
dnyaneshwar at multicorewareinc.com
Mon Jul 13 18:34:48 CEST 2015
details: http://hg.videolan.org/x265/rev/2d15249c39ba
branches:
changeset: 10801:2d15249c39ba
user: Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
date: Tue Jul 07 11:14:35 2015 +0530
description:
asm: sse4 code for saoCuStatsBO, improved 185378c->131279c
Subject: [x265] asm: sse4 code for saoCuStatsE0, improved 250341c->147284c
details: http://hg.videolan.org/x265/rev/3ce49b558052
branches:
changeset: 10802:3ce49b558052
user: Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
date: Tue Jul 07 12:17:08 2015 +0530
description:
asm: sse4 code for saoCuStatsE0, improved 250341c->147284c
Subject: [x265] asm: sse4 code for saoCuStatsE1, improved 320369c->151086c
details: http://hg.videolan.org/x265/rev/dd6945c3044e
branches:
changeset: 10803:dd6945c3044e
user: Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
date: Tue Jul 07 12:29:32 2015 +0530
description:
asm: sse4 code for saoCuStatsE1, improved 320369c->151086c
Subject: [x265] asm: frameInitLowres avx2 code for 8bpp and 10bpp
details: http://hg.videolan.org/x265/rev/448a0901a74c
branches:
changeset: 10804:448a0901a74c
user: Rajesh Paulraj<rajesh at multicorewareinc.com>
date: Thu Jul 09 17:51:09 2015 +0530
description:
asm: frameInitLowres avx2 code for 8bpp and 10bpp
8bpp:
avx2: downscale 30.38x 22659.94 688378.63
avx : downscale 18.92x 33242.29 628884.06
10bpp:
avx2: downscale 13.48x 51288.90 691165.69
avx : downscale 10.84x 64374.10 697631.81
Subject: [x265] cmake: remove empty file set
details: http://hg.videolan.org/x265/rev/11911e0e5949
branches:
changeset: 10805:11911e0e5949
user: Deepthi Nandakumar <deepthi at multicorewareinc.com>
date: Mon Jul 13 11:05:25 2015 +0530
description:
cmake: remove empty file set
Subject: [x265] asm: fix linux build error- cannot override register size
details: http://hg.videolan.org/x265/rev/26b7dc40afa6
branches:
changeset: 10806:26b7dc40afa6
user: Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
date: Mon Jul 13 12:47:50 2015 +0530
description:
asm: fix linux build error- cannot override register size
Subject: [x265] log-level: remove X265_LOG_FRAME, frame level logging is supported in CSV only
details: http://hg.videolan.org/x265/rev/63fe043f739c
branches:
changeset: 10807:63fe043f739c
user: Deepthi Nandakumar <deepthi at multicorewareinc.com>
date: Mon Jul 13 14:44:57 2015 +0530
description:
log-level: remove X265_LOG_FRAME, frame level logging is supported in CSV only
Subject: [x265] stats: count of each CU partition per frame
details: http://hg.videolan.org/x265/rev/3f029010cf52
branches:
changeset: 10808:3f029010cf52
user: Divya Manivannan <divya at multicorewareinc.com>
date: Wed Jul 01 14:36:18 2015 +0530
description:
stats: count of each CU partition per frame
Subject: [x265] asm: fix saoCuStatE1 testbench failure
details: http://hg.videolan.org/x265/rev/426169ca6c76
branches:
changeset: 10809:426169ca6c76
user: Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
date: Mon Jul 13 16:53:29 2015 +0530
description:
asm: fix saoCuStatE1 testbench failure
Subject: [x265] param: follow-up cleanups from --log-level frame removal
details: http://hg.videolan.org/x265/rev/39f5205c28d0
branches:
changeset: 10810:39f5205c28d0
user: Steve Borho <steve at borho.org>
date: Mon Jul 13 10:47:22 2015 -0500
description:
param: follow-up cleanups from --log-level frame removal
diffstat:
doc/reST/api.rst | 4 -
doc/reST/cli.rst | 7 +-
source/CMakeLists.txt | 7 +-
source/common/framedata.h | 22 ++
source/common/param.h | 2 +-
source/common/x86/asm-primitives.cpp | 7 +
source/common/x86/loopfilter.asm | 295 +++++++++++++++++++++++++++++++++++
source/common/x86/loopfilter.h | 3 +
source/common/x86/mc-a2.asm | 32 ++-
source/common/x86/mc.h | 1 +
source/common/x86/x86util.asm | 8 +-
source/encoder/encoder.cpp | 12 +
source/encoder/frameencoder.cpp | 102 ++++++-----
source/encoder/frameencoder.h | 6 +-
source/test/pixelharness.cpp | 168 +++++++++++++++++++
source/test/pixelharness.h | 3 +
source/x265-extras.cpp | 70 ++++++++-
source/x265-extras.h | 2 +-
source/x265.cpp | 30 +-
source/x265.h | 18 +-
20 files changed, 695 insertions(+), 104 deletions(-)
diffs (truncated from 1255 to 300 lines):
diff -r ae2e79a13089 -r 39f5205c28d0 doc/reST/api.rst
--- a/doc/reST/api.rst Sun Jul 12 17:23:57 2015 -0500
+++ b/doc/reST/api.rst Mon Jul 13 10:47:22 2015 -0500
@@ -339,10 +339,6 @@ statistics from the encoder::
Cleanup
=======
- /* x265_encoder_log:
- * This function is now deprecated */
- void x265_encoder_log(x265_encoder *encoder, int argc, char **argv);
-
Finally, the encoder must be closed in order to free all of its
resources. An encoder that has been flushed cannot be restarted and
reused. Once **x265_encoder_close()** has been called, the encoder
diff -r ae2e79a13089 -r 39f5205c28d0 doc/reST/cli.rst
--- a/doc/reST/cli.rst Sun Jul 12 17:23:57 2015 -0500
+++ b/doc/reST/cli.rst Mon Jul 13 10:47:22 2015 -0500
@@ -28,7 +28,7 @@ consider this an error and abort.
Generally, when an option expects a string value from a list of strings
the user may specify the integer ordinal of the value they desire. ie:
-:option:`--log-level` 4 is equivalent to :option:`--log-level` debug.
+:option:`--log-level` 3 is equivalent to :option:`--log-level` debug.
Executable Options
==================
@@ -68,9 +68,8 @@ Logging/Statistic Options
0. error
1. warning
2. info **(default)**
- 3. frame
- 4. debug
- 5. full
+ 3. debug
+ 4. full
.. option:: --no-progress
diff -r ae2e79a13089 -r 39f5205c28d0 source/CMakeLists.txt
--- a/source/CMakeLists.txt Sun Jul 12 17:23:57 2015 -0500
+++ b/source/CMakeLists.txt Mon Jul 13 10:47:22 2015 -0500
@@ -30,7 +30,7 @@ option(STATIC_LINK_CRT "Statically link
mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
# X265_BUILD must be incremented each time the public API is changed
-set(X265_BUILD 63)
+set(X265_BUILD 64)
configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
"${PROJECT_BINARY_DIR}/x265.def")
configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
@@ -513,7 +513,6 @@ if(ENABLE_CLI)
output/raw.cpp) # muxers
source_group(input FILES ${InputFiles})
source_group(output FILES ${OutputFiles})
- source_group(filters FILES ${FilterFiles})
check_include_files(getopt.h HAVE_GETOPT_H)
if(NOT HAVE_GETOPT_H)
@@ -529,11 +528,11 @@ if(ENABLE_CLI)
if(XCODE)
# Xcode seems unable to link the CLI with libs, so link as one targget
- add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${FilterFiles} ${GETOPT}
+ add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT}
x265.cpp x265.h x265cli.h x265-extras.h x265-extras.cpp
$<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${YASM_OBJS} ${YASM_SRCS})
else()
- add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${FilterFiles} ${GETOPT} ${X265_RC_FILE}
+ add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} ${X265_RC_FILE}
${ExportDefs} x265.cpp x265.h x265cli.h x265-extras.h x265-extras.cpp)
if(WIN32 OR NOT ENABLE_SHARED OR INTEL_CXX)
# The CLI cannot link to the shared library on Windows, it
diff -r ae2e79a13089 -r 39f5205c28d0 source/common/framedata.h
--- a/source/common/framedata.h Sun Jul 12 17:23:57 2015 -0500
+++ b/source/common/framedata.h Mon Jul 13 10:47:22 2015 -0500
@@ -34,6 +34,9 @@ namespace X265_NS {
class PicYuv;
class JobProvider;
+#define INTER_MODES 4 // 2Nx2N, 2NxN, Nx2N, AMP modes
+#define INTRA_MODES 3 // DC, Planar, Angular modes
+
/* Current frame stats for 2 pass */
struct FrameStats
{
@@ -49,6 +52,25 @@ struct FrameStats
double percent8x8Intra;
double percent8x8Inter;
double percent8x8Skip;
+ double percentIntraNxN;
+ double percentSkipCu[NUM_CU_DEPTH];
+ double percentMergeCu[NUM_CU_DEPTH];
+ double percentIntraDistribution[NUM_CU_DEPTH][INTRA_MODES];
+ double percentInterDistribution[NUM_CU_DEPTH][3]; // 2Nx2N, RECT, AMP modes percentage
+
+ uint64_t cntIntraNxN;
+ uint64_t totalCu;
+ uint64_t cntSkipCu[NUM_CU_DEPTH];
+ uint64_t cntMergeCu[NUM_CU_DEPTH];
+ uint64_t cntInter[NUM_CU_DEPTH];
+ uint64_t cntIntra[NUM_CU_DEPTH];
+ uint64_t cuInterDistribution[NUM_CU_DEPTH][INTER_MODES];
+ uint64_t cuIntraDistribution[NUM_CU_DEPTH][INTRA_MODES];
+
+ FrameStats()
+ {
+ memset(this, 0, sizeof(FrameStats));
+ }
};
/* Per-frame data that is used during encodes and referenced while the picture
diff -r ae2e79a13089 -r 39f5205c28d0 source/common/param.h
--- a/source/common/param.h Sun Jul 12 17:23:57 2015 -0500
+++ b/source/common/param.h Mon Jul 13 10:47:22 2015 -0500
@@ -41,7 +41,7 @@ void getParamAspectRatio(x265_param *p,
bool parseLambdaFile(x265_param *param);
/* this table is kept internal to avoid confusion, since log level indices start at -1 */
-static const char * const logLevelNames[] = { "none", "error", "warning", "info", "frame", "debug", "full", 0 };
+static const char * const logLevelNames[] = { "none", "error", "warning", "info", "debug", "full", 0 };
#if EXPORT_C_API
#define PARAM_NS
diff -r ae2e79a13089 -r 39f5205c28d0 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Sun Jul 12 17:23:57 2015 -0500
+++ b/source/common/x86/asm-primitives.cpp Mon Jul 13 10:47:22 2015 -0500
@@ -2116,6 +2116,8 @@ void setupAssemblyPrimitives(EncoderPrim
p.chroma[X265_CSP_I444].pu[LUMA_64x48].filter_vsp = PFX(interp_4tap_vert_sp_64x48_avx2);
p.chroma[X265_CSP_I444].pu[LUMA_64x64].filter_vsp = PFX(interp_4tap_vert_sp_64x64_avx2);
+ p.frameInitLowres = PFX(frame_init_lowres_core_avx2);
+
ALL_LUMA_PU_T(luma_hvpp, interp_8tap_hv_pp_cpu); // calling luma_hvpp for all sizes
p.pu[LUMA_4x4].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_4x4>; // ALL_LUMA_PU_T has declared all sizes except 4x4, hence calling luma_hvpp[4x4]
@@ -2497,6 +2499,9 @@ void setupAssemblyPrimitives(EncoderPrim
p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].p2s = PFX(filterPixelToShort_6x16_sse4);
#if X86_64
+ p.saoCuStatsBO = PFX(saoCuStatsBO_sse4);
+ p.saoCuStatsE0 = PFX(saoCuStatsE0_sse4);
+ p.saoCuStatsE1 = PFX(saoCuStatsE1_sse4);
p.saoCuStatsE2 = PFX(saoCuStatsE2_sse4);
p.saoCuStatsE3 = PFX(saoCuStatsE3_sse4);
@@ -3558,6 +3563,8 @@ void setupAssemblyPrimitives(EncoderPrim
p.chroma[X265_CSP_I444].pu[LUMA_64x32].filter_vpp = PFX(interp_4tap_vert_pp_64x32_avx2);
p.chroma[X265_CSP_I444].pu[LUMA_64x16].filter_vpp = PFX(interp_4tap_vert_pp_64x16_avx2);
+ p.frameInitLowres = PFX(frame_init_lowres_core_avx2);
+
if (cpuMask & X265_CPU_BMI2)
p.scanPosLast = PFX(scanPosLast_avx2_bmi2);
}
diff -r ae2e79a13089 -r 39f5205c28d0 source/common/x86/loopfilter.asm
--- a/source/common/x86/loopfilter.asm Sun Jul 12 17:23:57 2015 -0500
+++ b/source/common/x86/loopfilter.asm Mon Jul 13 10:47:22 2015 -0500
@@ -29,6 +29,7 @@
SECTION_RODATA 32
pb_31: times 32 db 31
+pb_124: times 32 db 124
pb_15: times 32 db 15
pb_movemask_32: times 32 db 0x00
times 32 db 0xFF
@@ -41,6 +42,8 @@ cextern pw_2
cextern pw_1023
cextern pb_movemask
cextern pw_1
+cextern hmul_16p
+cextern pb_4
;============================================================================================================
@@ -1984,3 +1987,295 @@ cglobal calSign, 4, 5, 6
.end:
RET
%endif
+
+;--------------------------------------------------------------------------------------------------------------------------
+; saoCuStatsBO_c(const pixel *fenc, const pixel *rec, intptr_t stride, int endX, int endY, int32_t *stats, int32_t *count)
+;--------------------------------------------------------------------------------------------------------------------------
+%if ARCH_X86_64
+INIT_XMM sse4
+cglobal saoCuStatsBO, 7,12,6
+ mova m3, [hmul_16p + 16]
+ mova m4, [pb_124]
+ mova m5, [pb_4]
+ xor r7d, r7d
+
+.loopH:
+ mov r10, r0
+ mov r11, r1
+ mov r9d, r3d
+.loopL:
+ movu m1, [r11]
+ movu m0, [r10]
+
+ punpckhbw m2, m0, m1
+ punpcklbw m0, m1
+ psrlw m1, 1 ; rec[x] >> boShift
+ pmaddubsw m2, m3
+ pmaddubsw m0, m3
+ pand m1, m4
+ paddb m1, m5
+
+%assign x 0
+%rep 16
+ pextrb r7d, m1, x
+
+%if (x < 8)
+ pextrw r8d, m0, (x % 8)
+%else
+ pextrw r8d, m2, (x % 8)
+%endif
+ movsx r8d, r8w
+ inc dword [r6 + r7] ; count[classIdx]++
+ add [r5 + r7], r8d ; stats[classIdx] += (fenc[x] - rec[x]);
+ dec r9d
+ jz .next
+%assign x x+1
+%endrep
+
+ add r10, 16
+ add r11, 16
+ jmp .loopL
+
+.next:
+ add r0, r2
+ add r1, r2
+ dec r4d
+ jnz .loopH
+ RET
+%endif
+
+;-----------------------------------------------------------------------------------------------------------------------
+; saoCuStatsE0(const pixel *fenc, const pixel *rec, intptr_t stride, int endX, int endY, int32_t *stats, int32_t *count)
+;-----------------------------------------------------------------------------------------------------------------------
+%if ARCH_X86_64
+INIT_XMM sse4
+cglobal saoCuStatsE0, 5,8,8, 0-32
+ mov r3d, r3m
+
+ ; clear internal temporary buffer
+ pxor m0, m0
+ mova [rsp], m0
+ mova [rsp + mmsize], m0
+ mova m4, [pb_128]
+ mova m5, [hmul_16p + 16]
+ mova m6, [pb_2]
+ xor r7d, r7d
+
+.loopH:
+ mov r5d, r3d
+
+ ; calculate signLeft
+ mov r7b, [r1]
+ sub r7b, [r1 - 1]
+ seta r7b
+ setb r6b
+ sub r7b, r6b
+ neg r7b
+ pinsrb m0, r7d, 15
+
+.loopL:
+ movu m7, [r1]
+ movu m2, [r1 + 1]
+
+ pxor m1, m7, m4
+ pxor m3, m2, m4
+ pcmpgtb m2, m1, m3
+ pcmpgtb m3, m1
+ pand m2, [pb_1]
+ por m2, m3 ; signRight
+
+ palignr m3, m2, m0, 15
+ psignb m3, m4 ; signLeft
+
+ mova m0, m2
+ paddb m2, m3
+ paddb m2, m6 ; edgeType
+
+ ; stats[edgeType]
+ movu m3, [r0] ; fenc[0-15]
+ punpckhbw m1, m3, m7
+ punpcklbw m3, m7
+ pmaddubsw m1, m5
+ pmaddubsw m3, m5
+
+%assign x 0
+%rep 16
+ pextrb r7d, m2, x
+
+%if (x < 8)
+ pextrw r6d, m3, (x % 8)
+%else
+ pextrw r6d, m1, (x % 8)
+%endif
+ movsx r6d, r6w
+ inc word [rsp + r7 * 2] ; tmp_count[edgeType]++
More information about the x265-commits
mailing list