[x265-commits] [x265] motion: use fast weighted subpel refine
Steve Borho
steve at borho.org
Fri Nov 22 06:41:44 CET 2013
details: http://hg.videolan.org/x265/rev/8f156b97360b
branches:
changeset: 5255:8f156b97360b
user: Steve Borho <steve at borho.org>
date: Thu Nov 21 17:29:26 2013 -0600
description:
motion: use fast weighted subpel refine
Don't do the full-blown weighted motion compensation for ME. Just interpolate
the weighted full pel pixels. It is not 100% accurate to the pixels that will
be used to encode the final prediction; but close enough for ME.
Testing with sintel_trailer_2k_720p24.y4m at medium preset and all defaults
x265 [info]: 651 of 1124 (57.92%) P frames weighted
before: 1253 frames in 512.74s (2.44 fps), 223.51 kb/s, Global PSNR: 50.552
after: 1253 frames in 410.25s (3.05 fps), 223.59 kb/s, Global PSNR: 50.589
Subject: [x265] cmake: almost revive Xcode support
details: http://hg.videolan.org/x265/rev/f4e10e4d3f0d
branches:
changeset: 5256:f4e10e4d3f0d
user: Steve Borho <steve at borho.org>
date: Thu Nov 21 18:03:46 2013 -0600
description:
cmake: almost revive Xcode support
# macbrew based instructions
brew install cmake --HEAD
cmake -G Xcode ../source
open x265.xcodeproj
> cmake --version
cmake version 2.8.12.20131121
The static library is still not linking properly, so the cli does not link
as well; but it does build the shared library
diffstat:
source/CMakeLists.txt | 26 ++++++++++---
source/Lib/TLibEncoder/TEncSearch.cpp | 15 +++----
source/common/CMakeLists.txt | 2 +-
source/encoder/motion.cpp | 68 +++++++++++-----------------------
source/encoder/motion.h | 3 -
5 files changed, 50 insertions(+), 64 deletions(-)
diffs (221 lines):
diff -r b172259c07f1 -r f4e10e4d3f0d source/CMakeLists.txt
--- a/source/CMakeLists.txt Thu Nov 21 17:07:34 2013 -0600
+++ b/source/CMakeLists.txt Thu Nov 21 18:03:46 2013 -0600
@@ -5,6 +5,7 @@ if(NOT CMAKE_BUILD_TYPE)
"Choose the type of build, options are: None(CMAKE_CXX_FLAGS or CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel."
FORCE)
endif()
+cmake_policy(SET CMP0025 OLD) # CMAKE_CXX_COMPILER_ID is Clang on Mac OS X (not AppleClang)
project (x265)
cmake_minimum_required (VERSION 2.8.8) # OBJECT libraries require 2.8.8
@@ -151,14 +152,27 @@ include_directories(. Lib common encoder
add_subdirectory(common)
add_subdirectory(encoder)
-if(MSVC_IDE AND ENABLE_PRIMITIVES_ASM)
+if((MSVC_IDE OR XCODE) AND ENABLE_PRIMITIVES_ASM)
# this is horrible. ugly, and hacky, and it reproduces logic found
# in the yasm CMake modules, but this is required because of this cmake bug
# http://www.cmake.org/Bug/print_bug_page.php?bug_id=8170
if (X64)
- set(FLAGS -f win64 -m amd64 -DARCH_X86_64=1 -DHAVE_ALIGNED_STACK=0)
+ if(APPLE)
+ set(FLAGS -f macho64 -m amd64 -DPREFIX -DPIC -DARCH_X86_64=1 -DHAVE_ALIGNED_STACK=0)
+ else()
+ set(FLAGS -f win64 -m amd64 -DARCH_X86_64=1 -DHAVE_ALIGNED_STACK=0)
+ endif()
else()
- set(FLAGS -f win32 -DARCH_X86_64=0 -DHAVE_ALIGNED_STACK=0 -DPREFIX)
+ if(APPLE)
+ set(FLAGS -f macho -DARCH_X86_64=0 -DHAVE_ALIGNED_STACK=0 -DPREFIX)
+ else()
+ set(FLAGS -f win32 -DARCH_X86_64=0 -DHAVE_ALIGNED_STACK=0 -DPREFIX)
+ endif()
+ endif()
+ if(WIN32)
+ set(SUFFIX obj)
+ else()
+ set(SUFFIX o)
endif()
if (HIGH_BIT_DEPTH)
set(FLAGS ${FLAGS} -DHIGH_BIT_DEPTH=1 -DBIT_DEPTH=10)
@@ -168,10 +182,10 @@ if(MSVC_IDE AND ENABLE_PRIMITIVES_ASM)
foreach(ASM ${MSVC_ASMS})
set(YASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/x86/${ASM})
set(YASM_SRCS ${YASM_SRCS} ${YASM_SRC})
- set(YASM_OBJS ${YASM_OBJS} ${ASM}.obj)
+ set(YASM_OBJS ${YASM_OBJS} ${ASM}.${SUFFIX})
add_custom_command(
- OUTPUT ${ASM}.obj
- COMMAND ${YASM_EXECUTABLE} ARGS ${FLAGS} ${YASM_SRC} -o ${ASM}.obj
+ OUTPUT ${ASM}.${SUFFIX}
+ COMMAND ${YASM_EXECUTABLE} ARGS ${FLAGS} ${YASM_SRC} -o ${ASM}.${SUFFIX}
DEPENDS ${YASM_SRC})
endforeach()
endif()
diff -r b172259c07f1 -r f4e10e4d3f0d source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Thu Nov 21 17:07:34 2013 -0600
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Thu Nov 21 18:03:46 2013 -0600
@@ -458,9 +458,9 @@ void TEncSearch::xIntraCodingLumaBlk(TCo
}
//===== get residual signal =====
- assert(!((uint32_t)fenc & (width - 1)));
- assert(!((uint32_t)pred & (width - 1)));
- assert(!((uint32_t)residual & (width - 1)));
+ assert(!((uint32_t)(size_t)fenc & (width - 1)));
+ assert(!((uint32_t)(size_t)pred & (width - 1)));
+ assert(!((uint32_t)(size_t)residual & (width - 1)));
primitives.calcresidual[(int)g_convertToBit[width]](fenc, pred, residual, stride);
//===== transform and quantization =====
@@ -499,7 +499,6 @@ void TEncSearch::xIntraCodingLumaBlk(TCo
}
//===== reconstruction =====
- assert(((uint32_t)residual & (width - 1)) == 0);
assert(width <= 32);
primitives.calcrecon[size](pred, residual, recon, reconQt, reconIPred, stride, MAX_CU_SIZE, reconIPredStride);
@@ -587,9 +586,9 @@ void TEncSearch::xIntraCodingChromaBlk(T
}
//===== get residual signal =====
- assert(!((uint32_t)fenc & (width - 1)));
- assert(!((uint32_t)pred & (width - 1)));
- assert(!((uint32_t)residual & (width - 1)));
+ assert(!((uint32_t)(size_t)fenc & (width - 1)));
+ assert(!((uint32_t)(size_t)pred & (width - 1)));
+ assert(!((uint32_t)(size_t)residual & (width - 1)));
int size = g_convertToBit[width];
primitives.calcresidual[size](fenc, pred, residual, stride);
@@ -638,7 +637,7 @@ void TEncSearch::xIntraCodingChromaBlk(T
}
//===== reconstruction =====
- assert(((uint32_t)residual & (width - 1)) == 0);
+ assert(((uint32_t)(size_t)residual & (width - 1)) == 0);
assert(width <= 32);
primitives.calcrecon[size](pred, residual, recon, reconQt, reconIPred, stride, MAX_CU_SIZE / 2, reconIPredStride);
diff -r b172259c07f1 -r f4e10e4d3f0d source/common/CMakeLists.txt
--- a/source/common/CMakeLists.txt Thu Nov 21 17:07:34 2013 -0600
+++ b/source/common/CMakeLists.txt Thu Nov 21 18:03:46 2013 -0600
@@ -119,7 +119,7 @@ if(ENABLE_PRIMITIVES_ASM)
set(A_SRCS ${A_SRCS} pixel-32.asm)
endif()
- if(MSVC_IDE)
+ if(MSVC_IDE OR XCODE)
# MSVC requires custom build rules in the main cmake script for yasm
set(MSVC_ASMS "${A_SRCS}" CACHE INTERNAL "yasm sources")
set(A_SRCS)
diff -r b172259c07f1 -r f4e10e4d3f0d source/encoder/motion.cpp
--- a/source/encoder/motion.cpp Thu Nov 21 17:07:34 2013 -0600
+++ b/source/encoder/motion.cpp Thu Nov 21 18:03:46 2013 -0600
@@ -104,17 +104,11 @@ MotionEstimate::MotionEstimate()
init_scales();
fenc = (pixel*)X265_MALLOC(pixel, MAX_CU_SIZE * MAX_CU_SIZE);
- subpelbuf = (pixel*)X265_MALLOC(pixel, (MAX_CU_SIZE + 1) * (MAX_CU_SIZE + 1));
- immedVal = (int16_t*)X265_MALLOC(int16_t, (MAX_CU_SIZE + 1) * (MAX_CU_SIZE + 1 + NTAPS_LUMA - 1));
- immedVal2 = (int16_t*)X265_MALLOC(int16_t, (MAX_CU_SIZE + 1) * (MAX_CU_SIZE + 1 + NTAPS_LUMA - 1));
}
MotionEstimate::~MotionEstimate()
{
X265_FREE(fenc);
- X265_FREE(subpelbuf);
- X265_FREE(immedVal);
- X265_FREE(immedVal2);
}
void MotionEstimate::setSourcePU(int offset, int width, int height)
@@ -1137,50 +1131,32 @@ int MotionEstimate::subpelCompare(Refere
}
else
{
- if (ref->isWeighted)
+ /* We are taking a short-cut here if the reference is weighted. To be
+ * accurate we should be interpolating unweighted pixels and weighting
+ * the final 16bit values prior to rounding and downshifting. Instead we
+ * are simply interpolating the weighted full-pel pixels. Not 100%
+ * accurate but good enough for fast qpel ME */
+ ALIGN_VAR_32(pixel, subpelbuf[64 * 64]);
+ pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
+ if (yFrac == 0)
{
- int shiftNum = IF_INTERNAL_PREC - X265_DEPTH;
- int shift = ref->shift + shiftNum;
- int round = shift ? (1 << (shift - 1)) : 0;
- pixel *fref = ref->unweightedFPelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
-
- if (yFrac == 0)
- {
- primitives.ipfilter_ps[FILTER_H_P_S_8](fref, ref->lumaStride, immedVal, FENC_STRIDE, blockwidth, blockheight, g_lumaFilter[xFrac]);
- primitives.weight_sp(immedVal, subpelbuf, FENC_STRIDE, FENC_STRIDE, blockwidth, blockheight, ref->weight, round, shift, ref->offset);
- }
- else if (xFrac == 0)
- {
- primitives.ipfilter_ps[FILTER_V_P_S_8](fref, ref->lumaStride, immedVal, FENC_STRIDE, blockwidth, blockheight, g_lumaFilter[yFrac]);
- primitives.weight_sp(immedVal, subpelbuf, FENC_STRIDE, FENC_STRIDE, blockwidth, blockheight, ref->weight, round, shift, ref->offset);
- }
- else
- {
- int filterSize = NTAPS_LUMA;
- int halfFilterSize = (filterSize >> 1);
- primitives.ipfilter_ps[FILTER_H_P_S_8](fref - (halfFilterSize - 1) * ref->lumaStride, ref->lumaStride, immedVal, blockwidth, blockwidth, blockheight + filterSize - 1, g_lumaFilter[xFrac]);
- primitives.ipfilter_ss[FILTER_V_S_S_8](immedVal + (halfFilterSize - 1) * blockwidth, blockwidth, immedVal2, FENC_STRIDE, blockwidth, blockheight, yFrac);
- primitives.weight_sp(immedVal2, subpelbuf, FENC_STRIDE, FENC_STRIDE, blockwidth, blockheight, ref->weight, round, shift, ref->offset);
- }
+ primitives.luma_hpp[partEnum](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, xFrac);
+ }
+ else if (xFrac == 0)
+ {
+ primitives.luma_vpp[partEnum](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, yFrac);
}
else
{
- pixel *fref = ref->fpelPlane + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * ref->lumaStride;
- if (yFrac == 0)
- {
- primitives.luma_hpp[partEnum](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, xFrac);
- }
- else if (xFrac == 0)
- {
- primitives.luma_vpp[partEnum](fref, ref->lumaStride, subpelbuf, FENC_STRIDE, yFrac);
- }
- else
- {
- int filterSize = NTAPS_LUMA;
- int halfFilterSize = (filterSize >> 1);
- primitives.ipfilter_ps[FILTER_H_P_S_8](fref - (halfFilterSize - 1) * ref->lumaStride, ref->lumaStride, immedVal, blockwidth, blockwidth, blockheight + filterSize - 1, g_lumaFilter[xFrac]);
- primitives.luma_vsp[partEnum](immedVal + (halfFilterSize - 1) * blockwidth, blockwidth, subpelbuf, FENC_STRIDE, yFrac);
- }
+ ALIGN_VAR_32(int16_t, immed[64 * (64 + 8)]);
+
+ int filterSize = NTAPS_LUMA;
+ int halfFilterSize = filterSize >> 1;
+ primitives.ipfilter_ps[FILTER_H_P_S_8](fref - (halfFilterSize - 1) * ref->lumaStride, ref->lumaStride,
+ immed, blockwidth,
+ blockwidth, blockheight + filterSize - 1,
+ g_lumaFilter[xFrac]);
+ primitives.luma_vsp[partEnum](immed + (halfFilterSize - 1) * blockwidth, blockwidth, subpelbuf, FENC_STRIDE, yFrac);
}
return cmp(fenc, FENC_STRIDE, subpelbuf, FENC_STRIDE);
}
diff -r b172259c07f1 -r f4e10e4d3f0d source/encoder/motion.h
--- a/source/encoder/motion.h Thu Nov 21 17:07:34 2013 -0600
+++ b/source/encoder/motion.h Thu Nov 21 18:03:46 2013 -0600
@@ -52,9 +52,6 @@ protected:
int subpelRefine;
/* subpel generation buffers */
- pixel *subpelbuf;
- int16_t *immedVal;
- int16_t *immedVal2;
int blockwidth;
int blockheight;
More information about the x265-commits
mailing list