[x265-commits] [x265] cmake: remove two MSVC warnings overrides, HM code has be...
Steve Borho
steve at borho.org
Mon Feb 3 15:56:11 CET 2014
details: http://hg.videolan.org/x265/rev/8150374cb0d2
branches:
changeset: 5977:8150374cb0d2
user: Steve Borho <steve at borho.org>
date: Sun Feb 02 13:07:05 2014 -0600
description:
cmake: remove two MSVC warnings overrides, HM code has been somewhat sanitized
Subject: [x265] cmake: remove two MSVC warnings overrides, HM code has been somewhat sanitized
details: http://hg.videolan.org/x265/rev/aab88ed13364
branches:
changeset: 5978:aab88ed13364
user: Steve Borho <steve at borho.org>
date: Sun Feb 02 13:09:26 2014 -0600
description:
cmake: remove two MSVC warnings overrides, HM code has been somewhat sanitized
Subject: [x265] fix
details: http://hg.videolan.org/x265/rev/c89f04114391
branches:
changeset: 5979:c89f04114391
user: Satoshi Nakagawa <nakagawa424 at oki.com>
date: Mon Feb 03 18:17:50 2014 +0900
description:
fix
Subject: [x265] frameencoder: use macro MAX_MAX_QP
details: http://hg.videolan.org/x265/rev/55b4d5135e06
branches:
changeset: 5980:55b4d5135e06
user: Deepthi Nandakumar <deepthi at multicorewareinc.com>
date: Mon Feb 03 16:03:45 2014 +0530
description:
frameencoder: use macro MAX_MAX_QP
Subject: [x265] asm: code for addAvg luma and chroma all sizes
details: http://hg.videolan.org/x265/rev/71841b07b8ee
branches:
changeset: 5981:71841b07b8ee
user: Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
date: Mon Feb 03 13:15:35 2014 +0530
description:
asm: code for addAvg luma and chroma all sizes
Subject: [x265] follow x264's keyframe-min logic [CHANGES OUTPUTS] (closes #24)
details: http://hg.videolan.org/x265/rev/898ccce491e9
branches:
changeset: 5982:898ccce491e9
user: Steve Borho <steve at borho.org>
date: Mon Feb 03 08:46:36 2014 -0600
description:
follow x264's keyframe-min logic [CHANGES OUTPUTS] (closes #24)
If no --keyint-min is specified, default to max/10, and clamp the value to
between [1, max / 2 + 1]. This allows x265 to insert I frames when scene cuts
are detected between keyframe-min and keyframe-max
diffstat:
source/Lib/TLibCommon/TComPicYuvMD5.cpp | 3 +
source/Lib/TLibCommon/TComYuv.cpp | 6 +-
source/Lib/TLibEncoder/TEncSbac.cpp | 3 +
source/common/CMakeLists.txt | 4 +-
source/common/pixel.cpp | 2 +-
source/common/primitives.h | 2 +-
source/common/x86/asm-primitives.cpp | 63 +
source/common/x86/const-a.asm | 2 +
source/common/x86/mc-a.asm | 1032 +++++++++++++++++++++++++++++++
source/common/x86/pixel.h | 35 +
source/encoder/CMakeLists.txt | 4 +-
source/encoder/encoder.cpp | 3 +-
source/encoder/frameencoder.cpp | 8 +-
source/test/pixelharness.cpp | 8 +-
14 files changed, 1155 insertions(+), 20 deletions(-)
diffs (truncated from 1377 to 300 lines):
diff -r eff52bc89e94 -r 898ccce491e9 source/Lib/TLibCommon/TComPicYuvMD5.cpp
--- a/source/Lib/TLibCommon/TComPicYuvMD5.cpp Sun Feb 02 12:52:04 2014 -0600
+++ b/source/Lib/TLibCommon/TComPicYuvMD5.cpp Mon Feb 03 08:46:36 2014 -0600
@@ -106,6 +106,9 @@ void updateCRC(const Pel* plane, uint32_
crcVal = (((crcVal << 1) + bitVal) & 0xffff) ^ (crcMsb * 0x1021);
}
+#if _MSC_VER
+#pragma warning(disable: 4127) // conditional expression is constant
+#endif
// take CRC of second pictureData byte if bit depth is greater than 8-bits
if (X265_DEPTH > 8)
{
diff -r eff52bc89e94 -r 898ccce491e9 source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp Sun Feb 02 12:52:04 2014 -0600
+++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Feb 03 08:46:36 2014 -0600
@@ -594,7 +594,7 @@ void TComYuv::addAvg(TShortYUV* srcYuv0,
src1Stride = srcYuv1->m_width;
dststride = getStride();
- primitives.luma_addAvg[part](dstY, dststride, srcY0, src0Stride, srcY1, src1Stride);
+ primitives.luma_addAvg[part](srcY0, srcY1, dstY, src0Stride, src1Stride, dststride);
}
if (bChroma)
{
@@ -602,8 +602,8 @@ void TComYuv::addAvg(TShortYUV* srcYuv0,
src1Stride = srcYuv1->m_cwidth;
dststride = getCStride();
- primitives.chroma[m_csp].addAvg[part](dstU, dststride, srcU0, src0Stride, srcU1, src1Stride);
- primitives.chroma[m_csp].addAvg[part](dstV, dststride, srcV0, src0Stride, srcV1, src1Stride);
+ primitives.chroma[m_csp].addAvg[part](srcU0, srcU1, dstU, src0Stride, src1Stride, dststride);
+ primitives.chroma[m_csp].addAvg[part](srcV0, srcV1, dstV, src0Stride, src1Stride, dststride);
}
}
diff -r eff52bc89e94 -r 898ccce491e9 source/Lib/TLibEncoder/TEncSbac.cpp
--- a/source/Lib/TLibEncoder/TEncSbac.cpp Sun Feb 02 12:52:04 2014 -0600
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp Mon Feb 03 08:46:36 2014 -0600
@@ -338,6 +338,9 @@ void TEncSbac::codeVPS(TComVPS* vps)
WRITE_UVLC(vps->getMaxDecPicBuffering(i) - 1, "vps_max_dec_pic_buffering_minus1[i]");
WRITE_UVLC(vps->getNumReorderPics(i), "vps_num_reorder_pics[i]");
WRITE_UVLC(vps->getMaxLatencyIncrease(i), "vps_max_latency_increase_plus1[i]");
+#if _MSC_VER
+#pragma warning(disable: 4127) // conditional expression is constant
+#endif
if (!subLayerOrderingInfoPresentFlag)
{
break;
diff -r eff52bc89e94 -r 898ccce491e9 source/common/CMakeLists.txt
--- a/source/common/CMakeLists.txt Sun Feb 02 12:52:04 2014 -0600
+++ b/source/common/CMakeLists.txt Mon Feb 03 08:46:36 2014 -0600
@@ -49,13 +49,11 @@ endif(GCC)
if(MSVC)
# ignore these warnings from HM source
# /wd4244 type conversion, possible loss of data
- # /wd4512 assignment operator could not be generated
- # /wd4127 conditional expression is constant
# /wd4389 signed/unsigned mismatch
# /wd4018 '<' signed/unsigned mismatch
# /wd4800 performance warning: bool coersion
set_source_files_properties(${LIBCOMMON_SRC} PROPERTIES COMPILE_FLAGS
- "/wd4244 /wd4512 /wd4127 /wd4389 /wd4018 /wd4800")
+ "/wd4244 /wd4389 /wd4018 /wd4800")
if(INTEL_CXX)
add_definitions(/Qwd2557) # signed/unsigned mismatch
endif()
diff -r eff52bc89e94 -r 898ccce491e9 source/common/pixel.cpp
--- a/source/common/pixel.cpp Sun Feb 02 12:52:04 2014 -0600
+++ b/source/common/pixel.cpp Mon Feb 03 08:46:36 2014 -0600
@@ -802,7 +802,7 @@ void pixel_add_ps_c(pixel *a, intptr_t d
}
template<int bx, int by>
-void addAvg(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride)
+void addAvg(int16_t* src0, int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride)
{
int shiftNum, offset;
diff -r eff52bc89e94 -r 898ccce491e9 source/common/primitives.h
--- a/source/common/primitives.h Sun Feb 02 12:52:04 2014 -0600
+++ b/source/common/primitives.h Mon Feb 03 08:46:36 2014 -0600
@@ -184,7 +184,7 @@ typedef void (*copy_ps_t)(int16_t *dst,
typedef void (*pixel_sub_ps_t)(int16_t *dst, intptr_t dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);
typedef void (*pixel_add_ps_t)(pixel *a, intptr_t dstride, pixel *b0, int16_t *b1, intptr_t sstride0, intptr_t sstride1);
-typedef void (*addAvg_t)(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride);
+typedef void (*addAvg_t)(int16_t* src0, int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride);
/* Define a structure containing function pointers to optimized encoder
* primitives. Each pointer can reference either an assembly routine,
diff -r eff52bc89e94 -r 898ccce491e9 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Sun Feb 02 12:52:04 2014 -0600
+++ b/source/common/x86/asm-primitives.cpp Mon Feb 03 08:46:36 2014 -0600
@@ -563,6 +563,62 @@ extern "C" {
SETUP_PIXEL_SSE_SP_DEF(64, 16, cpu); \
SETUP_PIXEL_SSE_SP_DEF(16, 64, cpu);
+#define SETUP_LUMA_ADDAVG_FUNC_DEF(W, H, cpu) \
+ p.luma_addAvg[LUMA_## W ## x ## H] = x265_addAvg_## W ## x ## H ## cpu;
+
+#define LUMA_ADDAVG(cpu) \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(4, 4, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(4, 8, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(4, 16, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(8, 4, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(8, 8, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(8, 16, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(8, 32, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(12, 16, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(16, 4, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(16, 8, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(16, 12, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(16, 16, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(16, 32, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(24, 32, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(16, 64, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(32, 8, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(32, 16, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(32, 24, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(32, 32, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(32, 64, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(48, 64, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(64, 16, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(64, 32, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(64, 48, cpu); \
+ SETUP_LUMA_ADDAVG_FUNC_DEF(64, 64, cpu); \
+
+#define SETUP_CHROMA_ADDAVG_FUNC_DEF(W, H, cpu) \
+ p.chroma[X265_CSP_I420].addAvg[CHROMA_## W ## x ## H] = x265_addAvg_## W ## x ## H ## cpu;
+
+#define CHROMA_ADDAVG(cpu) \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(4, 2, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(4, 4, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(4, 8, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(4, 16, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 2, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 4, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 6, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 8, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 16, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 32, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(12, 16, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 4, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 8, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 12, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 16, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 32, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(24, 32, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 8, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 16, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 24, cpu); \
+ SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 32, cpu); \
+
#define SETUP_INTRA_ANG4(mode, fno, cpu) \
p.intra_pred[BLOCK_4x4][mode] = x265_intra_pred_ang4_ ## fno ## _ ## cpu;
#define SETUP_INTRA_ANG8(mode, fno, cpu) \
@@ -890,6 +946,9 @@ void Setup_Assembly_Primitives(EncoderPr
p.dct[DCT_4x4] = x265_dct4_sse2;
p.idct[IDCT_4x4] = x265_idct4_sse2;
p.idct[IDST_4x4] = x265_idst4_sse2;
+
+ LUMA_ADDAVG(_sse2);
+ CHROMA_ADDAVG(_sse2);
}
if (cpuMask & X265_CPU_SSSE3)
{
@@ -1019,6 +1078,10 @@ void Setup_Assembly_Primitives(EncoderPr
SETUP_INTRA_ANG8(33, 3, sse4);
p.dct[DCT_8x8] = x265_dct8_sse4;
+
+ p.chroma[X265_CSP_I420].addAvg[CHROMA_2x4] = x265_addAvg_2x4_sse4;
+ p.chroma[X265_CSP_I420].addAvg[CHROMA_2x8] = x265_addAvg_2x8_sse4;
+ p.chroma[X265_CSP_I420].addAvg[CHROMA_6x8] = x265_addAvg_6x8_sse4;
}
if (cpuMask & X265_CPU_AVX)
{
diff -r eff52bc89e94 -r 898ccce491e9 source/common/x86/const-a.asm
--- a/source/common/x86/const-a.asm Sun Feb 02 12:52:04 2014 -0600
+++ b/source/common/x86/const-a.asm Mon Feb 03 08:46:36 2014 -0600
@@ -33,6 +33,8 @@ const hsub_mul, times 16 db 1, -1
const pw_1, times 16 dw 1
const pw_16, times 16 dw 16
const pw_32, times 16 dw 32
+const pw_128, times 16 dw 128
+const pw_256, times 16 dw 256
const pw_512, times 16 dw 512
const pw_1024, times 16 dw 1024
const pw_4096, times 16 dw 4096
diff -r eff52bc89e94 -r 898ccce491e9 source/common/x86/mc-a.asm
--- a/source/common/x86/mc-a.asm Sun Feb 02 12:52:04 2014 -0600
+++ b/source/common/x86/mc-a.asm Mon Feb 03 08:46:36 2014 -0600
@@ -49,6 +49,8 @@ cextern pw_4
cextern pw_8
cextern pw_32
cextern pw_64
+cextern pw_128
+cextern pw_256
cextern pw_512
cextern pw_00ff
cextern pw_pixel_max
@@ -56,6 +58,1036 @@ cextern sw_64
cextern pd_32
cextern deinterleave_shufd
+;====================================================================================================================
+;void addAvg (int16_t* src0, int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride)
+;====================================================================================================================
+; r0 = pSrc0, r1 = pSrc1
+; r2 = pDst, r3 = iStride0
+; r4 = iStride1, r5 = iDstStride
+
+;-----------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal addAvg_2x4, 6,6,8, src0, src1, dst, src0Stride, src1tride, dstStride
+
+ mova m0, [pw_256]
+ mova m7, [pw_128]
+ add r3, r3
+ add r4, r4
+
+ movd m1, [r0]
+ movd m2, [r0 + r3]
+ movd m3, [r1]
+ movd m4, [r1 + r4]
+
+ punpckldq m1, m2
+ punpckldq m3, m4
+
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ movd m2, [r0]
+ movd m4, [r0 + r3]
+ movd m5, [r1]
+ movd m6, [r1 + r4]
+
+ punpckldq m2, m4
+ punpckldq m5, m6
+ punpcklqdq m1, m2
+ punpcklqdq m3, m5
+
+ paddw m1, m3
+ pmulhrsw m1, m0
+ paddw m1, m7
+ packuswb m1, m1
+
+ pextrw [r2], m1, 0
+ pextrw [r2 + r5], m1, 1
+ lea r2, [r2 + 2 * r5]
+ pextrw [r2], m1, 2
+ pextrw [r2 + r5], m1, 3
+
+ RET
+;-----------------------------------------------------------------------------
+
+;-----------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal addAvg_2x8, 6,6,8, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
+
+ mova m0, [pw_256]
+ mova m7, [pw_128]
+ add r3, r3
+ add r4, r4
+
+ movd m1, [r0]
+ movd m2, [r0 + r3]
+ movd m3, [r1]
+ movd m4, [r1 + r4]
+
+ punpckldq m1, m2
+ punpckldq m3, m4
+
+ lea r0, [r0 + 2 * r3]
+ lea r1, [r1 + 2 * r4]
+
+ movd m2, [r0]
+ movd m4, [r0 + r3]
+ movd m5, [r1]
+ movd m6, [r1 + r4]
+
+ punpckldq m2, m4
+ punpckldq m5, m6
+ punpcklqdq m1, m2
+ punpcklqdq m3, m5
+
+ paddw m1, m3
+ pmulhrsw m1, m0
+ paddw m1, m7
+ packuswb m1, m1
+
+ pextrw [r2], m1, 0
+ pextrw [r2 + r5], m1, 1
+ lea r2, [r2 + 2 * r5]
+ pextrw [r2], m1, 2
+ pextrw [r2 + r5], m1, 3
+
+ lea r2, [r2 + 2 * r5]
+ lea r0, [r0 + 2 * r3]
More information about the x265-commits
mailing list