[x265-commits] [x265] cmake: remove two MSVC warnings overrides, HM code has be...

Steve Borho steve at borho.org
Mon Feb 3 15:56:11 CET 2014


details:   http://hg.videolan.org/x265/rev/8150374cb0d2
branches:  
changeset: 5977:8150374cb0d2
user:      Steve Borho <steve at borho.org>
date:      Sun Feb 02 13:07:05 2014 -0600
description:
cmake: remove two MSVC warnings overrides, HM code has been somewhat sanitized
Subject: [x265] cmake: remove two MSVC warnings overrides, HM code has been somewhat sanitized

details:   http://hg.videolan.org/x265/rev/aab88ed13364
branches:  
changeset: 5978:aab88ed13364
user:      Steve Borho <steve at borho.org>
date:      Sun Feb 02 13:09:26 2014 -0600
description:
cmake: remove two MSVC warnings overrides, HM code has been somewhat sanitized
Subject: [x265] fix

details:   http://hg.videolan.org/x265/rev/c89f04114391
branches:  
changeset: 5979:c89f04114391
user:      Satoshi Nakagawa <nakagawa424 at oki.com>
date:      Mon Feb 03 18:17:50 2014 +0900
description:
fix
Subject: [x265] frameencoder: use macro MAX_MAX_QP

details:   http://hg.videolan.org/x265/rev/55b4d5135e06
branches:  
changeset: 5980:55b4d5135e06
user:      Deepthi Nandakumar <deepthi at multicorewareinc.com>
date:      Mon Feb 03 16:03:45 2014 +0530
description:
frameencoder: use macro MAX_MAX_QP
Subject: [x265] asm: code for addAvg luma and chroma all sizes

details:   http://hg.videolan.org/x265/rev/71841b07b8ee
branches:  
changeset: 5981:71841b07b8ee
user:      Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
date:      Mon Feb 03 13:15:35 2014 +0530
description:
asm: code for addAvg luma and chroma all sizes
Subject: [x265] follow x264's keyframe-min logic [CHANGES OUTPUTS] (closes #24)

details:   http://hg.videolan.org/x265/rev/898ccce491e9
branches:  
changeset: 5982:898ccce491e9
user:      Steve Borho <steve at borho.org>
date:      Mon Feb 03 08:46:36 2014 -0600
description:
follow x264's keyframe-min logic [CHANGES OUTPUTS] (closes #24)

If no --keyint-min is specified, default to max/10, and clamp the value to
between [1, max / 2 + 1].  This allows x265 to insert I frames when scene cuts
are detected between keyframe-min and keyframe-max

diffstat:

 source/Lib/TLibCommon/TComPicYuvMD5.cpp |     3 +
 source/Lib/TLibCommon/TComYuv.cpp       |     6 +-
 source/Lib/TLibEncoder/TEncSbac.cpp     |     3 +
 source/common/CMakeLists.txt            |     4 +-
 source/common/pixel.cpp                 |     2 +-
 source/common/primitives.h              |     2 +-
 source/common/x86/asm-primitives.cpp    |    63 +
 source/common/x86/const-a.asm           |     2 +
 source/common/x86/mc-a.asm              |  1032 +++++++++++++++++++++++++++++++
 source/common/x86/pixel.h               |    35 +
 source/encoder/CMakeLists.txt           |     4 +-
 source/encoder/encoder.cpp              |     3 +-
 source/encoder/frameencoder.cpp         |     8 +-
 source/test/pixelharness.cpp            |     8 +-
 14 files changed, 1155 insertions(+), 20 deletions(-)

diffs (truncated from 1377 to 300 lines):

diff -r eff52bc89e94 -r 898ccce491e9 source/Lib/TLibCommon/TComPicYuvMD5.cpp
--- a/source/Lib/TLibCommon/TComPicYuvMD5.cpp	Sun Feb 02 12:52:04 2014 -0600
+++ b/source/Lib/TLibCommon/TComPicYuvMD5.cpp	Mon Feb 03 08:46:36 2014 -0600
@@ -106,6 +106,9 @@ void updateCRC(const Pel* plane, uint32_
                 crcVal = (((crcVal << 1) + bitVal) & 0xffff) ^ (crcMsb * 0x1021);
             }
 
+#if _MSC_VER
+#pragma warning(disable: 4127) // conditional expression is constant
+#endif
             // take CRC of second pictureData byte if bit depth is greater than 8-bits
             if (X265_DEPTH > 8)
             {
diff -r eff52bc89e94 -r 898ccce491e9 source/Lib/TLibCommon/TComYuv.cpp
--- a/source/Lib/TLibCommon/TComYuv.cpp	Sun Feb 02 12:52:04 2014 -0600
+++ b/source/Lib/TLibCommon/TComYuv.cpp	Mon Feb 03 08:46:36 2014 -0600
@@ -594,7 +594,7 @@ void TComYuv::addAvg(TShortYUV* srcYuv0,
         src1Stride = srcYuv1->m_width;
         dststride  = getStride();
 
-        primitives.luma_addAvg[part](dstY, dststride, srcY0, src0Stride, srcY1, src1Stride);
+        primitives.luma_addAvg[part](srcY0, srcY1, dstY, src0Stride, src1Stride, dststride);
     }
     if (bChroma)
     {
@@ -602,8 +602,8 @@ void TComYuv::addAvg(TShortYUV* srcYuv0,
         src1Stride = srcYuv1->m_cwidth;
         dststride  = getCStride();
 
-        primitives.chroma[m_csp].addAvg[part](dstU, dststride, srcU0, src0Stride, srcU1, src1Stride);
-        primitives.chroma[m_csp].addAvg[part](dstV, dststride, srcV0, src0Stride, srcV1, src1Stride);
+        primitives.chroma[m_csp].addAvg[part](srcU0, srcU1, dstU, src0Stride, src1Stride, dststride);
+        primitives.chroma[m_csp].addAvg[part](srcV0, srcV1, dstV, src0Stride, src1Stride, dststride);
     }
 }
 
diff -r eff52bc89e94 -r 898ccce491e9 source/Lib/TLibEncoder/TEncSbac.cpp
--- a/source/Lib/TLibEncoder/TEncSbac.cpp	Sun Feb 02 12:52:04 2014 -0600
+++ b/source/Lib/TLibEncoder/TEncSbac.cpp	Mon Feb 03 08:46:36 2014 -0600
@@ -338,6 +338,9 @@ void TEncSbac::codeVPS(TComVPS* vps)
         WRITE_UVLC(vps->getMaxDecPicBuffering(i) - 1,       "vps_max_dec_pic_buffering_minus1[i]");
         WRITE_UVLC(vps->getNumReorderPics(i),               "vps_num_reorder_pics[i]");
         WRITE_UVLC(vps->getMaxLatencyIncrease(i),           "vps_max_latency_increase_plus1[i]");
+#if _MSC_VER
+#pragma warning(disable: 4127) // conditional expression is constant
+#endif
         if (!subLayerOrderingInfoPresentFlag)
         {
             break;
diff -r eff52bc89e94 -r 898ccce491e9 source/common/CMakeLists.txt
--- a/source/common/CMakeLists.txt	Sun Feb 02 12:52:04 2014 -0600
+++ b/source/common/CMakeLists.txt	Mon Feb 03 08:46:36 2014 -0600
@@ -49,13 +49,11 @@ endif(GCC)
 if(MSVC)
     # ignore these warnings from HM source
     # /wd4244 type conversion, possible loss of data
-    # /wd4512 assignment operator could not be generated
-    # /wd4127 conditional expression is constant
     # /wd4389 signed/unsigned mismatch
     # /wd4018 '<' signed/unsigned mismatch
     # /wd4800 performance warning: bool coersion
     set_source_files_properties(${LIBCOMMON_SRC} PROPERTIES COMPILE_FLAGS 
-        "/wd4244 /wd4512 /wd4127 /wd4389 /wd4018 /wd4800")
+        "/wd4244 /wd4389 /wd4018 /wd4800")
     if(INTEL_CXX)
         add_definitions(/Qwd2557) # signed/unsigned mismatch
     endif()
diff -r eff52bc89e94 -r 898ccce491e9 source/common/pixel.cpp
--- a/source/common/pixel.cpp	Sun Feb 02 12:52:04 2014 -0600
+++ b/source/common/pixel.cpp	Mon Feb 03 08:46:36 2014 -0600
@@ -802,7 +802,7 @@ void pixel_add_ps_c(pixel *a, intptr_t d
 }
 
 template<int bx, int by>
-void addAvg(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride)
+void addAvg(int16_t* src0, int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride)
 {
     int shiftNum, offset;
 
diff -r eff52bc89e94 -r 898ccce491e9 source/common/primitives.h
--- a/source/common/primitives.h	Sun Feb 02 12:52:04 2014 -0600
+++ b/source/common/primitives.h	Mon Feb 03 08:46:36 2014 -0600
@@ -184,7 +184,7 @@ typedef void (*copy_ps_t)(int16_t *dst, 
 typedef void (*pixel_sub_ps_t)(int16_t *dst, intptr_t dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1);
 typedef void (*pixel_add_ps_t)(pixel *a, intptr_t dstride, pixel *b0, int16_t *b1, intptr_t sstride0, intptr_t sstride1);
 
-typedef void (*addAvg_t)(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride);
+typedef void (*addAvg_t)(int16_t* src0, int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride);
 
 /* Define a structure containing function pointers to optimized encoder
  * primitives.  Each pointer can reference either an assembly routine,
diff -r eff52bc89e94 -r 898ccce491e9 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Sun Feb 02 12:52:04 2014 -0600
+++ b/source/common/x86/asm-primitives.cpp	Mon Feb 03 08:46:36 2014 -0600
@@ -563,6 +563,62 @@ extern "C" {
     SETUP_PIXEL_SSE_SP_DEF(64, 16, cpu); \
     SETUP_PIXEL_SSE_SP_DEF(16, 64, cpu);
 
+#define SETUP_LUMA_ADDAVG_FUNC_DEF(W, H, cpu) \
+    p.luma_addAvg[LUMA_## W ## x ## H] = x265_addAvg_## W ## x ## H ## cpu;
+
+#define LUMA_ADDAVG(cpu) \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(4,  4,  cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(4,  8,  cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(4,  16, cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(8,  4,  cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(8,  8,  cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(8,  16, cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(8,  32, cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(12, 16, cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(16, 4,  cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(16, 8,  cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(16, 12, cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(16, 16, cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(16, 32, cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(24, 32, cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(16, 64, cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(32, 8,  cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(32, 16, cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(32, 24, cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(32, 32, cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(32, 64, cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(48, 64, cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(64, 16, cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(64, 32, cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(64, 48, cpu); \
+    SETUP_LUMA_ADDAVG_FUNC_DEF(64, 64, cpu); \
+
+#define SETUP_CHROMA_ADDAVG_FUNC_DEF(W, H, cpu) \
+    p.chroma[X265_CSP_I420].addAvg[CHROMA_## W ## x ## H] = x265_addAvg_## W ## x ## H ## cpu;
+
+#define CHROMA_ADDAVG(cpu) \
+    SETUP_CHROMA_ADDAVG_FUNC_DEF(4,  2,  cpu); \
+    SETUP_CHROMA_ADDAVG_FUNC_DEF(4,  4,  cpu); \
+    SETUP_CHROMA_ADDAVG_FUNC_DEF(4,  8,  cpu); \
+    SETUP_CHROMA_ADDAVG_FUNC_DEF(4,  16, cpu); \
+    SETUP_CHROMA_ADDAVG_FUNC_DEF(8,  2,  cpu); \
+    SETUP_CHROMA_ADDAVG_FUNC_DEF(8,  4,  cpu); \
+    SETUP_CHROMA_ADDAVG_FUNC_DEF(8,  6,  cpu); \
+    SETUP_CHROMA_ADDAVG_FUNC_DEF(8,  8,  cpu); \
+    SETUP_CHROMA_ADDAVG_FUNC_DEF(8,  16, cpu); \
+    SETUP_CHROMA_ADDAVG_FUNC_DEF(8,  32, cpu); \
+    SETUP_CHROMA_ADDAVG_FUNC_DEF(12, 16, cpu); \
+    SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 4,  cpu); \
+    SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 8,  cpu); \
+    SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 12, cpu); \
+    SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 16, cpu); \
+    SETUP_CHROMA_ADDAVG_FUNC_DEF(16, 32, cpu); \
+    SETUP_CHROMA_ADDAVG_FUNC_DEF(24, 32, cpu); \
+    SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 8,  cpu); \
+    SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 16, cpu); \
+    SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 24, cpu); \
+    SETUP_CHROMA_ADDAVG_FUNC_DEF(32, 32, cpu); \
+
 #define SETUP_INTRA_ANG4(mode, fno, cpu) \
     p.intra_pred[BLOCK_4x4][mode] = x265_intra_pred_ang4_ ## fno ## _ ## cpu;
 #define SETUP_INTRA_ANG8(mode, fno, cpu) \
@@ -890,6 +946,9 @@ void Setup_Assembly_Primitives(EncoderPr
         p.dct[DCT_4x4] = x265_dct4_sse2;
         p.idct[IDCT_4x4] = x265_idct4_sse2;
         p.idct[IDST_4x4] = x265_idst4_sse2;
+
+        LUMA_ADDAVG(_sse2);
+        CHROMA_ADDAVG(_sse2);
     }
     if (cpuMask & X265_CPU_SSSE3)
     {
@@ -1019,6 +1078,10 @@ void Setup_Assembly_Primitives(EncoderPr
         SETUP_INTRA_ANG8(33, 3, sse4);
 
         p.dct[DCT_8x8] = x265_dct8_sse4;
+
+        p.chroma[X265_CSP_I420].addAvg[CHROMA_2x4]  = x265_addAvg_2x4_sse4;
+        p.chroma[X265_CSP_I420].addAvg[CHROMA_2x8]  = x265_addAvg_2x8_sse4;
+        p.chroma[X265_CSP_I420].addAvg[CHROMA_6x8]  = x265_addAvg_6x8_sse4;
     }
     if (cpuMask & X265_CPU_AVX)
     {
diff -r eff52bc89e94 -r 898ccce491e9 source/common/x86/const-a.asm
--- a/source/common/x86/const-a.asm	Sun Feb 02 12:52:04 2014 -0600
+++ b/source/common/x86/const-a.asm	Mon Feb 03 08:46:36 2014 -0600
@@ -33,6 +33,8 @@ const hsub_mul,    times 16 db 1, -1
 const pw_1,        times 16 dw 1
 const pw_16,       times 16 dw 16
 const pw_32,       times 16 dw 32
+const pw_128,      times 16 dw 128
+const pw_256,      times 16 dw 256
 const pw_512,      times 16 dw 512
 const pw_1024,     times 16 dw 1024
 const pw_4096,     times 16 dw 4096
diff -r eff52bc89e94 -r 898ccce491e9 source/common/x86/mc-a.asm
--- a/source/common/x86/mc-a.asm	Sun Feb 02 12:52:04 2014 -0600
+++ b/source/common/x86/mc-a.asm	Mon Feb 03 08:46:36 2014 -0600
@@ -49,6 +49,8 @@ cextern pw_4
 cextern pw_8
 cextern pw_32
 cextern pw_64
+cextern pw_128
+cextern pw_256
 cextern pw_512
 cextern pw_00ff
 cextern pw_pixel_max
@@ -56,6 +58,1036 @@ cextern sw_64
 cextern pd_32
 cextern deinterleave_shufd
 
+;====================================================================================================================
+;void addAvg (int16_t* src0, int16_t* src1, pixel* dst, intptr_t src0Stride, intptr_t src1Stride, intptr_t dstStride)
+;====================================================================================================================
+; r0 = pSrc0,    r1 = pSrc1
+; r2 = pDst,     r3 = iStride0
+; r4 = iStride1, r5 = iDstStride
+
+;-----------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal addAvg_2x4, 6,6,8, src0, src1, dst, src0Stride, src1tride, dstStride
+
+    mova          m0,          [pw_256]
+    mova          m7,          [pw_128]
+    add           r3,          r3
+    add           r4,          r4
+
+    movd          m1,          [r0]
+    movd          m2,          [r0 + r3]
+    movd          m3,          [r1]
+    movd          m4,          [r1 + r4]
+
+    punpckldq     m1,          m2
+    punpckldq     m3,          m4
+
+    lea           r0,          [r0 + 2 * r3]
+    lea           r1,          [r1 + 2 * r4]
+
+    movd          m2,          [r0]
+    movd          m4,          [r0 + r3]
+    movd          m5,          [r1]
+    movd          m6,          [r1 + r4]
+
+    punpckldq     m2,          m4
+    punpckldq     m5,          m6
+    punpcklqdq    m1,          m2
+    punpcklqdq    m3,          m5
+
+    paddw         m1,          m3
+    pmulhrsw      m1,          m0
+    paddw         m1,          m7
+    packuswb      m1,          m1
+
+    pextrw        [r2],        m1, 0
+    pextrw        [r2 + r5],   m1, 1
+    lea           r2,          [r2 + 2 * r5]
+    pextrw        [r2],        m1, 2
+    pextrw        [r2 + r5],   m1, 3
+
+    RET
+;-----------------------------------------------------------------------------
+
+;-----------------------------------------------------------------------------
+INIT_XMM sse4
+cglobal addAvg_2x8, 6,6,8, pSrc0, src0, src1, dst, src0Stride, src1tride, dstStride
+
+    mova          m0,          [pw_256]
+    mova          m7,          [pw_128]
+    add           r3,          r3
+    add           r4,          r4
+
+    movd          m1,          [r0]
+    movd          m2,          [r0 + r3]
+    movd          m3,          [r1]
+    movd          m4,          [r1 + r4]
+
+    punpckldq     m1,          m2
+    punpckldq     m3,          m4
+
+    lea           r0,          [r0 + 2 * r3]
+    lea           r1,          [r1 + 2 * r4]
+
+    movd          m2,          [r0]
+    movd          m4,          [r0 + r3]
+    movd          m5,          [r1]
+    movd          m6,          [r1 + r4]
+
+    punpckldq     m2,          m4
+    punpckldq     m5,          m6
+    punpcklqdq    m1,          m2
+    punpcklqdq    m3,          m5
+
+    paddw         m1,          m3
+    pmulhrsw      m1,          m0
+    paddw         m1,          m7
+    packuswb      m1,          m1
+
+    pextrw        [r2],        m1, 0
+    pextrw        [r2 + r5],   m1, 1
+    lea           r2,          [r2 + 2 * r5]
+    pextrw        [r2],        m1, 2
+    pextrw        [r2 + r5],   m1, 3
+
+    lea           r2,          [r2 + 2 * r5]
+    lea           r0,          [r0 + 2 * r3]


More information about the x265-commits mailing list