[x265-commits] [x265] level: signal profile/level NONE if non-compliant configu...

Mon Aug 11 06:07:54 CEST 2014

details:   http://hg.videolan.org/x265/rev/7965aacd35ae
branches:  
changeset: 7759:7965aacd35ae
user:      Steve Borho <steve at borho.org>
date:      Sun Aug 10 16:36:38 2014 -0500
description:
level: signal profile/level NONE if non-compliant configuration is found

Once you get above a certain resolution, the CTU size must be at least 32. There
is no level that supports a smaller CTU, so signal level NONE.  It's my
understanding that you cannot signal a profile if the level is NONE, so we
reset profile to NONE as well.

The same is true if NumPocTotalCurr is greater than 8; there are no levels which
support values that high.  Instead of signaling the closest level, we should
signal profile/level NONE
Subject: [x265] rc: nits; add LF in frame stats of 2 pass

details:   http://hg.videolan.org/x265/rev/94d9a0d50837
branches:  
changeset: 7760:94d9a0d50837
user:      Aarthi Thirumalai
date:      Sun Aug 10 22:55:27 2014 +0530
description:
rc: nits; add LF in frame stats of 2 pass
Subject: [x265] quant: remove scaledCoeff from nquant()

details:   http://hg.videolan.org/x265/rev/2bdcfcc1bb33
branches:  
changeset: 7761:2bdcfcc1bb33
user:      Satoshi Nakagawa <nakagawa424 at oki.com>
date:      Sun Aug 10 17:22:08 2014 +0900
description:
quant: remove scaledCoeff from nquant()

diffstat:

 source/common/dct.cpp             |    3 +-
 source/common/primitives.h        |    2 +-
 source/common/quant.cpp           |    6 +-
 source/common/x86/pixel-util.h    |    2 +-
 source/common/x86/pixel-util8.asm |  103 +++++++++++++++++--------------------
 source/encoder/level.cpp          |   20 ++++++-
 source/encoder/ratecontrol.cpp    |    2 +-
 source/test/mbdstharness.cpp      |    9 +--
 8 files changed, 73 insertions(+), 74 deletions(-)

diffs (266 lines):

diff -r 6e4eb8542203 -r 2bdcfcc1bb33 source/common/dct.cpp

--- a/source/common/dct.cpp	Sat Aug 09 19:43:23 2014 -0500
+++ b/source/common/dct.cpp	Sun Aug 10 17:22:08 2014 +0900
@@ -795,7 +795,7 @@ uint32_t quant_c(int32_t* coef, int32_t*
     return numSig;
 }
 
-uint32_t nquant_c(int32_t* coef, int32_t* quantCoeff, int32_t* scaledCoeff, int32_t* qCoef, int qBits, int add, int numCoeff)
+uint32_t nquant_c(int32_t* coef, int32_t* quantCoeff, int32_t* qCoef, int qBits, int add, int numCoeff)
 {
     uint32_t numSig = 0;
 
@@ -805,7 +805,6 @@ uint32_t nquant_c(int32_t* coef, int32_t
         int sign  = (level < 0 ? -1 : 1);
 
         int tmplevel = abs(level) * quantCoeff[blockpos];
-        scaledCoeff[blockpos] = tmplevel;
         level = ((tmplevel + add) >> qBits);
         if (level)
             ++numSig;
diff -r 6e4eb8542203 -r 2bdcfcc1bb33 source/common/primitives.h
--- a/source/common/primitives.h	Sat Aug 09 19:43:23 2014 -0500
+++ b/source/common/primitives.h	Sun Aug 10 17:22:08 2014 +0900
@@ -160,7 +160,7 @@ typedef void (*calcresidual_t)(pixel *fe
 typedef void (*calcrecon_t)(pixel* pred, int16_t* residual, int16_t* reconqt, pixel *reconipred, int stride, int strideqt, int strideipred);
 typedef void (*transpose_t)(pixel* dst, pixel* src, intptr_t stride);
 typedef uint32_t (*quant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
-typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *scaledCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
+typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
 typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift);
 typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
 typedef int  (*count_nonzero_t)(const int32_t *quantCoeff, int numCoeff);
diff -r 6e4eb8542203 -r 2bdcfcc1bb33 source/common/quant.cpp
--- a/source/common/quant.cpp	Sat Aug 09 19:43:23 2014 -0500
+++ b/source/common/quant.cpp	Sun Aug 10 17:22:08 2014 +0900
@@ -487,7 +487,6 @@ void Quant::invtransformNxN(bool transQu
  * probability models like CABAC */
 uint32_t Quant::rdoQuant(TComDataCU* cu, coeff_t* dstCoeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx, bool usePsy)
 {
-    uint32_t trSize = 1 << log2TrSize;
     int transformShift = MAX_TR_DYNAMIC_RANGE - X265_DEPTH - log2TrSize; /* Represents scaling through forward transform */
     int scalingListType = (cu->isIntra(absPartIdx) ? 0 : 3) + ttype;
 
@@ -500,14 +499,13 @@ uint32_t Quant::rdoQuant(TComDataCU* cu,
     int32_t *qCoef = m_scalingList->m_quantCoef[log2TrSize - 2][scalingListType][rem];
 
     int numCoeff = 1 << log2TrSize * 2;
-    int scaledCoeff[32 * 32];
-    uint32_t numSig = primitives.nquant(m_resiDctCoeff, qCoef, scaledCoeff, dstCoeff, qbits, add, numCoeff);
+    uint32_t numSig = primitives.nquant(m_resiDctCoeff, qCoef, dstCoeff, qbits, add, numCoeff);
 
     X265_CHECK((int)numSig == primitives.count_nonzero(dstCoeff, numCoeff), "numSig differ\n");
     if (!numSig)
         return 0;
 
-    x265_emms();
+    uint32_t trSize = 1 << log2TrSize;
 
     /* unquant constants for psy-rdoq. The dequant coefficients have a (1<<4) scale applied that
      * must be removed during unquant.  This may be larger than the QP upshift, which would turn
diff -r 6e4eb8542203 -r 2bdcfcc1bb33 source/common/x86/pixel-util.h
--- a/source/common/x86/pixel-util.h	Sat Aug 09 19:43:23 2014 -0500
+++ b/source/common/x86/pixel-util.h	Sun Aug 10 17:22:08 2014 +0900
@@ -45,7 +45,7 @@ void x265_transpose32_sse2(pixel *dest, 
 void x265_transpose64_sse2(pixel *dest, pixel *src, intptr_t stride);
 
 uint32_t x265_quant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *deltaU, int32_t *qCoef, int qBits, int add, int numCoeff);
-uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *scaledCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
+uint32_t x265_nquant_sse4(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
 void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift);
 int x265_count_nonzero_ssse3(const int32_t *quantCoeff, int numCoeff);
 
diff -r 6e4eb8542203 -r 2bdcfcc1bb33 source/common/x86/pixel-util8.asm
--- a/source/common/x86/pixel-util8.asm	Sat Aug 09 19:43:23 2014 -0500
+++ b/source/common/x86/pixel-util8.asm	Sun Aug 10 17:22:08 2014 +0900
@@ -938,72 +938,63 @@ cglobal quant, 5,6,8
 
 
 ;-----------------------------------------------------------------------------
-; uint32_t nquant(int32_t *coef, int32_t *quantCoeff, int32_t *scaledCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
+; uint32_t nquant(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff);
 ;-----------------------------------------------------------------------------
 INIT_XMM sse4
-cglobal nquant, 5,6,8
-
-    ; fill qbits
-    movd        m4, r4d         ; m4 = qbits
-
-    ; fill offset
-    movd        m5, r5m
-    pshufd      m5, m5, 0       ; m5 = add
-
-    mov         r4d, r6m
+cglobal nquant, 4,5,8
+    movd        m6, r4m
+    mov         r4d, r5m
+    pxor        m7, m7          ; m7 = numZero
+    movd        m5, r3d         ; m5 = qbits
+    pshufd      m6, m6, 0       ; m6 = add
+    mov         r3d, r4d        ; r3 = numCoeff
     shr         r4d, 3
-    pxor        m7, m7          ; m7 = numZero
 .loop:
-    ; 4 coeff
     movu        m0, [r0]        ; m0 = level
-    pxor        m1, m1
-    pcmpgtd     m1, m0          ; m1 = sign
+    movu        m1, [r0 + 16]   ; m1 = level
     movu        m2, [r1]        ; m2 = qcoeff
+    movu        m3, [r1 + 16]   ; m3 = qcoeff
+    add         r0, 32
+    add         r1, 32
+
+    pxor        m4, m4
+    pcmpgtd     m4, m0          ; m4 = sign
     pabsd       m0, m0
     pmulld      m0, m2          ; m0 = tmpLevel1
-    movu        [r2], m0        ; m0 = scaledCoeff
-    paddd       m2, m0, m5
-    psrad       m2, m4          ; m2 = level1
-    pxor        m0, m0
-    pcmpeqd     m0, m2          ; m0 = mask4
-    psubd       m7, m0
-
-    pxor        m2, m1
-    psubd       m2, m1
-    packssdw    m2, m2
-    pmovsxwd    m2, m2
-    movu        [r3], m2
-    ; 4 coeff
-    movu        m0, [r0 + 16]   ; m0 = level
-    pxor        m1, m1
-    pcmpgtd     m1, m0          ; m1 = sign
-    movu        m2, [r1 + 16]   ; m2 = qcoeff
-    pabsd       m0, m0
-    pmulld      m0, m2          ; m0 = tmpLevel1
-    movu        [r2 + 16], m0   ; m0 = scaledCoeff
-    paddd       m2, m0, m5
-    psrad       m2, m4          ; m2 = level1
-    pxor        m0, m0
-    pcmpeqd     m0, m2          ; m0 = mask4
-    psubd       m7, m0
-
-    pxor        m2, m1
-    psubd       m2, m1
-    packssdw    m2, m2
-    pmovsxwd    m2, m2
-    movu        [r3 + 16], m2
-
-    add         r0, 32
-    add         r1, 32
+    paddd       m0, m6
+    psrad       m0, m5          ; m0 = level1
+    pxor        m0, m4
+    psubd       m0, m4
+
+    pxor        m4, m4
+    pcmpgtd     m4, m1          ; m4 = sign
+    pabsd       m1, m1
+    pmulld      m1, m3          ; m1 = tmpLevel1
+    paddd       m1, m6
+    psrad       m1, m5          ; m1 = level1
+    pxor        m1, m4
+    psubd       m1, m4
+
+    packssdw    m0, m0
+    packssdw    m1, m1
+    pmovsxwd    m0, m0
+    pmovsxwd    m1, m1
+
+    movu        [r2], m0
+    movu        [r2 + 16], m1
     add         r2, 32
-    add         r3, 32
-
     dec         r4d
-    jnz        .loop
-
-    phaddd      m7, m7
-    phaddd      m7, m7
-    mov         eax, r6m
+
+    packssdw    m0, m1
+    pxor        m4, m4
+    pcmpeqw     m0, m4
+    psubw       m7, m0
+
+    jnz         .loop
+
+    packuswb    m7, m7
+    psadbw      m7, m4
+    mov         eax, r3d
     movd        r4d, m7
     sub         eax, r4d        ; numSig
 
diff -r 6e4eb8542203 -r 2bdcfcc1bb33 source/encoder/level.cpp
--- a/source/encoder/level.cpp	Sat Aug 09 19:43:23 2014 -0500
+++ b/source/encoder/level.cpp	Sun Aug 10 17:22:08 2014 +0900
@@ -134,12 +134,26 @@ void determineLevel(const x265_param &pa
 
         /* For level 5 and higher levels, the value of CtbSizeY shall be equal to 32 or 64 */
         if (levels[i].levelEnum >= Level::LEVEL5 && param.maxCUSize < 32)
-            x265_log(&param, X265_LOG_WARNING, "CTU size is too small, stream will be non-compliant for level %s\n", levels[i].name);
+        {
+            x265_log(&param, X265_LOG_WARNING, "level %s detected, but CTU size 16 is non-compliant\n", levels[i].name);
+            vps.ptl.profileIdc = Profile::NONE;
+            vps.ptl.levelIdc = Level::NONE;
+            vps.ptl.tierFlag = Level::MAIN;
+            x265_log(&param, X265_LOG_INFO, "NONE profile, Level-NONE (Main tier)\n");
+            return;
+        }
 
         /* The value of NumPocTotalCurr shall be less than or equal to 8 */
-        int numPocTotalCurr = param.maxNumReferences + !!param.bframes;
+        int numPocTotalCurr = param.maxNumReferences + vps.numReorderPics;
         if (numPocTotalCurr > 8)
-            x265_log(&param, X265_LOG_WARNING, "Too many reference frames, stream will be non-compliant for level %s\n", levels[i].name);
+        {
+            x265_log(&param, X265_LOG_WARNING, "level %s detected, but NumPocTotalCurr (total references) is non-compliant\n", levels[i].name);
+            vps.ptl.profileIdc = Profile::NONE;
+            vps.ptl.levelIdc = Level::NONE;
+            vps.ptl.tierFlag = Level::MAIN;
+            x265_log(&param, X265_LOG_INFO, "NONE profile, Level-NONE (Main tier)\n");
+            return;
+        }
 
         vps.ptl.levelIdc = levels[i].levelEnum;
         if (bitrate > levels[i].maxBitrateMain && bitrate <= levels[i].maxBitrateHigh &&
diff -r 6e4eb8542203 -r 2bdcfcc1bb33 source/encoder/ratecontrol.cpp
--- a/source/encoder/ratecontrol.cpp	Sat Aug 09 19:43:23 2014 -0500
+++ b/source/encoder/ratecontrol.cpp	Sun Aug 10 17:22:08 2014 +0900
@@ -2084,7 +2084,7 @@ int RateControl::rateControlEnd(Frame* p
             : rce->sliceType == P_SLICE ? 'P'
             : IS_REFERENCED(slice) ? 'B' : 'b';
         if (fprintf(m_statFileOut,
-                    "in:%d out:%d type:%c dur:%.3f q:%.2f q-aq:%.2f tex:%d mv:%d misc:%d icu:%.2f pcu:%.2f scu:%.2f ;",
+                    "in:%d out:%d type:%c dur:%.3f q:%.2f q-aq:%.2f tex:%d mv:%d misc:%d icu:%.2f pcu:%.2f scu:%.2f ;\n",
                     rce->poc, rce->encodeOrder,
                     cType, m_frameDuration,
                     pic->m_avgQpRc, pic->m_avgQpAq,
diff -r 6e4eb8542203 -r 2bdcfcc1bb33 source/test/mbdstharness.cpp
--- a/source/test/mbdstharness.cpp	Sat Aug 09 19:43:23 2014 -0500
+++ b/source/test/mbdstharness.cpp	Sun Aug 10 17:22:08 2014 +0900
@@ -348,11 +348,8 @@ bool MBDstHarness::check_nquant_primitiv
         int index1 = rand() % TEST_CASES;
         int index2 = rand() % TEST_CASES;
 
-        refReturnValue = ref(int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf5, mintbuf6, bits, valueToAdd, numCoeff);
-        optReturnValue = (uint32_t)checked(opt, int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf3, mintbuf4, bits, valueToAdd, numCoeff);
-
-        if (memcmp(mintbuf3, mintbuf5, cmp_size))
-            return false;
+        refReturnValue = ref(int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf6, bits, valueToAdd, numCoeff);
+        optReturnValue = (uint32_t)checked(opt, int_test_buff[index1] + j, int_test_buff[index2] + j, mintbuf4, bits, valueToAdd, numCoeff);
 
         if (memcmp(mintbuf4, mintbuf6, cmp_size))
             return false;
@@ -511,7 +508,7 @@ void MBDstHarness::measureSpeed(const En
     if (opt.nquant)
     {
         printf("nquant\t\t");
-        REPORT_SPEEDUP(opt.nquant, ref.nquant, mintbuf1, mintbuf2, mintbuf3, mintbuf4, 23, 23785, 32 * 32);
+        REPORT_SPEEDUP(opt.nquant, ref.nquant, mintbuf1, mintbuf2, mintbuf3, 23, 23785, 32 * 32);
     }
 
     if (opt.count_nonzero)