[x265-commits] [x265] fix SATD32x32 16bits cumulate sum overflow (x86 version o...

Min Chen chenm003 at 163.com
Mon Jan 27 19:45:39 CET 2014


details:   http://hg.videolan.org/x265/rev/83767892376d
branches:  stable
changeset: 5910:83767892376d
user:      Min Chen <chenm003 at 163.com>
date:      Fri Jan 24 18:10:13 2014 +0800
description:
fix SATD32x32 16bits cumulate sum overflow (x86 version only)
Subject: [x265] wavefront: add missing initializer

details:   http://hg.videolan.org/x265/rev/4fcfa56420fb
branches:  stable
changeset: 5911:4fcfa56420fb
user:      Deepthi Nandakumar <deepthi at multicorewareinc.com>
date:      Mon Jan 27 23:21:06 2014 +0530
description:
wavefront: add missing initializer
Subject: [x265] WeightPrediction: check difPoc <= bframes+1 to allow weight analysis for valid references

details:   http://hg.videolan.org/x265/rev/a02a028afea9
branches:  stable
changeset: 5912:a02a028afea9
user:      Kavitha Sampath <kavitha at multicorewareinc.com>
date:      Fri Jan 24 10:45:41 2014 +0530
description:
WeightPrediction: check difPoc <= bframes+1 to allow weight analysis for valid references
Subject: [x265] common: do not allow encodes other than 4:2:0 to start

details:   http://hg.videolan.org/x265/rev/10fc60881bbf
branches:  stable
changeset: 5913:10fc60881bbf
user:      Steve Borho <steve at borho.org>
date:      Mon Jan 27 11:54:44 2014 -0600
description:
common: do not allow encodes other than 4:2:0 to start
Subject: [x265] me: add one more pixel lag for DIA search's relaxed search bounds

details:   http://hg.videolan.org/x265/rev/dc4e57833aae
branches:  stable
changeset: 5914:dc4e57833aae
user:      Steve Borho <steve at borho.org>
date:      Mon Jan 27 12:30:05 2014 -0600
description:
me: add one more pixel lag for DIA search's relaxed search bounds

diffstat:

 source/common/common.cpp            |   6 ++-
 source/common/wavefront.h           |   2 +-
 source/common/x86/pixel-a.asm       |  48 ++++++++++++++++-----------------
 source/encoder/frameencoder.cpp     |   1 +
 source/encoder/weightPrediction.cpp |  52 ++++++++++++++++++------------------
 source/encoder/weightPrediction.h   |   4 ++-
 6 files changed, 58 insertions(+), 55 deletions(-)

diffs (239 lines):

diff -r b173809575c6 -r dc4e57833aae source/common/common.cpp
--- a/source/common/common.cpp	Mon Jan 27 00:04:22 2014 -0600
+++ b/source/common/common.cpp	Mon Jan 27 12:30:05 2014 -0600
@@ -174,7 +174,7 @@ void x265_param_default(x265_param *para
     /* Inter Coding tools */
     param->searchMethod = X265_HEX_SEARCH;
     param->subpelRefine = 2;
-    param->searchRange = 58;
+    param->searchRange = 57;
     param->maxNumMergeCand = 2;
     param->bEnableWeightedPred = 1;
     param->bEnableWeightedBiPred = 0;
@@ -277,7 +277,7 @@ int x265_param_default_preset(x265_param
         {
             param->lookaheadDepth = 10;
             param->maxCUSize = 32;
-            param->searchRange = 26;
+            param->searchRange = 25;
             param->bFrameAdaptive = 0;
             param->subpelRefine = 0;
             param->maxNumMergeCand = 2;
@@ -438,6 +438,8 @@ int x265_check_params(x265_param *param)
 
     CHECK(param->inputBitDepth > x265_max_bit_depth,
           "inputBitDepth must be <= x265_max_bit_depth");
+    CHECK(param->internalCsp != X265_CSP_I420,
+          "Only 4:2:0 color space is supported at this time");
 
     CHECK(param->rc.qp < -6 * (param->inputBitDepth - 8) || param->rc.qp > 51,
           "QP exceeds supported range (-QpBDOffsety to 51)");
diff -r b173809575c6 -r dc4e57833aae source/common/wavefront.h
--- a/source/common/wavefront.h	Mon Jan 27 00:04:22 2014 -0600
+++ b/source/common/wavefront.h	Mon Jan 27 12:30:05 2014 -0600
@@ -49,7 +49,7 @@ private:
 
 public:
 
-    WaveFront(ThreadPool *pool) : JobProvider(pool), m_queuedBitmap(0) {}
+    WaveFront(ThreadPool *pool) : JobProvider(pool), m_queuedBitmap(0), m_enableBitmap(0) {}
 
     virtual ~WaveFront();
 
diff -r b173809575c6 -r dc4e57833aae source/common/x86/pixel-a.asm
--- a/source/common/x86/pixel-a.asm	Mon Jan 27 00:04:22 2014 -0600
+++ b/source/common/x86/pixel-a.asm	Mon Jan 27 12:30:05 2014 -0600
@@ -1361,43 +1361,41 @@ cglobal pixel_satd_32x32, 4,8,8    ;if W
     SATD_END_SSE2 m6, m7
 %else
 cglobal pixel_satd_32x32, 4,7,8,0-gprsize   ;if !WIN64
-
     SATD_START_SSE2 m6, m7
     mov r6, r0
     mov [rsp], r2
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-%if HIGH_BIT_DEPTH
-    pxor       m7, m7
-%endif
-    SATD_ACCUM m6, m0, m7
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
     lea r0, [r6 + 8*SIZEOF_PIXEL]
     mov r2, [rsp]
     add r2, 8*SIZEOF_PIXEL
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    SATD_ACCUM m6, m0, m7
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
     lea r0, [r6 + 16*SIZEOF_PIXEL]
     mov r2, [rsp]
     add r2, 16*SIZEOF_PIXEL
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    SATD_ACCUM m6, m0, m7
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
     lea r0, [r6 + 24*SIZEOF_PIXEL]
     mov r2, [rsp]
     add r2, 24*SIZEOF_PIXEL
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    call pixel_satd_8x8_internal
-    SATD_END_SSE2 m6, m7
-
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    call pixel_satd_8x8_internal2
+    pxor    m7, m7
+    movhlps m7, m6
+    paddd   m6, m7
+    pshufd  m7, m6, 1
+    paddd   m6, m7
+    movd   eax, m6
+    RET
 %endif
 
 %if WIN64
diff -r b173809575c6 -r dc4e57833aae source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Mon Jan 27 00:04:22 2014 -0600
+++ b/source/encoder/frameencoder.cpp	Mon Jan 27 12:30:05 2014 -0600
@@ -945,6 +945,7 @@ void FrameEncoder::compressCTURows()
     }
 
     int range = m_cfg->param.searchRange + /* fpel search */
+                1 +                        /* diamond search range check lag */
                 2 +                        /* subpel refine */
                 NTAPS_LUMA / 2;            /* subpel filter half-length */
     uint32_t refLagRows = 1 + ((range + g_maxCUHeight - 1) / g_maxCUHeight);
diff -r b173809575c6 -r dc4e57833aae source/encoder/weightPrediction.cpp
--- a/source/encoder/weightPrediction.cpp	Mon Jan 27 00:04:22 2014 -0600
+++ b/source/encoder/weightPrediction.cpp	Mon Jan 27 12:30:05 2014 -0600
@@ -195,9 +195,11 @@ bool WeightPrediction::checkDenom(int de
             ref  = &m_slice->getRefPic(list, refIdxTemp)->m_lowres;
             refPoc = m_slice->getRefPic(list, refIdxTemp)->getPOC();
             difPoc = abs(curPoc - refPoc);
-            m_mvs = fenc->lowresMvs[list][difPoc - 1];
-            if (m_mvs)
+            if (difPoc > m_bframes + 1)
+                continue;
+            else
             {
+                m_mvs = fenc->lowresMvs[list][difPoc - 1];
                 if (m_mvs[0].x == 0x7FFF)
                     continue;
                 else
@@ -245,44 +247,42 @@ bool WeightPrediction::checkDenom(int de
                 switch (yuv)
                 {
                 case 0:
-
+                {
                     m_mcbuf = ref->fpelPlane;
                     m_inbuf = fenc->lowresPlane[0];
-                    if (m_mvs)
+                    pixel *tempm_buf;
+                    pixel m_buf8[8 * 8];
+                    int pixoff = 0, cu = 0;
+                    intptr_t strd;
+                    for (int y = 0; y < m_frmHeight; y += 8, pixoff = y * m_refStride)
                     {
-                        pixel *tempm_buf;
-                        pixel m_buf8[8 * 8];
-                        int pixoff = 0, cu = 0;
-                        intptr_t strd;
-                        for (int y = 0; y < m_frmHeight; y += 8, pixoff = y * m_refStride)
+                        for (int x = 0; x < m_frmWidth; x += 8, pixoff += 8, cu++)
                         {
-                            for (int x = 0; x < m_frmWidth; x += 8, pixoff += 8, cu++)
+                            if (m_mvCost[cu] > fenc->intraCost[cu])
                             {
-                                if (fenc->lowresMvCosts[0][difPoc - 1][cu] > fenc->intraCost[cu])
-                                {
-                                    strd = m_refStride;
-                                    tempm_buf = m_inbuf + pixoff;
-                                }
-                                else
-                                {
-                                    strd = 8;
-                                    tempm_buf = ref->lowresMC(pixoff, m_mvs[cu], m_buf8, strd);
-                                    ic++;
-                                }
-                                primitives.blockcpy_pp(8, 8, m_buf + (y * m_refStride) + x, m_refStride, tempm_buf, strd);
+                                strd = m_refStride;
+                                tempm_buf = m_inbuf + pixoff;
                             }
+                            else
+                            {
+                                strd = 8;
+                                tempm_buf = ref->lowresMC(pixoff, m_mvs[cu], m_buf8, strd);
+                                ic++;
+                            }
+                            primitives.blockcpy_pp(8, 8, m_buf + (y * m_refStride) + x, m_refStride, tempm_buf, strd);
                         }
+                    }
 
-                        m_mcbuf = m_buf;
-                    }
+                    m_mcbuf = m_buf;
                     break;
+                }
 
                 case 1:
 
                     m_mcbuf = m_slice->getRefPic(list, refIdxTemp)->getPicYuvOrg()->getCbAddr();
                     m_inbuf = m_slice->getPic()->getPicYuvOrg()->getCbAddr();
                     m_blockSize = 8;
-                    if (m_mvs) mcChroma();
+                    mcChroma();
                     break;
 
                 case 2:
@@ -290,7 +290,7 @@ bool WeightPrediction::checkDenom(int de
                     m_mcbuf = m_slice->getRefPic(list, refIdxTemp)->getPicYuvOrg()->getCrAddr();
                     m_inbuf = m_slice->getPic()->getPicYuvOrg()->getCrAddr();
                     m_blockSize = 8;
-                    if (m_mvs) mcChroma();
+                    mcChroma();
                     break;
                 }
 
diff -r b173809575c6 -r dc4e57833aae source/encoder/weightPrediction.h
--- a/source/encoder/weightPrediction.h	Mon Jan 27 00:04:22 2014 -0600
+++ b/source/encoder/weightPrediction.h	Mon Jan 27 12:30:05 2014 -0600
@@ -42,10 +42,11 @@ private:
     pixel *m_mcbuf, *m_inbuf, *m_buf;
     int32_t *m_intraCost;
     MV *m_mvs;
+    int m_bframes;
 
 public:
 
-    WeightPrediction(TComSlice *slice)
+    WeightPrediction(TComSlice *slice, x265_param param)
     {
         this->m_slice = slice;
         m_csp = m_slice->getPic()->getPicYuvOrg()->m_picCsp;
@@ -56,6 +57,7 @@ public:
         m_dstStride = m_frmWidth;
         m_refStride = m_slice->getPic()->m_lowres.lumaStride;
         m_intraCost = m_slice->getPic()->m_lowres.intraCost;
+        m_bframes = param.bframes;
 
         m_mcbuf = NULL;
         m_inbuf = NULL;


More information about the x265-commits mailing list