[x265-commits] [x265] TEncSearch: remove unused static MV arrays

Steve Borho steve at borho.org
Wed Oct 16 09:36:30 CEST 2013


details:   http://hg.videolan.org/x265/rev/a998daed8459
branches:  
changeset: 4478:a998daed8459
user:      Steve Borho <steve at borho.org>
date:      Tue Oct 15 20:57:47 2013 -0500
description:
TEncSearch: remove unused static MV arrays
Subject: [x265] move ssim calculation to frameFilters

details:   http://hg.videolan.org/x265/rev/09c0e0209d84
branches:  
changeset: 4479:09c0e0209d84
user:      Aarthi Thirumalai<aarthi at multicorewareinc.com>
date:      Wed Oct 16 10:11:23 2013 +0530
description:
move ssim calculation to frameFilters
Subject: [x265] asm: disable the use of x264 pixel weighting functions

details:   http://hg.videolan.org/x265/rev/4b1716b232e5
branches:  
changeset: 4480:4b1716b232e5
user:      Steve Borho <steve at borho.org>
date:      Wed Oct 16 02:35:57 2013 -0500
description:
asm: disable the use of x264 pixel weighting functions

These are breaking lowres qpel generation

diffstat:

 source/Lib/TLibEncoder/TEncSearch.cpp |  26 --------------
 source/common/x86/asm-primitives.cpp  |   4 +-
 source/encoder/frameencoder.cpp       |  63 -----------------------------------
 source/encoder/frameencoder.h         |   6 ---
 source/encoder/framefilter.cpp        |  57 +++++++++++++++++++++++++++++++
 source/encoder/framefilter.h          |   3 +
 6 files changed, 62 insertions(+), 97 deletions(-)

diffs (296 lines):

diff -r 9bff70c75d32 -r 4b1716b232e5 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Tue Oct 15 20:50:42 2013 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Wed Oct 16 02:35:57 2013 -0500
@@ -56,32 +56,6 @@ DECLARE_CYCLE_COUNTER(ME);
 //! \ingroup TLibEncoder
 //! \{
 
-static const MV s_mvRefineHpel[9] =
-{
-    MV(0,  0),  // 0
-    MV(0, -1),  // 1
-    MV(0,  1),  // 2
-    MV(-1,  0), // 3
-    MV(1,  0),  // 4
-    MV(-1, -1), // 5
-    MV(1, -1),  // 6
-    MV(-1,  1), // 7
-    MV(1,  1)   // 8
-};
-
-static const MV s_mvRefineQPel[9] =
-{
-    MV(0,  0),  // 0
-    MV(0, -1),  // 1
-    MV(0,  1),  // 2
-    MV(-1, -1), // 5
-    MV(1, -1),  // 6
-    MV(-1,  0), // 3
-    MV(1,  0),  // 4
-    MV(-1,  1), // 7
-    MV(1,  1)   // 8
-};
-
 TEncSearch::TEncSearch()
 {
     m_qtTempCoeffY  = NULL;
diff -r 9bff70c75d32 -r 4b1716b232e5 source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Tue Oct 15 20:50:42 2013 -0500
+++ b/source/common/x86/asm-primitives.cpp	Wed Oct 16 02:35:57 2013 -0500
@@ -231,7 +231,7 @@ void Setup_Assembly_Primitives(EncoderPr
         p.pixelavg_pp[PARTITION_4x8]  = x265_pixel_avg_4x8_mmx2;
         p.pixelavg_pp[PARTITION_4x4]  = x265_pixel_avg_4x4_mmx2;
 
-        PIXEL_AVE(sse2);
+        //PIXEL_AVE(sse2);
         ASSGN_SSE(sse2);
         INIT2( sad, _sse2 );
         INIT2( sad_x3, _sse2 );
@@ -260,7 +260,7 @@ void Setup_Assembly_Primitives(EncoderPr
         SA8D_INTER_FROM_BLOCK(ssse3);
         p.sse_pp[PARTITION_4x4] = x265_pixel_ssd_4x4_ssse3;
         ASSGN_SSE(ssse3);
-        PIXEL_AVE(ssse3);
+        //PIXEL_AVE(ssse3);
 
         p.sad_x4[PARTITION_8x4] = x265_pixel_sad_x4_8x4_ssse3;
         p.sad_x4[PARTITION_8x8] = x265_pixel_sad_x4_8x8_ssse3;
diff -r 9bff70c75d32 -r 4b1716b232e5 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp	Tue Oct 15 20:50:42 2013 -0500
+++ b/source/encoder/frameencoder.cpp	Wed Oct 16 02:35:57 2013 -0500
@@ -55,7 +55,6 @@ FrameEncoder::FrameEncoder()
     , m_cfg(NULL)
     , m_pic(NULL)
     , m_rows(NULL)
-    , m_ssimBuf(NULL)
 {
     for (int i = 0; i < MAX_NAL_UNITS; i++)
         m_nalList[i] = NULL;
@@ -93,7 +92,6 @@ void FrameEncoder::destroy()
     }
 
     m_frameFilter.destroy();
-    X265_FREE(m_ssimBuf);
     // wait for worker thread to exit
     stop();
 }
@@ -119,9 +117,6 @@ void FrameEncoder::init(Encoder *top, in
         m_pool = NULL;
     }
 
-    if (m_cfg->param.bEnableSsim)
-        m_ssimBuf = (ssim_t*)x265_malloc(sizeof(ssim_t) * 8 * (m_cfg->param.sourceWidth / 4 + 3));
-
     m_frameFilter.init(top, numRows, getRDGoOnSbacCoder(0));
 
     // initialize SPS
@@ -541,31 +536,6 @@ void FrameEncoder::compressFrame()
         slice->setSaoEnabledFlag((saoParam->bSaoFlag[0] == 1) ? true : false);
     }
 
-    /* Compute SSIM if enabled */
-    if (m_cfg->param.bEnableSsim && m_ssimBuf)
-    {
-        pixel *rec = (pixel*)m_pic->getPicYuvRec()->getLumaAddr();
-        pixel *org = (pixel*)m_pic->getPicYuvOrg()->getLumaAddr();
-        int stride1 = m_pic->getPicYuvOrg()->getStride();
-        int stride2 = m_pic->getPicYuvRec()->getStride();
-        for (int row = 0; row < m_numRows; row++)
-        {
-            int bEnd = ((row + 1) == (this->m_numRows - 1));
-            int bStart = (row == 0);
-            int minPixY = row * 64 - 4 * !bStart;
-            int maxPixY = (row + 1) * 64 - 4 * !bEnd;
-            int ssim_cnt;
-            x265_emms();
-
-            /* SSIM is done for each row in blocks of 4x4 . The First blocks are offset by 2 pixels to the right
-             * to avoid alignment of ssim blocks with DCT blocks. */
-            minPixY += bStart ? 2 : -6;
-            slice->m_ssim += calculateSSIM(rec + 2 + minPixY * stride1, stride1, org + 2 + minPixY * stride2, stride2, 
-                                           m_cfg->param.sourceWidth - 2, maxPixY - minPixY, m_ssimBuf, &ssim_cnt);
-            slice->m_ssimCnt += ssim_cnt;
-        }
-    }
-
     entropyCoder->setBitstream(NULL);
 
     // Reconstruction slice
@@ -712,39 +682,6 @@ void FrameEncoder::compressFrame()
     delete bitstreamRedirect;
 }
 
-/* Function to calculate SSIM for each row */
-float FrameEncoder::calculateSSIM(pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, int width, int height, void *buf, int *cnt)
-{
-    int z = 0;
-    float ssim = 0.0;
-    ssim_t(*sum0)[4] = (ssim_t(*)[4])buf;
-    ssim_t(*sum1)[4] = sum0 + (width >> 2) + 3;
-    width >>= 2;
-    height >>= 2;
-
-    for (int y = 1; y < height; y++)
-    {
-        for (; z <= y; z++)
-        {
-            void* swap = sum0;
-            sum0 = sum1;
-            sum1 = (ssim_t(*)[4])swap;
-            for (int x = 0; x < width; x += 2)
-            {
-                primitives.ssim_4x4x2_core(&pix1[(4 * x + (z * stride1))], stride1, &pix2[(4 * x + (z * stride2))], stride2, &sum0[x]);
-            }
-        }
-
-        for (int x = 0; x < width - 1; x += 4)
-        {
-            ssim += primitives.ssim_end_4(sum0 + x, sum1 + x, X265_MIN(4, width - x - 1));
-        }
-    }
-
-    *cnt = (height - 1) * (width - 1);
-    return ssim; 
-} 
-
 void FrameEncoder::encodeSlice(TComOutputBitstream* substreams)
 {
     // choose entropy coder
diff -r 9bff70c75d32 -r 4b1716b232e5 source/encoder/frameencoder.h
--- a/source/encoder/frameencoder.h	Tue Oct 15 20:50:42 2013 -0500
+++ b/source/encoder/frameencoder.h	Wed Oct 16 02:35:57 2013 -0500
@@ -145,9 +145,6 @@ public:
     /* called by compressFrame to perform wave-front compression analysis */
     void compressCTURows();
 
-    /* called by compressFrame to calculate SSIM for each row */
-    float calculateSSIM(pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, int width, int height, void *buf, int *cnt);
-
     void encodeSlice(TComOutputBitstream* substreams);
 
     /* blocks until worker thread is done, returns encoded picture and bitstream */
@@ -188,9 +185,6 @@ protected:
     int                      m_filterRowDelay;
     CTURow*                  m_rows;
     Event                    m_completionEvent;
-
-    /* Temp storage for ssim computation that doesn't need repeated malloc */
-    void*                    m_ssimBuf;
 };
 }
 
diff -r 9bff70c75d32 -r 4b1716b232e5 source/encoder/framefilter.cpp
--- a/source/encoder/framefilter.cpp	Tue Oct 15 20:50:42 2013 -0500
+++ b/source/encoder/framefilter.cpp	Wed Oct 16 02:35:57 2013 -0500
@@ -36,6 +36,7 @@ FrameFilter::FrameFilter()
     : m_cfg(NULL)
     , m_pic(NULL)
     , m_rdGoOnBinCodersCABAC(true)
+    , m_ssimBuf(NULL)
 {
 }
 
@@ -52,6 +53,7 @@ void FrameFilter::destroy()
         m_sao.destroy();
         m_sao.destroyEncBuffer();
     }
+    X265_FREE(m_ssimBuf);
 }
 
 void FrameFilter::init(Encoder *top, int numRows, TEncSbac* rdGoOnSbacCoder)
@@ -75,6 +77,9 @@ void FrameFilter::init(Encoder *top, int
         m_sao.create(top->param.sourceWidth, top->param.sourceHeight, g_maxCUWidth, g_maxCUHeight);
         m_sao.createEncBuffer();
     }
+
+    if (m_cfg->param.bEnableSsim)
+        m_ssimBuf = (ssim_t*)x265_malloc(sizeof(ssim_t) * 8 * (m_cfg->param.sourceWidth / 4 + 3));
 }
 
 void FrameFilter::start(TComPic *pic)
@@ -270,6 +275,25 @@ void FrameFilter::processRowPost(int row
     {
         calculatePSNR(lineStartCUAddr, row);
     }
+    if (m_cfg->param.bEnableSsim && m_ssimBuf)
+    {
+        pixel *rec = (pixel*)m_pic->getPicYuvRec()->getLumaAddr();
+        pixel *org = (pixel*)m_pic->getPicYuvOrg()->getLumaAddr();
+        int stride1 = m_pic->getPicYuvOrg()->getStride();
+        int stride2 = m_pic->getPicYuvRec()->getStride();
+        int bEnd = ((row + 1) == (this->m_numRows - 1));
+        int bStart = (row == 0);
+        int minPixY = row * 64 - 4 * !bStart;
+        int maxPixY = (row + 1) * 64 - 4 * !bEnd;
+        int ssim_cnt;
+        x265_emms();
+        /* SSIM is done for each row in blocks of 4x4 . The First blocks are offset by 2 pixels to the right
+        * to avoid alignment of ssim blocks with DCT blocks. */
+        minPixY += bStart ? 2 : -6;
+        m_pic->getSlice()->m_ssim += calculateSSIM(rec + 2 + minPixY * stride1, stride1, org + 2 + minPixY * stride2, stride2, 
+                                                   m_cfg->param.sourceWidth - 2, maxPixY - minPixY, m_ssimBuf, &ssim_cnt);
+        m_pic->getSlice()->m_ssimCnt += ssim_cnt;
+    }
 }
 
 static UInt64 computeSSD(pixel *fenc, pixel *rec, int stride, int width, int height)
@@ -405,6 +429,39 @@ void FrameFilter::calculatePSNR(uint32_t
     m_pic->m_SSDV += ssdV;
 }
 
+/* Function to calculate SSIM for each row */
+float FrameFilter::calculateSSIM(pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, int width, int height, void *buf, int *cnt)
+{
+    int z = 0;
+    float ssim = 0.0;
+    ssim_t(*sum0)[4] = (ssim_t(*)[4])buf;
+    ssim_t(*sum1)[4] = sum0 + (width >> 2) + 3;
+    width >>= 2;
+    height >>= 2;
+
+    for (int y = 1; y < height; y++)
+    {
+        for (; z <= y; z++)
+        {
+            void* swap = sum0;
+            sum0 = sum1;
+            sum1 = (ssim_t(*)[4])swap;
+            for (int x = 0; x < width; x += 2)
+            {
+                primitives.ssim_4x4x2_core(&pix1[(4 * x + (z * stride1))], stride1, &pix2[(4 * x + (z * stride2))], stride2, &sum0[x]);
+            }
+        }
+
+        for (int x = 0; x < width - 1; x += 4)
+        {
+            ssim += primitives.ssim_end_4(sum0 + x, sum1 + x, X265_MIN(4, width - x - 1));
+        }
+    }
+
+    *cnt = (height - 1) * (width - 1);
+    return ssim; 
+} 
+
 void FrameFilter::processSao(int row)
 {
     const uint32_t numCols = m_pic->getPicSym()->getFrameWidthInCU();
diff -r 9bff70c75d32 -r 4b1716b232e5 source/encoder/framefilter.h
--- a/source/encoder/framefilter.h	Tue Oct 15 20:50:42 2013 -0500
+++ b/source/encoder/framefilter.h	Wed Oct 16 02:35:57 2013 -0500
@@ -54,6 +54,7 @@ public:
     void processRowPost(int row);
     void processSao(int row);
     void calculatePSNR(uint32_t cu, int row);
+    float calculateSSIM(pixel *pix1, intptr_t stride1, pixel *pix2, intptr_t stride2, int width, int height, void *buf, int *cnt);
 
 protected:
 
@@ -73,6 +74,8 @@ public:
     TEncBinCABAC                m_rdGoOnBinCodersCABAC;
     TComBitCounter              m_bitCounter;
     TEncSbac*                   m_rdGoOnSbacCoderRow0;  // for bitstream exact only, depends on HM's bug
+    /* Temp storage for ssim computation that doesn't need repeated malloc */
+    void*                       m_ssimBuf;
 };
 }
 


More information about the x265-commits mailing list