[x265-commits] [x265] TEncSearch: init best AMVP candidate to zero.

Deepthi Nandakumar deepthi at multicorewareinc.com
Sat Mar 15 04:00:14 CET 2014


details:   http://hg.videolan.org/x265/rev/ed48f84e541b
branches:  
changeset: 6506:ed48f84e541b
user:      Deepthi Nandakumar <deepthi at multicorewareinc.com>
date:      Fri Mar 14 14:21:34 2014 +0530
description:
TEncSearch: init best AMVP candidate to zero.
Subject: [x265] vbv: bugfix-calculate intraCuCostPerRow for vbv

details:   http://hg.videolan.org/x265/rev/394481c40cf9
branches:  stable
changeset: 6507:394481c40cf9
user:      Santhoshini Sekar <santhoshini at multicorewareinc.com>
date:      Fri Mar 14 14:14:57 2014 +0530
description:
vbv: bugfix-calculate intraCuCostPerRow for vbv
Subject: [x265] Merge with stable

details:   http://hg.videolan.org/x265/rev/d5a4296dbfe7
branches:  
changeset: 6508:d5a4296dbfe7
user:      Steve Borho <steve at borho.org>
date:      Fri Mar 14 12:20:31 2014 -0500
description:
Merge with stable
Subject: [x265] added asm primitives for 10bpp sad functions

details:   http://hg.videolan.org/x265/rev/f36c9130de66
branches:  
changeset: 6509:f36c9130de66
user:      Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
date:      Fri Mar 14 12:05:53 2014 +0530
description:
added asm primitives for 10bpp sad functions
Subject: [x265] asm: 8bpp and 10bpp code for idct8x8 module

details:   http://hg.videolan.org/x265/rev/a4cb4fbff864
branches:  
changeset: 6510:a4cb4fbff864
user:      Murugan Vairavel <murugan at multicorewareinc.com>
date:      Fri Mar 14 12:48:46 2014 +0530
description:
asm: 8bpp and 10bpp code for idct8x8 module
Subject: [x265] TEncCu: initialize variables, handle malloc failures more cleanly

details:   http://hg.videolan.org/x265/rev/93ea767e7df0
branches:  
changeset: 6511:93ea767e7df0
user:      Wenju He <wenju at multicorewareinc.com>
date:      Fri Mar 14 15:36:42 2014 +0800
description:
TEncCu: initialize variables, handle malloc failures more cleanly
Subject: [x265] TEncSearch: mvpIdx shares storage with mergeIdx, do not set for merge CUs

details:   http://hg.videolan.org/x265/rev/ba3ddc1848ff
branches:  
changeset: 6512:ba3ddc1848ff
user:      Steve Borho <steve at borho.org>
date:      Fri Mar 14 12:56:01 2014 -0500
description:
TEncSearch: mvpIdx shares storage with mergeIdx, do not set for merge CUs

diffstat:

 source/Lib/TLibEncoder/TEncCu.cpp     |   57 ++++++--
 source/Lib/TLibEncoder/TEncSearch.cpp |    4 +-
 source/common/x86/asm-primitives.cpp  |   47 ++++---
 source/common/x86/dct8.asm            |  202 ++++++++++++++++++++++++++++++++++
 source/common/x86/dct8.h              |    1 +
 source/common/x86/sad16-a.asm         |    1 +
 source/encoder/frameencoder.cpp       |    1 +
 source/test/mbdstharness.cpp          |    7 +-
 8 files changed, 275 insertions(+), 45 deletions(-)

diffs (truncated from 548 to 300 lines):

diff -r 7b5699e6bb75 -r ba3ddc1848ff source/Lib/TLibEncoder/TEncCu.cpp
--- a/source/Lib/TLibEncoder/TEncCu.cpp	Thu Mar 13 18:29:54 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncCu.cpp	Fri Mar 14 12:56:01 2014 -0500
@@ -54,6 +54,29 @@ using namespace x265;
 
 TEncCu::TEncCu()
 {
+    m_interCU_2Nx2N   = NULL;
+    m_interCU_2NxN    = NULL;
+    m_interCU_Nx2N    = NULL;
+    m_intraInInterCU  = NULL;
+    m_mergeCU         = NULL;
+    m_bestMergeCU     = NULL;
+    m_bestCU          = NULL;
+    m_tempCU          = NULL;
+
+    m_bestPredYuv     = NULL;
+    m_bestResiYuv     = NULL;
+    m_bestRecoYuv     = NULL;
+
+    m_tmpPredYuv      = NULL;
+    m_tmpResiYuv      = NULL;
+    m_tmpRecoYuv      = NULL;
+    m_bestMergeRecoYuv = NULL;
+    m_origYuv         = NULL;
+    for (int i = 0; i < MAX_PRED_TYPES; i++)
+    {
+        m_modePredYuv[i] = NULL;
+    }
+
     m_search          = NULL;
     m_trQuant         = NULL;
     m_rdCost          = NULL;
@@ -172,75 +195,75 @@ void TEncCu::destroy()
 {
     for (int i = 0; i < m_totalDepth - 1; i++)
     {
-        if (m_interCU_2Nx2N[i])
+        if (m_interCU_2Nx2N && m_interCU_2Nx2N[i])
         {
             m_interCU_2Nx2N[i]->destroy();
             delete m_interCU_2Nx2N[i];
             m_interCU_2Nx2N[i] = NULL;
         }
-        if (m_interCU_2NxN[i])
+        if (m_interCU_2NxN && m_interCU_2NxN[i])
         {
             m_interCU_2NxN[i]->destroy();
             delete m_interCU_2NxN[i];
             m_interCU_2NxN[i] = NULL;
         }
-        if (m_interCU_Nx2N[i])
+        if (m_interCU_Nx2N && m_interCU_Nx2N[i])
         {
             m_interCU_Nx2N[i]->destroy();
             delete m_interCU_Nx2N[i];
             m_interCU_Nx2N[i] = NULL;
         }
-        if (m_intraInInterCU[i])
+        if (m_intraInInterCU && m_intraInInterCU[i])
         {
             m_intraInInterCU[i]->destroy();
             delete m_intraInInterCU[i];
             m_intraInInterCU[i] = NULL;
         }
-        if (m_mergeCU[i])
+        if (m_mergeCU && m_mergeCU[i])
         {
             m_mergeCU[i]->destroy();
             delete m_mergeCU[i];
             m_mergeCU[i] = NULL;
         }
-        if (m_bestMergeCU[i])
+        if (m_bestMergeCU && m_bestMergeCU[i])
         {
             m_bestMergeCU[i]->destroy();
             delete m_bestMergeCU[i];
             m_bestMergeCU[i] = NULL;
         }
-        if (m_bestCU[i])
+        if (m_bestCU && m_bestCU[i])
         {
             m_bestCU[i]->destroy();
             delete m_bestCU[i];
             m_bestCU[i] = NULL;
         }
-        if (m_tempCU[i])
+        if (m_tempCU && m_tempCU[i])
         {
             m_tempCU[i]->destroy();
             delete m_tempCU[i];
             m_tempCU[i] = NULL;
         }
 
-        if (m_bestPredYuv[i])
+        if (m_bestPredYuv && m_bestPredYuv[i])
         {
             m_bestPredYuv[i]->destroy();
             delete m_bestPredYuv[i];
             m_bestPredYuv[i] = NULL;
         }
-        if (m_bestResiYuv[i])
+        if (m_bestResiYuv && m_bestResiYuv[i])
         {
             m_bestResiYuv[i]->destroy();
             delete m_bestResiYuv[i];
             m_bestResiYuv[i] = NULL;
         }
-        if (m_bestRecoYuv[i])
+        if (m_bestRecoYuv && m_bestRecoYuv[i])
         {
             m_bestRecoYuv[i]->destroy();
             delete m_bestRecoYuv[i];
             m_bestRecoYuv[i] = NULL;
         }
 
-        if (m_tmpPredYuv[i])
+        if (m_tmpPredYuv && m_tmpPredYuv[i])
         {
             m_tmpPredYuv[i]->destroy();
             delete m_tmpPredYuv[i];
@@ -248,7 +271,7 @@ void TEncCu::destroy()
         }
         for (int j = 0; j < MAX_PRED_TYPES; j++)
         {
-            if (m_modePredYuv[j][i])
+            if (m_modePredYuv[j] && m_modePredYuv[j][i])
             {
                 m_modePredYuv[j][i]->destroy();
                 delete m_modePredYuv[j][i];
@@ -256,26 +279,26 @@ void TEncCu::destroy()
             }
         }
 
-        if (m_tmpResiYuv[i])
+        if (m_tmpResiYuv && m_tmpResiYuv[i])
         {
             m_tmpResiYuv[i]->destroy();
             delete m_tmpResiYuv[i];
             m_tmpResiYuv[i] = NULL;
         }
-        if (m_tmpRecoYuv[i])
+        if (m_tmpRecoYuv && m_tmpRecoYuv[i])
         {
             m_tmpRecoYuv[i]->destroy();
             delete m_tmpRecoYuv[i];
             m_tmpRecoYuv[i] = NULL;
         }
-        if (m_bestMergeRecoYuv[i])
+        if (m_bestMergeRecoYuv && m_bestMergeRecoYuv[i])
         {
             m_bestMergeRecoYuv[i]->destroy();
             delete m_bestMergeRecoYuv[i];
             m_bestMergeRecoYuv[i] = NULL;
         }
 
-        if (m_origYuv[i])
+        if (m_origYuv && m_origYuv[i])
         {
             m_origYuv[i]->destroy();
             delete m_origYuv[i];
diff -r 7b5699e6bb75 -r ba3ddc1848ff source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp	Thu Mar 13 18:29:54 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp	Fri Mar 14 12:56:01 2014 -0500
@@ -2253,7 +2253,7 @@ bool TEncSearch::predInterSearch(TComDat
                 // Pick the best possible MVP from AMVP candidates based on least residual
                 MV mvc[AMVP_MAX_NUM_CANDS];
                 uint32_t bestCost = MAX_INT;
-                int mvpIdx;
+                int mvpIdx = 0;
                 int numMvc = 0;
                 for (int i = 0; i < amvpInfo[l][ref].m_num; i++)
                 {
@@ -2376,8 +2376,6 @@ bool TEncSearch::predInterSearch(TComDat
             cu->setInterDirSubParts(merge.interDir, partAddr, partIdx, cu->getDepth(partAddr));
             cu->getCUMvField(REF_PIC_LIST_0)->setAllMvField(merge.mvField[0], partSize, partAddr, 0, partIdx);
             cu->getCUMvField(REF_PIC_LIST_1)->setAllMvField(merge.mvField[1], partSize, partAddr, 0, partIdx);
-            cu->setMVPIdx(REF_PIC_LIST_0, partAddr, list[0].mvpIdx);
-            cu->setMVPIdx(REF_PIC_LIST_1, partAddr, list[1].mvpIdx);
 
             totalmebits += merge.bits;
         }
diff -r 7b5699e6bb75 -r ba3ddc1848ff source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp	Thu Mar 13 18:29:54 2014 -0500
+++ b/source/common/x86/asm-primitives.cpp	Fri Mar 14 12:56:01 2014 -0500
@@ -126,6 +126,25 @@ extern "C" {
     p.sad_x4[LUMA_64x48] = x265_pixel_sad_x4_64x48_ ## cpu; \
     p.sad_x4[LUMA_64x64] = x265_pixel_sad_x4_64x64_ ## cpu
 
+#define SAD(cpu) \
+    p.sad[LUMA_8x32]  = x265_pixel_sad_8x32_ ## cpu; \
+    p.sad[LUMA_16x4]  = x265_pixel_sad_16x4_ ## cpu; \
+    p.sad[LUMA_16x12] = x265_pixel_sad_16x12_ ## cpu; \
+    p.sad[LUMA_16x32] = x265_pixel_sad_16x32_ ## cpu; \
+    p.sad[LUMA_16x64] = x265_pixel_sad_16x64_ ## cpu; \
+    p.sad[LUMA_32x8]  = x265_pixel_sad_32x8_ ## cpu; \
+    p.sad[LUMA_32x16] = x265_pixel_sad_32x16_ ## cpu; \
+    p.sad[LUMA_32x24] = x265_pixel_sad_32x24_ ## cpu; \
+    p.sad[LUMA_32x32] = x265_pixel_sad_32x32_ ## cpu; \
+    p.sad[LUMA_32x64] = x265_pixel_sad_32x64_ ## cpu; \
+    p.sad[LUMA_64x16] = x265_pixel_sad_64x16_ ## cpu; \
+    p.sad[LUMA_64x32] = x265_pixel_sad_64x32_ ## cpu; \
+    p.sad[LUMA_64x48] = x265_pixel_sad_64x48_ ## cpu; \
+    p.sad[LUMA_64x64] = x265_pixel_sad_64x64_ ## cpu; \
+    p.sad[LUMA_48x64] = x265_pixel_sad_48x64_ ## cpu; \
+    p.sad[LUMA_24x32] = x265_pixel_sad_24x32_ ## cpu; \
+    p.sad[LUMA_12x16] = x265_pixel_sad_12x16_ ## cpu
+
 #define ASSGN_SSE(cpu) \
     p.sse_pp[LUMA_8x8]   = x265_pixel_ssd_8x8_ ## cpu; \
     p.sse_pp[LUMA_8x4]   = x265_pixel_ssd_8x4_ ## cpu; \
@@ -914,6 +933,10 @@ void Setup_Assembly_Primitives(EncoderPr
 #if HIGH_BIT_DEPTH
     if (cpuMask & X265_CPU_SSE2)
     {
+        INIT8(sad, _mmx2);
+        INIT2(sad, _sse2);
+        SAD(sse2);
+
         INIT6(satd, _sse2);
         HEVC_SATD(sse2);
         p.satd[LUMA_4x4] = x265_pixel_satd_4x4_mmx2;
@@ -1025,6 +1048,7 @@ void Setup_Assembly_Primitives(EncoderPr
         INTRA_ANG_SSSE3(ssse3);
 
         p.dct[DST_4x4] = x265_dst4_ssse3;
+        p.idct[IDCT_8x8] = x265_idct8_ssse3;
     }
     if (cpuMask & X265_CPU_SSE4)
     {
@@ -1096,29 +1120,9 @@ void Setup_Assembly_Primitives(EncoderPr
 
         LUMA_VAR(_sse2);
 
-        p.sad[LUMA_8x32]  = x265_pixel_sad_8x32_sse2;
-        p.sad[LUMA_16x4]  = x265_pixel_sad_16x4_sse2;
-        p.sad[LUMA_16x12] = x265_pixel_sad_16x12_sse2;
-        p.sad[LUMA_16x32] = x265_pixel_sad_16x32_sse2;
-        p.sad[LUMA_16x64] = x265_pixel_sad_16x64_sse2;
-
-        p.sad[LUMA_32x8]  = x265_pixel_sad_32x8_sse2;
-        p.sad[LUMA_32x16] = x265_pixel_sad_32x16_sse2;
-        p.sad[LUMA_32x24] = x265_pixel_sad_32x24_sse2;
-        p.sad[LUMA_32x32] = x265_pixel_sad_32x32_sse2;
-        p.sad[LUMA_32x64] = x265_pixel_sad_32x64_sse2;
-
-        p.sad[LUMA_64x16] = x265_pixel_sad_64x16_sse2;
-        p.sad[LUMA_64x32] = x265_pixel_sad_64x32_sse2;
-        p.sad[LUMA_64x48] = x265_pixel_sad_64x48_sse2;
-        p.sad[LUMA_64x64] = x265_pixel_sad_64x64_sse2;
-
-        p.sad[LUMA_48x64] = x265_pixel_sad_48x64_sse2;
-        p.sad[LUMA_24x32] = x265_pixel_sad_24x32_sse2;
-        p.sad[LUMA_12x16] = x265_pixel_sad_12x16_sse2;
-
         ASSGN_SSE(sse2);
         INIT2(sad, _sse2);
+        SAD(sse2);
         INIT2(sad_x3, _sse2);
         INIT2(sad_x4, _sse2);
         HEVC_SATD(sse2);
@@ -1197,6 +1201,7 @@ void Setup_Assembly_Primitives(EncoderPr
         p.chroma_p2s[X265_CSP_I444] = x265_luma_p2s_ssse3; // for i444 , chroma_p2s can be replaced by luma_p2s
 
         p.dct[DST_4x4] = x265_dst4_ssse3;
+        p.idct[IDCT_8x8] = x265_idct8_ssse3;
     }
     if (cpuMask & X265_CPU_SSE4)
     {
diff -r 7b5699e6bb75 -r ba3ddc1848ff source/common/x86/dct8.asm
--- a/source/common/x86/dct8.asm	Thu Mar 13 18:29:54 2014 -0500
+++ b/source/common/x86/dct8.asm	Fri Mar 14 12:56:01 2014 -0500
@@ -61,8 +61,26 @@ tab_dct8_2:     times 2 dd 83, 36
                 times 1 dd 50, -89, 18, 75
                 times 1 dd 18, -50, 75, -89
 
+tab_idct8_3:    times 4 dw 89, 75
+                times 4 dw 50, 18
+                times 4 dw 75, -18
+                times 4 dw -89, -50
+                times 4 dw 50, -89
+                times 4 dw 18, 75
+                times 4 dw 18, -50
+                times 4 dw 75, -89
+
 pb_unpackhlw1:  db 0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15
 
+pb_idct8even:   db 0, 1, 8, 9, 4, 5, 12, 13, 0, 1, 8, 9, 4, 5, 12, 13
+
+tab_idct8_1:    times 1 dw 64, -64, 36, -83, 64, 64, 83, 36
+
+tab_idct8_2:    times 1 dw 89, 75, 50, 18, 75, -18, -89, -50
+                times 1 dw 50, -89, 18, 75, 18, -50, 75, -89
+
+pb_idct8odd:    db 2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15
+
 SECTION .text
 cextern pd_1
 cextern pd_2
@@ -665,3 +683,187 @@ cglobal dct8, 3,6,7,0-16*mmsize


More information about the x265-commits mailing list