[x265-commits] [x265] TEncSearch: init best AMVP candidate to zero.
Deepthi Nandakumar
deepthi at multicorewareinc.com
Sat Mar 15 04:00:14 CET 2014
details: http://hg.videolan.org/x265/rev/ed48f84e541b
branches:
changeset: 6506:ed48f84e541b
user: Deepthi Nandakumar <deepthi at multicorewareinc.com>
date: Fri Mar 14 14:21:34 2014 +0530
description:
TEncSearch: init best AMVP candidate to zero.
Subject: [x265] vbv: bugfix-calculate intraCuCostPerRow for vbv
details: http://hg.videolan.org/x265/rev/394481c40cf9
branches: stable
changeset: 6507:394481c40cf9
user: Santhoshini Sekar <santhoshini at multicorewareinc.com>
date: Fri Mar 14 14:14:57 2014 +0530
description:
vbv: bugfix-calculate intraCuCostPerRow for vbv
Subject: [x265] Merge with stable
details: http://hg.videolan.org/x265/rev/d5a4296dbfe7
branches:
changeset: 6508:d5a4296dbfe7
user: Steve Borho <steve at borho.org>
date: Fri Mar 14 12:20:31 2014 -0500
description:
Merge with stable
Subject: [x265] added asm primitives for 10bpp sad functions
details: http://hg.videolan.org/x265/rev/f36c9130de66
branches:
changeset: 6509:f36c9130de66
user: Yuvaraj Venkatesh <yuvaraj at multicorewareinc.com>
date: Fri Mar 14 12:05:53 2014 +0530
description:
added asm primitives for 10bpp sad functions
Subject: [x265] asm: 8bpp and 10bpp code for idct8x8 module
details: http://hg.videolan.org/x265/rev/a4cb4fbff864
branches:
changeset: 6510:a4cb4fbff864
user: Murugan Vairavel <murugan at multicorewareinc.com>
date: Fri Mar 14 12:48:46 2014 +0530
description:
asm: 8bpp and 10bpp code for idct8x8 module
Subject: [x265] TEncCu: initialize variables, handle malloc failures more cleanly
details: http://hg.videolan.org/x265/rev/93ea767e7df0
branches:
changeset: 6511:93ea767e7df0
user: Wenju He <wenju at multicorewareinc.com>
date: Fri Mar 14 15:36:42 2014 +0800
description:
TEncCu: initialize variables, handle malloc failures more cleanly
Subject: [x265] TEncSearch: mvpIdx shares storage with mergeIdx, do not set for merge CUs
details: http://hg.videolan.org/x265/rev/ba3ddc1848ff
branches:
changeset: 6512:ba3ddc1848ff
user: Steve Borho <steve at borho.org>
date: Fri Mar 14 12:56:01 2014 -0500
description:
TEncSearch: mvpIdx shares storage with mergeIdx, do not set for merge CUs
diffstat:
source/Lib/TLibEncoder/TEncCu.cpp | 57 ++++++--
source/Lib/TLibEncoder/TEncSearch.cpp | 4 +-
source/common/x86/asm-primitives.cpp | 47 ++++---
source/common/x86/dct8.asm | 202 ++++++++++++++++++++++++++++++++++
source/common/x86/dct8.h | 1 +
source/common/x86/sad16-a.asm | 1 +
source/encoder/frameencoder.cpp | 1 +
source/test/mbdstharness.cpp | 7 +-
8 files changed, 275 insertions(+), 45 deletions(-)
diffs (truncated from 548 to 300 lines):
diff -r 7b5699e6bb75 -r ba3ddc1848ff source/Lib/TLibEncoder/TEncCu.cpp
--- a/source/Lib/TLibEncoder/TEncCu.cpp Thu Mar 13 18:29:54 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncCu.cpp Fri Mar 14 12:56:01 2014 -0500
@@ -54,6 +54,29 @@ using namespace x265;
TEncCu::TEncCu()
{
+ m_interCU_2Nx2N = NULL;
+ m_interCU_2NxN = NULL;
+ m_interCU_Nx2N = NULL;
+ m_intraInInterCU = NULL;
+ m_mergeCU = NULL;
+ m_bestMergeCU = NULL;
+ m_bestCU = NULL;
+ m_tempCU = NULL;
+
+ m_bestPredYuv = NULL;
+ m_bestResiYuv = NULL;
+ m_bestRecoYuv = NULL;
+
+ m_tmpPredYuv = NULL;
+ m_tmpResiYuv = NULL;
+ m_tmpRecoYuv = NULL;
+ m_bestMergeRecoYuv = NULL;
+ m_origYuv = NULL;
+ for (int i = 0; i < MAX_PRED_TYPES; i++)
+ {
+ m_modePredYuv[i] = NULL;
+ }
+
m_search = NULL;
m_trQuant = NULL;
m_rdCost = NULL;
@@ -172,75 +195,75 @@ void TEncCu::destroy()
{
for (int i = 0; i < m_totalDepth - 1; i++)
{
- if (m_interCU_2Nx2N[i])
+ if (m_interCU_2Nx2N && m_interCU_2Nx2N[i])
{
m_interCU_2Nx2N[i]->destroy();
delete m_interCU_2Nx2N[i];
m_interCU_2Nx2N[i] = NULL;
}
- if (m_interCU_2NxN[i])
+ if (m_interCU_2NxN && m_interCU_2NxN[i])
{
m_interCU_2NxN[i]->destroy();
delete m_interCU_2NxN[i];
m_interCU_2NxN[i] = NULL;
}
- if (m_interCU_Nx2N[i])
+ if (m_interCU_Nx2N && m_interCU_Nx2N[i])
{
m_interCU_Nx2N[i]->destroy();
delete m_interCU_Nx2N[i];
m_interCU_Nx2N[i] = NULL;
}
- if (m_intraInInterCU[i])
+ if (m_intraInInterCU && m_intraInInterCU[i])
{
m_intraInInterCU[i]->destroy();
delete m_intraInInterCU[i];
m_intraInInterCU[i] = NULL;
}
- if (m_mergeCU[i])
+ if (m_mergeCU && m_mergeCU[i])
{
m_mergeCU[i]->destroy();
delete m_mergeCU[i];
m_mergeCU[i] = NULL;
}
- if (m_bestMergeCU[i])
+ if (m_bestMergeCU && m_bestMergeCU[i])
{
m_bestMergeCU[i]->destroy();
delete m_bestMergeCU[i];
m_bestMergeCU[i] = NULL;
}
- if (m_bestCU[i])
+ if (m_bestCU && m_bestCU[i])
{
m_bestCU[i]->destroy();
delete m_bestCU[i];
m_bestCU[i] = NULL;
}
- if (m_tempCU[i])
+ if (m_tempCU && m_tempCU[i])
{
m_tempCU[i]->destroy();
delete m_tempCU[i];
m_tempCU[i] = NULL;
}
- if (m_bestPredYuv[i])
+ if (m_bestPredYuv && m_bestPredYuv[i])
{
m_bestPredYuv[i]->destroy();
delete m_bestPredYuv[i];
m_bestPredYuv[i] = NULL;
}
- if (m_bestResiYuv[i])
+ if (m_bestResiYuv && m_bestResiYuv[i])
{
m_bestResiYuv[i]->destroy();
delete m_bestResiYuv[i];
m_bestResiYuv[i] = NULL;
}
- if (m_bestRecoYuv[i])
+ if (m_bestRecoYuv && m_bestRecoYuv[i])
{
m_bestRecoYuv[i]->destroy();
delete m_bestRecoYuv[i];
m_bestRecoYuv[i] = NULL;
}
- if (m_tmpPredYuv[i])
+ if (m_tmpPredYuv && m_tmpPredYuv[i])
{
m_tmpPredYuv[i]->destroy();
delete m_tmpPredYuv[i];
@@ -248,7 +271,7 @@ void TEncCu::destroy()
}
for (int j = 0; j < MAX_PRED_TYPES; j++)
{
- if (m_modePredYuv[j][i])
+ if (m_modePredYuv[j] && m_modePredYuv[j][i])
{
m_modePredYuv[j][i]->destroy();
delete m_modePredYuv[j][i];
@@ -256,26 +279,26 @@ void TEncCu::destroy()
}
}
- if (m_tmpResiYuv[i])
+ if (m_tmpResiYuv && m_tmpResiYuv[i])
{
m_tmpResiYuv[i]->destroy();
delete m_tmpResiYuv[i];
m_tmpResiYuv[i] = NULL;
}
- if (m_tmpRecoYuv[i])
+ if (m_tmpRecoYuv && m_tmpRecoYuv[i])
{
m_tmpRecoYuv[i]->destroy();
delete m_tmpRecoYuv[i];
m_tmpRecoYuv[i] = NULL;
}
- if (m_bestMergeRecoYuv[i])
+ if (m_bestMergeRecoYuv && m_bestMergeRecoYuv[i])
{
m_bestMergeRecoYuv[i]->destroy();
delete m_bestMergeRecoYuv[i];
m_bestMergeRecoYuv[i] = NULL;
}
- if (m_origYuv[i])
+ if (m_origYuv && m_origYuv[i])
{
m_origYuv[i]->destroy();
delete m_origYuv[i];
diff -r 7b5699e6bb75 -r ba3ddc1848ff source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp Thu Mar 13 18:29:54 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp Fri Mar 14 12:56:01 2014 -0500
@@ -2253,7 +2253,7 @@ bool TEncSearch::predInterSearch(TComDat
// Pick the best possible MVP from AMVP candidates based on least residual
MV mvc[AMVP_MAX_NUM_CANDS];
uint32_t bestCost = MAX_INT;
- int mvpIdx;
+ int mvpIdx = 0;
int numMvc = 0;
for (int i = 0; i < amvpInfo[l][ref].m_num; i++)
{
@@ -2376,8 +2376,6 @@ bool TEncSearch::predInterSearch(TComDat
cu->setInterDirSubParts(merge.interDir, partAddr, partIdx, cu->getDepth(partAddr));
cu->getCUMvField(REF_PIC_LIST_0)->setAllMvField(merge.mvField[0], partSize, partAddr, 0, partIdx);
cu->getCUMvField(REF_PIC_LIST_1)->setAllMvField(merge.mvField[1], partSize, partAddr, 0, partIdx);
- cu->setMVPIdx(REF_PIC_LIST_0, partAddr, list[0].mvpIdx);
- cu->setMVPIdx(REF_PIC_LIST_1, partAddr, list[1].mvpIdx);
totalmebits += merge.bits;
}
diff -r 7b5699e6bb75 -r ba3ddc1848ff source/common/x86/asm-primitives.cpp
--- a/source/common/x86/asm-primitives.cpp Thu Mar 13 18:29:54 2014 -0500
+++ b/source/common/x86/asm-primitives.cpp Fri Mar 14 12:56:01 2014 -0500
@@ -126,6 +126,25 @@ extern "C" {
p.sad_x4[LUMA_64x48] = x265_pixel_sad_x4_64x48_ ## cpu; \
p.sad_x4[LUMA_64x64] = x265_pixel_sad_x4_64x64_ ## cpu
+#define SAD(cpu) \
+ p.sad[LUMA_8x32] = x265_pixel_sad_8x32_ ## cpu; \
+ p.sad[LUMA_16x4] = x265_pixel_sad_16x4_ ## cpu; \
+ p.sad[LUMA_16x12] = x265_pixel_sad_16x12_ ## cpu; \
+ p.sad[LUMA_16x32] = x265_pixel_sad_16x32_ ## cpu; \
+ p.sad[LUMA_16x64] = x265_pixel_sad_16x64_ ## cpu; \
+ p.sad[LUMA_32x8] = x265_pixel_sad_32x8_ ## cpu; \
+ p.sad[LUMA_32x16] = x265_pixel_sad_32x16_ ## cpu; \
+ p.sad[LUMA_32x24] = x265_pixel_sad_32x24_ ## cpu; \
+ p.sad[LUMA_32x32] = x265_pixel_sad_32x32_ ## cpu; \
+ p.sad[LUMA_32x64] = x265_pixel_sad_32x64_ ## cpu; \
+ p.sad[LUMA_64x16] = x265_pixel_sad_64x16_ ## cpu; \
+ p.sad[LUMA_64x32] = x265_pixel_sad_64x32_ ## cpu; \
+ p.sad[LUMA_64x48] = x265_pixel_sad_64x48_ ## cpu; \
+ p.sad[LUMA_64x64] = x265_pixel_sad_64x64_ ## cpu; \
+ p.sad[LUMA_48x64] = x265_pixel_sad_48x64_ ## cpu; \
+ p.sad[LUMA_24x32] = x265_pixel_sad_24x32_ ## cpu; \
+ p.sad[LUMA_12x16] = x265_pixel_sad_12x16_ ## cpu
+
#define ASSGN_SSE(cpu) \
p.sse_pp[LUMA_8x8] = x265_pixel_ssd_8x8_ ## cpu; \
p.sse_pp[LUMA_8x4] = x265_pixel_ssd_8x4_ ## cpu; \
@@ -914,6 +933,10 @@ void Setup_Assembly_Primitives(EncoderPr
#if HIGH_BIT_DEPTH
if (cpuMask & X265_CPU_SSE2)
{
+ INIT8(sad, _mmx2);
+ INIT2(sad, _sse2);
+ SAD(sse2);
+
INIT6(satd, _sse2);
HEVC_SATD(sse2);
p.satd[LUMA_4x4] = x265_pixel_satd_4x4_mmx2;
@@ -1025,6 +1048,7 @@ void Setup_Assembly_Primitives(EncoderPr
INTRA_ANG_SSSE3(ssse3);
p.dct[DST_4x4] = x265_dst4_ssse3;
+ p.idct[IDCT_8x8] = x265_idct8_ssse3;
}
if (cpuMask & X265_CPU_SSE4)
{
@@ -1096,29 +1120,9 @@ void Setup_Assembly_Primitives(EncoderPr
LUMA_VAR(_sse2);
- p.sad[LUMA_8x32] = x265_pixel_sad_8x32_sse2;
- p.sad[LUMA_16x4] = x265_pixel_sad_16x4_sse2;
- p.sad[LUMA_16x12] = x265_pixel_sad_16x12_sse2;
- p.sad[LUMA_16x32] = x265_pixel_sad_16x32_sse2;
- p.sad[LUMA_16x64] = x265_pixel_sad_16x64_sse2;
-
- p.sad[LUMA_32x8] = x265_pixel_sad_32x8_sse2;
- p.sad[LUMA_32x16] = x265_pixel_sad_32x16_sse2;
- p.sad[LUMA_32x24] = x265_pixel_sad_32x24_sse2;
- p.sad[LUMA_32x32] = x265_pixel_sad_32x32_sse2;
- p.sad[LUMA_32x64] = x265_pixel_sad_32x64_sse2;
-
- p.sad[LUMA_64x16] = x265_pixel_sad_64x16_sse2;
- p.sad[LUMA_64x32] = x265_pixel_sad_64x32_sse2;
- p.sad[LUMA_64x48] = x265_pixel_sad_64x48_sse2;
- p.sad[LUMA_64x64] = x265_pixel_sad_64x64_sse2;
-
- p.sad[LUMA_48x64] = x265_pixel_sad_48x64_sse2;
- p.sad[LUMA_24x32] = x265_pixel_sad_24x32_sse2;
- p.sad[LUMA_12x16] = x265_pixel_sad_12x16_sse2;
-
ASSGN_SSE(sse2);
INIT2(sad, _sse2);
+ SAD(sse2);
INIT2(sad_x3, _sse2);
INIT2(sad_x4, _sse2);
HEVC_SATD(sse2);
@@ -1197,6 +1201,7 @@ void Setup_Assembly_Primitives(EncoderPr
p.chroma_p2s[X265_CSP_I444] = x265_luma_p2s_ssse3; // for i444 , chroma_p2s can be replaced by luma_p2s
p.dct[DST_4x4] = x265_dst4_ssse3;
+ p.idct[IDCT_8x8] = x265_idct8_ssse3;
}
if (cpuMask & X265_CPU_SSE4)
{
diff -r 7b5699e6bb75 -r ba3ddc1848ff source/common/x86/dct8.asm
--- a/source/common/x86/dct8.asm Thu Mar 13 18:29:54 2014 -0500
+++ b/source/common/x86/dct8.asm Fri Mar 14 12:56:01 2014 -0500
@@ -61,8 +61,26 @@ tab_dct8_2: times 2 dd 83, 36
times 1 dd 50, -89, 18, 75
times 1 dd 18, -50, 75, -89
+tab_idct8_3: times 4 dw 89, 75
+ times 4 dw 50, 18
+ times 4 dw 75, -18
+ times 4 dw -89, -50
+ times 4 dw 50, -89
+ times 4 dw 18, 75
+ times 4 dw 18, -50
+ times 4 dw 75, -89
+
pb_unpackhlw1: db 0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15
+pb_idct8even: db 0, 1, 8, 9, 4, 5, 12, 13, 0, 1, 8, 9, 4, 5, 12, 13
+
+tab_idct8_1: times 1 dw 64, -64, 36, -83, 64, 64, 83, 36
+
+tab_idct8_2: times 1 dw 89, 75, 50, 18, 75, -18, -89, -50
+ times 1 dw 50, -89, 18, 75, 18, -50, 75, -89
+
+pb_idct8odd: db 2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15
+
SECTION .text
cextern pd_1
cextern pd_2
@@ -665,3 +683,187 @@ cglobal dct8, 3,6,7,0-16*mmsize
More information about the x265-commits
mailing list