[x265-commits] [x265] cmake: use -DHAVE_ALIGNED_STACK=1 when building asm for X...
Steve Borho
steve at borho.org
Thu Dec 5 07:41:23 CET 2013
details: http://hg.videolan.org/x265/rev/f9d3154732d9
branches:
changeset: 5505:f9d3154732d9
user: Steve Borho <steve at borho.org>
date: Wed Dec 04 23:38:12 2013 -0600
description:
cmake: use -DHAVE_ALIGNED_STACK=1 when building asm for Xcode
This matches the command line we use in the Makefile, and prevents stack
corruption when debugging within Xcode
Subject: [x265] pixel-a: fix x64 calling convention for several HEVC satd functions
details: http://hg.videolan.org/x265/rev/64096a4dcdc1
branches:
changeset: 5506:64096a4dcdc1
user: Steve Borho <steve at borho.org>
date: Thu Dec 05 00:12:58 2013 -0600
description:
pixel-a: fix x64 calling convention for several HEVC satd functions
Bug fix from Min Chen
Subject: [x265] log: collect statistics after CU analysis
details: http://hg.videolan.org/x265/rev/ed7c32d83490
branches:
changeset: 5507:ed7c32d83490
user: Kavitha Sampath <kavitha at multicorewareinc.com>
date: Thu Dec 05 11:04:22 2013 +0530
description:
log: collect statistics after CU analysis
Report percentage of CU blocks encoded at each depth of P/B slice as EncCU
diffstat:
source/CMakeLists.txt | 4 +-
source/Lib/TLibEncoder/TEncCu.cpp | 195 +++++++++++++------------------------
source/Lib/TLibEncoder/TEncCu.h | 2 +-
source/common/x86/pixel-a.asm | 24 ++--
source/encoder/compress.cpp | 85 +----------------
source/encoder/encoder.cpp | 113 ++++++++++++---------
6 files changed, 150 insertions(+), 273 deletions(-)
diffs (truncated from 733 to 300 lines):
diff -r d35b42382331 -r ed7c32d83490 source/CMakeLists.txt
--- a/source/CMakeLists.txt Wed Dec 04 22:44:18 2013 -0600
+++ b/source/CMakeLists.txt Thu Dec 05 11:04:22 2013 +0530
@@ -157,13 +157,13 @@ if((MSVC_IDE OR XCODE) AND ENABLE_PRIMIT
# http://www.cmake.org/Bug/print_bug_page.php?bug_id=8170
if (X64)
if(APPLE)
- set(FLAGS -f macho64 -m amd64 -DPREFIX -DPIC -DARCH_X86_64=1 -DHAVE_ALIGNED_STACK=0)
+ set(FLAGS -f macho64 -m amd64 -DPREFIX -DPIC -DARCH_X86_64=1 -DHAVE_ALIGNED_STACK=1)
else()
set(FLAGS -f win64 -m amd64 -DARCH_X86_64=1 -DHAVE_ALIGNED_STACK=0)
endif()
else()
if(APPLE)
- set(FLAGS -f macho -DARCH_X86_64=0 -DHAVE_ALIGNED_STACK=0 -DPREFIX)
+ set(FLAGS -f macho -DARCH_X86_64=0 -DHAVE_ALIGNED_STACK=1 -DPREFIX)
else()
set(FLAGS -f win32 -DARCH_X86_64=0 -DHAVE_ALIGNED_STACK=0 -DPREFIX)
endif()
diff -r d35b42382331 -r ed7c32d83490 source/Lib/TLibEncoder/TEncCu.cpp
--- a/source/Lib/TLibEncoder/TEncCu.cpp Wed Dec 04 22:44:18 2013 -0600
+++ b/source/Lib/TLibEncoder/TEncCu.cpp Thu Dec 05 11:04:22 2013 +0530
@@ -358,7 +358,31 @@ void TEncCu::compressCU(TComDataCU* cu)
// analysis of CU
if (m_bestCU[0]->getSlice()->getSliceType() == I_SLICE)
+ {
xCompressIntraCU(m_bestCU[0], m_tempCU[0], 0);
+ int i = 0, part;
+ part = m_bestCU[0]->getDepth(i);
+ do
+ {
+ m_log->TotalCu++;
+ int next = m_bestCU[0]->getTotalNumPart() >> (part * 2);
+ if (part == g_maxCUDepth - 1 && m_bestCU[0]->getPartitionSize(i) != SIZE_2Nx2N)
+ {
+ m_log->cntIntraNxN++;
+ }
+ else
+ {
+ m_log->cntIntra[part]++;
+ if (m_bestCU[0]->getLumaIntraDir()[i] > 1)
+ m_log->cuIntraDistribution[part][ANGULAR_MODE_ID]++;
+ else
+ m_log->cuIntraDistribution[part][m_bestCU[0]->getLumaIntraDir()[i]]++;
+ }
+ i += next;
+ part = m_bestCU[0]->getDepth(i);
+ }
+ while (part < g_maxCUDepth);
+ }
else
{
if (m_cfg->param.rdLevel < 5)
@@ -371,6 +395,47 @@ void TEncCu::compressCU(TComDataCU* cu)
}
else
xCompressCU(m_bestCU[0], m_tempCU[0], 0);
+ int i = 0, part;
+ part = cu->getDepth(i);
+ do
+ {
+ m_log->cntTotalCu[part]++;
+ int next = m_bestCU[0]->getTotalNumPart() >> (part * 2);
+ if (cu->isSkipped(i))
+ {
+ m_log->cntSkipCu[part]++;
+ }
+ else
+ {
+ m_log->TotalCu++;
+ if (cu->getPredictionMode(0) == MODE_INTER)
+ {
+ m_log->cntInter[part]++;
+ if (cu->getPartitionSize(0) < AMP_ID)
+ m_log->cuInterDistribution[part][cu->getPartitionSize(0)]++;
+ else
+ m_log->cuInterDistribution[part][AMP_ID]++;
+ }
+ else if (cu->getPredictionMode(0) == MODE_INTRA)
+ {
+ if (part == g_maxCUDepth - 1 && cu->getPartitionSize(0) == SIZE_NxN)
+ {
+ m_log->cntIntraNxN++;
+ }
+ else
+ {
+ m_log->cntIntra[part]++;
+ if (cu->getLumaIntraDir()[0] > 1)
+ m_log->cuIntraDistribution[part][ANGULAR_MODE_ID]++;
+ else
+ m_log->cuIntraDistribution[part][cu->getLumaIntraDir()[0]]++;
+ }
+ }
+ }
+ i = i + next;
+ part = cu->getDepth(i);
+ }
+ while (part < g_maxCUDepth);
}
}
@@ -467,8 +532,6 @@ void TEncCu::xCompressIntraCU(TComDataCU
{
//PPAScopeEvent(TEncCu_xCompressIntraCU + depth);
- m_log->cntTotalCu[depth]++;
- int boundaryCu = 0;
m_abortFlag = false;
TComPic* pic = outBestCU->getPic();
@@ -535,14 +598,9 @@ void TEncCu::xCompressIntraCU(TComDataCU
// Early CU determination
if (outBestCU->isSkipped(0))
- {
- m_log->cntSkipCu[depth]++;
bSubBranch = false;
- }
else
- {
bSubBranch = true;
- }
}
else if (!(bSliceEnd && bInsidePicture))
{
@@ -575,25 +633,14 @@ void TEncCu::xCompressIntraCU(TComDataCU
m_rdSbacCoders[nextDepth][CI_CURR_BEST]->load(m_rdSbacCoders[nextDepth][CI_NEXT_BEST]);
}
- // The following if condition has to be commented out in case the early Abort based on comparison of parentCu cost, childCU cost is not required.
- if (outBestCU->isIntra(0))
- {
- xCompressIntraCU(subBestPartCU[partUnitIdx], subTempPartCU[partUnitIdx], nextDepth);
- }
- else
- {
- xCompressIntraCU(subBestPartCU[partUnitIdx], subTempPartCU[partUnitIdx], nextDepth);
- }
- {
- outTempCU->copyPartFrom(subBestPartCU[partUnitIdx], partUnitIdx, nextDepth); // Keep best part data to current temporary data.
- xCopyYuv2Tmp(subBestPartCU[partUnitIdx]->getTotalNumPart() * partUnitIdx, nextDepth);
- }
+ xCompressIntraCU(subBestPartCU[partUnitIdx], subTempPartCU[partUnitIdx], nextDepth);
+ outTempCU->copyPartFrom(subBestPartCU[partUnitIdx], partUnitIdx, nextDepth); // Keep best part data to current temporary data.
+ xCopyYuv2Tmp(subBestPartCU[partUnitIdx]->getTotalNumPart() * partUnitIdx, nextDepth);
}
else if (bInSlice)
{
subBestPartCU[partUnitIdx]->copyToPic(nextDepth);
outTempCU->copyPartFrom(subBestPartCU[partUnitIdx], partUnitIdx, nextDepth);
- boundaryCu++;
}
}
@@ -633,30 +680,9 @@ void TEncCu::xCompressIntraCU(TComDataCU
}
m_rdSbacCoders[nextDepth][CI_NEXT_BEST]->store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
- if (outBestCU->m_totalCost < outTempCU->m_totalCost)
- {
- m_log->cntIntra[depth]++;
- for (int i = 0; i < 4; i++)
- {
- if (outTempCU->getPartitionSize(i) != SIZE_NxN)
- m_log->cntIntra[depth + 1]--;
- else
- m_log->cntIntraNxN--;
- }
- m_log->cntIntra[depth + 1] += boundaryCu;
- }
xCheckBestMode(outBestCU, outTempCU, depth); // RD compare current prediction with split prediction.
}
- if (depth == g_maxCUDepth - 1 && bSubBranch)
- {
- if (outBestCU->getPartitionSize(0) == SIZE_NxN)
- {
- m_log->cntIntraNxN++;
- }
- else
- m_log->cntIntra[depth]++;
- }
outBestCU->copyToPic(depth); // Copy Best data to Picture for next partition prediction.
// Copy Yuv data to picture Yuv
@@ -675,7 +701,6 @@ void TEncCu::xCompressCU(TComDataCU*& ou
{
//PPAScopeEvent(TEncCu_xCompressCU + depth);
- m_log->cntTotalCu[depth]++;
TComPic* pic = outBestCU->getPic();
m_abortFlag = false;
@@ -923,14 +948,9 @@ void TEncCu::xCompressCU(TComDataCU*& ou
// Early CU determination
if (outBestCU->isSkipped(0))
- {
- m_log->cntSkipCu[depth]++;
bSubBranch = false;
- }
else
- {
bSubBranch = true;
- }
}
else if (!(bSliceEnd && bInsidePicture))
{
@@ -971,16 +991,7 @@ void TEncCu::xCompressCU(TComDataCU*& ou
m_rdSbacCoders[nextDepth][CI_CURR_BEST]->load(m_rdSbacCoders[nextDepth][CI_NEXT_BEST]);
}
- // The following if condition has to be commented out in case the early Abort based on comparison of parentCu cost, childCU cost is not required.
- if (outBestCU->isIntra(0))
- {
- xCompressCU(subBestPartCU, subTempPartCU, nextDepth);
- }
- else
- {
- xCompressCU(subBestPartCU, subTempPartCU, nextDepth);
- }
-
+ xCompressCU(subBestPartCU, subTempPartCU, nextDepth);
outTempCU->copyPartFrom(subBestPartCU, partUnitIdx, nextDepth); // Keep best part data to current temporary data.
xCopyYuv2Tmp(subBestPartCU->getTotalNumPart() * partUnitIdx, nextDepth);
}
@@ -1028,76 +1039,8 @@ void TEncCu::xCompressCU(TComDataCU*& ou
}
m_rdSbacCoders[nextDepth][CI_NEXT_BEST]->store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
- if (outBestCU->m_totalCost < outTempCU->m_totalCost)
- {
- if (outBestCU->getPredictionMode(0) == MODE_INTER)
- {
- m_log->cntInter[depth]++;
- if (outBestCU->getPartitionSize(0) < AMP_ID)
- {
- m_log->cuInterDistribution[depth][outBestCU->getPartitionSize(0)]++;
- }
- else
- {
- m_log->cuInterDistribution[depth][AMP_ID]++;
- }
- }
- else if (outBestCU->getPredictionMode(0) == MODE_INTRA)
- {
- m_log->cntIntra[depth]++;
- if (outBestCU->getLumaIntraDir()[0] > 1)
- {
- m_log->cuIntraDistribution[depth][ANGULAR_MODE_ID]++;
- }
- else
- {
- m_log->cuIntraDistribution[depth][outBestCU->getLumaIntraDir()[0]]++;
- }
- }
- }
- else
- {
- m_log->cntSplit[depth]++;
- }
xCheckBestMode(outBestCU, outTempCU, depth); // RD compare current CU against split
}
- if (depth == g_maxCUDepth - 1)
- {
- if (!outBestCU->isSkipped(0))
- {
- if (outBestCU->getPredictionMode(0) == MODE_INTER)
- {
- m_log->cntInter[depth]++;
- if (outBestCU->getPartitionSize(0) < AMP_ID)
- {
- m_log->cuInterDistribution[depth][outBestCU->getPartitionSize(0)]++;
- }
- else
- {
- m_log->cuInterDistribution[depth][AMP_ID]++;
- }
- }
- else if (outBestCU->getPredictionMode(0) == MODE_INTRA)
- {
- m_log->cntIntra[depth]++;
- if (outBestCU->getPartitionSize(0) == SIZE_2Nx2N)
- {
- if (outBestCU->getLumaIntraDir()[0] > 1)
- {
- m_log->cuIntraDistribution[depth][ANGULAR_MODE_ID]++;
- }
- else
- {
- m_log->cuIntraDistribution[depth][outBestCU->getLumaIntraDir()[0]]++;
- }
- }
- else if (outBestCU->getPartitionSize(0) == SIZE_NxN)
- {
- m_log->cntIntraNxN++;
- }
- }
- }
More information about the x265-commits
mailing list