[x265] sao: some cleanups
Satoshi Nakagawa
nakagawa424 at oki.com
Fri Sep 12 04:04:08 CEST 2014
# HG changeset patch
# User Satoshi Nakagawa <nakagawa424 at oki.com>
# Date 1410487314 -32400
# Fri Sep 12 11:01:54 2014 +0900
# Node ID 8a2312df90f99b8b479940141c6dafa4b96581cf
# Parent 7e29b10982d2eb7fd79f581d99996f04184522ba
sao: some cleanups
diff -r 7e29b10982d2 -r 8a2312df90f9 source/common/common.h
--- a/source/common/common.h Thu Sep 11 19:24:28 2014 +0530
+++ b/source/common/common.h Fri Sep 12 11:01:54 2014 +0900
@@ -200,6 +200,8 @@
namespace x265 {
+enum { SAO_NUM_OFFSET = 4 };
+
// NOTE: MUST be alignment to 16 or 32 bytes for asm code
struct NoiseReduction
{
@@ -215,9 +217,8 @@
enum { NUM_DOWN_PART = 4 };
int bestType;
- int length;
int subTypeIdx; // indicates EO class or BO band position
- int offset[4];
+ int offset[SAO_NUM_OFFSET];
int startCUX;
int startCUY;
int endCUX;
@@ -245,10 +246,9 @@
bool mergeLeftFlag;
int typeIdx;
int subTypeIdx; // indicates EO class or BO band position
- int offset[4];
+ int offset[SAO_NUM_OFFSET];
int partIdx;
int partIdxTmp;
- int length;
void reset()
{
diff -r 7e29b10982d2 -r 8a2312df90f9 source/common/x86/loopfilter.asm
--- a/source/common/x86/loopfilter.asm Thu Sep 11 19:24:28 2014 +0530
+++ b/source/common/x86/loopfilter.asm Fri Sep 12 11:01:54 2014 +0900
@@ -44,7 +44,7 @@
pslldq m0, 15 ; m0 = [iSignLeft x .. x]
pcmpeqb m4, m4 ; m4 = [pb -1]
pxor m5, m5 ; m5 = 0
- movu m6, [r1] ; m6 = m_iOffsetEo
+ movh m6, [r1] ; m6 = m_offsetEo
.loop:
movu m7, [r0] ; m1 = pRec[x]
diff -r 7e29b10982d2 -r 8a2312df90f9 source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp Thu Sep 11 19:24:28 2014 +0530
+++ b/source/encoder/entropy.cpp Fri Sep 12 11:01:54 2014 +0900
@@ -879,19 +879,19 @@
if (symbol)
{
- if (saoLcuParam->typeIdx < 4 && compIdx != 2)
+ if (saoLcuParam->typeIdx < SAO_BO && compIdx != 2)
saoLcuParam->subTypeIdx = saoLcuParam->typeIdx;
int offsetTh = 1 << X265_MIN(X265_DEPTH - 5, 5);
if (saoLcuParam->typeIdx == SAO_BO)
{
- for (i = 0; i < saoLcuParam->length; i++)
+ for (i = 0; i < SAO_BO_LEN; i++)
{
uint32_t absOffset = ((saoLcuParam->offset[i] < 0) ? -saoLcuParam->offset[i] : saoLcuParam->offset[i]);
codeSaoMaxUvlc(absOffset, offsetTh - 1);
}
- for (i = 0; i < saoLcuParam->length; i++)
+ for (i = 0; i < SAO_BO_LEN; i++)
{
if (saoLcuParam->offset[i] != 0)
{
@@ -903,7 +903,7 @@
symbol = (uint32_t)(saoLcuParam->subTypeIdx);
codeSaoUflc(5, symbol);
}
- else if (saoLcuParam->typeIdx < 4)
+ else // if (saoLcuParam->typeIdx < SAO_BO)
{
codeSaoMaxUvlc(saoLcuParam->offset[0], offsetTh - 1);
codeSaoMaxUvlc(saoLcuParam->offset[1], offsetTh - 1);
diff -r 7e29b10982d2 -r 8a2312df90f9 source/encoder/sao.cpp
--- a/source/encoder/sao.cpp Thu Sep 11 19:24:28 2014 +0530
+++ b/source/encoder/sao.cpp Fri Sep 12 11:01:54 2014 +0900
@@ -79,26 +79,13 @@
341, // level 4
};
-const uint32_t SAO::s_eoTable[9] =
+const uint32_t SAO::s_eoTable[NUM_EDGETYPE] =
{
1, // 0
2, // 1
0, // 2
3, // 3
- 4, // 4
- 0, // 5
- 0, // 6
- 0, // 7
- 0
-};
-
-const int SAO::s_numClass[MAX_NUM_SAO_TYPE] =
-{
- SAO_EO_LEN,
- SAO_EO_LEN,
- SAO_EO_LEN,
- SAO_EO_LEN,
- SAO_BO_LEN
+ 4 // 4
};
SAO::SAO()
@@ -122,8 +109,6 @@
m_clipTable = NULL;
m_clipTableBase = NULL;
m_offsetBo = NULL;
- m_chromaOffsetBo = NULL;
- m_tableBo = NULL;
m_tmpU1[0] = NULL;
m_tmpU1[1] = NULL;
m_tmpU1[2] = NULL;
@@ -162,18 +147,12 @@
* m_numTotalParts must allow for sufficient storage in any allocated arrays */
m_numTotalParts = X265_MAX(3, s_numCulPartsLevel[m_maxSplitLevel]);
- int pixelRange = 1 << X265_DEPTH;
- int boRangeShift = X265_DEPTH - SAO_BO_BITS;
- pixel maxY = (1 << X265_DEPTH) - 1;
- pixel minY = 0;
- pixel rangeExt = maxY >> 1;
+ const pixel maxY = (1 << X265_DEPTH) - 1;
+ const pixel rangeExt = maxY >> 1;
int numLcu = m_numCuInWidth * m_numCuInHeight;
- CHECKED_MALLOC(m_tableBo, pixel, pixelRange);
-
- CHECKED_MALLOC(m_clipTableBase, pixel, maxY + 2 * rangeExt);
- CHECKED_MALLOC(m_offsetBo, int, maxY + 2 * rangeExt);
- CHECKED_MALLOC(m_chromaOffsetBo , int, maxY + 2 * rangeExt);
+ CHECKED_MALLOC(m_clipTableBase, pixel, maxY + 2 * rangeExt);
+ CHECKED_MALLOC(m_offsetBo, pixel, maxY + 2 * rangeExt);
CHECKED_MALLOC(m_tmpL1, pixel, g_maxCUSize + 1);
CHECKED_MALLOC(m_tmpL2, pixel, g_maxCUSize + 1);
@@ -199,19 +178,16 @@
CHECKED_MALLOC(m_countPreDblk, PerPlane, numLcu);
CHECKED_MALLOC(m_offsetOrgPreDblk, PerPlane, numLcu);
- for (int k2 = 0; k2 < pixelRange; k2++)
- m_tableBo[k2] = (pixel)(1 + (k2 >> boRangeShift));
+ m_clipTable = &(m_clipTableBase[rangeExt]);
- for (int i = 0; i < (minY + rangeExt); i++)
- m_clipTableBase[i] = minY;
+ for (int i = 0; i < rangeExt; i++)
+ m_clipTableBase[i] = 0;
- for (int i = minY + rangeExt; i < (maxY + rangeExt); i++)
- m_clipTableBase[i] = (pixel)(i - rangeExt);
+ for (int i = 0; i < maxY; i++)
+ m_clipTable[i] = (pixel)i;
- for (int i = maxY + rangeExt; i < (maxY + 2 * rangeExt); i++)
- m_clipTableBase[i] = maxY;
-
- m_clipTable = &(m_clipTableBase[rangeExt]);
+ for (int i = maxY; i < maxY + rangeExt; i++)
+ m_clipTable[i] = maxY;
return true;
@@ -223,8 +199,6 @@
{
X265_FREE(m_clipTableBase);
X265_FREE(m_offsetBo);
- X265_FREE(m_tableBo);
- X265_FREE(m_chromaOffsetBo);
X265_FREE(m_tmpL1);
X265_FREE(m_tmpL2);
@@ -271,12 +245,9 @@
/* recursively initialize SAO parameters (only once) */
void SAO::initSAOParam(SAOParam *saoParam, int partLevel, int partRow, int partCol, int parentPartIdx, int startCUX, int endCUX, int startCUY, int endCUY, int plane) const
{
- int j;
int partIdx = convertLevelRowCol2Idx(partLevel, partRow, partCol);
- SAOQTPart* saoPart;
-
- saoPart = &(saoParam->saoPart[plane][partIdx]);
+ SAOQTPart* saoPart = &(saoParam->saoPart[plane][partIdx]);
saoPart->partIdx = partIdx;
saoPart->partLevel = partLevel;
@@ -290,11 +261,10 @@
saoPart->upPartIdx = parentPartIdx;
saoPart->bestType = -1;
- saoPart->length = 0;
saoPart->subTypeIdx = 0;
- for (j = 0; j < MAX_NUM_SAO_OFFSETS; j++)
+ for (int j = 0; j < SAO_NUM_OFFSET; j++)
saoPart->offset[j] = 0;
if (saoPart->partLevel < m_maxSplitLevel)
@@ -371,14 +341,13 @@
for (int i = 0; i < s_numCulPartsLevel[m_maxSplitLevel]; i++)
{
saoParam->saoPart[c][i].bestType = -1;
- saoParam->saoPart[c][i].length = 0;
saoParam->saoPart[c][i].bSplit = false;
saoParam->saoPart[c][i].bProcessed = false;
saoParam->saoPart[c][i].minCost = MAX_DOUBLE;
saoParam->saoPart[c][i].minDist = MAX_INT;
saoParam->saoPart[c][i].minRate = MAX_INT;
saoParam->saoPart[c][i].subTypeIdx = 0;
- for (int j = 0; j < MAX_NUM_SAO_OFFSETS; j++)
+ for (int j = 0; j < SAO_NUM_OFFSET; j++)
{
saoParam->saoPart[c][i].offset[j] = 0;
saoParam->saoPart[c][i].offset[j] = 0;
@@ -454,18 +423,12 @@
int lcuHeight;
int rpelx;
int bpely;
- int edgeType;
- int signDown;
- int signDown1;
- int signDown2;
int picWidthTmp;
int picHeightTmp;
int startX;
int startY;
int endX;
int endY;
- int shift;
- int cuHeightTmp;
pixel* tmpL;
pixel* tmpU;
uint32_t lpelx = tmpCu->getCUPelX();
@@ -505,22 +468,18 @@
// if (iSaoType!=SAO_BO_0 || iSaoType!=SAO_BO_1)
{
- cuHeightTmp = isLuma ? g_maxCUSize : (g_maxCUSize >> m_vChromaShift);
- shift = isLuma ? (g_maxCUSize - 1) : ((g_maxCUSize >> m_hChromaShift) - 1);
+ int cuHeightTmp = isLuma ? g_maxCUSize : (g_maxCUSize >> m_vChromaShift);
+ pixel* recR = &rec[isLuma ? (g_maxCUSize - 1) : ((g_maxCUSize >> m_hChromaShift) - 1)];
for (int i = 0; i < cuHeightTmp + 1; i++)
{
- m_tmpL2[i] = rec[shift];
- rec += stride;
+ m_tmpL2[i] = *recR;
+ recR += stride;
}
- rec -= (stride * (cuHeightTmp + 1));
-
tmpL = m_tmpL1;
tmpU = &(m_tmpU1[plane][lpelx]);
}
- int32_t *offsetBo = isLuma ? m_offsetBo : m_chromaOffsetBo;
-
switch (saoType)
{
case SAO_EO_0: // dir: -
@@ -536,10 +495,10 @@
for (x = startX; x < endX; x++)
{
int signRight = signOf(rec[x] - rec[x + 1]);
- edgeType = signRight + signLeft + 2;
+ int edgeType = signRight + signLeft + 2;
signLeft = -signRight;
- rec[x] = (pixel)Clip3(0, (1 << X265_DEPTH) - 1, rec[x] + m_offsetEo[edgeType]);
+ rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
}
rec += stride;
@@ -584,8 +543,8 @@
{
for (x = 0; x < lcuWidth; x++)
{
- signDown = signOf(rec[x] - rec[x + stride]);
- edgeType = signDown + upBuff1[x] + 2;
+ int signDown = signOf(rec[x] - rec[x + stride]);
+ int edgeType = signDown + upBuff1[x] + 2;
upBuff1[x] = -signDown;
rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
@@ -612,11 +571,11 @@
for (y = startY; y < endY; y++)
{
- signDown2 = signOf(rec[stride + startX] - tmpL[y]);
+ int signDown2 = signOf(rec[stride + startX] - tmpL[y]);
for (x = startX; x < endX; x++)
{
- signDown1 = signOf(rec[x] - rec[x + stride + 1]);
- edgeType = signDown1 + upBuff1[x] + 2;
+ int signDown1 = signOf(rec[x] - rec[x + stride + 1]);
+ int edgeType = signDown1 + upBuff1[x] + 2;
upBufft[x + 1] = -signDown1;
rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
}
@@ -647,8 +606,8 @@
for (y = startY; y < endY; y++)
{
x = startX;
- signDown1 = signOf(rec[x] - tmpL[y + 1]);
- edgeType = signDown1 + upBuff1[x] + 2;
+ int signDown1 = signOf(rec[x] - tmpL[y + 1]);
+ int edgeType = signDown1 + upBuff1[x] + 2;
upBuff1[x - 1] = -signDown1;
rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
for (x = startX + 1; x < endX; x++)
@@ -668,10 +627,12 @@
}
case SAO_BO:
{
+ const pixel* offsetBo = m_offsetBo;
+
for (y = 0; y < lcuHeight; y++)
{
for (x = 0; x < lcuWidth; x++)
- rec[x] = (pixel)offsetBo[rec[x]];
+ rec[x] = offsetBo[rec[x]];
rec += stride;
}
@@ -704,38 +665,29 @@
memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidthTmp);
- int typeIdx;
- uint32_t edgeType;
-
- int offset[LUMA_GROUP_NUM + 1];
- int idxX;
- int idxY;
- int addr;
int frameWidthInCU = m_pic->getFrameWidthInCU();
int frameHeightInCU = m_pic->getFrameHeightInCU();
int stride;
bool isChroma = !!plane;
- bool mergeLeftFlag;
+ uint32_t cuHeightTmp = isChroma ? (g_maxCUSize >> m_vChromaShift) : g_maxCUSize;
- int32_t *offsetBo = isChroma ? m_chromaOffsetBo : m_offsetBo;
+ const int boShift = X265_DEPTH - SAO_BO_BITS;
- offset[0] = 0;
- for (idxY = 0; idxY < frameHeightInCU; idxY++)
+ for (int idxY = 0; idxY < frameHeightInCU; idxY++)
{
- addr = idxY * frameWidthInCU;
+ int addr = idxY * frameWidthInCU;
if (plane == 0)
{
- rec = m_pic->getPicYuvRec()->getLumaAddr(addr);
+ rec = m_pic->getPicYuvRec()->getLumaAddr(addr);
stride = m_pic->getStride();
picWidthTmp = m_param->sourceWidth;
}
else
{
- rec = m_pic->getPicYuvRec()->getChromaAddr(plane, addr);
+ rec = m_pic->getPicYuvRec()->getChromaAddr(plane, addr);
stride = m_pic->getCStride();
picWidthTmp = m_param->sourceWidth >> m_hChromaShift;
}
- uint32_t cuHeightTmp = isChroma ? (g_maxCUSize >> m_vChromaShift) : g_maxCUSize;
for (uint32_t i = 0; i < cuHeightTmp + 1; i++)
{
m_tmpL1[i] = rec[0];
@@ -746,10 +698,13 @@
memcpy(m_tmpU2[plane], rec, sizeof(pixel) * picWidthTmp);
- for (idxX = 0; idxX < frameWidthInCU; idxX++)
+ for (int idxX = 0; idxX < frameWidthInCU; idxX++)
{
addr = idxY * frameWidthInCU + idxX;
+ int typeIdx;
+ bool mergeLeftFlag;
+
if (oneUnitFlag)
{
typeIdx = saoLcuParam[0].typeIdx;
@@ -766,21 +721,24 @@
{
if (typeIdx == SAO_BO)
{
- for (int i = 0; i < SAO_MAX_BO_CLASSES + 1; i++)
- offset[i] = 0;
+ pixel* offsetBo = m_offsetBo;
+ int offset[SAO_NUM_BO_CLASSES];
+ memset(offset, 0, sizeof(offset));
- for (int i = 0; i < saoLcuParam[addr].length; i++)
- offset[(saoLcuParam[addr].subTypeIdx + i) % SAO_MAX_BO_CLASSES + 1] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)
+ offset[((saoLcuParam[addr].subTypeIdx + i) & (SAO_NUM_BO_CLASSES - 1))] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;
for (int i = 0; i < (1 << X265_DEPTH); i++)
- offsetBo[i] = m_clipTable[i + offset[m_tableBo[i]]];
+ offsetBo[i] = m_clipTable[i + offset[i >> boShift]];
}
- if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
+ else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
{
- for (int i = 0; i < saoLcuParam[addr].length; i++)
+ int offset[NUM_EDGETYPE];
+ offset[0] = 0;
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)
offset[i + 1] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;
- for (edgeType = 0; edgeType < 6; edgeType++)
+ for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
m_offsetEo[edgeType] = (int8_t)offset[s_eoTable[edgeType]];
}
}
@@ -823,32 +781,25 @@
if (plane)
{
- rec = m_pic->getPicYuvRec()->getChromaAddr(plane);
+ rec = m_pic->getPicYuvRec()->getChromaAddr(plane);
picWidthTmp = m_param->sourceWidth >> m_hChromaShift;
}
else
{
- rec = m_pic->getPicYuvRec()->getLumaAddr();
+ rec = m_pic->getPicYuvRec()->getLumaAddr();
picWidthTmp = m_param->sourceWidth;
}
if (!idxY)
memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidthTmp);
- int typeIdx;
-
- int offset[LUMA_GROUP_NUM + 1];
- int idxX;
- int addr;
int frameWidthInCU = m_pic->getFrameWidthInCU();
int stride;
bool isChroma = !!plane;
- bool mergeLeftFlag;
- int32_t* offsetBo = isChroma ? m_chromaOffsetBo : m_offsetBo;
+ const int boShift = X265_DEPTH - SAO_BO_BITS;
- offset[0] = 0;
- addr = idxY * frameWidthInCU;
+ int addr = idxY * frameWidthInCU;
if (isChroma)
{
rec = m_pic->getPicYuvRec()->getChromaAddr(plane, addr);
@@ -872,12 +823,12 @@
memcpy(m_tmpU2[plane], rec, sizeof(pixel) * picWidthTmp);
- for (idxX = 0; idxX < frameWidthInCU; idxX++)
+ for (int idxX = 0; idxX < frameWidthInCU; idxX++)
{
addr = idxY * frameWidthInCU + idxX;
- typeIdx = saoLcuParam[addr].typeIdx;
- mergeLeftFlag = saoLcuParam[addr].mergeLeftFlag;
+ int typeIdx = saoLcuParam[addr].typeIdx;
+ bool mergeLeftFlag = saoLcuParam[addr].mergeLeftFlag;
if (typeIdx >= 0)
{
@@ -885,21 +836,24 @@
{
if (typeIdx == SAO_BO)
{
- for (int i = 0; i < SAO_MAX_BO_CLASSES + 1; i++)
- offset[i] = 0;
+ pixel* offsetBo = m_offsetBo;
+ int offset[SAO_NUM_BO_CLASSES];
+ memset(offset, 0, sizeof(offset));
- for (int i = 0; i < saoLcuParam[addr].length; i++)
- offset[(saoLcuParam[addr].subTypeIdx + i) % SAO_MAX_BO_CLASSES + 1] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)
+ offset[((saoLcuParam[addr].subTypeIdx + i) & (SAO_NUM_BO_CLASSES - 1))] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;
for (int i = 0; i < (1 << X265_DEPTH); i++)
- offsetBo[i] = m_clipTable[i + offset[m_tableBo[i]]];
+ offsetBo[i] = m_clipTable[i + offset[i >> boShift]];
}
- if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
+ else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
{
- for (int i = 0; i < saoLcuParam[addr].length; i++)
+ int offset[NUM_EDGETYPE];
+ offset[0] = 0;
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)
offset[i + 1] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;
- for (uint32_t edgeType = 0; edgeType < 6; edgeType++)
+ for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)
m_offsetEo[edgeType] = (int8_t)offset[s_eoTable[edgeType]];
}
}
@@ -942,7 +896,7 @@
saoLcuParam[i].partIdx = 0;
saoLcuParam[i].typeIdx = -1;
saoLcuParam[i].subTypeIdx = 0;
- for (int j = 0; j < MAX_NUM_SAO_OFFSETS; j++)
+ for (int j = 0; j < SAO_NUM_OFFSET; j++)
saoLcuParam[i].offset[j] = 0;
}
}
@@ -954,10 +908,9 @@
saoUnit->partIdx = 0;
saoUnit->partIdxTmp = 0;
saoUnit->typeIdx = -1;
- saoUnit->length = 0;
saoUnit->subTypeIdx = 0;
- for (int i = 0; i < 4; i++)
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)
saoUnit->offset[i] = 0;
}
@@ -966,10 +919,9 @@
saoUnitDst->mergeLeftFlag = saoUnitSrc->mergeLeftFlag;
saoUnitDst->mergeUpFlag = saoUnitSrc->mergeUpFlag;
saoUnitDst->typeIdx = saoUnitSrc->typeIdx;
- saoUnitDst->length = saoUnitSrc->length;
saoUnitDst->subTypeIdx = saoUnitSrc->subTypeIdx;
- for (int i = 0; i < 4; i++)
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)
saoUnitDst->offset[i] = saoUnitSrc->offset[i];
}
@@ -1008,17 +960,15 @@
saoLcuParam[addr].partIdxTmp = (int)partIdx;
saoLcuParam[addr].typeIdx = saoQTPart[partIdx].bestType;
saoLcuParam[addr].subTypeIdx = saoQTPart[partIdx].subTypeIdx;
- if (saoLcuParam[addr].typeIdx != -1)
+ if (saoLcuParam[addr].typeIdx >= 0)
{
- saoLcuParam[addr].length = saoQTPart[partIdx].length;
- for (int j = 0; j < MAX_NUM_SAO_OFFSETS; j++)
+ for (int j = 0; j < SAO_NUM_OFFSET; j++)
saoLcuParam[addr].offset[j] = saoQTPart[partIdx].offset[j];
}
else
{
- saoLcuParam[addr].length = 0;
saoLcuParam[addr].subTypeIdx = saoQTPart[partIdx].subTypeIdx;
- for (int j = 0; j < MAX_NUM_SAO_OFFSETS; j++)
+ for (int j = 0; j < SAO_NUM_OFFSET; j++)
saoLcuParam[addr].offset[j] = 0;
}
}
@@ -1028,12 +978,9 @@
/* process SAO for one partition */
void SAO::rdoSaoOnePart(SAOQTPart *psQTPart, int partIdx, int plane)
{
- int typeIdx;
- int numTotalType = MAX_NUM_SAO_TYPE;
SAOQTPart* onePart = &(psQTPart[partIdx]);
int64_t estDist;
- int classIdx;
m_distOrg[partIdx] = 0;
@@ -1046,50 +993,20 @@
int allowMergeUp;
SaoLcuParam saoLcuParamRdo;
- for (typeIdx = -1; typeIdx < numTotalType; typeIdx++)
+ for (int typeIdx = -1; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
{
m_entropyCoder.load(m_rdEntropyCoders[onePart->partLevel][CI_CURR_BEST]);
m_entropyCoder.resetBits();
- if (typeIdx == -1)
- {
- for (int ry = onePart->startCUY; ry <= onePart->endCUY; ry++)
- {
- for (int rx = onePart->startCUX; rx <= onePart->endCUX; rx++)
- {
- // get bits for iTypeIdx = -1
- allowMergeLeft = 1;
- allowMergeUp = 1;
-
- // reset
- resetSaoUnit(&saoLcuParamRdo);
-
- // set merge flag
- saoLcuParamRdo.mergeUpFlag = 1;
- saoLcuParamRdo.mergeLeftFlag = 1;
-
- if (ry == onePart->startCUY)
- saoLcuParamRdo.mergeUpFlag = 0;
-
- if (rx == onePart->startCUX)
- saoLcuParamRdo.mergeLeftFlag = 0;
-
- m_entropyCoder.codeSaoUnitInterleaving(plane, 1, rx, ry, &saoLcuParamRdo, 1, 1, allowMergeLeft, allowMergeUp);
- }
- }
- }
-
if (typeIdx >= 0)
{
estDist = estSaoTypeDist(partIdx, typeIdx, 0, m_lumaLambda, currentDistortionTableBo, currentRdCostTableBo);
if (typeIdx == SAO_BO)
{
// Estimate Best Position
- double currentRDCost = 0.0;
-
- for (int i = 0; i < SAO_MAX_BO_CLASSES - SAO_BO_LEN + 1; i++)
+ for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
{
- currentRDCost = 0.0;
+ double currentRDCost = 0.0;
for (int j = i; j < i + SAO_BO_LEN; j++)
currentRDCost += currentRdCostTableBo[j];
@@ -1101,7 +1018,7 @@
}
// Recode all offsets
- for (classIdx = bestClassTableBo; classIdx < bestClassTableBo + SAO_BO_LEN; classIdx++)
+ for (int classIdx = bestClassTableBo; classIdx < bestClassTableBo + SAO_BO_LEN; classIdx++)
estDist += currentDistortionTableBo[classIdx];
}
@@ -1129,8 +1046,7 @@
// set type and offsets
saoLcuParamRdo.typeIdx = typeIdx;
saoLcuParamRdo.subTypeIdx = (typeIdx == SAO_BO) ? bestClassTableBo : 0;
- saoLcuParamRdo.length = s_numClass[typeIdx];
- for (classIdx = 0; classIdx < saoLcuParamRdo.length; classIdx++)
+ for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
saoLcuParamRdo.offset[classIdx] = (int)m_offset[partIdx][typeIdx][classIdx + saoLcuParamRdo.subTypeIdx + 1];
m_entropyCoder.codeSaoUnitInterleaving(plane, 1, rx, ry, &saoLcuParamRdo, 1, 1, allowMergeLeft, allowMergeUp);
@@ -1152,6 +1068,30 @@
}
else
{
+ for (int ry = onePart->startCUY; ry <= onePart->endCUY; ry++)
+ {
+ for (int rx = onePart->startCUX; rx <= onePart->endCUX; rx++)
+ {
+ // get bits for iTypeIdx = -1
+ allowMergeLeft = 1;
+ allowMergeUp = 1;
+
+ // reset
+ resetSaoUnit(&saoLcuParamRdo);
+
+ // set merge flag
+ saoLcuParamRdo.mergeUpFlag = 1;
+ saoLcuParamRdo.mergeLeftFlag = 1;
+
+ if (ry == onePart->startCUY)
+ saoLcuParamRdo.mergeUpFlag = 0;
+
+ if (rx == onePart->startCUX)
+ saoLcuParamRdo.mergeLeftFlag = 0;
+
+ m_entropyCoder.codeSaoUnitInterleaving(plane, 1, rx, ry, &saoLcuParamRdo, 1, 1, allowMergeLeft, allowMergeUp);
+ }
+ }
if (m_distOrg[partIdx] < m_costPartBest[partIdx])
{
m_costPartBest[partIdx] = (double)m_distOrg[partIdx] + m_entropyCoder.getNumberOfWrittenBits() * m_lumaLambda;
@@ -1170,18 +1110,15 @@
if (onePart->bestType != -1)
{
- onePart->length = s_numClass[onePart->bestType];
int minIndex = 0;
if (onePart->bestType == SAO_BO)
{
onePart->subTypeIdx = bestClassTableBo;
minIndex = onePart->subTypeIdx;
}
- for (int i = 0; i < onePart->length; i++)
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)
onePart->offset[i] = (int)m_offset[partIdx][onePart->bestType][minIndex + i + 1];
}
- else
- onePart->length = 0;
}
/* Run partition tree disable */
@@ -1190,7 +1127,6 @@
SAOQTPart* pOnePart = &(psQTPart[partIdx]);
pOnePart->bSplit = false;
- pOnePart->length = 0;
pOnePart->bestType = -1;
if (pOnePart->partLevel < (int)m_maxSplitLevel)
@@ -1236,7 +1172,6 @@
{
costFinal = costSplit;
onePart->bSplit = true;
- onePart->length = 0;
onePart->bestType = -1;
m_rdEntropyCoders[onePart->partLevel][CI_NEXT_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);
}
@@ -1271,7 +1206,6 @@
uint32_t picHeightTmp;
int64_t* stats;
int64_t* counts;
- int classIdx;
int startX;
int startY;
int endX;
@@ -1308,6 +1242,8 @@
//if(iSaoType == BO_0 || iSaoType == BO_1)
{
+ const int boShift = X265_DEPTH - SAO_BO_BITS;
+
if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
{
numSkipLine = isChroma ? 3 - (2 * m_vChromaShift) : 3;
@@ -1325,12 +1261,9 @@
{
for (x = 0; x < endX; x++)
{
- classIdx = m_tableBo[recon[x]];
- if (classIdx)
- {
- stats[classIdx] += (fenc[x] - recon[x]);
- counts[classIdx]++;
- }
+ int classIdx = 1 + (recon[x] >> boShift);
+ stats[classIdx] += (fenc[x] - recon[x]);
+ counts[classIdx]++;
}
fenc += stride;
@@ -1338,12 +1271,6 @@
}
}
- int signLeft;
- int signRight;
- int signDown;
- int signDown1;
- int signDown2;
- uint32_t edgeType;
int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;
int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
@@ -1366,11 +1293,11 @@
endX = (rpelx == picWidthTmp) ? lcuWidth - 1 : lcuWidth - numSkipLineRight;
for (y = 0; y < lcuHeight - numSkipLine; y++)
{
- signLeft = signOf(recon[startX] - recon[startX - 1]);
+ int signLeft = signOf(recon[startX] - recon[startX - 1]);
for (x = startX; x < endX; x++)
{
- signRight = signOf(recon[x] - recon[x + 1]);
- edgeType = signRight + signLeft + 2;
+ int signRight = signOf(recon[x] - recon[x + 1]);
+ int edgeType = signRight + signLeft + 2;
signLeft = -signRight;
stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]);
@@ -1411,8 +1338,8 @@
{
for (x = 0; x < endX; x++)
{
- signDown = signOf(recon[x] - recon[x + stride]);
- edgeType = signDown + upBuff1[x] + 2;
+ int signDown = signOf(recon[x] - recon[x + stride]);
+ int edgeType = signDown + upBuff1[x] + 2;
upBuff1[x] = -signDown;
stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]);
@@ -1452,11 +1379,11 @@
for (y = startY; y < endY; y++)
{
- signDown2 = signOf(recon[stride + startX] - recon[startX - 1]);
+ int signDown2 = signOf(recon[stride + startX] - recon[startX - 1]);
for (x = startX; x < endX; x++)
{
- signDown1 = signOf(recon[x] - recon[x + stride + 1]);
- edgeType = signDown1 + upBuff1[x] + 2;
+ int signDown1 = signOf(recon[x] - recon[x + stride + 1]);
+ int edgeType = signDown1 + upBuff1[x] + 2;
upBufft[x + 1] = -signDown1;
stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]);
counts[s_eoTable[edgeType]]++;
@@ -1500,8 +1427,8 @@
{
for (x = startX; x < endX; x++)
{
- signDown1 = signOf(recon[x] - recon[x + stride - 1]);
- edgeType = signDown1 + upBuff1[x] + 2;
+ int signDown1 = signOf(recon[x] - recon[x + stride - 1]);
+ int edgeType = signDown1 + upBuff1[x] + 2;
upBuff1[x - 1] = -signDown1;
stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]);
counts[s_eoTable[edgeType]]++;
@@ -1518,7 +1445,6 @@
void SAO::calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY)
{
- int addr;
int x, y;
pixel* fenc;
@@ -1528,7 +1454,6 @@
uint32_t bPelY;
int64_t* stats;
int64_t* count;
- int classIdx;
int startX;
int startY;
int endX;
@@ -1545,11 +1470,13 @@
int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;
int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
+ const int boShift = X265_DEPTH - SAO_BO_BITS;
+
// NOTE: Row
{
// NOTE: Col
{
- addr = idxX + frameWidthInCU * idxY;
+ int addr = idxX + frameWidthInCU * idxY;
cu = pic->getCU(addr);
uint32_t picWidthTmp = m_param->sourceWidth;
@@ -1606,26 +1533,15 @@
if (x < startX && y < startY)
continue;
- classIdx = m_tableBo[recon[x]];
- if (classIdx)
- {
- stats[classIdx] += (fenc[x] - recon[x]);
- count[classIdx]++;
- }
+ int classIdx = 1 + (recon[x] >> boShift);
+ stats[classIdx] += (fenc[x] - recon[x]);
+ count[classIdx]++;
}
fenc += stride;
recon += stride;
}
- int signLeft;
- int signRight;
- int signDown;
- int signDown1;
- int signDown2;
-
- uint32_t edgeType;
-
//if (iSaoType == EO_0)
numSkipLine = isChroma ? 1 : 3;
@@ -1644,11 +1560,11 @@
for (y = 0; y < lcuHeight; y++)
{
- signLeft = signOf(recon[firstX] - recon[firstX - 1]);
+ int signLeft = signOf(recon[firstX] - recon[firstX - 1]);
for (x = firstX; x < endX; x++)
{
- signRight = signOf(recon[x] - recon[x + 1]);
- edgeType = signRight + signLeft + 2;
+ int signRight = signOf(recon[x] - recon[x + 1]);
+ int edgeType = signRight + signLeft + 2;
signLeft = -signRight;
if (x < startX && y < startY)
@@ -1690,8 +1606,8 @@
{
for (x = 0; x < lcuWidth; x++)
{
- signDown = signOf(recon[x] - recon[x + stride]);
- edgeType = signDown + upBuff1[x] + 2;
+ int signDown = signOf(recon[x] - recon[x + stride]);
+ int edgeType = signDown + upBuff1[x] + 2;
upBuff1[x] = -signDown;
if (x < startX && y < startY)
@@ -1733,11 +1649,11 @@
for (y = firstY; y < endY; y++)
{
- signDown2 = signOf(recon[stride + startX] - recon[startX - 1]);
+ int signDown2 = signOf(recon[stride + startX] - recon[startX - 1]);
for (x = firstX; x < endX; x++)
{
- signDown1 = signOf(recon[x] - recon[x + stride + 1]);
- edgeType = signDown1 + upBuff1[x] + 2;
+ int signDown1 = signOf(recon[x] - recon[x + stride + 1]);
+ int edgeType = signDown1 + upBuff1[x] + 2;
upBufft[x + 1] = -signDown1;
if (x < startX && y < startY)
@@ -1784,8 +1700,8 @@
{
for (x = firstX; x < endX; x++)
{
- signDown1 = signOf(recon[x] - recon[x + stride - 1]);
- edgeType = signDown1 + upBuff1[x] + 2;
+ int signDown1 = signOf(recon[x] - recon[x + stride - 1]);
+ int edgeType = signDown1 + upBuff1[x] + 2;
upBuff1[x - 1] = -signDown1;
if (x < startX && y < startY)
@@ -1807,12 +1723,10 @@
void SAO::getSaoStats(SAOQTPart *psQTPart, int plane)
{
- int levelIdx, partIdx, typeIdx, classIdx;
+ int levelIdx, partIdx;
int i;
- int numTotalType = MAX_NUM_SAO_TYPE;
int lcuIdx;
int lcuIdy;
- int addr;
int frameWidthInCU = m_pic->getFrameWidthInCU();
int downPartIdx;
int partStart;
@@ -1827,7 +1741,7 @@
{
for (lcuIdx = onePart->startCUX; lcuIdx <= onePart->endCUX; lcuIdx++)
{
- addr = lcuIdy * frameWidthInCU + lcuIdx;
+ int addr = lcuIdy * frameWidthInCU + lcuIdx;
calcSaoStatsCu(addr, partIdx, plane);
}
}
@@ -1841,7 +1755,7 @@
{
for (lcuIdx = onePart->startCUX; lcuIdx <= onePart->endCUX; lcuIdx++)
{
- addr = lcuIdy * frameWidthInCU + lcuIdx;
+ int addr = lcuIdy * frameWidthInCU + lcuIdx;
calcSaoStatsCu(addr, partIdx, plane);
}
}
@@ -1858,9 +1772,9 @@
for (i = 0; i < SAOQTPart::NUM_DOWN_PART; i++)
{
downPartIdx = onePart->downPartsIdx[i];
- for (typeIdx = 0; typeIdx < numTotalType; typeIdx++)
+ for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
{
- for (classIdx = 0; classIdx < (typeIdx < SAO_BO ? s_numClass[typeIdx] : SAO_MAX_BO_CLASSES) + 1; classIdx++)
+ for (int classIdx = 0; classIdx < (typeIdx < SAO_BO ? SAO_EO_LEN : SAO_NUM_BO_CLASSES) + 1; classIdx++)
{
m_offsetOrg[partIdx][typeIdx][classIdx] += m_offsetOrg[downPartIdx][typeIdx][classIdx];
m_count[partIdx][typeIdx][classIdx] += m_count[downPartIdx][typeIdx][classIdx];
@@ -1923,16 +1837,15 @@
/* Check merge SAO unit */
void SAO::checkMerge(SaoLcuParam * saoUnitCurr, SaoLcuParam * saoUnitCheck, int dir)
{
- int i;
int countDiff = 0;
if (saoUnitCurr->partIdx != saoUnitCheck->partIdx)
{
- if (saoUnitCurr->typeIdx != -1)
+ if (saoUnitCurr->typeIdx >= 0)
{
if (saoUnitCurr->typeIdx == saoUnitCheck->typeIdx)
{
- for (i = 0; i < saoUnitCurr->length; i++)
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)
countDiff += (saoUnitCurr->offset[i] != saoUnitCheck->offset[i]);
countDiff += (saoUnitCurr->subTypeIdx != saoUnitCheck->subTypeIdx);
@@ -1979,24 +1892,22 @@
oneUnitFlag = 1;
else
{
- int i, j, addr, addrUp, addrLeft, idx, idxUp, idxLeft, idxCount;
-
oneUnitFlag = 0;
- idxCount = -1;
+ int idxCount = -1;
saoLcuParam[0].mergeUpFlag = 0;
saoLcuParam[0].mergeLeftFlag = 0;
- for (j = 0; j < m_numCuInHeight; j++)
+ for (int j = 0; j < m_numCuInHeight; j++)
{
- for (i = 0; i < m_numCuInWidth; i++)
+ for (int i = 0; i < m_numCuInWidth; i++)
{
- addr = i + j * m_numCuInWidth;
- addrLeft = (addr % m_numCuInWidth == 0) ? -1 : addr - 1;
- addrUp = (addr < m_numCuInWidth) ? -1 : addr - m_numCuInWidth;
- idx = saoLcuParam[addr].partIdxTmp;
- idxLeft = (addrLeft == -1) ? -1 : saoLcuParam[addrLeft].partIdxTmp;
- idxUp = (addrUp == -1) ? -1 : saoLcuParam[addrUp].partIdxTmp;
+ int addr = i + j * m_numCuInWidth;
+ int addrUp = (j == 0) ? -1 : addr - m_numCuInWidth;
+ int addrLeft = (i == 0) ? -1 : addr - 1;
+ int idx = saoLcuParam[addr].partIdxTmp;
+ int idxLeft = (addrLeft == -1) ? -1 : saoLcuParam[addrLeft].partIdxTmp;
+ int idxUp = (addrUp == -1) ? -1 : saoLcuParam[addrUp].partIdxTmp;
if (idx != idxLeft && idx != idxUp)
{
@@ -2057,21 +1968,17 @@
void SAO::rdoSaoUnitRow(SAOParam *saoParam, int idxY)
{
- int idxX;
int frameWidthInCU = saoParam->numCuInWidth;
int j, k;
- int addr = 0;
- int addrUp = -1;
- int addrLeft = -1;
int compIdx = 0;
SaoLcuParam mergeSaoParam[3][2];
double compDistortion[3];
- for (idxX = 0; idxX < frameWidthInCU; idxX++)
+ for (int idxX = 0; idxX < frameWidthInCU; idxX++)
{
- addr = idxX + frameWidthInCU * idxY;
- addrUp = addr < frameWidthInCU ? -1 : idxX + frameWidthInCU * (idxY - 1);
- addrLeft = idxX == 0 ? -1 : idxX - 1 + frameWidthInCU * idxY;
+ int addr = idxX + idxY * frameWidthInCU;
+ int addrUp = idxY == 0 ? -1 : addr - frameWidthInCU;
+ int addrLeft = idxX == 0 ? -1 : addr - 1;
int allowMergeLeft = 1;
int allowMergeUp = 1;
uint32_t rate;
@@ -2111,7 +2018,7 @@
}
}
- saoParam->saoLcuParam[compIdx][addr].typeIdx = -1;
+ saoParam->saoLcuParam[compIdx][addr].typeIdx = -1;
saoParam->saoLcuParam[compIdx][addr].mergeUpFlag = 0;
saoParam->saoLcuParam[compIdx][addr].mergeLeftFlag = 0;
saoParam->saoLcuParam[compIdx][addr].subTypeIdx = 0;
@@ -2173,9 +2080,9 @@
}
}
- if (saoParam->saoLcuParam[0][addr].typeIdx == -1)
+ if (saoParam->saoLcuParam[0][addr].typeIdx < 0)
m_numNoSao[0]++;
- if (saoParam->saoLcuParam[1][addr].typeIdx == -1)
+ if (saoParam->saoLcuParam[1][addr].typeIdx < 0)
m_numNoSao[1] += 2;
m_entropyCoder.load(m_rdEntropyCoders[0][CI_TEMP_BEST]);
m_entropyCoder.store(m_rdEntropyCoders[0][CI_CURR_BEST]);
@@ -2187,9 +2094,8 @@
inline int64_t SAO::estSaoTypeDist(int compIdx, int typeIdx, int shift, double lambda, int32_t *currentDistortionTableBo, double *currentRdCostTableBo)
{
int64_t estDist = 0;
- int classIdx;
- for (classIdx = 1; classIdx < ((typeIdx < SAO_BO) ? s_numClass[typeIdx] + 1 : SAO_MAX_BO_CLASSES + 1); classIdx++)
+ for (int classIdx = 1; classIdx < ((typeIdx < SAO_BO) ? SAO_EO_LEN + 1 : SAO_NUM_BO_CLASSES + 1); classIdx++)
{
if (typeIdx == SAO_BO)
{
@@ -2200,7 +2106,7 @@
{
m_offset[compIdx][typeIdx][classIdx] = (int64_t)roundIDBI((double)(m_offsetOrg[compIdx][typeIdx][classIdx] << (X265_DEPTH - 8)) / (double)(m_count[compIdx][typeIdx][classIdx] << SAO_BIT_INC));
m_offset[compIdx][typeIdx][classIdx] = Clip3(-OFFSET_THRESH + 1, OFFSET_THRESH - 1, (int)m_offset[compIdx][typeIdx][classIdx]);
- if (typeIdx < 4)
+ if (typeIdx < SAO_BO)
{
if (m_offset[compIdx][typeIdx][classIdx] < 0 && classIdx < 3)
m_offset[compIdx][typeIdx][classIdx] = 0;
@@ -2231,12 +2137,11 @@
//Clean up, best_q_offset.
int64_t iterOffset, tempOffset;
int64_t tempDist, tempRate;
- double tempCost, tempMinCost;
int64_t offsetOutput = 0;
iterOffset = offsetInput;
// Assuming sending quantized value 0 results in zero offset and sending the value zero needs 1 bit. entropy coder can be used to measure the exact rate here.
- tempMinCost = lambda;
+ double tempMinCost = lambda;
while (iterOffset != 0)
{
// Calculate the bits required for signalling the offset
@@ -2247,7 +2152,7 @@
// Do the dequntization before distorion calculation
tempOffset = iterOffset << bitIncrease;
tempDist = estSaoDist(count, tempOffset, offsetOrg, shift);
- tempCost = ((double)tempDist + lambda * (double)tempRate);
+ double tempCost = ((double)tempDist + lambda * (double)tempRate);
if (tempCost < tempMinCost)
{
tempMinCost = tempCost;
@@ -2267,10 +2172,7 @@
void SAO::saoComponentParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft, int plane,
SaoLcuParam *compSaoParam, double *compDistortion)
{
- int typeIdx;
-
int64_t estDist;
- int classIdx;
int64_t bestDist;
SaoLcuParam* saoLcuParam = &(saoParam->saoLcuParam[plane][addr]);
@@ -2287,7 +2189,6 @@
double currentRdCostTableBo[MAX_NUM_SAO_CLASS];
SaoLcuParam saoLcuParamRdo;
- double estRate = 0;
resetSaoUnit(&saoLcuParamRdo);
@@ -2298,18 +2199,16 @@
copySaoUnit(saoLcuParam, &saoLcuParamRdo);
bestDist = 0;
- for (typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
+ for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
{
estDist = estSaoTypeDist(plane, typeIdx, 0, m_lumaLambda, currentDistortionTableBo, currentRdCostTableBo);
if (typeIdx == SAO_BO)
{
// Estimate Best Position
- double currentRDCost = 0.0;
-
- for (int i = 0; i < SAO_MAX_BO_CLASSES - SAO_BO_LEN + 1; i++)
+ for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
{
- currentRDCost = 0.0;
+ double currentRDCost = 0.0;
for (int j = i; j < i + SAO_BO_LEN; j++)
currentRDCost += currentRdCostTableBo[j];
@@ -2323,23 +2222,22 @@
// Re code all Offsets
// Code Center
estDist = 0;
- for (classIdx = bestClassTableBo; classIdx < bestClassTableBo + SAO_BO_LEN; classIdx++)
+ for (int classIdx = bestClassTableBo; classIdx < bestClassTableBo + SAO_BO_LEN; classIdx++)
estDist += currentDistortionTableBo[classIdx];
}
resetSaoUnit(&saoLcuParamRdo);
- saoLcuParamRdo.length = s_numClass[typeIdx];
saoLcuParamRdo.typeIdx = typeIdx;
saoLcuParamRdo.mergeLeftFlag = 0;
saoLcuParamRdo.mergeUpFlag = 0;
saoLcuParamRdo.subTypeIdx = (typeIdx == SAO_BO) ? bestClassTableBo : 0;
- for (classIdx = 0; classIdx < saoLcuParamRdo.length; classIdx++)
+ for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
saoLcuParamRdo.offset[classIdx] = (int)m_offset[plane][typeIdx][classIdx + saoLcuParamRdo.subTypeIdx + 1];
m_entropyCoder.load(m_rdEntropyCoders[0][CI_TEMP_BEST]);
m_entropyCoder.resetBits();
m_entropyCoder.codeSaoOffset(&saoLcuParamRdo, plane);
- estRate = m_entropyCoder.getNumberOfWrittenBits();
+ uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
m_cost[plane][typeIdx] = (double)((double)estDist + m_lumaLambda * (double)estRate);
if (m_cost[plane][typeIdx] < dCostPartBest)
@@ -2367,12 +2265,12 @@
if (saoLcuParamNeighbor != NULL)
{
estDist = 0;
- typeIdx = saoLcuParamNeighbor->typeIdx;
+ int typeIdx = saoLcuParamNeighbor->typeIdx;
if (typeIdx >= 0)
{
int mergeBandPosition = (typeIdx == SAO_BO) ? saoLcuParamNeighbor->subTypeIdx : 0;
int mergeOffset;
- for (classIdx = 0; classIdx < s_numClass[typeIdx]; classIdx++)
+ for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
{
mergeOffset = saoLcuParamNeighbor->offset[classIdx];
estDist += estSaoDist(m_count[plane][typeIdx][classIdx + mergeBandPosition + 1], mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + mergeBandPosition + 1], 0);
@@ -2395,8 +2293,6 @@
{
int64_t estDist[2];
int64_t bestDist = 0;
- int typeIdx;
- int classIdx;
SaoLcuParam* saoLcuParam[2] = { &(saoParam->saoLcuParam[1][addr]), &(saoParam->saoLcuParam[2][addr]) };
SaoLcuParam* saoLcuParamNeighbor[2] = { NULL, NULL };
@@ -2417,7 +2313,6 @@
double costPartBest = MAX_DOUBLE;
double bestRDCostTableBo;
double currentRdCostTableBo[MAX_NUM_SAO_CLASS];
- double estRate = 0;
int bestClassTableBo[2] = { 0, 0 };
int currentDistortionTableBo[MAX_NUM_SAO_CLASS];
@@ -2435,19 +2330,18 @@
copySaoUnit(saoLcuParam[0], &saoLcuParamRdo[0]);
copySaoUnit(saoLcuParam[1], &saoLcuParamRdo[1]);
- for (typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
+ for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
{
if (typeIdx == SAO_BO)
{
// Estimate Best Position
for (int compIdx = 0; compIdx < 2; compIdx++)
{
- double currentRDCost = 0.0;
bestRDCostTableBo = MAX_DOUBLE;
estDist[compIdx] = estSaoTypeDist(compIdx + 1, typeIdx, 0, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);
- for (int i = 0; i < SAO_MAX_BO_CLASSES - SAO_BO_LEN + 1; i++)
+ for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
{
- currentRDCost = 0.0;
+ double currentRDCost = 0.0;
for (int j = i; j < i + SAO_BO_LEN; j++)
currentRDCost += currentRdCostTableBo[j];
@@ -2461,7 +2355,7 @@
// Re code all Offsets
// Code Center
estDist[compIdx] = 0;
- for (classIdx = bestClassTableBo[compIdx]; classIdx < bestClassTableBo[compIdx] + SAO_BO_LEN; classIdx++)
+ for (int classIdx = bestClassTableBo[compIdx]; classIdx < bestClassTableBo[compIdx] + SAO_BO_LEN; classIdx++)
estDist[compIdx] += currentDistortionTableBo[classIdx];
}
}
@@ -2477,18 +2371,17 @@
for (int compIdx = 0; compIdx < 2; compIdx++)
{
resetSaoUnit(&saoLcuParamRdo[compIdx]);
- saoLcuParamRdo[compIdx].length = s_numClass[typeIdx];
saoLcuParamRdo[compIdx].typeIdx = typeIdx;
saoLcuParamRdo[compIdx].mergeLeftFlag = 0;
saoLcuParamRdo[compIdx].mergeUpFlag = 0;
saoLcuParamRdo[compIdx].subTypeIdx = (typeIdx == SAO_BO) ? bestClassTableBo[compIdx] : 0;
- for (classIdx = 0; classIdx < saoLcuParamRdo[compIdx].length; classIdx++)
+ for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
saoLcuParamRdo[compIdx].offset[classIdx] = (int)m_offset[compIdx + 1][typeIdx][classIdx + saoLcuParamRdo[compIdx].subTypeIdx + 1];
m_entropyCoder.codeSaoOffset(&saoLcuParamRdo[compIdx], compIdx + 1);
}
- estRate = m_entropyCoder.getNumberOfWrittenBits();
+ uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
m_cost[1][typeIdx] = (double)((double)(estDist[0] + estDist[1]) + m_chromaLambda * (double)estRate);
if (m_cost[1][typeIdx] < costPartBest)
@@ -2520,11 +2413,11 @@
if (saoLcuParamNeighbor[compIdx] != NULL)
{
estDist[compIdx] = 0;
- typeIdx = saoLcuParamNeighbor[compIdx]->typeIdx;
+ int typeIdx = saoLcuParamNeighbor[compIdx]->typeIdx;
if (typeIdx >= 0)
{
int mergeBandPosition = (typeIdx == SAO_BO) ? saoLcuParamNeighbor[compIdx]->subTypeIdx : 0;
- for (classIdx = 0; classIdx < s_numClass[typeIdx]; classIdx++)
+ for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
{
int mergeOffset = saoLcuParamNeighbor[compIdx]->offset[classIdx];
estDist[compIdx] += estSaoDist(m_count[compIdx + 1][typeIdx][classIdx + mergeBandPosition + 1], mergeOffset, m_offsetOrg[compIdx + 1][typeIdx][classIdx + mergeBandPosition + 1], 0);
diff -r 7e29b10982d2 -r 8a2312df90f9 source/encoder/sao.h
--- a/source/encoder/sao.h Thu Sep 11 19:24:28 2014 +0530
+++ b/source/encoder/sao.h Fri Sep 12 11:01:54 2014 +0900
@@ -36,7 +36,7 @@
{
SAO_EO_LEN = 4,
SAO_BO_LEN = 4,
- SAO_MAX_BO_CLASSES = 32
+ SAO_NUM_BO_CLASSES = 32
};
enum SAOType
@@ -55,15 +55,13 @@
enum { SAO_MAX_DEPTH = 4 };
enum { SAO_BO_BITS = 5 };
- enum { LUMA_GROUP_NUM = 1 << SAO_BO_BITS };
- enum { MAX_NUM_SAO_OFFSETS = 4 };
enum { MAX_NUM_SAO_CLASS = 33 };
enum { SAO_BIT_INC = X265_MAX(X265_DEPTH - 10, 0) };
enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
+ enum { NUM_EDGETYPE = 5 };
static const int s_numCulPartsLevel[5];
- static const int s_numClass[MAX_NUM_SAO_TYPE];
- static const uint32_t s_eoTable[9];
+ static const uint32_t s_eoTable[NUM_EDGETYPE];
typedef int64_t (PerClass[MAX_NUM_SAO_TYPE][MAX_NUM_SAO_CLASS]);
typedef int64_t (PerType[MAX_NUM_SAO_TYPE]);
@@ -86,9 +84,8 @@
PerPlane* m_offsetOrgPreDblk;
double m_depthSaoRate[2][4];
- int32_t* m_offsetBo;
- int32_t* m_chromaOffsetBo;
- int8_t m_offsetEo[LUMA_GROUP_NUM];
+ pixel* m_offsetBo;
+ int8_t m_offsetEo[NUM_EDGETYPE];
int m_maxSplitLevel;
@@ -100,7 +97,6 @@
pixel* m_clipTable;
pixel* m_clipTableBase;
- pixel* m_tableBo;
pixel* m_tmpU1[3];
pixel* m_tmpU2[3];
More information about the x265-devel
mailing list