<div dir="ltr">Thanks, queued for testing.<br></div><div class="gmail_extra"><br><div class="gmail_quote">On Fri, Sep 12, 2014 at 7:34 AM, Satoshi Nakagawa <span dir="ltr"><<a href="mailto:nakagawa424@oki.com" target="_blank">nakagawa424@oki.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"># HG changeset patch<br>
# User Satoshi Nakagawa <<a href="mailto:nakagawa424@oki.com">nakagawa424@oki.com</a>><br>
# Date 1410487314 -32400<br>
# Fri Sep 12 11:01:54 2014 +0900<br>
# Node ID 8a2312df90f99b8b479940141c6dafa4b96581cf<br>
# Parent 7e29b10982d2eb7fd79f581d99996f04184522ba<br>
sao: some cleanups<br>
<br>
diff -r 7e29b10982d2 -r 8a2312df90f9 source/common/common.h<br>
--- a/source/common/common.h Thu Sep 11 19:24:28 2014 +0530<br>
+++ b/source/common/common.h Fri Sep 12 11:01:54 2014 +0900<br>
@@ -200,6 +200,8 @@<br>
<br>
namespace x265 {<br>
<br>
+enum { SAO_NUM_OFFSET = 4 };<br>
+<br>
// NOTE: MUST be alignment to 16 or 32 bytes for asm code<br>
struct NoiseReduction<br>
{<br>
@@ -215,9 +217,8 @@<br>
enum { NUM_DOWN_PART = 4 };<br>
<br>
int bestType;<br>
- int length;<br>
int subTypeIdx; // indicates EO class or BO band position<br>
- int offset[4];<br>
+ int offset[SAO_NUM_OFFSET];<br>
int startCUX;<br>
int startCUY;<br>
int endCUX;<br>
@@ -245,10 +246,9 @@<br>
bool mergeLeftFlag;<br>
int typeIdx;<br>
int subTypeIdx; // indicates EO class or BO band position<br>
- int offset[4];<br>
+ int offset[SAO_NUM_OFFSET];<br>
int partIdx;<br>
int partIdxTmp;<br>
- int length;<br>
<br>
void reset()<br>
{<br>
diff -r 7e29b10982d2 -r 8a2312df90f9 source/common/x86/loopfilter.asm<br>
--- a/source/common/x86/loopfilter.asm Thu Sep 11 19:24:28 2014 +0530<br>
+++ b/source/common/x86/loopfilter.asm Fri Sep 12 11:01:54 2014 +0900<br>
@@ -44,7 +44,7 @@<br>
pslldq m0, 15 ; m0 = [iSignLeft x .. x]<br>
pcmpeqb m4, m4 ; m4 = [pb -1]<br>
pxor m5, m5 ; m5 = 0<br>
- movu m6, [r1] ; m6 = m_iOffsetEo<br>
+ movh m6, [r1] ; m6 = m_offsetEo<br>
<br>
.loop:<br>
movu m7, [r0] ; m1 = pRec[x]<br>
diff -r 7e29b10982d2 -r 8a2312df90f9 source/encoder/entropy.cpp<br>
--- a/source/encoder/entropy.cpp Thu Sep 11 19:24:28 2014 +0530<br>
+++ b/source/encoder/entropy.cpp Fri Sep 12 11:01:54 2014 +0900<br>
@@ -879,19 +879,19 @@<br>
<br>
if (symbol)<br>
{<br>
- if (saoLcuParam->typeIdx < 4 && compIdx != 2)<br>
+ if (saoLcuParam->typeIdx < SAO_BO && compIdx != 2)<br>
saoLcuParam->subTypeIdx = saoLcuParam->typeIdx;<br>
<br>
int offsetTh = 1 << X265_MIN(X265_DEPTH - 5, 5);<br>
if (saoLcuParam->typeIdx == SAO_BO)<br>
{<br>
- for (i = 0; i < saoLcuParam->length; i++)<br>
+ for (i = 0; i < SAO_BO_LEN; i++)<br>
{<br>
uint32_t absOffset = ((saoLcuParam->offset[i] < 0) ? -saoLcuParam->offset[i] : saoLcuParam->offset[i]);<br>
codeSaoMaxUvlc(absOffset, offsetTh - 1);<br>
}<br>
<br>
- for (i = 0; i < saoLcuParam->length; i++)<br>
+ for (i = 0; i < SAO_BO_LEN; i++)<br>
{<br>
if (saoLcuParam->offset[i] != 0)<br>
{<br>
@@ -903,7 +903,7 @@<br>
symbol = (uint32_t)(saoLcuParam->subTypeIdx);<br>
codeSaoUflc(5, symbol);<br>
}<br>
- else if (saoLcuParam->typeIdx < 4)<br>
+ else // if (saoLcuParam->typeIdx < SAO_BO)<br>
{<br>
codeSaoMaxUvlc(saoLcuParam->offset[0], offsetTh - 1);<br>
codeSaoMaxUvlc(saoLcuParam->offset[1], offsetTh - 1);<br>
diff -r 7e29b10982d2 -r 8a2312df90f9 source/encoder/sao.cpp<br>
--- a/source/encoder/sao.cpp Thu Sep 11 19:24:28 2014 +0530<br>
+++ b/source/encoder/sao.cpp Fri Sep 12 11:01:54 2014 +0900<br>
@@ -79,26 +79,13 @@<br>
341, // level 4<br>
};<br>
<br>
-const uint32_t SAO::s_eoTable[9] =<br>
+const uint32_t SAO::s_eoTable[NUM_EDGETYPE] =<br>
{<br>
1, // 0<br>
2, // 1<br>
0, // 2<br>
3, // 3<br>
- 4, // 4<br>
- 0, // 5<br>
- 0, // 6<br>
- 0, // 7<br>
- 0<br>
-};<br>
-<br>
-const int SAO::s_numClass[MAX_NUM_SAO_TYPE] =<br>
-{<br>
- SAO_EO_LEN,<br>
- SAO_EO_LEN,<br>
- SAO_EO_LEN,<br>
- SAO_EO_LEN,<br>
- SAO_BO_LEN<br>
+ 4 // 4<br>
};<br>
<br>
SAO::SAO()<br>
@@ -122,8 +109,6 @@<br>
m_clipTable = NULL;<br>
m_clipTableBase = NULL;<br>
m_offsetBo = NULL;<br>
- m_chromaOffsetBo = NULL;<br>
- m_tableBo = NULL;<br>
m_tmpU1[0] = NULL;<br>
m_tmpU1[1] = NULL;<br>
m_tmpU1[2] = NULL;<br>
@@ -162,18 +147,12 @@<br>
* m_numTotalParts must allow for sufficient storage in any allocated arrays */<br>
m_numTotalParts = X265_MAX(3, s_numCulPartsLevel[m_maxSplitLevel]);<br>
<br>
- int pixelRange = 1 << X265_DEPTH;<br>
- int boRangeShift = X265_DEPTH - SAO_BO_BITS;<br>
- pixel maxY = (1 << X265_DEPTH) - 1;<br>
- pixel minY = 0;<br>
- pixel rangeExt = maxY >> 1;<br>
+ const pixel maxY = (1 << X265_DEPTH) - 1;<br>
+ const pixel rangeExt = maxY >> 1;<br>
int numLcu = m_numCuInWidth * m_numCuInHeight;<br>
<br>
- CHECKED_MALLOC(m_tableBo, pixel, pixelRange);<br>
-<br>
- CHECKED_MALLOC(m_clipTableBase, pixel, maxY + 2 * rangeExt);<br>
- CHECKED_MALLOC(m_offsetBo, int, maxY + 2 * rangeExt);<br>
- CHECKED_MALLOC(m_chromaOffsetBo , int, maxY + 2 * rangeExt);<br>
+ CHECKED_MALLOC(m_clipTableBase, pixel, maxY + 2 * rangeExt);<br>
+ CHECKED_MALLOC(m_offsetBo, pixel, maxY + 2 * rangeExt);<br>
<br>
CHECKED_MALLOC(m_tmpL1, pixel, g_maxCUSize + 1);<br>
CHECKED_MALLOC(m_tmpL2, pixel, g_maxCUSize + 1);<br>
@@ -199,19 +178,16 @@<br>
CHECKED_MALLOC(m_countPreDblk, PerPlane, numLcu);<br>
CHECKED_MALLOC(m_offsetOrgPreDblk, PerPlane, numLcu);<br>
<br>
- for (int k2 = 0; k2 < pixelRange; k2++)<br>
- m_tableBo[k2] = (pixel)(1 + (k2 >> boRangeShift));<br>
+ m_clipTable = &(m_clipTableBase[rangeExt]);<br>
<br>
- for (int i = 0; i < (minY + rangeExt); i++)<br>
- m_clipTableBase[i] = minY;<br>
+ for (int i = 0; i < rangeExt; i++)<br>
+ m_clipTableBase[i] = 0;<br>
<br>
- for (int i = minY + rangeExt; i < (maxY + rangeExt); i++)<br>
- m_clipTableBase[i] = (pixel)(i - rangeExt);<br>
+ for (int i = 0; i < maxY; i++)<br>
+ m_clipTable[i] = (pixel)i;<br>
<br>
- for (int i = maxY + rangeExt; i < (maxY + 2 * rangeExt); i++)<br>
- m_clipTableBase[i] = maxY;<br>
-<br>
- m_clipTable = &(m_clipTableBase[rangeExt]);<br>
+ for (int i = maxY; i < maxY + rangeExt; i++)<br>
+ m_clipTable[i] = maxY;<br>
<br>
return true;<br>
<br>
@@ -223,8 +199,6 @@<br>
{<br>
X265_FREE(m_clipTableBase);<br>
X265_FREE(m_offsetBo);<br>
- X265_FREE(m_tableBo);<br>
- X265_FREE(m_chromaOffsetBo);<br>
<br>
X265_FREE(m_tmpL1);<br>
X265_FREE(m_tmpL2);<br>
@@ -271,12 +245,9 @@<br>
/* recursively initialize SAO parameters (only once) */<br>
void SAO::initSAOParam(SAOParam *saoParam, int partLevel, int partRow, int partCol, int parentPartIdx, int startCUX, int endCUX, int startCUY, int endCUY, int plane) const<br>
{<br>
- int j;<br>
int partIdx = convertLevelRowCol2Idx(partLevel, partRow, partCol);<br>
<br>
- SAOQTPart* saoPart;<br>
-<br>
- saoPart = &(saoParam->saoPart[plane][partIdx]);<br>
+ SAOQTPart* saoPart = &(saoParam->saoPart[plane][partIdx]);<br>
<br>
saoPart->partIdx = partIdx;<br>
saoPart->partLevel = partLevel;<br>
@@ -290,11 +261,10 @@<br>
<br>
saoPart->upPartIdx = parentPartIdx;<br>
saoPart->bestType = -1;<br>
- saoPart->length = 0;<br>
<br>
saoPart->subTypeIdx = 0;<br>
<br>
- for (j = 0; j < MAX_NUM_SAO_OFFSETS; j++)<br>
+ for (int j = 0; j < SAO_NUM_OFFSET; j++)<br>
saoPart->offset[j] = 0;<br>
<br>
if (saoPart->partLevel < m_maxSplitLevel)<br>
@@ -371,14 +341,13 @@<br>
for (int i = 0; i < s_numCulPartsLevel[m_maxSplitLevel]; i++)<br>
{<br>
saoParam->saoPart[c][i].bestType = -1;<br>
- saoParam->saoPart[c][i].length = 0;<br>
saoParam->saoPart[c][i].bSplit = false;<br>
saoParam->saoPart[c][i].bProcessed = false;<br>
saoParam->saoPart[c][i].minCost = MAX_DOUBLE;<br>
saoParam->saoPart[c][i].minDist = MAX_INT;<br>
saoParam->saoPart[c][i].minRate = MAX_INT;<br>
saoParam->saoPart[c][i].subTypeIdx = 0;<br>
- for (int j = 0; j < MAX_NUM_SAO_OFFSETS; j++)<br>
+ for (int j = 0; j < SAO_NUM_OFFSET; j++)<br>
{<br>
saoParam->saoPart[c][i].offset[j] = 0;<br>
saoParam->saoPart[c][i].offset[j] = 0;<br>
@@ -454,18 +423,12 @@<br>
int lcuHeight;<br>
int rpelx;<br>
int bpely;<br>
- int edgeType;<br>
- int signDown;<br>
- int signDown1;<br>
- int signDown2;<br>
int picWidthTmp;<br>
int picHeightTmp;<br>
int startX;<br>
int startY;<br>
int endX;<br>
int endY;<br>
- int shift;<br>
- int cuHeightTmp;<br>
pixel* tmpL;<br>
pixel* tmpU;<br>
uint32_t lpelx = tmpCu->getCUPelX();<br>
@@ -505,22 +468,18 @@<br>
<br>
// if (iSaoType!=SAO_BO_0 || iSaoType!=SAO_BO_1)<br>
{<br>
- cuHeightTmp = isLuma ? g_maxCUSize : (g_maxCUSize >> m_vChromaShift);<br>
- shift = isLuma ? (g_maxCUSize - 1) : ((g_maxCUSize >> m_hChromaShift) - 1);<br>
+ int cuHeightTmp = isLuma ? g_maxCUSize : (g_maxCUSize >> m_vChromaShift);<br>
+ pixel* recR = &rec[isLuma ? (g_maxCUSize - 1) : ((g_maxCUSize >> m_hChromaShift) - 1)];<br>
for (int i = 0; i < cuHeightTmp + 1; i++)<br>
{<br>
- m_tmpL2[i] = rec[shift];<br>
- rec += stride;<br>
+ m_tmpL2[i] = *recR;<br>
+ recR += stride;<br>
}<br>
<br>
- rec -= (stride * (cuHeightTmp + 1));<br>
-<br>
tmpL = m_tmpL1;<br>
tmpU = &(m_tmpU1[plane][lpelx]);<br>
}<br>
<br>
- int32_t *offsetBo = isLuma ? m_offsetBo : m_chromaOffsetBo;<br>
-<br>
switch (saoType)<br>
{<br>
case SAO_EO_0: // dir: -<br>
@@ -536,10 +495,10 @@<br>
for (x = startX; x < endX; x++)<br>
{<br>
int signRight = signOf(rec[x] - rec[x + 1]);<br>
- edgeType = signRight + signLeft + 2;<br>
+ int edgeType = signRight + signLeft + 2;<br>
signLeft = -signRight;<br>
<br>
- rec[x] = (pixel)Clip3(0, (1 << X265_DEPTH) - 1, rec[x] + m_offsetEo[edgeType]);<br>
+ rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];<br>
}<br>
<br>
rec += stride;<br>
@@ -584,8 +543,8 @@<br>
{<br>
for (x = 0; x < lcuWidth; x++)<br>
{<br>
- signDown = signOf(rec[x] - rec[x + stride]);<br>
- edgeType = signDown + upBuff1[x] + 2;<br>
+ int signDown = signOf(rec[x] - rec[x + stride]);<br>
+ int edgeType = signDown + upBuff1[x] + 2;<br>
upBuff1[x] = -signDown;<br>
<br>
rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];<br>
@@ -612,11 +571,11 @@<br>
<br>
for (y = startY; y < endY; y++)<br>
{<br>
- signDown2 = signOf(rec[stride + startX] - tmpL[y]);<br>
+ int signDown2 = signOf(rec[stride + startX] - tmpL[y]);<br>
for (x = startX; x < endX; x++)<br>
{<br>
- signDown1 = signOf(rec[x] - rec[x + stride + 1]);<br>
- edgeType = signDown1 + upBuff1[x] + 2;<br>
+ int signDown1 = signOf(rec[x] - rec[x + stride + 1]);<br>
+ int edgeType = signDown1 + upBuff1[x] + 2;<br>
upBufft[x + 1] = -signDown1;<br>
rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];<br>
}<br>
@@ -647,8 +606,8 @@<br>
for (y = startY; y < endY; y++)<br>
{<br>
x = startX;<br>
- signDown1 = signOf(rec[x] - tmpL[y + 1]);<br>
- edgeType = signDown1 + upBuff1[x] + 2;<br>
+ int signDown1 = signOf(rec[x] - tmpL[y + 1]);<br>
+ int edgeType = signDown1 + upBuff1[x] + 2;<br>
upBuff1[x - 1] = -signDown1;<br>
rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];<br>
for (x = startX + 1; x < endX; x++)<br>
@@ -668,10 +627,12 @@<br>
}<br>
case SAO_BO:<br>
{<br>
+ const pixel* offsetBo = m_offsetBo;<br>
+<br>
for (y = 0; y < lcuHeight; y++)<br>
{<br>
for (x = 0; x < lcuWidth; x++)<br>
- rec[x] = (pixel)offsetBo[rec[x]];<br>
+ rec[x] = offsetBo[rec[x]];<br>
<br>
rec += stride;<br>
}<br>
@@ -704,38 +665,29 @@<br>
<br>
memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidthTmp);<br>
<br>
- int typeIdx;<br>
- uint32_t edgeType;<br>
-<br>
- int offset[LUMA_GROUP_NUM + 1];<br>
- int idxX;<br>
- int idxY;<br>
- int addr;<br>
int frameWidthInCU = m_pic->getFrameWidthInCU();<br>
int frameHeightInCU = m_pic->getFrameHeightInCU();<br>
int stride;<br>
bool isChroma = !!plane;<br>
- bool mergeLeftFlag;<br>
+ uint32_t cuHeightTmp = isChroma ? (g_maxCUSize >> m_vChromaShift) : g_maxCUSize;<br>
<br>
- int32_t *offsetBo = isChroma ? m_chromaOffsetBo : m_offsetBo;<br>
+ const int boShift = X265_DEPTH - SAO_BO_BITS;<br>
<br>
- offset[0] = 0;<br>
- for (idxY = 0; idxY < frameHeightInCU; idxY++)<br>
+ for (int idxY = 0; idxY < frameHeightInCU; idxY++)<br>
{<br>
- addr = idxY * frameWidthInCU;<br>
+ int addr = idxY * frameWidthInCU;<br>
if (plane == 0)<br>
{<br>
- rec = m_pic->getPicYuvRec()->getLumaAddr(addr);<br>
+ rec = m_pic->getPicYuvRec()->getLumaAddr(addr);<br>
stride = m_pic->getStride();<br>
picWidthTmp = m_param->sourceWidth;<br>
}<br>
else<br>
{<br>
- rec = m_pic->getPicYuvRec()->getChromaAddr(plane, addr);<br>
+ rec = m_pic->getPicYuvRec()->getChromaAddr(plane, addr);<br>
stride = m_pic->getCStride();<br>
picWidthTmp = m_param->sourceWidth >> m_hChromaShift;<br>
}<br>
- uint32_t cuHeightTmp = isChroma ? (g_maxCUSize >> m_vChromaShift) : g_maxCUSize;<br>
for (uint32_t i = 0; i < cuHeightTmp + 1; i++)<br>
{<br>
m_tmpL1[i] = rec[0];<br>
@@ -746,10 +698,13 @@<br>
<br>
memcpy(m_tmpU2[plane], rec, sizeof(pixel) * picWidthTmp);<br>
<br>
- for (idxX = 0; idxX < frameWidthInCU; idxX++)<br>
+ for (int idxX = 0; idxX < frameWidthInCU; idxX++)<br>
{<br>
addr = idxY * frameWidthInCU + idxX;<br>
<br>
+ int typeIdx;<br>
+ bool mergeLeftFlag;<br>
+<br>
if (oneUnitFlag)<br>
{<br>
typeIdx = saoLcuParam[0].typeIdx;<br>
@@ -766,21 +721,24 @@<br>
{<br>
if (typeIdx == SAO_BO)<br>
{<br>
- for (int i = 0; i < SAO_MAX_BO_CLASSES + 1; i++)<br>
- offset[i] = 0;<br>
+ pixel* offsetBo = m_offsetBo;<br>
+ int offset[SAO_NUM_BO_CLASSES];<br>
+ memset(offset, 0, sizeof(offset));<br>
<br>
- for (int i = 0; i < saoLcuParam[addr].length; i++)<br>
- offset[(saoLcuParam[addr].subTypeIdx + i) % SAO_MAX_BO_CLASSES + 1] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;<br>
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)<br>
+ offset[((saoLcuParam[addr].subTypeIdx + i) & (SAO_NUM_BO_CLASSES - 1))] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;<br>
<br>
for (int i = 0; i < (1 << X265_DEPTH); i++)<br>
- offsetBo[i] = m_clipTable[i + offset[m_tableBo[i]]];<br>
+ offsetBo[i] = m_clipTable[i + offset[i >> boShift]];<br>
}<br>
- if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)<br>
+ else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)<br>
{<br>
- for (int i = 0; i < saoLcuParam[addr].length; i++)<br>
+ int offset[NUM_EDGETYPE];<br>
+ offset[0] = 0;<br>
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)<br>
offset[i + 1] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;<br>
<br>
- for (edgeType = 0; edgeType < 6; edgeType++)<br>
+ for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)<br>
m_offsetEo[edgeType] = (int8_t)offset[s_eoTable[edgeType]];<br>
}<br>
}<br>
@@ -823,32 +781,25 @@<br>
<br>
if (plane)<br>
{<br>
- rec = m_pic->getPicYuvRec()->getChromaAddr(plane);<br>
+ rec = m_pic->getPicYuvRec()->getChromaAddr(plane);<br>
picWidthTmp = m_param->sourceWidth >> m_hChromaShift;<br>
}<br>
else<br>
{<br>
- rec = m_pic->getPicYuvRec()->getLumaAddr();<br>
+ rec = m_pic->getPicYuvRec()->getLumaAddr();<br>
picWidthTmp = m_param->sourceWidth;<br>
}<br>
<br>
if (!idxY)<br>
memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidthTmp);<br>
<br>
- int typeIdx;<br>
-<br>
- int offset[LUMA_GROUP_NUM + 1];<br>
- int idxX;<br>
- int addr;<br>
int frameWidthInCU = m_pic->getFrameWidthInCU();<br>
int stride;<br>
bool isChroma = !!plane;<br>
- bool mergeLeftFlag;<br>
<br>
- int32_t* offsetBo = isChroma ? m_chromaOffsetBo : m_offsetBo;<br>
+ const int boShift = X265_DEPTH - SAO_BO_BITS;<br>
<br>
- offset[0] = 0;<br>
- addr = idxY * frameWidthInCU;<br>
+ int addr = idxY * frameWidthInCU;<br>
if (isChroma)<br>
{<br>
rec = m_pic->getPicYuvRec()->getChromaAddr(plane, addr);<br>
@@ -872,12 +823,12 @@<br>
<br>
memcpy(m_tmpU2[plane], rec, sizeof(pixel) * picWidthTmp);<br>
<br>
- for (idxX = 0; idxX < frameWidthInCU; idxX++)<br>
+ for (int idxX = 0; idxX < frameWidthInCU; idxX++)<br>
{<br>
addr = idxY * frameWidthInCU + idxX;<br>
<br>
- typeIdx = saoLcuParam[addr].typeIdx;<br>
- mergeLeftFlag = saoLcuParam[addr].mergeLeftFlag;<br>
+ int typeIdx = saoLcuParam[addr].typeIdx;<br>
+ bool mergeLeftFlag = saoLcuParam[addr].mergeLeftFlag;<br>
<br>
if (typeIdx >= 0)<br>
{<br>
@@ -885,21 +836,24 @@<br>
{<br>
if (typeIdx == SAO_BO)<br>
{<br>
- for (int i = 0; i < SAO_MAX_BO_CLASSES + 1; i++)<br>
- offset[i] = 0;<br>
+ pixel* offsetBo = m_offsetBo;<br>
+ int offset[SAO_NUM_BO_CLASSES];<br>
+ memset(offset, 0, sizeof(offset));<br>
<br>
- for (int i = 0; i < saoLcuParam[addr].length; i++)<br>
- offset[(saoLcuParam[addr].subTypeIdx + i) % SAO_MAX_BO_CLASSES + 1] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;<br>
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)<br>
+ offset[((saoLcuParam[addr].subTypeIdx + i) & (SAO_NUM_BO_CLASSES - 1))] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;<br>
<br>
for (int i = 0; i < (1 << X265_DEPTH); i++)<br>
- offsetBo[i] = m_clipTable[i + offset[m_tableBo[i]]];<br>
+ offsetBo[i] = m_clipTable[i + offset[i >> boShift]];<br>
}<br>
- if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)<br>
+ else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)<br>
{<br>
- for (int i = 0; i < saoLcuParam[addr].length; i++)<br>
+ int offset[NUM_EDGETYPE];<br>
+ offset[0] = 0;<br>
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)<br>
offset[i + 1] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;<br>
<br>
- for (uint32_t edgeType = 0; edgeType < 6; edgeType++)<br>
+ for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++)<br>
m_offsetEo[edgeType] = (int8_t)offset[s_eoTable[edgeType]];<br>
}<br>
}<br>
@@ -942,7 +896,7 @@<br>
saoLcuParam[i].partIdx = 0;<br>
saoLcuParam[i].typeIdx = -1;<br>
saoLcuParam[i].subTypeIdx = 0;<br>
- for (int j = 0; j < MAX_NUM_SAO_OFFSETS; j++)<br>
+ for (int j = 0; j < SAO_NUM_OFFSET; j++)<br>
saoLcuParam[i].offset[j] = 0;<br>
}<br>
}<br>
@@ -954,10 +908,9 @@<br>
saoUnit->partIdx = 0;<br>
saoUnit->partIdxTmp = 0;<br>
saoUnit->typeIdx = -1;<br>
- saoUnit->length = 0;<br>
saoUnit->subTypeIdx = 0;<br>
<br>
- for (int i = 0; i < 4; i++)<br>
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)<br>
saoUnit->offset[i] = 0;<br>
}<br>
<br>
@@ -966,10 +919,9 @@<br>
saoUnitDst->mergeLeftFlag = saoUnitSrc->mergeLeftFlag;<br>
saoUnitDst->mergeUpFlag = saoUnitSrc->mergeUpFlag;<br>
saoUnitDst->typeIdx = saoUnitSrc->typeIdx;<br>
- saoUnitDst->length = saoUnitSrc->length;<br>
<br>
saoUnitDst->subTypeIdx = saoUnitSrc->subTypeIdx;<br>
- for (int i = 0; i < 4; i++)<br>
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)<br>
saoUnitDst->offset[i] = saoUnitSrc->offset[i];<br>
}<br>
<br>
@@ -1008,17 +960,15 @@<br>
saoLcuParam[addr].partIdxTmp = (int)partIdx;<br>
saoLcuParam[addr].typeIdx = saoQTPart[partIdx].bestType;<br>
saoLcuParam[addr].subTypeIdx = saoQTPart[partIdx].subTypeIdx;<br>
- if (saoLcuParam[addr].typeIdx != -1)<br>
+ if (saoLcuParam[addr].typeIdx >= 0)<br>
{<br>
- saoLcuParam[addr].length = saoQTPart[partIdx].length;<br>
- for (int j = 0; j < MAX_NUM_SAO_OFFSETS; j++)<br>
+ for (int j = 0; j < SAO_NUM_OFFSET; j++)<br>
saoLcuParam[addr].offset[j] = saoQTPart[partIdx].offset[j];<br>
}<br>
else<br>
{<br>
- saoLcuParam[addr].length = 0;<br>
saoLcuParam[addr].subTypeIdx = saoQTPart[partIdx].subTypeIdx;<br>
- for (int j = 0; j < MAX_NUM_SAO_OFFSETS; j++)<br>
+ for (int j = 0; j < SAO_NUM_OFFSET; j++)<br>
saoLcuParam[addr].offset[j] = 0;<br>
}<br>
}<br>
@@ -1028,12 +978,9 @@<br>
/* process SAO for one partition */<br>
void SAO::rdoSaoOnePart(SAOQTPart *psQTPart, int partIdx, int plane)<br>
{<br>
- int typeIdx;<br>
- int numTotalType = MAX_NUM_SAO_TYPE;<br>
SAOQTPart* onePart = &(psQTPart[partIdx]);<br>
<br>
int64_t estDist;<br>
- int classIdx;<br>
<br>
m_distOrg[partIdx] = 0;<br>
<br>
@@ -1046,50 +993,20 @@<br>
int allowMergeUp;<br>
SaoLcuParam saoLcuParamRdo;<br>
<br>
- for (typeIdx = -1; typeIdx < numTotalType; typeIdx++)<br>
+ for (int typeIdx = -1; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)<br>
{<br>
m_entropyCoder.load(m_rdEntropyCoders[onePart->partLevel][CI_CURR_BEST]);<br>
m_entropyCoder.resetBits();<br>
<br>
- if (typeIdx == -1)<br>
- {<br>
- for (int ry = onePart->startCUY; ry <= onePart->endCUY; ry++)<br>
- {<br>
- for (int rx = onePart->startCUX; rx <= onePart->endCUX; rx++)<br>
- {<br>
- // get bits for iTypeIdx = -1<br>
- allowMergeLeft = 1;<br>
- allowMergeUp = 1;<br>
-<br>
- // reset<br>
- resetSaoUnit(&saoLcuParamRdo);<br>
-<br>
- // set merge flag<br>
- saoLcuParamRdo.mergeUpFlag = 1;<br>
- saoLcuParamRdo.mergeLeftFlag = 1;<br>
-<br>
- if (ry == onePart->startCUY)<br>
- saoLcuParamRdo.mergeUpFlag = 0;<br>
-<br>
- if (rx == onePart->startCUX)<br>
- saoLcuParamRdo.mergeLeftFlag = 0;<br>
-<br>
- m_entropyCoder.codeSaoUnitInterleaving(plane, 1, rx, ry, &saoLcuParamRdo, 1, 1, allowMergeLeft, allowMergeUp);<br>
- }<br>
- }<br>
- }<br>
-<br>
if (typeIdx >= 0)<br>
{<br>
estDist = estSaoTypeDist(partIdx, typeIdx, 0, m_lumaLambda, currentDistortionTableBo, currentRdCostTableBo);<br>
if (typeIdx == SAO_BO)<br>
{<br>
// Estimate Best Position<br>
- double currentRDCost = 0.0;<br>
-<br>
- for (int i = 0; i < SAO_MAX_BO_CLASSES - SAO_BO_LEN + 1; i++)<br>
+ for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)<br>
{<br>
- currentRDCost = 0.0;<br>
+ double currentRDCost = 0.0;<br>
for (int j = i; j < i + SAO_BO_LEN; j++)<br>
currentRDCost += currentRdCostTableBo[j];<br>
<br>
@@ -1101,7 +1018,7 @@<br>
}<br>
<br>
// Recode all offsets<br>
- for (classIdx = bestClassTableBo; classIdx < bestClassTableBo + SAO_BO_LEN; classIdx++)<br>
+ for (int classIdx = bestClassTableBo; classIdx < bestClassTableBo + SAO_BO_LEN; classIdx++)<br>
estDist += currentDistortionTableBo[classIdx];<br>
}<br>
<br>
@@ -1129,8 +1046,7 @@<br>
// set type and offsets<br>
saoLcuParamRdo.typeIdx = typeIdx;<br>
saoLcuParamRdo.subTypeIdx = (typeIdx == SAO_BO) ? bestClassTableBo : 0;<br>
- saoLcuParamRdo.length = s_numClass[typeIdx];<br>
- for (classIdx = 0; classIdx < saoLcuParamRdo.length; classIdx++)<br>
+ for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)<br>
saoLcuParamRdo.offset[classIdx] = (int)m_offset[partIdx][typeIdx][classIdx + saoLcuParamRdo.subTypeIdx + 1];<br>
<br>
m_entropyCoder.codeSaoUnitInterleaving(plane, 1, rx, ry, &saoLcuParamRdo, 1, 1, allowMergeLeft, allowMergeUp);<br>
@@ -1152,6 +1068,30 @@<br>
}<br>
else<br>
{<br>
+ for (int ry = onePart->startCUY; ry <= onePart->endCUY; ry++)<br>
+ {<br>
+ for (int rx = onePart->startCUX; rx <= onePart->endCUX; rx++)<br>
+ {<br>
+ // get bits for iTypeIdx = -1<br>
+ allowMergeLeft = 1;<br>
+ allowMergeUp = 1;<br>
+<br>
+ // reset<br>
+ resetSaoUnit(&saoLcuParamRdo);<br>
+<br>
+ // set merge flag<br>
+ saoLcuParamRdo.mergeUpFlag = 1;<br>
+ saoLcuParamRdo.mergeLeftFlag = 1;<br>
+<br>
+ if (ry == onePart->startCUY)<br>
+ saoLcuParamRdo.mergeUpFlag = 0;<br>
+<br>
+ if (rx == onePart->startCUX)<br>
+ saoLcuParamRdo.mergeLeftFlag = 0;<br>
+<br>
+ m_entropyCoder.codeSaoUnitInterleaving(plane, 1, rx, ry, &saoLcuParamRdo, 1, 1, allowMergeLeft, allowMergeUp);<br>
+ }<br>
+ }<br>
if (m_distOrg[partIdx] < m_costPartBest[partIdx])<br>
{<br>
m_costPartBest[partIdx] = (double)m_distOrg[partIdx] + m_entropyCoder.getNumberOfWrittenBits() * m_lumaLambda;<br>
@@ -1170,18 +1110,15 @@<br>
<br>
if (onePart->bestType != -1)<br>
{<br>
- onePart->length = s_numClass[onePart->bestType];<br>
int minIndex = 0;<br>
if (onePart->bestType == SAO_BO)<br>
{<br>
onePart->subTypeIdx = bestClassTableBo;<br>
minIndex = onePart->subTypeIdx;<br>
}<br>
- for (int i = 0; i < onePart->length; i++)<br>
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)<br>
onePart->offset[i] = (int)m_offset[partIdx][onePart->bestType][minIndex + i + 1];<br>
}<br>
- else<br>
- onePart->length = 0;<br>
}<br>
<br>
/* Run partition tree disable */<br>
@@ -1190,7 +1127,6 @@<br>
SAOQTPart* pOnePart = &(psQTPart[partIdx]);<br>
<br>
pOnePart->bSplit = false;<br>
- pOnePart->length = 0;<br>
pOnePart->bestType = -1;<br>
<br>
if (pOnePart->partLevel < (int)m_maxSplitLevel)<br>
@@ -1236,7 +1172,6 @@<br>
{<br>
costFinal = costSplit;<br>
onePart->bSplit = true;<br>
- onePart->length = 0;<br>
onePart->bestType = -1;<br>
m_rdEntropyCoders[onePart->partLevel][CI_NEXT_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);<br>
}<br>
@@ -1271,7 +1206,6 @@<br>
uint32_t picHeightTmp;<br>
int64_t* stats;<br>
int64_t* counts;<br>
- int classIdx;<br>
int startX;<br>
int startY;<br>
int endX;<br>
@@ -1308,6 +1242,8 @@<br>
<br>
//if(iSaoType == BO_0 || iSaoType == BO_1)<br>
{<br>
+ const int boShift = X265_DEPTH - SAO_BO_BITS;<br>
+<br>
if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)<br>
{<br>
numSkipLine = isChroma ? 3 - (2 * m_vChromaShift) : 3;<br>
@@ -1325,12 +1261,9 @@<br>
{<br>
for (x = 0; x < endX; x++)<br>
{<br>
- classIdx = m_tableBo[recon[x]];<br>
- if (classIdx)<br>
- {<br>
- stats[classIdx] += (fenc[x] - recon[x]);<br>
- counts[classIdx]++;<br>
- }<br>
+ int classIdx = 1 + (recon[x] >> boShift);<br>
+ stats[classIdx] += (fenc[x] - recon[x]);<br>
+ counts[classIdx]++;<br>
}<br>
<br>
fenc += stride;<br>
@@ -1338,12 +1271,6 @@<br>
}<br>
}<br>
<br>
- int signLeft;<br>
- int signRight;<br>
- int signDown;<br>
- int signDown1;<br>
- int signDown2;<br>
- uint32_t edgeType;<br>
int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;<br>
int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;<br>
<br>
@@ -1366,11 +1293,11 @@<br>
endX = (rpelx == picWidthTmp) ? lcuWidth - 1 : lcuWidth - numSkipLineRight;<br>
for (y = 0; y < lcuHeight - numSkipLine; y++)<br>
{<br>
- signLeft = signOf(recon[startX] - recon[startX - 1]);<br>
+ int signLeft = signOf(recon[startX] - recon[startX - 1]);<br>
for (x = startX; x < endX; x++)<br>
{<br>
- signRight = signOf(recon[x] - recon[x + 1]);<br>
- edgeType = signRight + signLeft + 2;<br>
+ int signRight = signOf(recon[x] - recon[x + 1]);<br>
+ int edgeType = signRight + signLeft + 2;<br>
signLeft = -signRight;<br>
<br>
stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]);<br>
@@ -1411,8 +1338,8 @@<br>
{<br>
for (x = 0; x < endX; x++)<br>
{<br>
- signDown = signOf(recon[x] - recon[x + stride]);<br>
- edgeType = signDown + upBuff1[x] + 2;<br>
+ int signDown = signOf(recon[x] - recon[x + stride]);<br>
+ int edgeType = signDown + upBuff1[x] + 2;<br>
upBuff1[x] = -signDown;<br>
<br>
stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]);<br>
@@ -1452,11 +1379,11 @@<br>
<br>
for (y = startY; y < endY; y++)<br>
{<br>
- signDown2 = signOf(recon[stride + startX] - recon[startX - 1]);<br>
+ int signDown2 = signOf(recon[stride + startX] - recon[startX - 1]);<br>
for (x = startX; x < endX; x++)<br>
{<br>
- signDown1 = signOf(recon[x] - recon[x + stride + 1]);<br>
- edgeType = signDown1 + upBuff1[x] + 2;<br>
+ int signDown1 = signOf(recon[x] - recon[x + stride + 1]);<br>
+ int edgeType = signDown1 + upBuff1[x] + 2;<br>
upBufft[x + 1] = -signDown1;<br>
stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]);<br>
counts[s_eoTable[edgeType]]++;<br>
@@ -1500,8 +1427,8 @@<br>
{<br>
for (x = startX; x < endX; x++)<br>
{<br>
- signDown1 = signOf(recon[x] - recon[x + stride - 1]);<br>
- edgeType = signDown1 + upBuff1[x] + 2;<br>
+ int signDown1 = signOf(recon[x] - recon[x + stride - 1]);<br>
+ int edgeType = signDown1 + upBuff1[x] + 2;<br>
upBuff1[x - 1] = -signDown1;<br>
stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]);<br>
counts[s_eoTable[edgeType]]++;<br>
@@ -1518,7 +1445,6 @@<br>
<br>
void SAO::calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY)<br>
{<br>
- int addr;<br>
int x, y;<br>
<br>
pixel* fenc;<br>
@@ -1528,7 +1454,6 @@<br>
uint32_t bPelY;<br>
int64_t* stats;<br>
int64_t* count;<br>
- int classIdx;<br>
int startX;<br>
int startY;<br>
int endX;<br>
@@ -1545,11 +1470,13 @@<br>
int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;<br>
int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;<br>
<br>
+ const int boShift = X265_DEPTH - SAO_BO_BITS;<br>
+<br>
// NOTE: Row<br>
{<br>
// NOTE: Col<br>
{<br>
- addr = idxX + frameWidthInCU * idxY;<br>
+ int addr = idxX + frameWidthInCU * idxY;<br>
cu = pic->getCU(addr);<br>
<br>
uint32_t picWidthTmp = m_param->sourceWidth;<br>
@@ -1606,26 +1533,15 @@<br>
if (x < startX && y < startY)<br>
continue;<br>
<br>
- classIdx = m_tableBo[recon[x]];<br>
- if (classIdx)<br>
- {<br>
- stats[classIdx] += (fenc[x] - recon[x]);<br>
- count[classIdx]++;<br>
- }<br>
+ int classIdx = 1 + (recon[x] >> boShift);<br>
+ stats[classIdx] += (fenc[x] - recon[x]);<br>
+ count[classIdx]++;<br>
}<br>
<br>
fenc += stride;<br>
recon += stride;<br>
}<br>
<br>
- int signLeft;<br>
- int signRight;<br>
- int signDown;<br>
- int signDown1;<br>
- int signDown2;<br>
-<br>
- uint32_t edgeType;<br>
-<br>
//if (iSaoType == EO_0)<br>
<br>
numSkipLine = isChroma ? 1 : 3;<br>
@@ -1644,11 +1560,11 @@<br>
<br>
for (y = 0; y < lcuHeight; y++)<br>
{<br>
- signLeft = signOf(recon[firstX] - recon[firstX - 1]);<br>
+ int signLeft = signOf(recon[firstX] - recon[firstX - 1]);<br>
for (x = firstX; x < endX; x++)<br>
{<br>
- signRight = signOf(recon[x] - recon[x + 1]);<br>
- edgeType = signRight + signLeft + 2;<br>
+ int signRight = signOf(recon[x] - recon[x + 1]);<br>
+ int edgeType = signRight + signLeft + 2;<br>
signLeft = -signRight;<br>
<br>
if (x < startX && y < startY)<br>
@@ -1690,8 +1606,8 @@<br>
{<br>
for (x = 0; x < lcuWidth; x++)<br>
{<br>
- signDown = signOf(recon[x] - recon[x + stride]);<br>
- edgeType = signDown + upBuff1[x] + 2;<br>
+ int signDown = signOf(recon[x] - recon[x + stride]);<br>
+ int edgeType = signDown + upBuff1[x] + 2;<br>
upBuff1[x] = -signDown;<br>
<br>
if (x < startX && y < startY)<br>
@@ -1733,11 +1649,11 @@<br>
<br>
for (y = firstY; y < endY; y++)<br>
{<br>
- signDown2 = signOf(recon[stride + startX] - recon[startX - 1]);<br>
+ int signDown2 = signOf(recon[stride + startX] - recon[startX - 1]);<br>
for (x = firstX; x < endX; x++)<br>
{<br>
- signDown1 = signOf(recon[x] - recon[x + stride + 1]);<br>
- edgeType = signDown1 + upBuff1[x] + 2;<br>
+ int signDown1 = signOf(recon[x] - recon[x + stride + 1]);<br>
+ int edgeType = signDown1 + upBuff1[x] + 2;<br>
upBufft[x + 1] = -signDown1;<br>
<br>
if (x < startX && y < startY)<br>
@@ -1784,8 +1700,8 @@<br>
{<br>
for (x = firstX; x < endX; x++)<br>
{<br>
- signDown1 = signOf(recon[x] - recon[x + stride - 1]);<br>
- edgeType = signDown1 + upBuff1[x] + 2;<br>
+ int signDown1 = signOf(recon[x] - recon[x + stride - 1]);<br>
+ int edgeType = signDown1 + upBuff1[x] + 2;<br>
upBuff1[x - 1] = -signDown1;<br>
<br>
if (x < startX && y < startY)<br>
@@ -1807,12 +1723,10 @@<br>
<br>
void SAO::getSaoStats(SAOQTPart *psQTPart, int plane)<br>
{<br>
- int levelIdx, partIdx, typeIdx, classIdx;<br>
+ int levelIdx, partIdx;<br>
int i;<br>
- int numTotalType = MAX_NUM_SAO_TYPE;<br>
int lcuIdx;<br>
int lcuIdy;<br>
- int addr;<br>
int frameWidthInCU = m_pic->getFrameWidthInCU();<br>
int downPartIdx;<br>
int partStart;<br>
@@ -1827,7 +1741,7 @@<br>
{<br>
for (lcuIdx = onePart->startCUX; lcuIdx <= onePart->endCUX; lcuIdx++)<br>
{<br>
- addr = lcuIdy * frameWidthInCU + lcuIdx;<br>
+ int addr = lcuIdy * frameWidthInCU + lcuIdx;<br>
calcSaoStatsCu(addr, partIdx, plane);<br>
}<br>
}<br>
@@ -1841,7 +1755,7 @@<br>
{<br>
for (lcuIdx = onePart->startCUX; lcuIdx <= onePart->endCUX; lcuIdx++)<br>
{<br>
- addr = lcuIdy * frameWidthInCU + lcuIdx;<br>
+ int addr = lcuIdy * frameWidthInCU + lcuIdx;<br>
calcSaoStatsCu(addr, partIdx, plane);<br>
}<br>
}<br>
@@ -1858,9 +1772,9 @@<br>
for (i = 0; i < SAOQTPart::NUM_DOWN_PART; i++)<br>
{<br>
downPartIdx = onePart->downPartsIdx[i];<br>
- for (typeIdx = 0; typeIdx < numTotalType; typeIdx++)<br>
+ for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)<br>
{<br>
- for (classIdx = 0; classIdx < (typeIdx < SAO_BO ? s_numClass[typeIdx] : SAO_MAX_BO_CLASSES) + 1; classIdx++)<br>
+ for (int classIdx = 0; classIdx < (typeIdx < SAO_BO ? SAO_EO_LEN : SAO_NUM_BO_CLASSES) + 1; classIdx++)<br>
{<br>
m_offsetOrg[partIdx][typeIdx][classIdx] += m_offsetOrg[downPartIdx][typeIdx][classIdx];<br>
m_count[partIdx][typeIdx][classIdx] += m_count[downPartIdx][typeIdx][classIdx];<br>
@@ -1923,16 +1837,15 @@<br>
/* Check merge SAO unit */<br>
void SAO::checkMerge(SaoLcuParam * saoUnitCurr, SaoLcuParam * saoUnitCheck, int dir)<br>
{<br>
- int i;<br>
int countDiff = 0;<br>
<br>
if (saoUnitCurr->partIdx != saoUnitCheck->partIdx)<br>
{<br>
- if (saoUnitCurr->typeIdx != -1)<br>
+ if (saoUnitCurr->typeIdx >= 0)<br>
{<br>
if (saoUnitCurr->typeIdx == saoUnitCheck->typeIdx)<br>
{<br>
- for (i = 0; i < saoUnitCurr->length; i++)<br>
+ for (int i = 0; i < SAO_NUM_OFFSET; i++)<br>
countDiff += (saoUnitCurr->offset[i] != saoUnitCheck->offset[i]);<br>
<br>
countDiff += (saoUnitCurr->subTypeIdx != saoUnitCheck->subTypeIdx);<br>
@@ -1979,24 +1892,22 @@<br>
oneUnitFlag = 1;<br>
else<br>
{<br>
- int i, j, addr, addrUp, addrLeft, idx, idxUp, idxLeft, idxCount;<br>
-<br>
oneUnitFlag = 0;<br>
<br>
- idxCount = -1;<br>
+ int idxCount = -1;<br>
saoLcuParam[0].mergeUpFlag = 0;<br>
saoLcuParam[0].mergeLeftFlag = 0;<br>
<br>
- for (j = 0; j < m_numCuInHeight; j++)<br>
+ for (int j = 0; j < m_numCuInHeight; j++)<br>
{<br>
- for (i = 0; i < m_numCuInWidth; i++)<br>
+ for (int i = 0; i < m_numCuInWidth; i++)<br>
{<br>
- addr = i + j * m_numCuInWidth;<br>
- addrLeft = (addr % m_numCuInWidth == 0) ? -1 : addr - 1;<br>
- addrUp = (addr < m_numCuInWidth) ? -1 : addr - m_numCuInWidth;<br>
- idx = saoLcuParam[addr].partIdxTmp;<br>
- idxLeft = (addrLeft == -1) ? -1 : saoLcuParam[addrLeft].partIdxTmp;<br>
- idxUp = (addrUp == -1) ? -1 : saoLcuParam[addrUp].partIdxTmp;<br>
+ int addr = i + j * m_numCuInWidth;<br>
+ int addrUp = (j == 0) ? -1 : addr - m_numCuInWidth;<br>
+ int addrLeft = (i == 0) ? -1 : addr - 1;<br>
+ int idx = saoLcuParam[addr].partIdxTmp;<br>
+ int idxLeft = (addrLeft == -1) ? -1 : saoLcuParam[addrLeft].partIdxTmp;<br>
+ int idxUp = (addrUp == -1) ? -1 : saoLcuParam[addrUp].partIdxTmp;<br>
<br>
if (idx != idxLeft && idx != idxUp)<br>
{<br>
@@ -2057,21 +1968,17 @@<br>
<br>
void SAO::rdoSaoUnitRow(SAOParam *saoParam, int idxY)<br>
{<br>
- int idxX;<br>
int frameWidthInCU = saoParam->numCuInWidth;<br>
int j, k;<br>
- int addr = 0;<br>
- int addrUp = -1;<br>
- int addrLeft = -1;<br>
int compIdx = 0;<br>
SaoLcuParam mergeSaoParam[3][2];<br>
double compDistortion[3];<br>
<br>
- for (idxX = 0; idxX < frameWidthInCU; idxX++)<br>
+ for (int idxX = 0; idxX < frameWidthInCU; idxX++)<br>
{<br>
- addr = idxX + frameWidthInCU * idxY;<br>
- addrUp = addr < frameWidthInCU ? -1 : idxX + frameWidthInCU * (idxY - 1);<br>
- addrLeft = idxX == 0 ? -1 : idxX - 1 + frameWidthInCU * idxY;<br>
+ int addr = idxX + idxY * frameWidthInCU;<br>
+ int addrUp = idxY == 0 ? -1 : addr - frameWidthInCU;<br>
+ int addrLeft = idxX == 0 ? -1 : addr - 1;<br>
int allowMergeLeft = 1;<br>
int allowMergeUp = 1;<br>
uint32_t rate;<br>
@@ -2111,7 +2018,7 @@<br>
}<br>
}<br>
<br>
- saoParam->saoLcuParam[compIdx][addr].typeIdx = -1;<br>
+ saoParam->saoLcuParam[compIdx][addr].typeIdx = -1;<br>
saoParam->saoLcuParam[compIdx][addr].mergeUpFlag = 0;<br>
saoParam->saoLcuParam[compIdx][addr].mergeLeftFlag = 0;<br>
saoParam->saoLcuParam[compIdx][addr].subTypeIdx = 0;<br>
@@ -2173,9 +2080,9 @@<br>
}<br>
}<br>
<br>
- if (saoParam->saoLcuParam[0][addr].typeIdx == -1)<br>
+ if (saoParam->saoLcuParam[0][addr].typeIdx < 0)<br>
m_numNoSao[0]++;<br>
- if (saoParam->saoLcuParam[1][addr].typeIdx == -1)<br>
+ if (saoParam->saoLcuParam[1][addr].typeIdx < 0)<br>
m_numNoSao[1] += 2;<br>
m_entropyCoder.load(m_rdEntropyCoders[0][CI_TEMP_BEST]);<br>
m_entropyCoder.store(m_rdEntropyCoders[0][CI_CURR_BEST]);<br>
@@ -2187,9 +2094,8 @@<br>
inline int64_t SAO::estSaoTypeDist(int compIdx, int typeIdx, int shift, double lambda, int32_t *currentDistortionTableBo, double *currentRdCostTableBo)<br>
{<br>
int64_t estDist = 0;<br>
- int classIdx;<br>
<br>
- for (classIdx = 1; classIdx < ((typeIdx < SAO_BO) ? s_numClass[typeIdx] + 1 : SAO_MAX_BO_CLASSES + 1); classIdx++)<br>
+ for (int classIdx = 1; classIdx < ((typeIdx < SAO_BO) ? SAO_EO_LEN + 1 : SAO_NUM_BO_CLASSES + 1); classIdx++)<br>
{<br>
if (typeIdx == SAO_BO)<br>
{<br>
@@ -2200,7 +2106,7 @@<br>
{<br>
m_offset[compIdx][typeIdx][classIdx] = (int64_t)roundIDBI((double)(m_offsetOrg[compIdx][typeIdx][classIdx] << (X265_DEPTH - 8)) / (double)(m_count[compIdx][typeIdx][classIdx] << SAO_BIT_INC));<br>
m_offset[compIdx][typeIdx][classIdx] = Clip3(-OFFSET_THRESH + 1, OFFSET_THRESH - 1, (int)m_offset[compIdx][typeIdx][classIdx]);<br>
- if (typeIdx < 4)<br>
+ if (typeIdx < SAO_BO)<br>
{<br>
if (m_offset[compIdx][typeIdx][classIdx] < 0 && classIdx < 3)<br>
m_offset[compIdx][typeIdx][classIdx] = 0;<br>
@@ -2231,12 +2137,11 @@<br>
//Clean up, best_q_offset.<br>
int64_t iterOffset, tempOffset;<br>
int64_t tempDist, tempRate;<br>
- double tempCost, tempMinCost;<br>
int64_t offsetOutput = 0;<br>
<br>
iterOffset = offsetInput;<br>
// Assuming sending quantized value 0 results in zero offset and sending the value zero needs 1 bit. entropy coder can be used to measure the exact rate here.<br>
- tempMinCost = lambda;<br>
+ double tempMinCost = lambda;<br>
while (iterOffset != 0)<br>
{<br>
// Calculate the bits required for signalling the offset<br>
@@ -2247,7 +2152,7 @@<br>
// Do the dequntization before distorion calculation<br>
tempOffset = iterOffset << bitIncrease;<br>
tempDist = estSaoDist(count, tempOffset, offsetOrg, shift);<br>
- tempCost = ((double)tempDist + lambda * (double)tempRate);<br>
+ double tempCost = ((double)tempDist + lambda * (double)tempRate);<br>
if (tempCost < tempMinCost)<br>
{<br>
tempMinCost = tempCost;<br>
@@ -2267,10 +2172,7 @@<br>
void SAO::saoComponentParamDist(int allowMergeLeft, int allowMergeUp, SAOParam *saoParam, int addr, int addrUp, int addrLeft, int plane,<br>
SaoLcuParam *compSaoParam, double *compDistortion)<br>
{<br>
- int typeIdx;<br>
-<br>
int64_t estDist;<br>
- int classIdx;<br>
int64_t bestDist;<br>
<br>
SaoLcuParam* saoLcuParam = &(saoParam->saoLcuParam[plane][addr]);<br>
@@ -2287,7 +2189,6 @@<br>
double currentRdCostTableBo[MAX_NUM_SAO_CLASS];<br>
<br>
SaoLcuParam saoLcuParamRdo;<br>
- double estRate = 0;<br>
<br>
resetSaoUnit(&saoLcuParamRdo);<br>
<br>
@@ -2298,18 +2199,16 @@<br>
copySaoUnit(saoLcuParam, &saoLcuParamRdo);<br>
bestDist = 0;<br>
<br>
- for (typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)<br>
+ for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)<br>
{<br>
estDist = estSaoTypeDist(plane, typeIdx, 0, m_lumaLambda, currentDistortionTableBo, currentRdCostTableBo);<br>
<br>
if (typeIdx == SAO_BO)<br>
{<br>
// Estimate Best Position<br>
- double currentRDCost = 0.0;<br>
-<br>
- for (int i = 0; i < SAO_MAX_BO_CLASSES - SAO_BO_LEN + 1; i++)<br>
+ for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)<br>
{<br>
- currentRDCost = 0.0;<br>
+ double currentRDCost = 0.0;<br>
for (int j = i; j < i + SAO_BO_LEN; j++)<br>
currentRDCost += currentRdCostTableBo[j];<br>
<br>
@@ -2323,23 +2222,22 @@<br>
// Re code all Offsets<br>
// Code Center<br>
estDist = 0;<br>
- for (classIdx = bestClassTableBo; classIdx < bestClassTableBo + SAO_BO_LEN; classIdx++)<br>
+ for (int classIdx = bestClassTableBo; classIdx < bestClassTableBo + SAO_BO_LEN; classIdx++)<br>
estDist += currentDistortionTableBo[classIdx];<br>
}<br>
resetSaoUnit(&saoLcuParamRdo);<br>
- saoLcuParamRdo.length = s_numClass[typeIdx];<br>
saoLcuParamRdo.typeIdx = typeIdx;<br>
saoLcuParamRdo.mergeLeftFlag = 0;<br>
saoLcuParamRdo.mergeUpFlag = 0;<br>
saoLcuParamRdo.subTypeIdx = (typeIdx == SAO_BO) ? bestClassTableBo : 0;<br>
- for (classIdx = 0; classIdx < saoLcuParamRdo.length; classIdx++)<br>
+ for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)<br>
saoLcuParamRdo.offset[classIdx] = (int)m_offset[plane][typeIdx][classIdx + saoLcuParamRdo.subTypeIdx + 1];<br>
<br>
m_entropyCoder.load(m_rdEntropyCoders[0][CI_TEMP_BEST]);<br>
m_entropyCoder.resetBits();<br>
m_entropyCoder.codeSaoOffset(&saoLcuParamRdo, plane);<br>
<br>
- estRate = m_entropyCoder.getNumberOfWrittenBits();<br>
+ uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();<br>
m_cost[plane][typeIdx] = (double)((double)estDist + m_lumaLambda * (double)estRate);<br>
<br>
if (m_cost[plane][typeIdx] < dCostPartBest)<br>
@@ -2367,12 +2265,12 @@<br>
if (saoLcuParamNeighbor != NULL)<br>
{<br>
estDist = 0;<br>
- typeIdx = saoLcuParamNeighbor->typeIdx;<br>
+ int typeIdx = saoLcuParamNeighbor->typeIdx;<br>
if (typeIdx >= 0)<br>
{<br>
int mergeBandPosition = (typeIdx == SAO_BO) ? saoLcuParamNeighbor->subTypeIdx : 0;<br>
int mergeOffset;<br>
- for (classIdx = 0; classIdx < s_numClass[typeIdx]; classIdx++)<br>
+ for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)<br>
{<br>
mergeOffset = saoLcuParamNeighbor->offset[classIdx];<br>
estDist += estSaoDist(m_count[plane][typeIdx][classIdx + mergeBandPosition + 1], mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + mergeBandPosition + 1], 0);<br>
@@ -2395,8 +2293,6 @@<br>
{<br>
int64_t estDist[2];<br>
int64_t bestDist = 0;<br>
- int typeIdx;<br>
- int classIdx;<br>
<br>
SaoLcuParam* saoLcuParam[2] = { &(saoParam->saoLcuParam[1][addr]), &(saoParam->saoLcuParam[2][addr]) };<br>
SaoLcuParam* saoLcuParamNeighbor[2] = { NULL, NULL };<br>
@@ -2417,7 +2313,6 @@<br>
double costPartBest = MAX_DOUBLE;<br>
double bestRDCostTableBo;<br>
double currentRdCostTableBo[MAX_NUM_SAO_CLASS];<br>
- double estRate = 0;<br>
int bestClassTableBo[2] = { 0, 0 };<br>
int currentDistortionTableBo[MAX_NUM_SAO_CLASS];<br>
<br>
@@ -2435,19 +2330,18 @@<br>
copySaoUnit(saoLcuParam[0], &saoLcuParamRdo[0]);<br>
copySaoUnit(saoLcuParam[1], &saoLcuParamRdo[1]);<br>
<br>
- for (typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)<br>
+ for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)<br>
{<br>
if (typeIdx == SAO_BO)<br>
{<br>
// Estimate Best Position<br>
for (int compIdx = 0; compIdx < 2; compIdx++)<br>
{<br>
- double currentRDCost = 0.0;<br>
bestRDCostTableBo = MAX_DOUBLE;<br>
estDist[compIdx] = estSaoTypeDist(compIdx + 1, typeIdx, 0, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);<br>
- for (int i = 0; i < SAO_MAX_BO_CLASSES - SAO_BO_LEN + 1; i++)<br>
+ for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)<br>
{<br>
- currentRDCost = 0.0;<br>
+ double currentRDCost = 0.0;<br>
for (int j = i; j < i + SAO_BO_LEN; j++)<br>
currentRDCost += currentRdCostTableBo[j];<br>
<br>
@@ -2461,7 +2355,7 @@<br>
// Re code all Offsets<br>
// Code Center<br>
estDist[compIdx] = 0;<br>
- for (classIdx = bestClassTableBo[compIdx]; classIdx < bestClassTableBo[compIdx] + SAO_BO_LEN; classIdx++)<br>
+ for (int classIdx = bestClassTableBo[compIdx]; classIdx < bestClassTableBo[compIdx] + SAO_BO_LEN; classIdx++)<br>
estDist[compIdx] += currentDistortionTableBo[classIdx];<br>
}<br>
}<br>
@@ -2477,18 +2371,17 @@<br>
for (int compIdx = 0; compIdx < 2; compIdx++)<br>
{<br>
resetSaoUnit(&saoLcuParamRdo[compIdx]);<br>
- saoLcuParamRdo[compIdx].length = s_numClass[typeIdx];<br>
saoLcuParamRdo[compIdx].typeIdx = typeIdx;<br>
saoLcuParamRdo[compIdx].mergeLeftFlag = 0;<br>
saoLcuParamRdo[compIdx].mergeUpFlag = 0;<br>
saoLcuParamRdo[compIdx].subTypeIdx = (typeIdx == SAO_BO) ? bestClassTableBo[compIdx] : 0;<br>
- for (classIdx = 0; classIdx < saoLcuParamRdo[compIdx].length; classIdx++)<br>
+ for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)<br>
saoLcuParamRdo[compIdx].offset[classIdx] = (int)m_offset[compIdx + 1][typeIdx][classIdx + saoLcuParamRdo[compIdx].subTypeIdx + 1];<br>
<br>
m_entropyCoder.codeSaoOffset(&saoLcuParamRdo[compIdx], compIdx + 1);<br>
}<br>
<br>
- estRate = m_entropyCoder.getNumberOfWrittenBits();<br>
+ uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();<br>
m_cost[1][typeIdx] = (double)((double)(estDist[0] + estDist[1]) + m_chromaLambda * (double)estRate);<br>
<br>
if (m_cost[1][typeIdx] < costPartBest)<br>
@@ -2520,11 +2413,11 @@<br>
if (saoLcuParamNeighbor[compIdx] != NULL)<br>
{<br>
estDist[compIdx] = 0;<br>
- typeIdx = saoLcuParamNeighbor[compIdx]->typeIdx;<br>
+ int typeIdx = saoLcuParamNeighbor[compIdx]->typeIdx;<br>
if (typeIdx >= 0)<br>
{<br>
int mergeBandPosition = (typeIdx == SAO_BO) ? saoLcuParamNeighbor[compIdx]->subTypeIdx : 0;<br>
- for (classIdx = 0; classIdx < s_numClass[typeIdx]; classIdx++)<br>
+ for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)<br>
{<br>
int mergeOffset = saoLcuParamNeighbor[compIdx]->offset[classIdx];<br>
estDist[compIdx] += estSaoDist(m_count[compIdx + 1][typeIdx][classIdx + mergeBandPosition + 1], mergeOffset, m_offsetOrg[compIdx + 1][typeIdx][classIdx + mergeBandPosition + 1], 0);<br>
diff -r 7e29b10982d2 -r 8a2312df90f9 source/encoder/sao.h<br>
--- a/source/encoder/sao.h Thu Sep 11 19:24:28 2014 +0530<br>
+++ b/source/encoder/sao.h Fri Sep 12 11:01:54 2014 +0900<br>
@@ -36,7 +36,7 @@<br>
{<br>
SAO_EO_LEN = 4,<br>
SAO_BO_LEN = 4,<br>
- SAO_MAX_BO_CLASSES = 32<br>
+ SAO_NUM_BO_CLASSES = 32<br>
};<br>
<br>
enum SAOType<br>
@@ -55,15 +55,13 @@<br>
<br>
enum { SAO_MAX_DEPTH = 4 };<br>
enum { SAO_BO_BITS = 5 };<br>
- enum { LUMA_GROUP_NUM = 1 << SAO_BO_BITS };<br>
- enum { MAX_NUM_SAO_OFFSETS = 4 };<br>
enum { MAX_NUM_SAO_CLASS = 33 };<br>
enum { SAO_BIT_INC = X265_MAX(X265_DEPTH - 10, 0) };<br>
enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };<br>
+ enum { NUM_EDGETYPE = 5 };<br>
<br>
static const int s_numCulPartsLevel[5];<br>
- static const int s_numClass[MAX_NUM_SAO_TYPE];<br>
- static const uint32_t s_eoTable[9];<br>
+ static const uint32_t s_eoTable[NUM_EDGETYPE];<br>
<br>
typedef int64_t (PerClass[MAX_NUM_SAO_TYPE][MAX_NUM_SAO_CLASS]);<br>
typedef int64_t (PerType[MAX_NUM_SAO_TYPE]);<br>
@@ -86,9 +84,8 @@<br>
PerPlane* m_offsetOrgPreDblk;<br>
<br>
double m_depthSaoRate[2][4];<br>
- int32_t* m_offsetBo;<br>
- int32_t* m_chromaOffsetBo;<br>
- int8_t m_offsetEo[LUMA_GROUP_NUM];<br>
+ pixel* m_offsetBo;<br>
+ int8_t m_offsetEo[NUM_EDGETYPE];<br>
<br>
int m_maxSplitLevel;<br>
<br>
@@ -100,7 +97,6 @@<br>
<br>
pixel* m_clipTable;<br>
pixel* m_clipTableBase;<br>
- pixel* m_tableBo;<br>
<br>
pixel* m_tmpU1[3];<br>
pixel* m_tmpU2[3];<br>
_______________________________________________<br>
x265-devel mailing list<br>
<a href="mailto:x265-devel@videolan.org">x265-devel@videolan.org</a><br>
<a href="https://mailman.videolan.org/listinfo/x265-devel" target="_blank">https://mailman.videolan.org/listinfo/x265-devel</a><br>
</blockquote></div><br></div>