[x265] [PATCH] calculate SSIM for each Row after deblock, sao
Steve Borho
steve at borho.org
Sun Oct 6 23:07:17 CEST 2013
On Sun, Oct 6, 2013 at 3:42 PM, Aarthi Thirumalai <
aarthi at multicorewareinc.com> wrote:
> # HG changeset patch
> # User Aarthi Thirumalai
> # Date 1381092064 -19800
> # Mon Oct 07 02:11:04 2013 +0530
> # Node ID e116b5414806b6c981998f60877b3e0314d14c48
> # Parent e58bbff11696e0f6f4f5b2975d1b449174e9e839
> calculate SSIM for each Row after deblock, sao
>
> diff -r e58bbff11696 -r e116b5414806 source/Lib/TLibEncoder/TEncTop.cpp
> --- a/source/Lib/TLibEncoder/TEncTop.cpp Mon Oct 07 02:09:36 2013
> +0530
> +++ b/source/Lib/TLibEncoder/TEncTop.cpp Mon Oct 07 02:11:04 2013
> +0530
> @@ -513,7 +513,7 @@
> int width = recon->getWidth() - getPad(0);
> int height = recon->getHeight() - getPad(1);
> int size = width * height;
> -
> + double ssim = 0;
> UInt64 ssdY = computeSSD(orig->getLumaAddr(), recon->getLumaAddr(),
> stride, width, height);
>
> height >>= 1;
> @@ -604,7 +604,14 @@
> {
> m_analyzeB.addResult(psnrY, psnrU, psnrV, (double)bits);
> }
> -
> + if (param.bEnableSsim)
> + {
> + if(pic->getSlice()->m_ssimCnt > 0)
>
white-space
> + {
> + ssim += pic->getSlice()->m_ssim / pic->getSlice()->m_ssimCnt;
> + m_globalSsim += ssim;
> + }
> + }
> if (param.logLevel >= X265_LOG_DEBUG)
> {
> char c = (slice->isIntra() ? 'I' : slice->isInterP() ? 'P' : 'B');
> diff -r e58bbff11696 -r e116b5414806 source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp Mon Oct 07 02:09:36 2013 +0530
> +++ b/source/encoder/frameencoder.cpp Mon Oct 07 02:11:04 2013 +0530
> @@ -92,7 +92,7 @@
> }
>
> m_frameFilter.destroy();
> -
> + X265_FREE(ssimBuf);
> // wait for worker thread to exit
> stop();
> }
> @@ -111,6 +111,15 @@
> m_rows[i].create(top);
> }
>
> + if (m_cfg->param.bEnableSsim)
> + {
> +#if !HIGH_BIT_DEPTH
> + CHECKED_MALLOC(ssimBuf, int32_t, m_cfg->param.bEnableSsim * 8 *
> (m_cfg->param.sourceWidth / 4 + 3));
> +#else
> + CHECKED_MALLOC(ssimBuf, int64_t, m_cfg->param.bEnableSsim * 8 *
> (m_cfg->param.sourceWidth / 4 + 3));
>
There is no point in including m_cfg->param.bEnableSsim in the calculation.
Also, it would be a lot cleaner to place code like this in your header:
#if HIGH_BIT_DEPTH
typedef uint64_t ssim_t;
#else
typedef uint32_t ssim_t;
#endif
Then you can just use ssim_t for the pointer type and malloc calls.
+#endif
> + }
> +
> // NOTE: 2 times of numRows because both Encoder and Filter in same
> queue
> if (!WaveFront::init(m_numRows * 2))
> {
> @@ -168,6 +177,10 @@
> assert(0);
> }
> start();
> + return;
> +fail:
> + X265_FREE(ssimBuf);
> + ssimBuf = 0;
>
if the malloc failed, X265_FREE is a NOP and so is ssimBuf
> }
>
> int FrameEncoder::getStreamHeaders(NALUnitEBSP **nalunits)
> @@ -540,6 +553,36 @@
> slice->setSaoEnabledFlag((saoParam->bSaoFlag[0] == 1) ? true :
> false);
> }
>
> + /*Compute SSIM if enabled*/
> + if (m_cfg->param.bEnableSsim && ssimBuf)
> + {
> + pixel *rec = (pixel*)m_pic->getPicYuvRec()->getLumaAddr();
> + pixel *org = (pixel*)m_pic->getPicYuvOrg()->getLumaAddr();
> + int stride1 = m_pic->getPicYuvOrg()->getStride();
> + int stride2 = m_pic->getPicYuvRec()->getStride();
> + for (int row = 0; row < m_numRows; row++)
> + {
> + int bEnd = ((row + 1) == (this->m_numRows - 1));
> + int bStart = (row == 0);
> + int minPixY = row * 64 - 4 * !bStart;
> + int maxPixY = (row + 1) * 64 - 4 * !bEnd;
> + int ssim_cnt;
> + x265_emms();
> +
> + /* SSIM is done for each row in blocks of 4x4 . The First
> blocks are offset by 2 pixels to the right
> + * to avoid alignment of ssim blocks with DCT blocks. */
> + minPixY += bStart ? 2 : -6;
> +#if HIGH_BIT_DEPTH
> + slice->m_ssim += calculateSSIM_int64(rec + 2 + minPixY *
> stride1, stride1, org + 2 + minPixY * stride2, stride2,
> + m_cfg->param.sourceWidth - 2, maxPixY - minPixY, ssimBuf,
> &ssim_cnt);
> +#else
> + slice->m_ssim += calculateSSIM_int32(rec + 2 + minPixY *
> stride1, stride1, org + 2 + minPixY * stride2, stride2,
> + m_cfg->param.sourceWidth - 2, maxPixY - minPixY, ssimBuf,
> &ssim_cnt);
> +#endif
> + slice->m_ssimCnt += ssim_cnt;
> + }
> + }
> +
> entropyCoder->setBitstream(NULL);
>
> // Reconstruction slice
> @@ -687,6 +730,73 @@
> delete bitstreamRedirect;
> }
>
> +/* Function to calculate SSIM for each row */
> +float FrameEncoder::calculateSSIM_int32(pixel *pix1, intptr_t stride1,
> pixel *pix2, intptr_t stride2, int width, int height, void *buf, int *cnt)
> +{
> + int z = 0;
> + float ssim = 0.0;
> +
> + int32_t(*sum0)[4] = (int32_t(*)[4])buf;
> + int32_t(*sum1)[4] = sum0 + (width >> 2) + 3;
> + width >>= 2;
> + height >>= 2;
> +
> + for (int y = 1; y < height; y++)
> + {
> + for (; z <= y; z++)
> + {
> + void* swap = sum0;
> + sum0 = sum1;
> + sum1 = (int32_t(*)[4])swap;
> + for (int x = 0; x < width; x += 2)
> + {
> + primitives.ssim_4x4x2_core_int32(&pix1[4 * (x + (z *
> stride1))], stride1, &pix2[4 * (x + (z * stride2))], stride2, &sum0[x]);
> + }
> + }
> +
> + for (int x = 0; x < width - 1; x += 4)
> + {
> + ssim += primitives.ssim_end4_int32(sum0 + x, sum1 + x,
> X265_MIN(4, width - x - 1));
> + }
> + }
> +
> + *cnt = (height - 1) * (width - 1);
> + return ssim;
> +}
> +
> +float FrameEncoder::calculateSSIM_int64(pixel *pix1, intptr_t stride1,
> pixel *pix2, intptr_t stride2, int width, int height, void *buf, int *cnt)
> +{
> + int z = 0;
> + float ssim = 0.0;
> +
> + int64_t(*sum0)[4] = (int64_t(*)[4])buf;
> + int64_t(*sum1)[4] = sum0 + (width >> 2) + 3;
> + width >>= 2;
> + height >>= 2;
> +
> + for (int y = 1; y < height; y++)
> + {
> + for (; z <= y; z++)
> + {
> + void* swap = sum0;
> + sum0 = sum1;
> + sum1 = (int64_t(*)[4])swap;
> + for (int x = 0; x < width; x += 2)
> + {
> + primitives.ssim_4x4x2_core_int64(&pix1[4 * (x + (z *
> stride1))], stride1, &pix2[4 * (x + (z * stride2))], stride2, &sum0[x]);
> + }
> + }
> +
> + for (int x = 0; x < width - 1; x += 4)
> + {
> + ssim += primitives.ssim_end4_int64(sum0 + x, sum1 + x,
> X265_MIN(4, width - x - 1));
> + }
> + }
> +
> + *cnt = (height - 1) * (width - 1);
> + return ssim;
> +}
> +
> void FrameEncoder::encodeSlice(TComOutputBitstream* substreams)
> {
> // choose entropy coder
> diff -r e58bbff11696 -r e116b5414806 source/encoder/frameencoder.h
> --- a/source/encoder/frameencoder.h Mon Oct 07 02:09:36 2013 +0530
> +++ b/source/encoder/frameencoder.h Mon Oct 07 02:11:04 2013 +0530
> @@ -145,6 +145,10 @@
> /* called by compressFrame to perform wave-front compression analysis
> */
> void compressCTURows();
>
> + /* called by compressFrame to calculate SSIM for each row . */
> + float calculateSSIM_int32(pixel *pix1, intptr_t stride1, pixel *pix2,
> intptr_t stride2, int width, int height, void *buf, int *cnt);
> + float calculateSSIM_int64(pixel *pix1, intptr_t stride1, pixel *pix2,
> intptr_t stride2, int width, int height, void *buf, int *cnt);
> +
> void encodeSlice(TComOutputBitstream* substreams);
>
> /* blocks until worker thread is done, returns encoded picture and
> bitstream */
> @@ -185,6 +189,9 @@
> int m_filterRowDelay;
> CTURow* m_rows;
> Event m_completionEvent;
> +
> + /* Temp Storage for ssim computation that doesnt need repeated malloc
> */
> + void * ssimBuf;
> };
> }
>
> _______________________________________________
> x265-devel mailing list
> x265-devel at videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
--
Steve Borho
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20131006/be9e9de8/attachment-0001.html>
More information about the x265-devel
mailing list