[x265] [PATCH 10/14] Implement ASM for SSD used for motion estimation
Snehaa Giridharan
snehaa at multicorewareinc.com
Wed Oct 19 07:31:20 UTC 2022
>From 0d1748cbf58e83b6357e4ff3c9696687e3c30ddd Mon Sep 17 00:00:00 2001
From: ashok2022 <ashok at multicorewareinc.com>
Date: Thu, 13 Oct 2022 20:22:07 +0530
Subject: [PATCH] Implement ASM for SSD used for motion estimation
---
source/common/temporalfilter.cpp | 47 +++++++++++++++++++++++++-------
source/common/temporalfilter.h | 31 ++++++++++++++++-----
source/encoder/frameencoder.cpp | 2 ++
source/encoder/motion.cpp | 25 +++++++++++++++++
source/encoder/motion.h | 2 +-
5 files changed, 89 insertions(+), 18 deletions(-)
diff --git a/source/common/temporalfilter.cpp
b/source/common/temporalfilter.cpp
index 1d5a7d076..a937e2a67 100644
--- a/source/common/temporalfilter.cpp
+++ b/source/common/temporalfilter.cpp
@@ -1,6 +1,8 @@
/*****************************************************************************
* Copyright (C) 2013-2021 MulticoreWare, Inc
*
+ * Authors: Ashok Kumar Mishra <ashok at multicorewareinc.com>
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@@ -18,8 +20,9 @@
* This program is also available under a commercial proprietary license.
* For more information, contact us at license @ x265.com.
*****************************************************************************/
-
+#include "common.h"
#include "temporalfilter.h"
+#include "primitives.h"
#include "frame.h"
#include "slice.h"
@@ -160,6 +163,10 @@ void TemporalFilter::init(const x265_param* param)
m_sourceHeight = param->sourceHeight;
m_internalCsp = param->internalCsp;
m_numComponents = (m_internalCsp != X265_CSP_I400) ? MAX_NUM_COMPONENT
: 1;
+
+ m_metld = new MotionEstimatorTLD;
+
+ predPUYuv.create(FENC_STRIDE, X265_CSP_I400);
}
int TemporalFilter::createRefPicInfo(MCTFReferencePicInfo* refFrame,
x265_param* param)
@@ -206,21 +213,33 @@ int TemporalFilter::motionErrorLuma(
{
dx /= s_motionVectorFactor;
dy /= s_motionVectorFactor;
+
+ const pixel* bufferRowStart = buffOrigin + (y + dy) * buffStride +
(x + dx);
+#if 0
+ const pixel* origRowStart = origOrigin + y *origStride + x;
+
for (int y1 = 0; y1 < bs; y1++)
{
- const pixel* origRowStart = origOrigin + (y + y1)*origStride +
x;
- const pixel* bufferRowStart = buffOrigin + (y + y1 +
dy)*buffStride + (x + dx);
- for (int x1 = 0; x1 < bs; x1 += 2)
+ for (int x1 = 0; x1 < bs; x1++)
{
int diff = origRowStart[x1] - bufferRowStart[x1];
error += diff * diff;
- diff = origRowStart[x1 + 1] - bufferRowStart[x1 + 1];
- error += diff * diff;
- }
- if (error > besterror)
- {
- return error;
}
+
+ origRowStart += origStride;
+ bufferRowStart += buffStride;
+ }
+#else
+ int partEnum = partitionFromSizes(bs, bs);
+ /* copy PU block into cache */
+ primitives.pu[partEnum].copy_pp(predPUYuv.m_buf[0], FENC_STRIDE,
bufferRowStart, buffStride);
+
+ error = primitives.cu[partEnum].sse_pp(m_metld->me.fencPUYuv.m_buf[0],
FENC_STRIDE, predPUYuv.m_buf[0], FENC_STRIDE);
+
+#endif
+ if (error > besterror)
+ {
+ return error;
}
}
else
@@ -761,6 +780,10 @@ void TemporalFilter::motionEstimationLuma(MV *mvs,
uint32_t mvStride, PicYuv *or
{
for (int blockX = 0; blockX + blockSize <= origWidth; blockX +=
stepSize)
{
+ const intptr_t pelOffset = blockY * orig->m_stride + blockX;
+ m_metld->me.setSourcePU(orig->m_picOrg[0], orig->m_stride,
pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);
+
+
MV best(0, 0);
int leastError = INT_MAX;
@@ -889,6 +912,10 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV
*mvs, uint32_t mvStride, P
{
for (int blockX = 0; blockX + blockSize <= origWidth; blockX +=
stepSize)
{
+
+ const intptr_t pelOffset = blockY * orig->m_stride + blockX;
+ m_metld->me.setSourcePU(orig->m_picOrg[0], orig->m_stride,
pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1);
+
MV best(0, 0);
int leastError = INT_MAX;
diff --git a/source/common/temporalfilter.h b/source/common/temporalfilter.h
index 003630994..801359914 100644
--- a/source/common/temporalfilter.h
+++ b/source/common/temporalfilter.h
@@ -29,6 +29,7 @@
#include <deque>
#include "piclist.h"
#include "yuv.h"
+#include "motion.h"
using namespace X265_NS;
@@ -94,6 +95,19 @@ struct TemporalFilterRefPicInfo
int origOffset;
};
+struct MotionEstimatorTLD
+{
+ MotionEstimate me;
+
+ MotionEstimatorTLD()
+ {
+ me.init(X265_CSP_I400);
+ me.setQP(X265_LOOKAHEAD_QP);
+ }
+
+ ~MotionEstimatorTLD() {}
+};
+
struct MCTFReferencePicInfo
{
PicYuv* picBuffer;
@@ -103,16 +117,16 @@ struct MCTFReferencePicInfo
MV* mvs0;
MV* mvs1;
MV* mvs2;
- uint32_t mvsStride;
- uint32_t mvsStride0;
- uint32_t mvsStride1;
- uint32_t mvsStride2;
- int* error;
- int* noise;
+ uint32_t mvsStride;
+ uint32_t mvsStride0;
+ uint32_t mvsStride1;
+ uint32_t mvsStride2;
+ int* error;
+ int* noise;
int16_t origOffset;
bool isFilteredFrame;
- PicYuv* compensatedPic;
+ PicYuv* compensatedPic;
int* isSubsampled;
@@ -154,6 +168,9 @@ public:
int m_numComponents;
uint8_t m_sliceTypeConfig;
+ MotionEstimatorTLD* m_metld;
+ Yuv predPUYuv;
+
void subsampleLuma(PicYuv *input, PicYuv *output, int factor = 2);
int createRefPicInfo(MCTFReferencePicInfo* refFrame, x265_param*
param);
diff --git a/source/encoder/frameencoder.cpp
b/source/encoder/frameencoder.cpp
index 0a44eb22f..ec78fc9f2 100644
--- a/source/encoder/frameencoder.cpp
+++ b/source/encoder/frameencoder.cpp
@@ -105,6 +105,8 @@ void FrameEncoder::destroy()
if (m_param->bEnableGopBasedTemporalFilter)
{
+ delete m_frameEncTF->m_metld;
+
for (int i = 0; i < (m_frameEncTF->s_range << 1); i++)
m_frameEncTF->destroyRefPicInfo(&m_mcstfRefList[i]);
diff --git a/source/encoder/motion.cpp b/source/encoder/motion.cpp
index f10db884e..2bb613ec0 100644
--- a/source/encoder/motion.cpp
+++ b/source/encoder/motion.cpp
@@ -190,6 +190,31 @@ void MotionEstimate::setSourcePU(pixel *fencY,
intptr_t stride, intptr_t offset,
X265_CHECK(!bChromaSATD, "chroma distortion measurements impossible in
this code path\n");
}
+/* Called by lookahead, luma only, no use of PicYuv */
+void MotionEstimate::setSourcePU(pixel *fencY, intptr_t stride, intptr_t
offset, int pwidth, int pheight, const int method, const int refine)
+{
+ partEnum = partitionFromSizes(pwidth, pheight);
+ X265_CHECK(LUMA_4x4 != partEnum, "4x4 inter partition detected!\n");
+ sad = primitives.pu[partEnum].sad;
+ ads = primitives.pu[partEnum].ads;
+ satd = primitives.pu[partEnum].satd;
+ sad_x3 = primitives.pu[partEnum].sad_x3;
+ sad_x4 = primitives.pu[partEnum].sad_x4;
+
+
+ blockwidth = pwidth;
+ blockOffset = offset;
+ absPartIdx = ctuAddr = -1;
+
+ /* Search params */
+ searchMethod = method;
+ subpelRefine = refine;
+
+ /* copy PU block into cache */
+ primitives.pu[partEnum].copy_pp(fencPUYuv.m_buf[0], FENC_STRIDE, fencY
+ offset, stride);
+ X265_CHECK(!bChromaSATD, "chroma distortion measurements impossible in
this code path\n");
+}
+
/* Called by Search::predInterSearch() or --pme equivalent, chroma
residual might be considered */
void MotionEstimate::setSourcePU(const Yuv& srcFencYuv, int _ctuAddr, int
cuPartIdx, int puPartIdx, int pwidth, int pheight, const int method, const
int refine, bool bChroma)
{
diff --git a/source/encoder/motion.h b/source/encoder/motion.h
index d306230b4..790bc5fb4 100644
--- a/source/encoder/motion.h
+++ b/source/encoder/motion.h
@@ -77,7 +77,7 @@ public:
void init(int csp);
/* Methods called at slice setup */
-
+ void setSourcePU(pixel *fencY, intptr_t stride, intptr_t offset, int
pwidth, int pheight, const int searchMethod, const int subpelRefine);
void setSourcePU(pixel *fencY, intptr_t stride, intptr_t offset, int
pwidth, int pheight, const int searchMethod, const int searchL0, const int
searchL1, const int subpelRefine);
void setSourcePU(const Yuv& srcFencYuv, int ctuAddr, int cuPartIdx,
int puPartIdx, int pwidth, int pheight, const int searchMethod, const int
subpelRefine, bool bChroma);
--
2.34.1.windows.1
*Thanks and Regards,*
*Snehaa.GVideo Codec Engineer,Media & AI analytics
<https://multicorewareinc.com/>*
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20221019/608be3b4/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: mcstf_patch_10.diff
Type: application/octet-stream
Size: 8790 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20221019/608be3b4/attachment-0001.obj>
More information about the x265-devel
mailing list