[x265] [PATCH 117 of 307] x86: Aligned routine encoder integration for calcresidual primitive
mythreyi at multicorewareinc.com
mythreyi at multicorewareinc.com
Sat Apr 7 04:31:55 CEST 2018
# HG changeset patch
# User Jayashri Murugan <jayashri at multicorewareinc.com>
# Date 1507182997 -19800
# Thu Oct 05 11:26:37 2017 +0530
# Node ID 1748c9a5c9b16c380f926cd5d07a69c4f13a6fab
# Parent c497cbf5c2d53ea9c47f3929eaacbb36e703bdfa
x86: Aligned routine encoder integration for calcresidual primitive
diff -r c497cbf5c2d5 -r 1748c9a5c9b1 source/encoder/search.cpp
--- a/source/encoder/search.cpp Wed Oct 04 16:33:33 2017 +0530
+++ b/source/encoder/search.cpp Thu Oct 05 11:26:37 2017 +0530
@@ -354,8 +354,10 @@
// store original entropy coding status
if (bEnableRDOQ)
m_entropyCoder.estBit(m_entropyCoder.m_estBitsSbac, log2TrSize, true);
-
- primitives.cu[sizeIdx].calcresidual(fenc, pred, residual, stride);
+ if ((stride % 64 == 0) && (m_param->cpuid & X265_CPU_AVX512))
+ primitives.cu[sizeIdx].calcresidual_aligned(fenc, pred, residual, stride);
+ else
+ primitives.cu[sizeIdx].calcresidual(fenc, pred, residual, stride);
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false);
if (numSig)
@@ -561,7 +563,10 @@
pixel* tmpRecon = (useTSkip ? m_tsRecon : reconQt);
uint32_t tmpReconStride = (useTSkip ? MAX_TS_SIZE : reconQtStride);
- primitives.cu[sizeIdx].calcresidual(fenc, pred, residual, stride);
+ if ((stride % 64 == 0) && (m_param->cpuid & X265_CPU_AVX512))
+ primitives.cu[sizeIdx].calcresidual_aligned(fenc, pred, residual, stride);
+ else
+ primitives.cu[sizeIdx].calcresidual(fenc, pred, residual, stride);
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSize, TEXT_LUMA, absPartIdx, useTSkip);
if (numSig)
@@ -714,7 +719,10 @@
coeff_t* coeffY = cu.m_trCoeff[0] + coeffOffsetY;
uint32_t sizeIdx = log2TrSize - 2;
- primitives.cu[sizeIdx].calcresidual(fenc, pred, residual, stride);
+ if ((stride % 64 == 0) && (m_param->cpuid & X265_CPU_AVX512))
+ primitives.cu[sizeIdx].calcresidual_aligned(fenc, pred, residual, stride);
+ else
+ primitives.cu[sizeIdx].calcresidual(fenc, pred, residual, stride);
PicYuv* reconPic = m_frame->m_reconPic;
pixel* picReconY = reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + absPartIdx);
@@ -893,7 +901,11 @@
predIntraChromaAng(chromaPredMode, pred, stride, log2TrSizeC);
cu.setTransformSkipPartRange(0, ttype, absPartIdxC, tuIterator.absPartIdxStep);
- primitives.cu[sizeIdxC].calcresidual(fenc, pred, residual, stride);
+ if ((stride % 64 == 0) && (m_param->cpuid & X265_CPU_AVX512))
+ primitives.cu[sizeIdxC].calcresidual_aligned(fenc, pred, residual, stride);
+ else
+ primitives.cu[sizeIdxC].calcresidual(fenc, pred, residual, stride);
+
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);
if (numSig)
{
@@ -992,7 +1004,10 @@
pixel* recon = (useTSkip ? m_tsRecon : reconQt);
uint32_t reconStride = (useTSkip ? MAX_TS_SIZE : reconQtStride);
- primitives.cu[sizeIdxC].calcresidual(fenc, pred, residual, stride);
+ if ((stride % 64 == 0) && (m_param->cpuid & X265_CPU_AVX512))
+ primitives.cu[sizeIdxC].calcresidual_aligned(fenc, pred, residual, stride);
+ else
+ primitives.cu[sizeIdxC].calcresidual(fenc, pred, residual, stride);
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeff, log2TrSizeC, ttype, absPartIdxC, useTSkip);
if (numSig)
@@ -1183,7 +1198,11 @@
X265_CHECK(!cu.m_transformSkip[ttype][0], "transform skip not supported at low RD levels\n");
- primitives.cu[sizeIdxC].calcresidual(fenc, pred, residual, stride);
+ if ((stride % 64 == 0) && (m_param->cpuid & X265_CPU_AVX512))
+ primitives.cu[sizeIdxC].calcresidual_aligned(fenc, pred, residual, stride);
+ else
+ primitives.cu[sizeIdxC].calcresidual(fenc, pred, residual, stride);
+
uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false);
if (numSig)
{
More information about the x265-devel
mailing list