[x265] [PATCH 1 of 3] asm: separated pelFilterChroma function into horizontal & vertical primitives for asm
dnyaneshwar at multicorewareinc.com
dnyaneshwar at multicorewareinc.com
Fri Feb 26 10:11:29 CET 2016
# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1456466613 -19800
# Fri Feb 26 11:33:33 2016 +0530
# Node ID 5ff8ee940ad7f4d34b106ae4999b996245c87919
# Parent 01782e7f0a8cb93efbe4ff1534602ff9055c8565
asm: separated pelFilterChroma function into horizontal & vertical primitives for asm
diff -r 01782e7f0a8c -r 5ff8ee940ad7 source/common/deblock.cpp
--- a/source/common/deblock.cpp Thu Feb 25 12:17:57 2016 +0530
+++ b/source/common/deblock.cpp Fri Feb 26 11:33:33 2016 +0530
@@ -319,27 +319,6 @@
}
}
-/* Deblocking of one line/column for the chrominance component
- * \param src pointer to picture data
- * \param offset offset value for picture data
- * \param tc tc value
- * \param maskP indicator to disable filtering on partP
- * \param maskQ indicator to disable filtering on partQ */
-static inline void pelFilterChroma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ)
-{
- for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
- {
- int16_t m4 = (int16_t)src[0];
- int16_t m3 = (int16_t)src[-offset];
- int16_t m5 = (int16_t)src[offset];
- int16_t m2 = (int16_t)src[-offset * 2];
-
- int32_t delta = x265_clip3(-tc, tc, ((((m4 - m3) * 4) + m2 - m5 + 4) >> 3));
- src[-offset] = x265_clip(m3 + (delta & maskP));
- src[0] = x265_clip(m4 - (delta & maskQ));
- }
-}
-
void Deblock::edgeFilterLuma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
{
PicYuv* reconPic = cuQ->m_encData->m_reconPic;
@@ -517,7 +496,7 @@
int32_t tc = s_tcTable[indexTC] << bitdepthShift;
pixel* srcC = srcChroma[chromaIdx];
- pelFilterChroma(srcC + unitOffset, srcStep, offset, tc, maskP, maskQ);
+ primitives.pelFilterChroma[dir](srcC + unitOffset, srcStep, offset, tc, maskP, maskQ);
}
}
}
diff -r 01782e7f0a8c -r 5ff8ee940ad7 source/common/loopfilter.cpp
--- a/source/common/loopfilter.cpp Thu Feb 25 12:17:57 2016 +0530
+++ b/source/common/loopfilter.cpp Fri Feb 26 11:33:33 2016 +0530
@@ -158,6 +158,27 @@
src[offset * 2] = (pixel)(x265_clip3(-tcQ, tcQ, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3) - m6) + m6);
}
}
+
+/* Deblocking of one line/column for the chrominance component
+* \param src pointer to picture data
+* \param offset offset value for picture data
+* \param tc tc value
+* \param maskP indicator to disable filtering on partP
+* \param maskQ indicator to disable filtering on partQ */
+static void pelFilterChroma_c(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ)
+{
+ for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
+ {
+ int16_t m4 = (int16_t)src[0];
+ int16_t m3 = (int16_t)src[-offset];
+ int16_t m5 = (int16_t)src[offset];
+ int16_t m2 = (int16_t)src[-offset * 2];
+
+ int32_t delta = x265_clip3(-tc, tc, ((((m4 - m3) * 4) + m2 - m5 + 4) >> 3));
+ src[-offset] = x265_clip(m3 + (delta & maskP));
+ src[0] = x265_clip(m4 - (delta & maskQ));
+ }
+}
}
namespace X265_NS {
@@ -176,5 +197,7 @@
// C code is same for EDGE_VER and EDGE_HOR only asm code is different
p.pelFilterLumaStrong[0] = pelFilterLumaStrong_c;
p.pelFilterLumaStrong[1] = pelFilterLumaStrong_c;
+ p.pelFilterChroma[0] = pelFilterChroma_c;
+ p.pelFilterChroma[1] = pelFilterChroma_c;
}
}
diff -r 01782e7f0a8c -r 5ff8ee940ad7 source/common/primitives.h
--- a/source/common/primitives.h Thu Feb 25 12:17:57 2016 +0530
+++ b/source/common/primitives.h Fri Feb 26 11:33:33 2016 +0530
@@ -197,6 +197,7 @@
typedef uint32_t (*costC1C2Flag_t)(uint16_t *absCoeff, intptr_t numC1Flag, uint8_t *baseCtxMod, intptr_t ctxOffset);
typedef void (*pelFilterLumaStrong_t)(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tcP, int32_t tcQ);
+typedef void (*pelFilterChroma_t)(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ);
/* Function pointers to optimized encoder primitives. Each pointer can reference
* either an assembly routine, a SIMD intrinsic primitive, or a C function */
@@ -332,6 +333,7 @@
costC1C2Flag_t costC1C2Flag;
pelFilterLumaStrong_t pelFilterLumaStrong[2]; // EDGE_VER = 0, EDGE_HOR = 1
+ pelFilterChroma_t pelFilterChroma[2]; // EDGE_VER = 0, EDGE_HOR = 1
/* There is one set of chroma primitives per color space. An encoder will
* have just a single color space and thus it will only ever use one entry
More information about the x265-devel
mailing list