[x265] [PATCH 1 of 3] asm: separated pelFilterChroma function into horizontal & vertical primitives for asm

dnyaneshwar at multicorewareinc.com dnyaneshwar at multicorewareinc.com
Fri Feb 26 10:11:29 CET 2016


# HG changeset patch
# User Dnyaneshwar G <dnyaneshwar at multicorewareinc.com>
# Date 1456466613 -19800
#      Fri Feb 26 11:33:33 2016 +0530
# Node ID 5ff8ee940ad7f4d34b106ae4999b996245c87919
# Parent  01782e7f0a8cb93efbe4ff1534602ff9055c8565
asm: separated pelFilterChroma function into horizontal & vertical primitives for asm

diff -r 01782e7f0a8c -r 5ff8ee940ad7 source/common/deblock.cpp
--- a/source/common/deblock.cpp	Thu Feb 25 12:17:57 2016 +0530
+++ b/source/common/deblock.cpp	Fri Feb 26 11:33:33 2016 +0530
@@ -319,27 +319,6 @@
     }
 }
 
-/* Deblocking of one line/column for the chrominance component
- * \param src     pointer to picture data
- * \param offset  offset value for picture data
- * \param tc      tc value
- * \param maskP   indicator to disable filtering on partP
- * \param maskQ   indicator to disable filtering on partQ */
-static inline void pelFilterChroma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ)
-{
-    for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
-    {
-        int16_t m4  = (int16_t)src[0];
-        int16_t m3  = (int16_t)src[-offset];
-        int16_t m5  = (int16_t)src[offset];
-        int16_t m2  = (int16_t)src[-offset * 2];
-
-        int32_t delta = x265_clip3(-tc, tc, ((((m4 - m3) * 4) + m2 - m5 + 4) >> 3));
-        src[-offset] = x265_clip(m3 + (delta & maskP));
-        src[0] = x265_clip(m4 - (delta & maskQ));
-    }
-}
-
 void Deblock::edgeFilterLuma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
 {
     PicYuv* reconPic = cuQ->m_encData->m_reconPic;
@@ -517,7 +496,7 @@
             int32_t tc = s_tcTable[indexTC] << bitdepthShift;
             pixel* srcC = srcChroma[chromaIdx];
 
-            pelFilterChroma(srcC + unitOffset, srcStep, offset, tc, maskP, maskQ);
+            primitives.pelFilterChroma[dir](srcC + unitOffset, srcStep, offset, tc, maskP, maskQ);
         }
     }
 }
diff -r 01782e7f0a8c -r 5ff8ee940ad7 source/common/loopfilter.cpp
--- a/source/common/loopfilter.cpp	Thu Feb 25 12:17:57 2016 +0530
+++ b/source/common/loopfilter.cpp	Fri Feb 26 11:33:33 2016 +0530
@@ -158,6 +158,27 @@
         src[offset * 2]  = (pixel)(x265_clip3(-tcQ, tcQ, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3) - m6) + m6);
     }
 }
+
+/* Deblocking of one line/column for the chrominance component
+* \param src     pointer to picture data
+* \param offset  offset value for picture data
+* \param tc      tc value
+* \param maskP   indicator to disable filtering on partP
+* \param maskQ   indicator to disable filtering on partQ */
+static void pelFilterChroma_c(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ)
+{
+    for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
+    {
+        int16_t m4 = (int16_t)src[0];
+        int16_t m3 = (int16_t)src[-offset];
+        int16_t m5 = (int16_t)src[offset];
+        int16_t m2 = (int16_t)src[-offset * 2];
+
+        int32_t delta = x265_clip3(-tc, tc, ((((m4 - m3) * 4) + m2 - m5 + 4) >> 3));
+        src[-offset]  = x265_clip(m3 + (delta & maskP));
+        src[0]        = x265_clip(m4 - (delta & maskQ));
+    }
+}
 }
 
 namespace X265_NS {
@@ -176,5 +197,7 @@
     // C code is same for EDGE_VER and EDGE_HOR only asm code is different
     p.pelFilterLumaStrong[0] = pelFilterLumaStrong_c;
     p.pelFilterLumaStrong[1] = pelFilterLumaStrong_c;
+    p.pelFilterChroma[0]     = pelFilterChroma_c;
+    p.pelFilterChroma[1]     = pelFilterChroma_c;
 }
 }
diff -r 01782e7f0a8c -r 5ff8ee940ad7 source/common/primitives.h
--- a/source/common/primitives.h	Thu Feb 25 12:17:57 2016 +0530
+++ b/source/common/primitives.h	Fri Feb 26 11:33:33 2016 +0530
@@ -197,6 +197,7 @@
 typedef uint32_t (*costC1C2Flag_t)(uint16_t *absCoeff, intptr_t numC1Flag, uint8_t *baseCtxMod, intptr_t ctxOffset);
 
 typedef void (*pelFilterLumaStrong_t)(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tcP, int32_t tcQ);
+typedef void (*pelFilterChroma_t)(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ);
 
 /* Function pointers to optimized encoder primitives. Each pointer can reference
  * either an assembly routine, a SIMD intrinsic primitive, or a C function */
@@ -332,6 +333,7 @@
     costC1C2Flag_t        costC1C2Flag;
 
     pelFilterLumaStrong_t pelFilterLumaStrong[2]; // EDGE_VER = 0, EDGE_HOR = 1
+    pelFilterChroma_t     pelFilterChroma[2];     // EDGE_VER = 0, EDGE_HOR = 1
 
     /* There is one set of chroma primitives per color space. An encoder will
      * have just a single color space and thus it will only ever use one entry


More information about the x265-devel mailing list