[x265] [x265 patch] Adaptive Frame Duplication

Mon Sep 23 07:49:46 CEST 2019

At 2019-09-23 12:50:22, "Akil" <akil at multicorewareinc.com> wrote:

# HG changeset patch
# User Akil Ayyappan<akil at multicorewareinc.com>
# Date 1568370446 -19800
#      Fri Sep 13 15:57:26 2019 +0530
# Node ID 531f6b03eed0a40a38d3589dec03f14743293146
# Parent  c4b098f973e6b0ee4aee3bf0d7b54da4e2734d42
Adaptive Frame duplication
+    uint32_t y = 0;
+
+    /* Consume rows in ever narrower chunks of height */
+    for (int size = BLOCK_64x64; size >= BLOCK_4x4 && y < height; size--)
+    {
+        uint32_t rowHeight = 1 << (size + 2);
+
+        for (; y + rowHeight <= height; y += rowHeight)
+        {
+            uint32_t y1, x = 0;
+
+            /* Consume each row using the largest square blocks possible */
+            if (size == BLOCK_64x64 && !(stride & 31))
+                for (; x + 64 <= width; x += 64)
+                    ssd += primitives.cu[BLOCK_64x64].sse_pp(fenc + x, stride, rec + x, stride);
+
+            if (size >= BLOCK_32x32 && !(stride & 15))
+                for (; x + 32 <= width; x += 32)
+                    for (y1 = 0; y1 + 32 <= rowHeight; y1 += 32)
+                        ssd += primitives.cu[BLOCK_32x32].sse_pp(fenc + y1 * stride + x, stride, rec + y1 * stride + x, stride);
+
+            if (size >= BLOCK_16x16)
+                for (; x + 16 <= width; x += 16)
+                    for (y1 = 0; y1 + 16 <= rowHeight; y1 += 16)
+                        ssd += primitives.cu[BLOCK_16x16].sse_pp(fenc + y1 * stride + x, stride, rec + y1 * stride + x, stride);
+
+            if (size >= BLOCK_8x8)
+                for (; x + 8 <= width; x += 8)
+                    for (y1 = 0; y1 + 8 <= rowHeight; y1 += 8)
+                        ssd += primitives.cu[BLOCK_8x8].sse_pp(fenc + y1 * stride + x, stride, rec + y1 * stride + x, stride);
+
+            for (; x + 4 <= width; x += 4)
+                for (y1 = 0; y1 + 4 <= rowHeight; y1 += 4)
+                    ssd += primitives.cu[BLOCK_4x4].sse_pp(fenc + y1 * stride + x, stride, rec + y1 * stride + x, stride);
+
+            fenc += stride * rowHeight;
+            rec += stride * rowHeight;
+        }
+    }
+
+    return ssd;
+}

You try to processing block as big as possible, however, this code styles is less readable.
Suggest put trick in optimized version other than inside C model.

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20190923/10697f93/attachment.html>