[x265] '--dither' patch for 10/12-bit and for not default bit depth

Wed Apr 6 21:48:52 CEST 2016

fix '--dither' option for 10/12-bit and for not default bit depth (fixes 
#255)

Mateusz
-------------- next part --------------
diff -r 5dbd6a0c8e17 source/x265-extras.cpp

--- a/source/x265-extras.cpp	Mon Mar 28 12:53:40 2016 +0530
+++ b/source/x265-extras.cpp	Sat Apr 02 21:35:11 2016 +0200
@@ -280,9 +280,9 @@
     fprintf(csvfp, " %-6u, %-6u, %s\n", stats.maxCLL, stats.maxFALL, api.version_str);
 }
 
-/* The dithering algorithm is based on Sierra-2-4A error diffusion. */
-static void ditherPlane(pixel *dst, int dstStride, uint16_t *src, int srcStride,
-                        int width, int height, int16_t *errors, int bitDepth)
+/* The dithering algorithm is based on Sierra-2-4A error diffusion.
+ * We convert planes in place (without allocating a new buffer). */
+static void ditherPlane(uint16_t *src, int srcStride, int width, int height, int16_t *errors, int bitDepth)
 {
     const int lShift = 16 - bitDepth;
     const int rShift = 16 - bitDepth + 2;
@@ -290,15 +290,34 @@
     const int pixelMax = (1 << bitDepth) - 1;
 
     memset(errors, 0, (width + 1) * sizeof(int16_t));
-    int pitch = 1;
-    for (int y = 0; y < height; y++, src += srcStride, dst += dstStride)
+
+    if (bitDepth == 8)
     {
-        int16_t err = 0;
-        for (int x = 0; x < width; x++)
+        for (int y = 0; y < height; y++, src += srcStride)
         {
-            err = err * 2 + errors[x] + errors[x + 1];
-            dst[x * pitch] = (pixel)x265_clip3(0, pixelMax, ((src[x * 1] << 2) + err + half) >> rShift);
-            errors[x] = err = src[x * pitch] - (dst[x * pitch] << lShift);
+            uint8_t *dst = (uint8_t*)src;
+            int16_t err = 0;
+            for (int x = 0; x < width; x++)
+            {
+                err = err * 2 + errors[x] + errors[x + 1];
+                int tmpDst = x265_clip3(0, pixelMax, ((src[x] << 2) + err + half) >> rShift);
+                errors[x] = err = (int16_t)(src[x] - (tmpDst << lShift));
+                dst[x] = (uint8_t)tmpDst;
+            }
+        }
+    }
+    else
+    {
+        for (int y = 0; y < height; y++, src += srcStride)
+        {
+            int16_t err = 0;
+            for (int x = 0; x < width; x++)
+            {
+                err = err * 2 + errors[x] + errors[x + 1];
+                int tmpDst = x265_clip3(0, pixelMax, ((src[x] << 2) + err + half) >> rShift);
+                errors[x] = err = (int16_t)(src[x] - (tmpDst << lShift));
+                src[x] = (uint16_t)tmpDst;
+            }
         }
     }
 }
@@ -320,7 +339,7 @@
     /* This portion of code is from readFrame in x264. */
     for (int i = 0; i < x265_cli_csps[picIn.colorSpace].planes; i++)
     {
-        if ((picIn.bitDepth & 7) && (picIn.bitDepth != 16))
+        if (picIn.bitDepth < 16)
         {
             /* upconvert non 16bit high depth planes to 16bit */
             uint16_t *plane = (uint16_t*)picIn.planes[i];
@@ -332,14 +351,10 @@
             for (uint32_t j = 0; j < pixelCount; j++)
                 plane[j] = plane[j] << lShift;
         }
-    }
 
-    for (int i = 0; i < x265_cli_csps[picIn.colorSpace].planes; i++)
-    {
         int height = (int)(picHeight >> x265_cli_csps[picIn.colorSpace].height[i]);
         int width = (int)(picWidth >> x265_cli_csps[picIn.colorSpace].width[i]);
 
-        ditherPlane(((pixel*)picIn.planes[i]), picIn.stride[i] / sizeof(pixel), ((uint16_t*)picIn.planes[i]),
-                    picIn.stride[i] / 2, width, height, errorBuf, bitDepth);
+        ditherPlane(((uint16_t*)picIn.planes[i]), picIn.stride[i] / 2, width, height, errorBuf, bitDepth);
     }
 }