[x265] [PATCH 1 of 2] Vectorized WeightUni

deepthidevaki at multicorewareinc.com deepthidevaki at multicorewareinc.com
Tue Jul 16 10:58:02 CEST 2013


# HG changeset patch
# User Deepthi Devaki
# Date 1373959114 -19800
# Node ID 9c5fc824649816f4c54b769e04ee6071cdb70d5d
# Parent  c9bb72e8cb8effc0d1d0e99f0b9abc8d341c652a
Vectorized WeightUni

diff -r c9bb72e8cb8e -r 9c5fc8246498 source/common/vec/pixel.inc
--- a/source/common/vec/pixel.inc	Mon Jul 15 23:41:11 2013 -0500
+++ b/source/common/vec/pixel.inc	Tue Jul 16 12:48:34 2013 +0530
@@ -382,6 +382,7 @@
     p.transpose[2] = transpose16;
     p.transpose[3] = transpose32;
     p.transpose[4] = transpose<64>;
+    p.weightpUni = weightUnidir;
 #endif
 }
 }
diff -r c9bb72e8cb8e -r 9c5fc8246498 source/common/vec/pixel8.inc
--- a/source/common/vec/pixel8.inc	Mon Jul 15 23:41:11 2013 -0500
+++ b/source/common/vec/pixel8.inc	Tue Jul 16 12:48:34 2013 +0530
@@ -26,6 +26,9 @@
 // Vector class versions of pixel comparison performance primitives
 /* intrinsics for when pixel type is uint8_t */
 
+#define IF_INTERNAL_PREC 14 ///< Number of bits for internal precision
+#define IF_INTERNAL_OFFS (1 << (IF_INTERNAL_PREC - 1)) ///< Offset used internally
+
 #if defined(_MSC_VER)
 #undef ALWAYSINLINE
 #define ALWAYSINLINE  __forceinline
@@ -1935,6 +1938,32 @@
     }
 }
 
+void weightUnidir(short *src, pixel *dst, int srcStride, int dstStride, int width, int height, int w0, int round, int shift, int offset, int bitDepth)
+{
+    int x, y;
+    Vec8s   tmp;
+    Vec4i   vw0(w0), vsrc, iofs(IF_INTERNAL_OFFS), ofs(offset), vround(round), vdst; 
+    for (y = height - 1; y >= 0; y--)
+    {
+        for (x = 0; x <= width-4; x+=4 )
+        {  
+            tmp  = load_partial(const_int(8), src+x);
+            vsrc = extend_low(tmp);
+            vdst = ((vw0 * (vsrc + iofs) + vround) >> shift) + ofs;
+            store_partial(const_int(4), dst+x, compress_unsafe(compress_saturated(vdst, vdst), 0));
+        }
+        if(width>x)
+        {
+            tmp  = load_partial(const_int(4), src+x);
+            vsrc = extend_low(tmp);
+            vdst = ((vw0 * (vsrc + iofs) + vround) >> shift) + ofs;
+            compress_unsafe(compress_saturated(vdst, vdst), 0).store_partial(2, dst+x);
+        }
+        src += srcStride;
+        dst += dstStride;
+    }
+}
+
 #if INSTRSET >= 8
 template<int size>
 ALWAYSINLINE void unrollFunc_32_avx2(pixel *fenc, intptr_t fencstride, pixel *fref, intptr_t frefstride, Vec16us& sad)
diff -r c9bb72e8cb8e -r 9c5fc8246498 source/test/pixelharness.cpp
--- a/source/test/pixelharness.cpp	Mon Jul 15 23:41:11 2013 -0500
+++ b/source/test/pixelharness.cpp	Tue Jul 16 12:48:34 2013 +0530
@@ -333,7 +333,7 @@
     memset(ref_dest, 0, 64 * 64 * sizeof(pixel));
     memset(opt_dest, 0, 64 * 64 * sizeof(pixel));
     int j = 0;
-    int width = 8;
+    int width = (2 * rand()) % 64;
     int height = 8;
     int w0 = rand() % 256;
     int shift = rand() % 12;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: xhevc_deepthid-1.patch
Type: text/x-patch
Size: 2810 bytes
Desc: not available
URL: <http://mailman.videolan.org/pipermail/x265-devel/attachments/20130716/3f9a2086/attachment.bin>


More information about the x265-devel mailing list